graphlit-client 1.0.20260217005 → 1.0.20260218002

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/client.d.ts CHANGED
@@ -3,7 +3,8 @@ import type { NormalizedCacheObject } from "@apollo/client/core/index.js";
3
3
  import * as Types from "./generated/graphql-types.js";
4
4
  import { AgentOptions, AgentResult, StreamAgentOptions, ToolHandler } from "./types/agent.js";
5
5
  import { AgentStreamEvent } from "./types/ui-events.js";
6
- export type { AgentOptions, AgentResult, ArtifactCollector, StreamAgentOptions, ToolCallResult, UsageInfo, AgentError, } from "./types/agent.js";
6
+ export type { AgentOptions, AgentResult, ArtifactCollector, ContextStrategy, ContextManagementAction, StreamAgentOptions, ToolCallResult, UsageInfo, AgentError, } from "./types/agent.js";
7
+ export { TokenBudgetTracker, truncateToolResult, estimateTokens, isAccurateTokenCounting, } from "./helpers/context-management.js";
7
8
  export type { AgentStreamEvent } from "./types/ui-events.js";
8
9
  export interface RetryConfig {
9
10
  /** Maximum number of retry attempts (default: 5) */
@@ -803,12 +804,12 @@ declare class Graphlit {
803
804
  */
804
805
  queryConversationsGraph(filter?: Types.ConversationFilter, correlationId?: string): Promise<Types.QueryConversationsGraphQuery>;
805
806
  /**
806
- * Retrieves Conversations with clustering.
807
- * @param filter - The filter criteria to apply when retrieving Conversations, optional.
808
- * @param clusters - The clustering input parameters, optional.
809
- * @param correlationId - The tenant correlation identifier, optional.
810
- * @returns The Conversations with clusters.
811
- */
807
+ * Retrieves Conversations with clustering.
808
+ * @param filter - The filter criteria to apply when retrieving Conversations, optional.
809
+ * @param clusters - The clustering input parameters, optional.
810
+ * @param correlationId - The tenant correlation identifier, optional.
811
+ * @returns The Conversations with clusters.
812
+ */
812
813
  queryConversationsClusters(filter?: Types.ConversationFilter, clusters?: Types.EntityClustersInput, correlationId?: string): Promise<Types.QueryConversationsClustersQuery>;
813
814
  /**
814
815
  * Counts conversations based on the provided filter criteria.
package/dist/client.js CHANGED
@@ -6,6 +6,7 @@ import { RetryLink } from "@apollo/client/link/retry/index.js";
6
6
  import * as Types from "./generated/graphql-types.js";
7
7
  import * as Documents from "./generated/graphql-documents.js";
8
8
  import { getServiceType, getModelName, getModelEnum } from "./model-mapping.js";
9
+ import { TokenBudgetTracker, truncateToolResult, windowToolRounds, estimateTokens, DEFAULT_CONTEXT_STRATEGY, } from "./helpers/context-management.js";
9
10
  import { UIEventAdapter } from "./streaming/ui-event-adapter.js";
10
11
  import { formatMessagesForOpenAI, formatMessagesForAnthropic, formatMessagesForGoogle, formatMessagesForMistral, formatMessagesForBedrock, } from "./streaming/llm-formatters.js";
11
12
  import { streamWithOpenAI, streamWithAnthropic, streamWithGoogle, streamWithGroq, streamWithCerebras, streamWithCohere, streamWithMistral, streamWithBedrock, streamWithDeepseek, streamWithXai, } from "./streaming/providers.js";
@@ -124,7 +125,9 @@ catch (e) {
124
125
  console.log("[SDK Loading] Cerebras SDK not found:", e.message);
125
126
  }
126
127
  }
127
- const DEFAULT_MAX_TOOL_ROUNDS = 1000;
128
+ const DEFAULT_MAX_TOOL_ROUNDS = 100;
129
+ // Re-export context management utilities
130
+ export { TokenBudgetTracker, truncateToolResult, estimateTokens, isAccurateTokenCounting, } from "./helpers/context-management.js";
128
131
  // Helper function to validate GUID format
129
132
  function isValidGuid(guid) {
130
133
  if (!guid)
@@ -1534,12 +1537,12 @@ class Graphlit {
1534
1537
  });
1535
1538
  }
1536
1539
  /**
1537
- * Retrieves Conversations with clustering.
1538
- * @param filter - The filter criteria to apply when retrieving Conversations, optional.
1539
- * @param clusters - The clustering input parameters, optional.
1540
- * @param correlationId - The tenant correlation identifier, optional.
1541
- * @returns The Conversations with clusters.
1542
- */
1540
+ * Retrieves Conversations with clustering.
1541
+ * @param filter - The filter criteria to apply when retrieving Conversations, optional.
1542
+ * @param clusters - The clustering input parameters, optional.
1543
+ * @param correlationId - The tenant correlation identifier, optional.
1544
+ * @returns The Conversations with clusters.
1545
+ */
1543
1546
  async queryConversationsClusters(filter, clusters, correlationId) {
1544
1547
  return this.queryAndCheckError(Documents.QueryConversationsClusters, {
1545
1548
  filter: filter,
@@ -3131,7 +3134,10 @@ class Graphlit {
3131
3134
  * @returns The organizations.
3132
3135
  */
3133
3136
  async queryOrganizations(filter, correlationId) {
3134
- return this.queryAndCheckError(Documents.QueryOrganizations, { filter: filter, correlationId: correlationId });
3137
+ return this.queryAndCheckError(Documents.QueryOrganizations, {
3138
+ filter: filter,
3139
+ correlationId: correlationId,
3140
+ });
3135
3141
  }
3136
3142
  /**
3137
3143
  * Retrieves organizations with clustering information.
@@ -3352,7 +3358,10 @@ class Graphlit {
3352
3358
  * @returns The emotions.
3353
3359
  */
3354
3360
  async queryEmotions(filter, correlationId) {
3355
- return this.queryAndCheckError(Documents.QueryEmotions, { filter: filter, correlationId: correlationId });
3361
+ return this.queryAndCheckError(Documents.QueryEmotions, {
3362
+ filter: filter,
3363
+ correlationId: correlationId,
3364
+ });
3356
3365
  }
3357
3366
  /**
3358
3367
  * Counts emotions based on the provided filter criteria.
@@ -3518,7 +3527,10 @@ class Graphlit {
3518
3527
  * @returns The products.
3519
3528
  */
3520
3529
  async queryProducts(filter, correlationId) {
3521
- return this.queryAndCheckError(Documents.QueryProducts, { filter: filter, correlationId: correlationId });
3530
+ return this.queryAndCheckError(Documents.QueryProducts, {
3531
+ filter: filter,
3532
+ correlationId: correlationId,
3533
+ });
3522
3534
  }
3523
3535
  /**
3524
3536
  * Retrieves products with clustering information.
@@ -3712,7 +3724,10 @@ class Graphlit {
3712
3724
  * @returns The software.
3713
3725
  */
3714
3726
  async querySoftwares(filter, correlationId) {
3715
- return this.queryAndCheckError(Documents.QuerySoftwares, { filter: filter, correlationId: correlationId });
3727
+ return this.queryAndCheckError(Documents.QuerySoftwares, {
3728
+ filter: filter,
3729
+ correlationId: correlationId,
3730
+ });
3716
3731
  }
3717
3732
  /**
3718
3733
  * Retrieves software with clustering information.
@@ -3773,7 +3788,10 @@ class Graphlit {
3773
3788
  }
3774
3789
  /** Retrieves medical conditions based on filter criteria. */
3775
3790
  async queryMedicalConditions(filter, correlationId) {
3776
- return this.queryAndCheckError(Documents.QueryMedicalConditions, { filter: filter, correlationId: correlationId });
3791
+ return this.queryAndCheckError(Documents.QueryMedicalConditions, {
3792
+ filter: filter,
3793
+ correlationId: correlationId,
3794
+ });
3777
3795
  }
3778
3796
  /** Retrieves medical conditions with clustering information. */
3779
3797
  async queryMedicalConditionsClusters(filter, clusters, correlationId) {
@@ -3823,7 +3841,10 @@ class Graphlit {
3823
3841
  }
3824
3842
  /** Retrieves medical guidelines based on filter criteria. */
3825
3843
  async queryMedicalGuidelines(filter, correlationId) {
3826
- return this.queryAndCheckError(Documents.QueryMedicalGuidelines, { filter: filter, correlationId: correlationId });
3844
+ return this.queryAndCheckError(Documents.QueryMedicalGuidelines, {
3845
+ filter: filter,
3846
+ correlationId: correlationId,
3847
+ });
3827
3848
  }
3828
3849
  /** Retrieves medical guidelines with clustering information. */
3829
3850
  async queryMedicalGuidelinesClusters(filter, clusters, correlationId) {
@@ -3869,7 +3890,10 @@ class Graphlit {
3869
3890
  }
3870
3891
  /** Retrieves medical drugs based on filter criteria. */
3871
3892
  async queryMedicalDrugs(filter, correlationId) {
3872
- return this.queryAndCheckError(Documents.QueryMedicalDrugs, { filter: filter, correlationId: correlationId });
3893
+ return this.queryAndCheckError(Documents.QueryMedicalDrugs, {
3894
+ filter: filter,
3895
+ correlationId: correlationId,
3896
+ });
3873
3897
  }
3874
3898
  /** Retrieves medical drugs with clustering information. */
3875
3899
  async queryMedicalDrugsClusters(filter, clusters, correlationId) {
@@ -3923,7 +3947,10 @@ class Graphlit {
3923
3947
  }
3924
3948
  /** Retrieves medical indications based on filter criteria. */
3925
3949
  async queryMedicalIndications(filter, correlationId) {
3926
- return this.queryAndCheckError(Documents.QueryMedicalIndications, { filter: filter, correlationId: correlationId });
3950
+ return this.queryAndCheckError(Documents.QueryMedicalIndications, {
3951
+ filter: filter,
3952
+ correlationId: correlationId,
3953
+ });
3927
3954
  }
3928
3955
  /** Retrieves medical indications with clustering information. */
3929
3956
  async queryMedicalIndicationsClusters(filter, clusters, correlationId) {
@@ -3977,7 +4004,10 @@ class Graphlit {
3977
4004
  }
3978
4005
  /** Retrieves medical contraindications based on filter criteria. */
3979
4006
  async queryMedicalContraindications(filter, correlationId) {
3980
- return this.queryAndCheckError(Documents.QueryMedicalContraindications, { filter: filter, correlationId: correlationId });
4007
+ return this.queryAndCheckError(Documents.QueryMedicalContraindications, {
4008
+ filter: filter,
4009
+ correlationId: correlationId,
4010
+ });
3981
4011
  }
3982
4012
  /** Retrieves medical contraindications with clustering information. */
3983
4013
  async queryMedicalContraindicationsClusters(filter, clusters, correlationId) {
@@ -4024,7 +4054,10 @@ class Graphlit {
4024
4054
  }
4025
4055
  /** Retrieves medical tests based on filter criteria. */
4026
4056
  async queryMedicalTests(filter, correlationId) {
4027
- return this.queryAndCheckError(Documents.QueryMedicalTests, { filter: filter, correlationId: correlationId });
4057
+ return this.queryAndCheckError(Documents.QueryMedicalTests, {
4058
+ filter: filter,
4059
+ correlationId: correlationId,
4060
+ });
4028
4061
  }
4029
4062
  /** Retrieves medical tests with clustering information. */
4030
4063
  async queryMedicalTestsClusters(filter, clusters, correlationId) {
@@ -4074,7 +4107,10 @@ class Graphlit {
4074
4107
  }
4075
4108
  /** Retrieves medical devices based on filter criteria. */
4076
4109
  async queryMedicalDevices(filter, correlationId) {
4077
- return this.queryAndCheckError(Documents.QueryMedicalDevices, { filter: filter, correlationId: correlationId });
4110
+ return this.queryAndCheckError(Documents.QueryMedicalDevices, {
4111
+ filter: filter,
4112
+ correlationId: correlationId,
4113
+ });
4078
4114
  }
4079
4115
  /** Retrieves medical devices with clustering information. */
4080
4116
  async queryMedicalDevicesClusters(filter, clusters, correlationId) {
@@ -4124,7 +4160,10 @@ class Graphlit {
4124
4160
  }
4125
4161
  /** Retrieves medical procedures based on filter criteria. */
4126
4162
  async queryMedicalProcedures(filter, correlationId) {
4127
- return this.queryAndCheckError(Documents.QueryMedicalProcedures, { filter: filter, correlationId: correlationId });
4163
+ return this.queryAndCheckError(Documents.QueryMedicalProcedures, {
4164
+ filter: filter,
4165
+ correlationId: correlationId,
4166
+ });
4128
4167
  }
4129
4168
  /** Retrieves medical procedures with clustering information. */
4130
4169
  async queryMedicalProceduresClusters(filter, clusters, correlationId) {
@@ -4174,7 +4213,10 @@ class Graphlit {
4174
4213
  }
4175
4214
  /** Retrieves medical studies based on filter criteria. */
4176
4215
  async queryMedicalStudies(filter, correlationId) {
4177
- return this.queryAndCheckError(Documents.QueryMedicalStudies, { filter: filter, correlationId: correlationId });
4216
+ return this.queryAndCheckError(Documents.QueryMedicalStudies, {
4217
+ filter: filter,
4218
+ correlationId: correlationId,
4219
+ });
4178
4220
  }
4179
4221
  /** Retrieves medical studies with clustering information. */
4180
4222
  async queryMedicalStudiesClusters(filter, clusters, correlationId) {
@@ -4224,7 +4266,10 @@ class Graphlit {
4224
4266
  }
4225
4267
  /** Retrieves medical drug classes based on filter criteria. */
4226
4268
  async queryMedicalDrugClasses(filter, correlationId) {
4227
- return this.queryAndCheckError(Documents.QueryMedicalDrugClasses, { filter: filter, correlationId: correlationId });
4269
+ return this.queryAndCheckError(Documents.QueryMedicalDrugClasses, {
4270
+ filter: filter,
4271
+ correlationId: correlationId,
4272
+ });
4228
4273
  }
4229
4274
  /** Retrieves medical drug classes with clustering information. */
4230
4275
  async queryMedicalDrugClassesClusters(filter, clusters, correlationId) {
@@ -4274,7 +4319,10 @@ class Graphlit {
4274
4319
  }
4275
4320
  /** Retrieves medical therapies based on filter criteria. */
4276
4321
  async queryMedicalTherapies(filter, correlationId) {
4277
- return this.queryAndCheckError(Documents.QueryMedicalTherapies, { filter: filter, correlationId: correlationId });
4322
+ return this.queryAndCheckError(Documents.QueryMedicalTherapies, {
4323
+ filter: filter,
4324
+ correlationId: correlationId,
4325
+ });
4278
4326
  }
4279
4327
  /** Retrieves medical therapies with clustering information. */
4280
4328
  async queryMedicalTherapiesClusters(filter, clusters, correlationId) {
@@ -4612,13 +4660,17 @@ class Graphlit {
4612
4660
  let totalTokens = currentMessage?.tokens || 0;
4613
4661
  const toolStartTime = Date.now();
4614
4662
  let toolTime = 0;
4663
+ // Context strategy for tool result truncation
4664
+ const strategy = options?.contextStrategy ?? {};
4665
+ const toolResultTokenLimit = strategy.toolResultTokenLimit ??
4666
+ DEFAULT_CONTEXT_STRATEGY.toolResultTokenLimit;
4615
4667
  while (currentMessage.toolCalls?.length &&
4616
4668
  rounds < maxRounds &&
4617
4669
  !abortController.signal.aborted) {
4618
4670
  rounds++;
4619
- // Execute tools
4671
+ // Execute tools (with truncation)
4620
4672
  const toolExecStart = Date.now();
4621
- const toolResults = await this.executeToolsForPromptAgent(currentMessage.toolCalls.filter((tc) => tc !== null), toolHandlers || {}, allToolCalls, abortController.signal);
4673
+ const toolResults = await this.executeToolsForPromptAgent(currentMessage.toolCalls.filter((tc) => tc !== null), toolHandlers || {}, allToolCalls, abortController.signal, toolResultTokenLimit);
4622
4674
  toolTime += Date.now() - toolExecStart;
4623
4675
  if (abortController.signal.aborted) {
4624
4676
  throw new Error("Operation timed out");
@@ -4701,7 +4753,9 @@ class Graphlit {
4701
4753
  // Swallow errors from the previous call so a failed message doesn't
4702
4754
  // permanently block the queue for this conversation.
4703
4755
  // Check the abort signal before starting work so ESC while queued is instant.
4704
- const next = previous.catch(() => { }).then(() => {
4756
+ const next = previous
4757
+ .catch(() => { })
4758
+ .then(() => {
4705
4759
  if (abortSignal?.aborted)
4706
4760
  throw new Error("Operation aborted");
4707
4761
  return work();
@@ -4862,7 +4916,7 @@ class Graphlit {
4862
4916
  modelService: serviceType,
4863
4917
  });
4864
4918
  // Start the streaming conversation
4865
- await this.executeStreamingAgent(prompt, actualConversationId, fullSpec, tools, toolHandlers, uiAdapter, maxRounds, abortSignal, mimeType, data, correlationId, persona);
4919
+ await this.executeStreamingAgent(prompt, actualConversationId, fullSpec, tools, toolHandlers, uiAdapter, maxRounds, abortSignal, mimeType, data, correlationId, persona, options?.contextStrategy);
4866
4920
  }, abortSignal);
4867
4921
  }
4868
4922
  catch (error) {
@@ -4908,9 +4962,10 @@ class Graphlit {
4908
4962
  /**
4909
4963
  * Execute the streaming agent workflow with tool calling loop
4910
4964
  */
4911
- async executeStreamingAgent(prompt, conversationId, specification, tools, toolHandlers, uiAdapter, maxRounds, abortSignal, mimeType, data, correlationId, persona) {
4965
+ async executeStreamingAgent(prompt, conversationId, specification, tools, toolHandlers, uiAdapter, maxRounds, abortSignal, mimeType, data, correlationId, persona, contextStrategy) {
4912
4966
  let currentRound = 0;
4913
4967
  let fullMessage = "";
4968
+ const contextActions = [];
4914
4969
  // Collects artifact content IDs from tool handlers (e.g. code_execution).
4915
4970
  // Handlers register async ingestion promises; we await all of them before
4916
4971
  // completeConversation so the IDs are available without blocking the LLM.
@@ -4965,8 +5020,30 @@ class Graphlit {
4965
5020
  console.log(`📊 [Context Window] Using ${usedTokens.toLocaleString()}/${details.tokenLimit.toLocaleString()} tokens (${Math.round((usedTokens / details.tokenLimit) * 100)}%)`);
4966
5021
  }
4967
5022
  }
5023
+ // Initialize context management
5024
+ const budgetTracker = details
5025
+ ? TokenBudgetTracker.fromDetails(details)
5026
+ : undefined;
5027
+ // Merge: caller overrides > server-side specification strategy > defaults
5028
+ const callerStrategy = contextStrategy ?? {};
5029
+ const serverStrategy = specification.strategy;
5030
+ const toolResultTokenLimit = callerStrategy.toolResultTokenLimit ??
5031
+ serverStrategy?.toolResultTokenLimit ??
5032
+ DEFAULT_CONTEXT_STRATEGY.toolResultTokenLimit;
5033
+ const toolRoundLimit = callerStrategy.toolRoundLimit ??
5034
+ serverStrategy?.toolRoundLimit ??
5035
+ DEFAULT_CONTEXT_STRATEGY.toolRoundLimit;
5036
+ const rebudgetThreshold = callerStrategy.rebudgetThreshold ??
5037
+ serverStrategy?.toolBudgetThreshold ??
5038
+ DEFAULT_CONTEXT_STRATEGY.rebudgetThreshold;
5039
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING && budgetTracker) {
5040
+ console.log(`📊 [Context Management] Initialized budget tracker: ${budgetTracker.usagePercent}% used, ` +
5041
+ `${budgetTracker.remaining.toLocaleString()} tokens remaining. ` +
5042
+ `Strategy: toolResultLimit=${toolResultTokenLimit}, toolRoundLimit=${toolRoundLimit}, ` +
5043
+ `rebudgetThreshold=${rebudgetThreshold}`);
5044
+ }
4968
5045
  // Build message array with conversation history
4969
- const messages = [];
5046
+ let messages = [];
4970
5047
  // Add system prompt if specified
4971
5048
  if (specification.systemPrompt) {
4972
5049
  messages.push({
@@ -5033,6 +5110,41 @@ class Graphlit {
5033
5110
  if (abortSignal?.aborted) {
5034
5111
  throw new Error("Operation aborted");
5035
5112
  }
5113
+ // Context window management: check budget before sending to LLM
5114
+ if (budgetTracker && currentRound > 0) {
5115
+ if (budgetTracker.needsRebudget(rebudgetThreshold)) {
5116
+ const beforeUsage = budgetTracker.usagePercent;
5117
+ const beforeCount = messages.length;
5118
+ messages = windowToolRounds(messages, toolRoundLimit);
5119
+ budgetTracker.resetFromMessages(messages);
5120
+ const afterUsage = budgetTracker.usagePercent;
5121
+ const droppedRounds = Math.max(0, Math.floor((beforeCount - messages.length) / 2));
5122
+ if (droppedRounds > 0) {
5123
+ const action = {
5124
+ type: "windowed_tool_rounds",
5125
+ droppedRounds,
5126
+ keptRounds: toolRoundLimit,
5127
+ };
5128
+ contextActions.push(action);
5129
+ // Notify the UI
5130
+ uiAdapter.handleEvent({
5131
+ type: "context_management",
5132
+ action,
5133
+ usage: budgetTracker.getUsageSnapshot(),
5134
+ timestamp: new Date(),
5135
+ });
5136
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
5137
+ console.log(`📊 [Context Management] Windowed tool rounds: dropped ${droppedRounds} round(s), ` +
5138
+ `budget ${beforeUsage}% → ${afterUsage}% (${messages.length} messages)`);
5139
+ }
5140
+ }
5141
+ // Emit updated context window
5142
+ uiAdapter.handleEvent({
5143
+ type: "context_window",
5144
+ usage: budgetTracker.getUsageSnapshot(),
5145
+ });
5146
+ }
5147
+ }
5036
5148
  let toolCalls = [];
5037
5149
  let roundMessage = "";
5038
5150
  // Stream with appropriate provider
@@ -5299,6 +5411,12 @@ class Graphlit {
5299
5411
  timestamp: new Date().toISOString(),
5300
5412
  };
5301
5413
  messages.push(assistantMessage);
5414
+ // Track assistant message in budget (includes tool call arguments)
5415
+ if (budgetTracker) {
5416
+ const assistantTokens = estimateTokens(roundMessage) +
5417
+ toolCalls.reduce((sum, tc) => sum + estimateTokens(tc.arguments), 0);
5418
+ budgetTracker.addMessage("", assistantTokens);
5419
+ }
5302
5420
  // Execute tools and add responses
5303
5421
  for (const toolCall of toolCalls) {
5304
5422
  const handler = toolHandlers[toolCall.name];
@@ -5395,17 +5513,45 @@ class Graphlit {
5395
5513
  },
5396
5514
  result: result,
5397
5515
  });
5398
- // Add tool response to messages
5516
+ // Add tool response to messages (with truncation)
5517
+ const rawResult = typeof result === "string" ? result : JSON.stringify(result);
5518
+ const truncatedResult = truncateToolResult(rawResult, toolResultTokenLimit, toolCall.name);
5519
+ // Track truncation for observability
5520
+ if (truncatedResult.length < rawResult.length) {
5521
+ const action = {
5522
+ type: "truncated_tool_result",
5523
+ toolName: toolCall.name,
5524
+ originalTokens: estimateTokens(rawResult),
5525
+ truncatedTokens: estimateTokens(truncatedResult),
5526
+ };
5527
+ contextActions.push(action);
5528
+ if (budgetTracker) {
5529
+ uiAdapter.handleEvent({
5530
+ type: "context_management",
5531
+ action,
5532
+ usage: budgetTracker.getUsageSnapshot(),
5533
+ timestamp: new Date(),
5534
+ });
5535
+ }
5536
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
5537
+ console.log(`📊 [Context Management] Truncated tool result for ${toolCall.name}: ` +
5538
+ `${estimateTokens(rawResult)} → ${estimateTokens(truncatedResult)} tokens`);
5539
+ }
5540
+ }
5399
5541
  const toolMessage = {
5400
5542
  __typename: "ConversationMessage",
5401
5543
  role: Types.ConversationRoleTypes.Tool,
5402
- message: typeof result === "string" ? result : JSON.stringify(result),
5544
+ message: truncatedResult,
5403
5545
  toolCallId: toolCall.id,
5404
5546
  timestamp: new Date().toISOString(),
5405
5547
  };
5406
5548
  // Add tool name for Mistral compatibility
5407
5549
  toolMessage.toolName = toolCall.name;
5408
5550
  messages.push(toolMessage);
5551
+ // Track budget
5552
+ if (budgetTracker) {
5553
+ budgetTracker.addMessage(truncatedResult);
5554
+ }
5409
5555
  }
5410
5556
  catch (error) {
5411
5557
  const errorMessage = error instanceof Error ? error.message : "Unknown error";
@@ -5421,19 +5567,30 @@ class Graphlit {
5421
5567
  error: errorMessage,
5422
5568
  });
5423
5569
  // Add error response
5570
+ const errorText = `Error: ${errorMessage}`;
5424
5571
  const errorToolMessage = {
5425
5572
  __typename: "ConversationMessage",
5426
5573
  role: Types.ConversationRoleTypes.Tool,
5427
- message: `Error: ${errorMessage}`,
5574
+ message: errorText,
5428
5575
  toolCallId: toolCall.id,
5429
5576
  timestamp: new Date().toISOString(),
5430
5577
  };
5431
5578
  // Add tool name for Mistral compatibility
5432
5579
  errorToolMessage.toolName = toolCall.name;
5433
5580
  messages.push(errorToolMessage);
5581
+ if (budgetTracker) {
5582
+ budgetTracker.addMessage(errorText);
5583
+ }
5434
5584
  }
5435
5585
  }
5436
5586
  }
5587
+ // Emit context window usage after each tool round
5588
+ if (budgetTracker) {
5589
+ uiAdapter.handleEvent({
5590
+ type: "context_window",
5591
+ usage: budgetTracker.getUsageSnapshot(),
5592
+ });
5593
+ }
5437
5594
  currentRound++;
5438
5595
  }
5439
5596
  // Complete the conversation and get token count
@@ -5859,7 +6016,7 @@ class Graphlit {
5859
6016
  await streamWithXai(specification, messages, tools, xaiClient, (event) => uiAdapter.handleEvent(event), onComplete, abortSignal);
5860
6017
  }
5861
6018
  // Helper method to execute tools for promptAgent
5862
- async executeToolsForPromptAgent(toolCalls, toolHandlers, allToolCalls, signal) {
6019
+ async executeToolsForPromptAgent(toolCalls, toolHandlers, allToolCalls, signal, toolResultTokenLimit = DEFAULT_CONTEXT_STRATEGY.toolResultTokenLimit) {
5863
6020
  const responses = [];
5864
6021
  // Execute tools in parallel for better performance
5865
6022
  const toolPromises = toolCalls.map(async (toolCall) => {
@@ -5892,10 +6049,18 @@ class Graphlit {
5892
6049
  duration: Date.now() - startTime,
5893
6050
  };
5894
6051
  allToolCalls.push(toolResult);
6052
+ // Truncate oversized tool results before sending to server
6053
+ const rawContent = error ? error : result ? JSON.stringify(result) : "";
6054
+ const content = truncateToolResult(rawContent, toolResultTokenLimit, toolCall.name || "unknown");
6055
+ if (content.length < rawContent.length &&
6056
+ process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
6057
+ console.log(`📊 [Context Management] Truncated tool result for ${toolCall.name}: ` +
6058
+ `${estimateTokens(rawContent)} → ${estimateTokens(content)} tokens (promptAgent path)`);
6059
+ }
5895
6060
  // Response for API
5896
6061
  return {
5897
6062
  id: toolCall.id,
5898
- content: error ? error : result ? JSON.stringify(result) : "",
6063
+ content,
5899
6064
  };
5900
6065
  });
5901
6066
  const results = await Promise.all(toolPromises);
@@ -16098,6 +16098,9 @@ export const GetSpecification = gql `
16098
16098
  factExtractionLimit
16099
16099
  messagesWeight
16100
16100
  contentsWeight
16101
+ toolResultTokenLimit
16102
+ toolRoundLimit
16103
+ toolBudgetThreshold
16101
16104
  }
16102
16105
  promptStrategy {
16103
16106
  type
@@ -16492,6 +16495,9 @@ export const QuerySpecifications = gql `
16492
16495
  factExtractionLimit
16493
16496
  messagesWeight
16494
16497
  contentsWeight
16498
+ toolResultTokenLimit
16499
+ toolRoundLimit
16500
+ toolBudgetThreshold
16495
16501
  }
16496
16502
  promptStrategy {
16497
16503
  type
@@ -3755,6 +3755,12 @@ export type ConversationStrategy = {
3755
3755
  messageLimit?: Maybe<Scalars['Int']['output']>;
3756
3756
  /** The weight of conversation messages within prompt context, in range [0.0 - 1.0]. */
3757
3757
  messagesWeight?: Maybe<Scalars['Float']['output']>;
3758
+ /** The fraction of token budget at which tool round windowing is triggered, in range [0.0 - 1.0]. */
3759
+ toolBudgetThreshold?: Maybe<Scalars['Float']['output']>;
3760
+ /** The maximum number of tokens for a single tool result. Results exceeding this limit are truncated. */
3761
+ toolResultTokenLimit?: Maybe<Scalars['Int']['output']>;
3762
+ /** The maximum number of tool call/response rounds to keep in context. Older rounds are dropped. */
3763
+ toolRoundLimit?: Maybe<Scalars['Int']['output']>;
3758
3764
  /** The conversation strategy type. */
3759
3765
  type?: Maybe<ConversationStrategyTypes>;
3760
3766
  };
@@ -3782,6 +3788,12 @@ export type ConversationStrategyInput = {
3782
3788
  messageLimit?: InputMaybe<Scalars['Int']['input']>;
3783
3789
  /** The weight of conversation messages within prompt context, in range [0.0 - 1.0]. */
3784
3790
  messagesWeight?: InputMaybe<Scalars['Float']['input']>;
3791
+ /** The fraction of token budget at which tool round windowing is triggered, in range [0.0 - 1.0]. */
3792
+ toolBudgetThreshold?: InputMaybe<Scalars['Float']['input']>;
3793
+ /** The maximum number of tokens for a single tool result. Results exceeding this limit are truncated. */
3794
+ toolResultTokenLimit?: InputMaybe<Scalars['Int']['input']>;
3795
+ /** The maximum number of tool call/response rounds to keep in context. Older rounds are dropped. */
3796
+ toolRoundLimit?: InputMaybe<Scalars['Int']['input']>;
3785
3797
  /** The conversation strategy type. */
3786
3798
  type?: InputMaybe<ConversationStrategyTypes>;
3787
3799
  };
@@ -3816,6 +3828,12 @@ export type ConversationStrategyUpdateInput = {
3816
3828
  messageLimit?: InputMaybe<Scalars['Int']['input']>;
3817
3829
  /** The weight of conversation messages within prompt context, in range [0.0 - 1.0]. */
3818
3830
  messagesWeight?: InputMaybe<Scalars['Float']['input']>;
3831
+ /** The fraction of token budget at which tool round windowing is triggered, in range [0.0 - 1.0]. */
3832
+ toolBudgetThreshold?: InputMaybe<Scalars['Float']['input']>;
3833
+ /** The maximum number of tokens for a single tool result. Results exceeding this limit are truncated. */
3834
+ toolResultTokenLimit?: InputMaybe<Scalars['Int']['input']>;
3835
+ /** The maximum number of tool call/response rounds to keep in context. Older rounds are dropped. */
3836
+ toolRoundLimit?: InputMaybe<Scalars['Int']['input']>;
3819
3837
  /** The conversation strategy type. */
3820
3838
  type?: InputMaybe<ConversationStrategyTypes>;
3821
3839
  };
@@ -40284,6 +40302,9 @@ export type GetSpecificationQuery = {
40284
40302
  factExtractionLimit?: number | null;
40285
40303
  messagesWeight?: number | null;
40286
40304
  contentsWeight?: number | null;
40305
+ toolResultTokenLimit?: number | null;
40306
+ toolRoundLimit?: number | null;
40307
+ toolBudgetThreshold?: number | null;
40287
40308
  } | null;
40288
40309
  promptStrategy?: {
40289
40310
  __typename?: 'PromptStrategy';
@@ -40726,6 +40747,9 @@ export type QuerySpecificationsQuery = {
40726
40747
  factExtractionLimit?: number | null;
40727
40748
  messagesWeight?: number | null;
40728
40749
  contentsWeight?: number | null;
40750
+ toolResultTokenLimit?: number | null;
40751
+ toolRoundLimit?: number | null;
40752
+ toolBudgetThreshold?: number | null;
40729
40753
  } | null;
40730
40754
  promptStrategy?: {
40731
40755
  __typename?: 'PromptStrategy';
@@ -0,0 +1,91 @@
1
+ import * as Types from "../generated/graphql-types.js";
2
+ /** Returns `true` when js-tiktoken is installed and the encoder loaded successfully. */
3
+ export declare function isAccurateTokenCounting(): boolean;
4
+ /**
5
+ * Token estimation.
6
+ *
7
+ * When js-tiktoken is installed, returns an accurate BPE token count (o200k_base encoding).
8
+ * Otherwise falls back to a conservative heuristic: chars / 3.5.
9
+ */
10
+ export declare function estimateTokens(text: string): number;
11
+ /**
12
+ * Configuration for context window management during agentic tool loops.
13
+ * Values can be provided by the server (via ConversationStrategy) or set client-side.
14
+ */
15
+ export interface ContextStrategyConfig {
16
+ /** Max tokens for any single tool result. Results exceeding this are truncated. Default: 8192 */
17
+ toolResultTokenLimit: number;
18
+ /** Max tool call/response rounds to keep in context. Older rounds are dropped FIFO. Default: 10 */
19
+ toolRoundLimit: number;
20
+ /** Fraction of token budget at which client-side windowing is triggered. Default: 0.75 */
21
+ rebudgetThreshold: number;
22
+ }
23
+ export declare const DEFAULT_CONTEXT_STRATEGY: ContextStrategyConfig;
24
+ /**
25
+ * Tracks token budget during streaming agent tool loops.
26
+ *
27
+ * Initialized from server-provided accurate token counts (via formatConversation details),
28
+ * then uses character-based heuristic estimation for incremental additions during the loop.
29
+ */
30
+ export declare class TokenBudgetTracker {
31
+ private readonly tokenLimit;
32
+ private readonly completionTokenLimit;
33
+ private _usedTokens;
34
+ constructor(tokenLimit: number, completionTokenLimit: number, initialUsedTokens: number);
35
+ /**
36
+ * Create a tracker from formatConversation response details.
37
+ * Returns undefined if the details lack token information.
38
+ */
39
+ static fromDetails(details: {
40
+ tokenLimit?: number | null;
41
+ completionTokenLimit?: number | null;
42
+ messages?: Array<{
43
+ tokens?: number | null;
44
+ } | null> | null;
45
+ }): TokenBudgetTracker | undefined;
46
+ /** Total available token budget (tokenLimit - completionTokenLimit, at 95% ceiling) */
47
+ get budget(): number;
48
+ /** Current estimated token usage */
49
+ get usedTokens(): number;
50
+ /** Remaining tokens before budget is exhausted */
51
+ get remaining(): number;
52
+ /** Current usage as a percentage (0-100) */
53
+ get usagePercent(): number;
54
+ /** Model's full context token limit */
55
+ get maxTokens(): number;
56
+ /** Track addition of new message content */
57
+ addMessage(text: string, serverTokenCount?: number): void;
58
+ /** Check if we need to trigger windowing/re-budgeting */
59
+ needsRebudget(threshold: number): boolean;
60
+ /** Reset tracker from a fresh set of messages (after windowing) */
61
+ resetFromMessages(messages: Array<{
62
+ message?: string | null;
63
+ tokens?: number | null;
64
+ }>): void;
65
+ /** Get current usage snapshot for emitting events */
66
+ getUsageSnapshot(): {
67
+ usedTokens: number;
68
+ maxTokens: number;
69
+ percentage: number;
70
+ remainingTokens: number;
71
+ };
72
+ }
73
+ /**
74
+ * Truncates a tool result to fit within a token budget.
75
+ *
76
+ * Attempts to find a clean break point (JSON boundary or newline).
77
+ * Appends a [truncated] marker so the LLM knows data was cut.
78
+ */
79
+ export declare function truncateToolResult(result: unknown, maxTokens: number, toolName: string): string;
80
+ /**
81
+ * Windows tool rounds to keep the messages array within budget.
82
+ *
83
+ * Preserves:
84
+ * - "Header" messages (system prompt, conversation history, initial user message)
85
+ * - The most recent `keepRounds` tool rounds
86
+ *
87
+ * Drops older tool rounds and inserts a system message noting what was removed.
88
+ *
89
+ * @returns The windowed messages array
90
+ */
91
+ export declare function windowToolRounds(messages: Types.ConversationMessage[], keepRounds: number): Types.ConversationMessage[];
@@ -0,0 +1,223 @@
1
+ import { createRequire } from "node:module";
2
+ import * as Types from "../generated/graphql-types.js";
3
+ // ── Singleton tiktoken encoder (best-effort load) ───────────────────────────
4
+ let encoder;
5
+ try {
6
+ const require = createRequire(import.meta.url);
7
+ const { Tiktoken } = require("js-tiktoken/lite");
8
+ const ranks = require("js-tiktoken/ranks/o200k_base");
9
+ encoder = new Tiktoken(ranks);
10
+ if (process.env.DEBUG_GRAPHLIT_SDK_INITIALIZATION) {
11
+ console.debug("[graphlit-sdk] tiktoken encoder loaded (o200k_base) — accurate token counting enabled");
12
+ }
13
+ }
14
+ catch {
15
+ // js-tiktoken not installed — fall back to heuristic
16
+ if (process.env.DEBUG_GRAPHLIT_SDK_INITIALIZATION) {
17
+ console.debug("[graphlit-sdk] js-tiktoken not available — using heuristic token estimation (chars / 3.5)");
18
+ }
19
+ }
20
+ /** Returns `true` when js-tiktoken is installed and the encoder loaded successfully. */
21
+ export function isAccurateTokenCounting() {
22
+ return encoder !== undefined;
23
+ }
24
+ /**
25
+ * Token estimation.
26
+ *
27
+ * When js-tiktoken is installed, returns an accurate BPE token count (o200k_base encoding).
28
+ * Otherwise falls back to a conservative heuristic: chars / 3.5.
29
+ */
30
+ export function estimateTokens(text) {
31
+ if (!text)
32
+ return 0;
33
+ if (encoder)
34
+ return encoder.encode(text).length;
35
+ return Math.ceil(text.length / 3.5);
36
+ }
37
+ export const DEFAULT_CONTEXT_STRATEGY = {
38
+ toolResultTokenLimit: 8192,
39
+ toolRoundLimit: 10,
40
+ rebudgetThreshold: 0.75,
41
+ };
42
+ /**
43
+ * Tracks token budget during streaming agent tool loops.
44
+ *
45
+ * Initialized from server-provided accurate token counts (via formatConversation details),
46
+ * then uses character-based heuristic estimation for incremental additions during the loop.
47
+ */
48
+ export class TokenBudgetTracker {
49
+ tokenLimit;
50
+ completionTokenLimit;
51
+ _usedTokens;
52
+ constructor(tokenLimit, completionTokenLimit, initialUsedTokens) {
53
+ this.tokenLimit = tokenLimit;
54
+ this.completionTokenLimit = completionTokenLimit;
55
+ this._usedTokens = initialUsedTokens;
56
+ }
57
+ /**
58
+ * Create a tracker from formatConversation response details.
59
+ * Returns undefined if the details lack token information.
60
+ */
61
+ static fromDetails(details) {
62
+ if (!details.tokenLimit)
63
+ return undefined;
64
+ const tokenLimit = details.tokenLimit;
65
+ const completionTokenLimit = details.completionTokenLimit ?? 4096;
66
+ const usedTokens = details.messages?.reduce((sum, msg) => sum + (msg?.tokens ?? 0), 0) ?? 0;
67
+ return new TokenBudgetTracker(tokenLimit, completionTokenLimit, usedTokens);
68
+ }
69
+ /** Total available token budget (tokenLimit - completionTokenLimit, at 95% ceiling) */
70
+ get budget() {
71
+ return Math.floor((this.tokenLimit - this.completionTokenLimit) * 0.95);
72
+ }
73
+ /** Current estimated token usage */
74
+ get usedTokens() {
75
+ return this._usedTokens;
76
+ }
77
+ /** Remaining tokens before budget is exhausted */
78
+ get remaining() {
79
+ return Math.max(0, this.budget - this._usedTokens);
80
+ }
81
+ /** Current usage as a percentage (0-100) */
82
+ get usagePercent() {
83
+ if (this.budget <= 0)
84
+ return 100;
85
+ return Math.round((this._usedTokens / this.budget) * 100);
86
+ }
87
+ /** Model's full context token limit */
88
+ get maxTokens() {
89
+ return this.tokenLimit;
90
+ }
91
+ /** Track addition of new message content */
92
+ addMessage(text, serverTokenCount) {
93
+ this._usedTokens += serverTokenCount ?? estimateTokens(text);
94
+ }
95
+ /** Check if we need to trigger windowing/re-budgeting */
96
+ needsRebudget(threshold) {
97
+ return this.usagePercent >= threshold * 100;
98
+ }
99
+ /** Reset tracker from a fresh set of messages (after windowing) */
100
+ resetFromMessages(messages) {
101
+ this._usedTokens = messages.reduce((sum, msg) => {
102
+ if (msg.tokens)
103
+ return sum + msg.tokens;
104
+ return sum + estimateTokens(msg.message ?? "");
105
+ }, 0);
106
+ }
107
+ /** Get current usage snapshot for emitting events */
108
+ getUsageSnapshot() {
109
+ return {
110
+ usedTokens: this._usedTokens,
111
+ maxTokens: this.tokenLimit,
112
+ percentage: this.usagePercent,
113
+ remainingTokens: this.remaining,
114
+ };
115
+ }
116
+ }
117
+ /**
118
+ * Truncates a tool result to fit within a token budget.
119
+ *
120
+ * Attempts to find a clean break point (JSON boundary or newline).
121
+ * Appends a [truncated] marker so the LLM knows data was cut.
122
+ */
123
+ export function truncateToolResult(result, maxTokens, toolName) {
124
+ const text = typeof result === "string" ? result : JSON.stringify(result);
125
+ if (!text)
126
+ return "";
127
+ const estimatedTokens = estimateTokens(text);
128
+ if (estimatedTokens <= maxTokens)
129
+ return text;
130
+ // When tiktoken is available, compute the actual chars-per-token ratio for
131
+ // this specific text instead of using the hardcoded 3.5 heuristic.
132
+ const charsPerToken = encoder && estimatedTokens > 0
133
+ ? text.length / estimatedTokens
134
+ : 3.5;
135
+ const maxChars = Math.floor(maxTokens * charsPerToken);
136
+ let truncated = text.substring(0, maxChars);
137
+ // Try to find a clean break point
138
+ if (text.startsWith("{") || text.startsWith("[")) {
139
+ // For JSON, try to close at a valid boundary
140
+ const lastComplete = Math.max(truncated.lastIndexOf("},"), truncated.lastIndexOf("}\n"), truncated.lastIndexOf("],"), truncated.lastIndexOf("]\n"));
141
+ if (lastComplete > maxChars * 0.5) {
142
+ truncated = truncated.substring(0, lastComplete + 1);
143
+ }
144
+ }
145
+ else {
146
+ // For plain text, break at newline
147
+ const lastNewline = truncated.lastIndexOf("\n");
148
+ if (lastNewline > maxChars * 0.5) {
149
+ truncated = truncated.substring(0, lastNewline);
150
+ }
151
+ }
152
+ const truncatedTokens = estimateTokens(truncated);
153
+ return `${truncated}\n\n[truncated by ${toolName}: original ~${estimatedTokens} tokens, showing first ~${truncatedTokens} tokens]`;
154
+ }
155
+ /**
156
+ * Identifies the boundary between "header" messages (system prompt, conversation history,
157
+ * initial user message) and "tool round" messages (assistant+tool pairs from the agentic loop).
158
+ *
159
+ * Tool rounds start at the first assistant message that has tool calls.
160
+ */
161
+ function findToolRoundStart(messages) {
162
+ for (let i = 0; i < messages.length; i++) {
163
+ const msg = messages[i];
164
+ if (msg.role === Types.ConversationRoleTypes.Assistant &&
165
+ msg.toolCalls &&
166
+ msg.toolCalls.length > 0) {
167
+ return i;
168
+ }
169
+ }
170
+ return messages.length; // No tool rounds found
171
+ }
172
+ /**
173
+ * Groups tool-round messages into logical rounds.
174
+ * Each round = one assistant message (with tool calls) + all subsequent tool response messages.
175
+ */
176
+ function groupToolRounds(toolMessages) {
177
+ const rounds = [];
178
+ let currentRound = [];
179
+ for (const msg of toolMessages) {
180
+ if (msg.role === Types.ConversationRoleTypes.Assistant &&
181
+ currentRound.length > 0) {
182
+ // New assistant message starts a new round
183
+ rounds.push(currentRound);
184
+ currentRound = [msg];
185
+ }
186
+ else {
187
+ currentRound.push(msg);
188
+ }
189
+ }
190
+ if (currentRound.length > 0) {
191
+ rounds.push(currentRound);
192
+ }
193
+ return rounds;
194
+ }
195
+ /**
196
+ * Windows tool rounds to keep the messages array within budget.
197
+ *
198
+ * Preserves:
199
+ * - "Header" messages (system prompt, conversation history, initial user message)
200
+ * - The most recent `keepRounds` tool rounds
201
+ *
202
+ * Drops older tool rounds and inserts a system message noting what was removed.
203
+ *
204
+ * @returns The windowed messages array
205
+ */
206
+ export function windowToolRounds(messages, keepRounds) {
207
+ const headerEnd = findToolRoundStart(messages);
208
+ const header = messages.slice(0, headerEnd);
209
+ const toolMessages = messages.slice(headerEnd);
210
+ const rounds = groupToolRounds(toolMessages);
211
+ if (rounds.length <= keepRounds)
212
+ return messages;
213
+ const keptRounds = rounds.slice(-keepRounds);
214
+ const droppedCount = rounds.length - keepRounds;
215
+ // Summary marker so the LLM knows context was trimmed
216
+ const summaryMessage = {
217
+ __typename: "ConversationMessage",
218
+ role: Types.ConversationRoleTypes.System,
219
+ message: `[Context management: ${droppedCount} earlier tool calling round(s) were removed to stay within token limits. The most recent ${keepRounds} round(s) are preserved below.]`,
220
+ timestamp: new Date().toISOString(),
221
+ };
222
+ return [...header, summaryMessage, ...keptRounds.flat()];
223
+ }
@@ -62,6 +62,7 @@ export declare class UIEventAdapter {
62
62
  private emitMessageUpdate;
63
63
  private emitUIEvent;
64
64
  private handleContextWindow;
65
+ private handleContextManagement;
65
66
  private handleReasoningStart;
66
67
  private handleReasoningDelta;
67
68
  private handleReasoningEnd;
@@ -81,6 +81,9 @@ export class UIEventAdapter {
81
81
  case "context_window":
82
82
  this.handleContextWindow(event.usage);
83
83
  break;
84
+ case "context_management":
85
+ this.handleContextManagement(event);
86
+ break;
84
87
  case "reasoning_start":
85
88
  this.handleReasoningStart(event.format);
86
89
  break;
@@ -611,6 +614,17 @@ export class UIEventAdapter {
611
614
  timestamp: new Date(),
612
615
  });
613
616
  }
617
+ handleContextManagement(event) {
618
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
619
+ console.log(`📊 [UIEventAdapter] Context management: ${event.action.type}`);
620
+ }
621
+ this.emitUIEvent({
622
+ type: "context_management",
623
+ action: event.action,
624
+ usage: event.usage,
625
+ timestamp: event.timestamp,
626
+ });
627
+ }
614
628
  handleReasoningStart(format) {
615
629
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
616
630
  console.log(`🤔 [UIEventAdapter] Reasoning start - Format: ${format}`);
@@ -8,9 +8,18 @@ export interface ArtifactCollector {
8
8
  }[]>;
9
9
  }
10
10
  export type ToolHandler = (args: any, artifacts?: ArtifactCollector) => Promise<any>;
11
+ export interface ContextStrategy {
12
+ /** Max tokens for any single tool result. Results exceeding this are truncated. Default: 8192 */
13
+ toolResultTokenLimit?: number;
14
+ /** Max tool call/response rounds to keep in context. Older rounds are dropped FIFO. Default: 10 */
15
+ toolRoundLimit?: number;
16
+ /** Fraction of token budget (0.0-1.0) at which client-side windowing is triggered. Default: 0.75 */
17
+ rebudgetThreshold?: number;
18
+ }
11
19
  export interface AgentOptions {
12
20
  maxToolRounds?: number;
13
21
  timeout?: number;
22
+ contextStrategy?: ContextStrategy;
14
23
  }
15
24
  export interface AgentMetrics {
16
25
  totalTime: number;
@@ -27,6 +36,16 @@ export interface ContextWindowUsage {
27
36
  percentage: number;
28
37
  remainingTokens: number;
29
38
  }
39
+ export type ContextManagementAction = {
40
+ type: "truncated_tool_result";
41
+ toolName: string;
42
+ originalTokens: number;
43
+ truncatedTokens: number;
44
+ } | {
45
+ type: "windowed_tool_rounds";
46
+ droppedRounds: number;
47
+ keptRounds: number;
48
+ };
30
49
  export interface AgentResult {
31
50
  message: string;
32
51
  conversationId: string;
@@ -36,6 +55,7 @@ export interface AgentResult {
36
55
  metrics?: AgentMetrics;
37
56
  usage?: UsageInfo;
38
57
  contextWindow?: ContextWindowUsage;
58
+ contextActions?: ContextManagementAction[];
39
59
  error?: AgentError;
40
60
  }
41
61
  export interface StreamAgentOptions {
@@ -44,6 +64,7 @@ export interface StreamAgentOptions {
44
64
  smoothingEnabled?: boolean;
45
65
  chunkingStrategy?: "character" | "word" | "sentence";
46
66
  smoothingDelay?: number;
67
+ contextStrategy?: ContextStrategy;
47
68
  }
48
69
  export interface ToolCallResult {
49
70
  id: string;
@@ -2,6 +2,7 @@
2
2
  * Internal types used by the streaming implementation
3
3
  * These are not exported to consumers of the library
4
4
  */
5
+ import { ContextManagementAction } from "./agent.js";
5
6
  /**
6
7
  * Low-level streaming events used internally by providers
7
8
  * These get transformed into AgentStreamEvent by UIEventAdapter
@@ -64,6 +65,16 @@ export type StreamEvent = {
64
65
  percentage: number;
65
66
  remainingTokens: number;
66
67
  };
68
+ } | {
69
+ type: "context_management";
70
+ action: ContextManagementAction;
71
+ usage: {
72
+ usedTokens: number;
73
+ maxTokens: number;
74
+ percentage: number;
75
+ remainingTokens: number;
76
+ };
77
+ timestamp: Date;
67
78
  } | {
68
79
  type: "reasoning_start";
69
80
  format: "thinking_tag" | "markdown" | "custom";
@@ -1,4 +1,5 @@
1
1
  import { ConversationMessage, ConversationToolCall } from "../generated/graphql-types.js";
2
+ import { ContextManagementAction } from "./agent.js";
2
3
  /**
3
4
  * Tool execution status for streaming
4
5
  */
@@ -44,6 +45,16 @@ export type AgentStreamEvent = {
44
45
  conversationId: string;
45
46
  timestamp: Date;
46
47
  } | ContextWindowEvent | {
48
+ type: "context_management";
49
+ action: ContextManagementAction;
50
+ usage: {
51
+ usedTokens: number;
52
+ maxTokens: number;
53
+ percentage: number;
54
+ remainingTokens: number;
55
+ };
56
+ timestamp: Date;
57
+ } | {
47
58
  type: "message_update";
48
59
  message: StreamingConversationMessage;
49
60
  isStreaming: boolean;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "graphlit-client",
3
- "version": "1.0.20260217005",
3
+ "version": "1.0.20260218002",
4
4
  "description": "Graphlit API Client for TypeScript",
5
5
  "type": "module",
6
6
  "main": "./dist/client.js",
@@ -73,6 +73,9 @@
73
73
  },
74
74
  "@aws-sdk/client-bedrock-runtime": {
75
75
  "optional": true
76
+ },
77
+ "js-tiktoken": {
78
+ "optional": true
76
79
  }
77
80
  },
78
81
  "optionalDependencies": {
@@ -81,6 +84,7 @@
81
84
  "@mistralai/mistralai": "^1.11.0",
82
85
  "cohere-ai": "^7.20.0",
83
86
  "groq-sdk": "^0.25.0",
87
+ "js-tiktoken": "^1.0.16",
84
88
  "openai": "^5.3.0"
85
89
  },
86
90
  "devDependencies": {