graphlit-client 1.0.20260217005 → 1.0.20260218002
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/client.d.ts +8 -7
- package/dist/client.js +198 -33
- package/dist/generated/graphql-documents.js +6 -0
- package/dist/generated/graphql-types.d.ts +24 -0
- package/dist/helpers/context-management.d.ts +91 -0
- package/dist/helpers/context-management.js +223 -0
- package/dist/streaming/ui-event-adapter.d.ts +1 -0
- package/dist/streaming/ui-event-adapter.js +14 -0
- package/dist/types/agent.d.ts +21 -0
- package/dist/types/internal.d.ts +11 -0
- package/dist/types/ui-events.d.ts +11 -0
- package/package.json +5 -1
package/dist/client.d.ts
CHANGED
|
@@ -3,7 +3,8 @@ import type { NormalizedCacheObject } from "@apollo/client/core/index.js";
|
|
|
3
3
|
import * as Types from "./generated/graphql-types.js";
|
|
4
4
|
import { AgentOptions, AgentResult, StreamAgentOptions, ToolHandler } from "./types/agent.js";
|
|
5
5
|
import { AgentStreamEvent } from "./types/ui-events.js";
|
|
6
|
-
export type { AgentOptions, AgentResult, ArtifactCollector, StreamAgentOptions, ToolCallResult, UsageInfo, AgentError, } from "./types/agent.js";
|
|
6
|
+
export type { AgentOptions, AgentResult, ArtifactCollector, ContextStrategy, ContextManagementAction, StreamAgentOptions, ToolCallResult, UsageInfo, AgentError, } from "./types/agent.js";
|
|
7
|
+
export { TokenBudgetTracker, truncateToolResult, estimateTokens, isAccurateTokenCounting, } from "./helpers/context-management.js";
|
|
7
8
|
export type { AgentStreamEvent } from "./types/ui-events.js";
|
|
8
9
|
export interface RetryConfig {
|
|
9
10
|
/** Maximum number of retry attempts (default: 5) */
|
|
@@ -803,12 +804,12 @@ declare class Graphlit {
|
|
|
803
804
|
*/
|
|
804
805
|
queryConversationsGraph(filter?: Types.ConversationFilter, correlationId?: string): Promise<Types.QueryConversationsGraphQuery>;
|
|
805
806
|
/**
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
807
|
+
* Retrieves Conversations with clustering.
|
|
808
|
+
* @param filter - The filter criteria to apply when retrieving Conversations, optional.
|
|
809
|
+
* @param clusters - The clustering input parameters, optional.
|
|
810
|
+
* @param correlationId - The tenant correlation identifier, optional.
|
|
811
|
+
* @returns The Conversations with clusters.
|
|
812
|
+
*/
|
|
812
813
|
queryConversationsClusters(filter?: Types.ConversationFilter, clusters?: Types.EntityClustersInput, correlationId?: string): Promise<Types.QueryConversationsClustersQuery>;
|
|
813
814
|
/**
|
|
814
815
|
* Counts conversations based on the provided filter criteria.
|
package/dist/client.js
CHANGED
|
@@ -6,6 +6,7 @@ import { RetryLink } from "@apollo/client/link/retry/index.js";
|
|
|
6
6
|
import * as Types from "./generated/graphql-types.js";
|
|
7
7
|
import * as Documents from "./generated/graphql-documents.js";
|
|
8
8
|
import { getServiceType, getModelName, getModelEnum } from "./model-mapping.js";
|
|
9
|
+
import { TokenBudgetTracker, truncateToolResult, windowToolRounds, estimateTokens, DEFAULT_CONTEXT_STRATEGY, } from "./helpers/context-management.js";
|
|
9
10
|
import { UIEventAdapter } from "./streaming/ui-event-adapter.js";
|
|
10
11
|
import { formatMessagesForOpenAI, formatMessagesForAnthropic, formatMessagesForGoogle, formatMessagesForMistral, formatMessagesForBedrock, } from "./streaming/llm-formatters.js";
|
|
11
12
|
import { streamWithOpenAI, streamWithAnthropic, streamWithGoogle, streamWithGroq, streamWithCerebras, streamWithCohere, streamWithMistral, streamWithBedrock, streamWithDeepseek, streamWithXai, } from "./streaming/providers.js";
|
|
@@ -124,7 +125,9 @@ catch (e) {
|
|
|
124
125
|
console.log("[SDK Loading] Cerebras SDK not found:", e.message);
|
|
125
126
|
}
|
|
126
127
|
}
|
|
127
|
-
const DEFAULT_MAX_TOOL_ROUNDS =
|
|
128
|
+
const DEFAULT_MAX_TOOL_ROUNDS = 100;
|
|
129
|
+
// Re-export context management utilities
|
|
130
|
+
export { TokenBudgetTracker, truncateToolResult, estimateTokens, isAccurateTokenCounting, } from "./helpers/context-management.js";
|
|
128
131
|
// Helper function to validate GUID format
|
|
129
132
|
function isValidGuid(guid) {
|
|
130
133
|
if (!guid)
|
|
@@ -1534,12 +1537,12 @@ class Graphlit {
|
|
|
1534
1537
|
});
|
|
1535
1538
|
}
|
|
1536
1539
|
/**
|
|
1537
|
-
|
|
1538
|
-
|
|
1539
|
-
|
|
1540
|
-
|
|
1541
|
-
|
|
1542
|
-
|
|
1540
|
+
* Retrieves Conversations with clustering.
|
|
1541
|
+
* @param filter - The filter criteria to apply when retrieving Conversations, optional.
|
|
1542
|
+
* @param clusters - The clustering input parameters, optional.
|
|
1543
|
+
* @param correlationId - The tenant correlation identifier, optional.
|
|
1544
|
+
* @returns The Conversations with clusters.
|
|
1545
|
+
*/
|
|
1543
1546
|
async queryConversationsClusters(filter, clusters, correlationId) {
|
|
1544
1547
|
return this.queryAndCheckError(Documents.QueryConversationsClusters, {
|
|
1545
1548
|
filter: filter,
|
|
@@ -3131,7 +3134,10 @@ class Graphlit {
|
|
|
3131
3134
|
* @returns The organizations.
|
|
3132
3135
|
*/
|
|
3133
3136
|
async queryOrganizations(filter, correlationId) {
|
|
3134
|
-
return this.queryAndCheckError(Documents.QueryOrganizations, {
|
|
3137
|
+
return this.queryAndCheckError(Documents.QueryOrganizations, {
|
|
3138
|
+
filter: filter,
|
|
3139
|
+
correlationId: correlationId,
|
|
3140
|
+
});
|
|
3135
3141
|
}
|
|
3136
3142
|
/**
|
|
3137
3143
|
* Retrieves organizations with clustering information.
|
|
@@ -3352,7 +3358,10 @@ class Graphlit {
|
|
|
3352
3358
|
* @returns The emotions.
|
|
3353
3359
|
*/
|
|
3354
3360
|
async queryEmotions(filter, correlationId) {
|
|
3355
|
-
return this.queryAndCheckError(Documents.QueryEmotions, {
|
|
3361
|
+
return this.queryAndCheckError(Documents.QueryEmotions, {
|
|
3362
|
+
filter: filter,
|
|
3363
|
+
correlationId: correlationId,
|
|
3364
|
+
});
|
|
3356
3365
|
}
|
|
3357
3366
|
/**
|
|
3358
3367
|
* Counts emotions based on the provided filter criteria.
|
|
@@ -3518,7 +3527,10 @@ class Graphlit {
|
|
|
3518
3527
|
* @returns The products.
|
|
3519
3528
|
*/
|
|
3520
3529
|
async queryProducts(filter, correlationId) {
|
|
3521
|
-
return this.queryAndCheckError(Documents.QueryProducts, {
|
|
3530
|
+
return this.queryAndCheckError(Documents.QueryProducts, {
|
|
3531
|
+
filter: filter,
|
|
3532
|
+
correlationId: correlationId,
|
|
3533
|
+
});
|
|
3522
3534
|
}
|
|
3523
3535
|
/**
|
|
3524
3536
|
* Retrieves products with clustering information.
|
|
@@ -3712,7 +3724,10 @@ class Graphlit {
|
|
|
3712
3724
|
* @returns The software.
|
|
3713
3725
|
*/
|
|
3714
3726
|
async querySoftwares(filter, correlationId) {
|
|
3715
|
-
return this.queryAndCheckError(Documents.QuerySoftwares, {
|
|
3727
|
+
return this.queryAndCheckError(Documents.QuerySoftwares, {
|
|
3728
|
+
filter: filter,
|
|
3729
|
+
correlationId: correlationId,
|
|
3730
|
+
});
|
|
3716
3731
|
}
|
|
3717
3732
|
/**
|
|
3718
3733
|
* Retrieves software with clustering information.
|
|
@@ -3773,7 +3788,10 @@ class Graphlit {
|
|
|
3773
3788
|
}
|
|
3774
3789
|
/** Retrieves medical conditions based on filter criteria. */
|
|
3775
3790
|
async queryMedicalConditions(filter, correlationId) {
|
|
3776
|
-
return this.queryAndCheckError(Documents.QueryMedicalConditions, {
|
|
3791
|
+
return this.queryAndCheckError(Documents.QueryMedicalConditions, {
|
|
3792
|
+
filter: filter,
|
|
3793
|
+
correlationId: correlationId,
|
|
3794
|
+
});
|
|
3777
3795
|
}
|
|
3778
3796
|
/** Retrieves medical conditions with clustering information. */
|
|
3779
3797
|
async queryMedicalConditionsClusters(filter, clusters, correlationId) {
|
|
@@ -3823,7 +3841,10 @@ class Graphlit {
|
|
|
3823
3841
|
}
|
|
3824
3842
|
/** Retrieves medical guidelines based on filter criteria. */
|
|
3825
3843
|
async queryMedicalGuidelines(filter, correlationId) {
|
|
3826
|
-
return this.queryAndCheckError(Documents.QueryMedicalGuidelines, {
|
|
3844
|
+
return this.queryAndCheckError(Documents.QueryMedicalGuidelines, {
|
|
3845
|
+
filter: filter,
|
|
3846
|
+
correlationId: correlationId,
|
|
3847
|
+
});
|
|
3827
3848
|
}
|
|
3828
3849
|
/** Retrieves medical guidelines with clustering information. */
|
|
3829
3850
|
async queryMedicalGuidelinesClusters(filter, clusters, correlationId) {
|
|
@@ -3869,7 +3890,10 @@ class Graphlit {
|
|
|
3869
3890
|
}
|
|
3870
3891
|
/** Retrieves medical drugs based on filter criteria. */
|
|
3871
3892
|
async queryMedicalDrugs(filter, correlationId) {
|
|
3872
|
-
return this.queryAndCheckError(Documents.QueryMedicalDrugs, {
|
|
3893
|
+
return this.queryAndCheckError(Documents.QueryMedicalDrugs, {
|
|
3894
|
+
filter: filter,
|
|
3895
|
+
correlationId: correlationId,
|
|
3896
|
+
});
|
|
3873
3897
|
}
|
|
3874
3898
|
/** Retrieves medical drugs with clustering information. */
|
|
3875
3899
|
async queryMedicalDrugsClusters(filter, clusters, correlationId) {
|
|
@@ -3923,7 +3947,10 @@ class Graphlit {
|
|
|
3923
3947
|
}
|
|
3924
3948
|
/** Retrieves medical indications based on filter criteria. */
|
|
3925
3949
|
async queryMedicalIndications(filter, correlationId) {
|
|
3926
|
-
return this.queryAndCheckError(Documents.QueryMedicalIndications, {
|
|
3950
|
+
return this.queryAndCheckError(Documents.QueryMedicalIndications, {
|
|
3951
|
+
filter: filter,
|
|
3952
|
+
correlationId: correlationId,
|
|
3953
|
+
});
|
|
3927
3954
|
}
|
|
3928
3955
|
/** Retrieves medical indications with clustering information. */
|
|
3929
3956
|
async queryMedicalIndicationsClusters(filter, clusters, correlationId) {
|
|
@@ -3977,7 +4004,10 @@ class Graphlit {
|
|
|
3977
4004
|
}
|
|
3978
4005
|
/** Retrieves medical contraindications based on filter criteria. */
|
|
3979
4006
|
async queryMedicalContraindications(filter, correlationId) {
|
|
3980
|
-
return this.queryAndCheckError(Documents.QueryMedicalContraindications, {
|
|
4007
|
+
return this.queryAndCheckError(Documents.QueryMedicalContraindications, {
|
|
4008
|
+
filter: filter,
|
|
4009
|
+
correlationId: correlationId,
|
|
4010
|
+
});
|
|
3981
4011
|
}
|
|
3982
4012
|
/** Retrieves medical contraindications with clustering information. */
|
|
3983
4013
|
async queryMedicalContraindicationsClusters(filter, clusters, correlationId) {
|
|
@@ -4024,7 +4054,10 @@ class Graphlit {
|
|
|
4024
4054
|
}
|
|
4025
4055
|
/** Retrieves medical tests based on filter criteria. */
|
|
4026
4056
|
async queryMedicalTests(filter, correlationId) {
|
|
4027
|
-
return this.queryAndCheckError(Documents.QueryMedicalTests, {
|
|
4057
|
+
return this.queryAndCheckError(Documents.QueryMedicalTests, {
|
|
4058
|
+
filter: filter,
|
|
4059
|
+
correlationId: correlationId,
|
|
4060
|
+
});
|
|
4028
4061
|
}
|
|
4029
4062
|
/** Retrieves medical tests with clustering information. */
|
|
4030
4063
|
async queryMedicalTestsClusters(filter, clusters, correlationId) {
|
|
@@ -4074,7 +4107,10 @@ class Graphlit {
|
|
|
4074
4107
|
}
|
|
4075
4108
|
/** Retrieves medical devices based on filter criteria. */
|
|
4076
4109
|
async queryMedicalDevices(filter, correlationId) {
|
|
4077
|
-
return this.queryAndCheckError(Documents.QueryMedicalDevices, {
|
|
4110
|
+
return this.queryAndCheckError(Documents.QueryMedicalDevices, {
|
|
4111
|
+
filter: filter,
|
|
4112
|
+
correlationId: correlationId,
|
|
4113
|
+
});
|
|
4078
4114
|
}
|
|
4079
4115
|
/** Retrieves medical devices with clustering information. */
|
|
4080
4116
|
async queryMedicalDevicesClusters(filter, clusters, correlationId) {
|
|
@@ -4124,7 +4160,10 @@ class Graphlit {
|
|
|
4124
4160
|
}
|
|
4125
4161
|
/** Retrieves medical procedures based on filter criteria. */
|
|
4126
4162
|
async queryMedicalProcedures(filter, correlationId) {
|
|
4127
|
-
return this.queryAndCheckError(Documents.QueryMedicalProcedures, {
|
|
4163
|
+
return this.queryAndCheckError(Documents.QueryMedicalProcedures, {
|
|
4164
|
+
filter: filter,
|
|
4165
|
+
correlationId: correlationId,
|
|
4166
|
+
});
|
|
4128
4167
|
}
|
|
4129
4168
|
/** Retrieves medical procedures with clustering information. */
|
|
4130
4169
|
async queryMedicalProceduresClusters(filter, clusters, correlationId) {
|
|
@@ -4174,7 +4213,10 @@ class Graphlit {
|
|
|
4174
4213
|
}
|
|
4175
4214
|
/** Retrieves medical studies based on filter criteria. */
|
|
4176
4215
|
async queryMedicalStudies(filter, correlationId) {
|
|
4177
|
-
return this.queryAndCheckError(Documents.QueryMedicalStudies, {
|
|
4216
|
+
return this.queryAndCheckError(Documents.QueryMedicalStudies, {
|
|
4217
|
+
filter: filter,
|
|
4218
|
+
correlationId: correlationId,
|
|
4219
|
+
});
|
|
4178
4220
|
}
|
|
4179
4221
|
/** Retrieves medical studies with clustering information. */
|
|
4180
4222
|
async queryMedicalStudiesClusters(filter, clusters, correlationId) {
|
|
@@ -4224,7 +4266,10 @@ class Graphlit {
|
|
|
4224
4266
|
}
|
|
4225
4267
|
/** Retrieves medical drug classes based on filter criteria. */
|
|
4226
4268
|
async queryMedicalDrugClasses(filter, correlationId) {
|
|
4227
|
-
return this.queryAndCheckError(Documents.QueryMedicalDrugClasses, {
|
|
4269
|
+
return this.queryAndCheckError(Documents.QueryMedicalDrugClasses, {
|
|
4270
|
+
filter: filter,
|
|
4271
|
+
correlationId: correlationId,
|
|
4272
|
+
});
|
|
4228
4273
|
}
|
|
4229
4274
|
/** Retrieves medical drug classes with clustering information. */
|
|
4230
4275
|
async queryMedicalDrugClassesClusters(filter, clusters, correlationId) {
|
|
@@ -4274,7 +4319,10 @@ class Graphlit {
|
|
|
4274
4319
|
}
|
|
4275
4320
|
/** Retrieves medical therapies based on filter criteria. */
|
|
4276
4321
|
async queryMedicalTherapies(filter, correlationId) {
|
|
4277
|
-
return this.queryAndCheckError(Documents.QueryMedicalTherapies, {
|
|
4322
|
+
return this.queryAndCheckError(Documents.QueryMedicalTherapies, {
|
|
4323
|
+
filter: filter,
|
|
4324
|
+
correlationId: correlationId,
|
|
4325
|
+
});
|
|
4278
4326
|
}
|
|
4279
4327
|
/** Retrieves medical therapies with clustering information. */
|
|
4280
4328
|
async queryMedicalTherapiesClusters(filter, clusters, correlationId) {
|
|
@@ -4612,13 +4660,17 @@ class Graphlit {
|
|
|
4612
4660
|
let totalTokens = currentMessage?.tokens || 0;
|
|
4613
4661
|
const toolStartTime = Date.now();
|
|
4614
4662
|
let toolTime = 0;
|
|
4663
|
+
// Context strategy for tool result truncation
|
|
4664
|
+
const strategy = options?.contextStrategy ?? {};
|
|
4665
|
+
const toolResultTokenLimit = strategy.toolResultTokenLimit ??
|
|
4666
|
+
DEFAULT_CONTEXT_STRATEGY.toolResultTokenLimit;
|
|
4615
4667
|
while (currentMessage.toolCalls?.length &&
|
|
4616
4668
|
rounds < maxRounds &&
|
|
4617
4669
|
!abortController.signal.aborted) {
|
|
4618
4670
|
rounds++;
|
|
4619
|
-
// Execute tools
|
|
4671
|
+
// Execute tools (with truncation)
|
|
4620
4672
|
const toolExecStart = Date.now();
|
|
4621
|
-
const toolResults = await this.executeToolsForPromptAgent(currentMessage.toolCalls.filter((tc) => tc !== null), toolHandlers || {}, allToolCalls, abortController.signal);
|
|
4673
|
+
const toolResults = await this.executeToolsForPromptAgent(currentMessage.toolCalls.filter((tc) => tc !== null), toolHandlers || {}, allToolCalls, abortController.signal, toolResultTokenLimit);
|
|
4622
4674
|
toolTime += Date.now() - toolExecStart;
|
|
4623
4675
|
if (abortController.signal.aborted) {
|
|
4624
4676
|
throw new Error("Operation timed out");
|
|
@@ -4701,7 +4753,9 @@ class Graphlit {
|
|
|
4701
4753
|
// Swallow errors from the previous call so a failed message doesn't
|
|
4702
4754
|
// permanently block the queue for this conversation.
|
|
4703
4755
|
// Check the abort signal before starting work so ESC while queued is instant.
|
|
4704
|
-
const next = previous
|
|
4756
|
+
const next = previous
|
|
4757
|
+
.catch(() => { })
|
|
4758
|
+
.then(() => {
|
|
4705
4759
|
if (abortSignal?.aborted)
|
|
4706
4760
|
throw new Error("Operation aborted");
|
|
4707
4761
|
return work();
|
|
@@ -4862,7 +4916,7 @@ class Graphlit {
|
|
|
4862
4916
|
modelService: serviceType,
|
|
4863
4917
|
});
|
|
4864
4918
|
// Start the streaming conversation
|
|
4865
|
-
await this.executeStreamingAgent(prompt, actualConversationId, fullSpec, tools, toolHandlers, uiAdapter, maxRounds, abortSignal, mimeType, data, correlationId, persona);
|
|
4919
|
+
await this.executeStreamingAgent(prompt, actualConversationId, fullSpec, tools, toolHandlers, uiAdapter, maxRounds, abortSignal, mimeType, data, correlationId, persona, options?.contextStrategy);
|
|
4866
4920
|
}, abortSignal);
|
|
4867
4921
|
}
|
|
4868
4922
|
catch (error) {
|
|
@@ -4908,9 +4962,10 @@ class Graphlit {
|
|
|
4908
4962
|
/**
|
|
4909
4963
|
* Execute the streaming agent workflow with tool calling loop
|
|
4910
4964
|
*/
|
|
4911
|
-
async executeStreamingAgent(prompt, conversationId, specification, tools, toolHandlers, uiAdapter, maxRounds, abortSignal, mimeType, data, correlationId, persona) {
|
|
4965
|
+
async executeStreamingAgent(prompt, conversationId, specification, tools, toolHandlers, uiAdapter, maxRounds, abortSignal, mimeType, data, correlationId, persona, contextStrategy) {
|
|
4912
4966
|
let currentRound = 0;
|
|
4913
4967
|
let fullMessage = "";
|
|
4968
|
+
const contextActions = [];
|
|
4914
4969
|
// Collects artifact content IDs from tool handlers (e.g. code_execution).
|
|
4915
4970
|
// Handlers register async ingestion promises; we await all of them before
|
|
4916
4971
|
// completeConversation so the IDs are available without blocking the LLM.
|
|
@@ -4965,8 +5020,30 @@ class Graphlit {
|
|
|
4965
5020
|
console.log(`📊 [Context Window] Using ${usedTokens.toLocaleString()}/${details.tokenLimit.toLocaleString()} tokens (${Math.round((usedTokens / details.tokenLimit) * 100)}%)`);
|
|
4966
5021
|
}
|
|
4967
5022
|
}
|
|
5023
|
+
// Initialize context management
|
|
5024
|
+
const budgetTracker = details
|
|
5025
|
+
? TokenBudgetTracker.fromDetails(details)
|
|
5026
|
+
: undefined;
|
|
5027
|
+
// Merge: caller overrides > server-side specification strategy > defaults
|
|
5028
|
+
const callerStrategy = contextStrategy ?? {};
|
|
5029
|
+
const serverStrategy = specification.strategy;
|
|
5030
|
+
const toolResultTokenLimit = callerStrategy.toolResultTokenLimit ??
|
|
5031
|
+
serverStrategy?.toolResultTokenLimit ??
|
|
5032
|
+
DEFAULT_CONTEXT_STRATEGY.toolResultTokenLimit;
|
|
5033
|
+
const toolRoundLimit = callerStrategy.toolRoundLimit ??
|
|
5034
|
+
serverStrategy?.toolRoundLimit ??
|
|
5035
|
+
DEFAULT_CONTEXT_STRATEGY.toolRoundLimit;
|
|
5036
|
+
const rebudgetThreshold = callerStrategy.rebudgetThreshold ??
|
|
5037
|
+
serverStrategy?.toolBudgetThreshold ??
|
|
5038
|
+
DEFAULT_CONTEXT_STRATEGY.rebudgetThreshold;
|
|
5039
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING && budgetTracker) {
|
|
5040
|
+
console.log(`📊 [Context Management] Initialized budget tracker: ${budgetTracker.usagePercent}% used, ` +
|
|
5041
|
+
`${budgetTracker.remaining.toLocaleString()} tokens remaining. ` +
|
|
5042
|
+
`Strategy: toolResultLimit=${toolResultTokenLimit}, toolRoundLimit=${toolRoundLimit}, ` +
|
|
5043
|
+
`rebudgetThreshold=${rebudgetThreshold}`);
|
|
5044
|
+
}
|
|
4968
5045
|
// Build message array with conversation history
|
|
4969
|
-
|
|
5046
|
+
let messages = [];
|
|
4970
5047
|
// Add system prompt if specified
|
|
4971
5048
|
if (specification.systemPrompt) {
|
|
4972
5049
|
messages.push({
|
|
@@ -5033,6 +5110,41 @@ class Graphlit {
|
|
|
5033
5110
|
if (abortSignal?.aborted) {
|
|
5034
5111
|
throw new Error("Operation aborted");
|
|
5035
5112
|
}
|
|
5113
|
+
// Context window management: check budget before sending to LLM
|
|
5114
|
+
if (budgetTracker && currentRound > 0) {
|
|
5115
|
+
if (budgetTracker.needsRebudget(rebudgetThreshold)) {
|
|
5116
|
+
const beforeUsage = budgetTracker.usagePercent;
|
|
5117
|
+
const beforeCount = messages.length;
|
|
5118
|
+
messages = windowToolRounds(messages, toolRoundLimit);
|
|
5119
|
+
budgetTracker.resetFromMessages(messages);
|
|
5120
|
+
const afterUsage = budgetTracker.usagePercent;
|
|
5121
|
+
const droppedRounds = Math.max(0, Math.floor((beforeCount - messages.length) / 2));
|
|
5122
|
+
if (droppedRounds > 0) {
|
|
5123
|
+
const action = {
|
|
5124
|
+
type: "windowed_tool_rounds",
|
|
5125
|
+
droppedRounds,
|
|
5126
|
+
keptRounds: toolRoundLimit,
|
|
5127
|
+
};
|
|
5128
|
+
contextActions.push(action);
|
|
5129
|
+
// Notify the UI
|
|
5130
|
+
uiAdapter.handleEvent({
|
|
5131
|
+
type: "context_management",
|
|
5132
|
+
action,
|
|
5133
|
+
usage: budgetTracker.getUsageSnapshot(),
|
|
5134
|
+
timestamp: new Date(),
|
|
5135
|
+
});
|
|
5136
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
|
5137
|
+
console.log(`📊 [Context Management] Windowed tool rounds: dropped ${droppedRounds} round(s), ` +
|
|
5138
|
+
`budget ${beforeUsage}% → ${afterUsage}% (${messages.length} messages)`);
|
|
5139
|
+
}
|
|
5140
|
+
}
|
|
5141
|
+
// Emit updated context window
|
|
5142
|
+
uiAdapter.handleEvent({
|
|
5143
|
+
type: "context_window",
|
|
5144
|
+
usage: budgetTracker.getUsageSnapshot(),
|
|
5145
|
+
});
|
|
5146
|
+
}
|
|
5147
|
+
}
|
|
5036
5148
|
let toolCalls = [];
|
|
5037
5149
|
let roundMessage = "";
|
|
5038
5150
|
// Stream with appropriate provider
|
|
@@ -5299,6 +5411,12 @@ class Graphlit {
|
|
|
5299
5411
|
timestamp: new Date().toISOString(),
|
|
5300
5412
|
};
|
|
5301
5413
|
messages.push(assistantMessage);
|
|
5414
|
+
// Track assistant message in budget (includes tool call arguments)
|
|
5415
|
+
if (budgetTracker) {
|
|
5416
|
+
const assistantTokens = estimateTokens(roundMessage) +
|
|
5417
|
+
toolCalls.reduce((sum, tc) => sum + estimateTokens(tc.arguments), 0);
|
|
5418
|
+
budgetTracker.addMessage("", assistantTokens);
|
|
5419
|
+
}
|
|
5302
5420
|
// Execute tools and add responses
|
|
5303
5421
|
for (const toolCall of toolCalls) {
|
|
5304
5422
|
const handler = toolHandlers[toolCall.name];
|
|
@@ -5395,17 +5513,45 @@ class Graphlit {
|
|
|
5395
5513
|
},
|
|
5396
5514
|
result: result,
|
|
5397
5515
|
});
|
|
5398
|
-
// Add tool response to messages
|
|
5516
|
+
// Add tool response to messages (with truncation)
|
|
5517
|
+
const rawResult = typeof result === "string" ? result : JSON.stringify(result);
|
|
5518
|
+
const truncatedResult = truncateToolResult(rawResult, toolResultTokenLimit, toolCall.name);
|
|
5519
|
+
// Track truncation for observability
|
|
5520
|
+
if (truncatedResult.length < rawResult.length) {
|
|
5521
|
+
const action = {
|
|
5522
|
+
type: "truncated_tool_result",
|
|
5523
|
+
toolName: toolCall.name,
|
|
5524
|
+
originalTokens: estimateTokens(rawResult),
|
|
5525
|
+
truncatedTokens: estimateTokens(truncatedResult),
|
|
5526
|
+
};
|
|
5527
|
+
contextActions.push(action);
|
|
5528
|
+
if (budgetTracker) {
|
|
5529
|
+
uiAdapter.handleEvent({
|
|
5530
|
+
type: "context_management",
|
|
5531
|
+
action,
|
|
5532
|
+
usage: budgetTracker.getUsageSnapshot(),
|
|
5533
|
+
timestamp: new Date(),
|
|
5534
|
+
});
|
|
5535
|
+
}
|
|
5536
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
|
5537
|
+
console.log(`📊 [Context Management] Truncated tool result for ${toolCall.name}: ` +
|
|
5538
|
+
`${estimateTokens(rawResult)} → ${estimateTokens(truncatedResult)} tokens`);
|
|
5539
|
+
}
|
|
5540
|
+
}
|
|
5399
5541
|
const toolMessage = {
|
|
5400
5542
|
__typename: "ConversationMessage",
|
|
5401
5543
|
role: Types.ConversationRoleTypes.Tool,
|
|
5402
|
-
message:
|
|
5544
|
+
message: truncatedResult,
|
|
5403
5545
|
toolCallId: toolCall.id,
|
|
5404
5546
|
timestamp: new Date().toISOString(),
|
|
5405
5547
|
};
|
|
5406
5548
|
// Add tool name for Mistral compatibility
|
|
5407
5549
|
toolMessage.toolName = toolCall.name;
|
|
5408
5550
|
messages.push(toolMessage);
|
|
5551
|
+
// Track budget
|
|
5552
|
+
if (budgetTracker) {
|
|
5553
|
+
budgetTracker.addMessage(truncatedResult);
|
|
5554
|
+
}
|
|
5409
5555
|
}
|
|
5410
5556
|
catch (error) {
|
|
5411
5557
|
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
|
@@ -5421,19 +5567,30 @@ class Graphlit {
|
|
|
5421
5567
|
error: errorMessage,
|
|
5422
5568
|
});
|
|
5423
5569
|
// Add error response
|
|
5570
|
+
const errorText = `Error: ${errorMessage}`;
|
|
5424
5571
|
const errorToolMessage = {
|
|
5425
5572
|
__typename: "ConversationMessage",
|
|
5426
5573
|
role: Types.ConversationRoleTypes.Tool,
|
|
5427
|
-
message:
|
|
5574
|
+
message: errorText,
|
|
5428
5575
|
toolCallId: toolCall.id,
|
|
5429
5576
|
timestamp: new Date().toISOString(),
|
|
5430
5577
|
};
|
|
5431
5578
|
// Add tool name for Mistral compatibility
|
|
5432
5579
|
errorToolMessage.toolName = toolCall.name;
|
|
5433
5580
|
messages.push(errorToolMessage);
|
|
5581
|
+
if (budgetTracker) {
|
|
5582
|
+
budgetTracker.addMessage(errorText);
|
|
5583
|
+
}
|
|
5434
5584
|
}
|
|
5435
5585
|
}
|
|
5436
5586
|
}
|
|
5587
|
+
// Emit context window usage after each tool round
|
|
5588
|
+
if (budgetTracker) {
|
|
5589
|
+
uiAdapter.handleEvent({
|
|
5590
|
+
type: "context_window",
|
|
5591
|
+
usage: budgetTracker.getUsageSnapshot(),
|
|
5592
|
+
});
|
|
5593
|
+
}
|
|
5437
5594
|
currentRound++;
|
|
5438
5595
|
}
|
|
5439
5596
|
// Complete the conversation and get token count
|
|
@@ -5859,7 +6016,7 @@ class Graphlit {
|
|
|
5859
6016
|
await streamWithXai(specification, messages, tools, xaiClient, (event) => uiAdapter.handleEvent(event), onComplete, abortSignal);
|
|
5860
6017
|
}
|
|
5861
6018
|
// Helper method to execute tools for promptAgent
|
|
5862
|
-
async executeToolsForPromptAgent(toolCalls, toolHandlers, allToolCalls, signal) {
|
|
6019
|
+
async executeToolsForPromptAgent(toolCalls, toolHandlers, allToolCalls, signal, toolResultTokenLimit = DEFAULT_CONTEXT_STRATEGY.toolResultTokenLimit) {
|
|
5863
6020
|
const responses = [];
|
|
5864
6021
|
// Execute tools in parallel for better performance
|
|
5865
6022
|
const toolPromises = toolCalls.map(async (toolCall) => {
|
|
@@ -5892,10 +6049,18 @@ class Graphlit {
|
|
|
5892
6049
|
duration: Date.now() - startTime,
|
|
5893
6050
|
};
|
|
5894
6051
|
allToolCalls.push(toolResult);
|
|
6052
|
+
// Truncate oversized tool results before sending to server
|
|
6053
|
+
const rawContent = error ? error : result ? JSON.stringify(result) : "";
|
|
6054
|
+
const content = truncateToolResult(rawContent, toolResultTokenLimit, toolCall.name || "unknown");
|
|
6055
|
+
if (content.length < rawContent.length &&
|
|
6056
|
+
process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
|
6057
|
+
console.log(`📊 [Context Management] Truncated tool result for ${toolCall.name}: ` +
|
|
6058
|
+
`${estimateTokens(rawContent)} → ${estimateTokens(content)} tokens (promptAgent path)`);
|
|
6059
|
+
}
|
|
5895
6060
|
// Response for API
|
|
5896
6061
|
return {
|
|
5897
6062
|
id: toolCall.id,
|
|
5898
|
-
content
|
|
6063
|
+
content,
|
|
5899
6064
|
};
|
|
5900
6065
|
});
|
|
5901
6066
|
const results = await Promise.all(toolPromises);
|
|
@@ -16098,6 +16098,9 @@ export const GetSpecification = gql `
|
|
|
16098
16098
|
factExtractionLimit
|
|
16099
16099
|
messagesWeight
|
|
16100
16100
|
contentsWeight
|
|
16101
|
+
toolResultTokenLimit
|
|
16102
|
+
toolRoundLimit
|
|
16103
|
+
toolBudgetThreshold
|
|
16101
16104
|
}
|
|
16102
16105
|
promptStrategy {
|
|
16103
16106
|
type
|
|
@@ -16492,6 +16495,9 @@ export const QuerySpecifications = gql `
|
|
|
16492
16495
|
factExtractionLimit
|
|
16493
16496
|
messagesWeight
|
|
16494
16497
|
contentsWeight
|
|
16498
|
+
toolResultTokenLimit
|
|
16499
|
+
toolRoundLimit
|
|
16500
|
+
toolBudgetThreshold
|
|
16495
16501
|
}
|
|
16496
16502
|
promptStrategy {
|
|
16497
16503
|
type
|
|
@@ -3755,6 +3755,12 @@ export type ConversationStrategy = {
|
|
|
3755
3755
|
messageLimit?: Maybe<Scalars['Int']['output']>;
|
|
3756
3756
|
/** The weight of conversation messages within prompt context, in range [0.0 - 1.0]. */
|
|
3757
3757
|
messagesWeight?: Maybe<Scalars['Float']['output']>;
|
|
3758
|
+
/** The fraction of token budget at which tool round windowing is triggered, in range [0.0 - 1.0]. */
|
|
3759
|
+
toolBudgetThreshold?: Maybe<Scalars['Float']['output']>;
|
|
3760
|
+
/** The maximum number of tokens for a single tool result. Results exceeding this limit are truncated. */
|
|
3761
|
+
toolResultTokenLimit?: Maybe<Scalars['Int']['output']>;
|
|
3762
|
+
/** The maximum number of tool call/response rounds to keep in context. Older rounds are dropped. */
|
|
3763
|
+
toolRoundLimit?: Maybe<Scalars['Int']['output']>;
|
|
3758
3764
|
/** The conversation strategy type. */
|
|
3759
3765
|
type?: Maybe<ConversationStrategyTypes>;
|
|
3760
3766
|
};
|
|
@@ -3782,6 +3788,12 @@ export type ConversationStrategyInput = {
|
|
|
3782
3788
|
messageLimit?: InputMaybe<Scalars['Int']['input']>;
|
|
3783
3789
|
/** The weight of conversation messages within prompt context, in range [0.0 - 1.0]. */
|
|
3784
3790
|
messagesWeight?: InputMaybe<Scalars['Float']['input']>;
|
|
3791
|
+
/** The fraction of token budget at which tool round windowing is triggered, in range [0.0 - 1.0]. */
|
|
3792
|
+
toolBudgetThreshold?: InputMaybe<Scalars['Float']['input']>;
|
|
3793
|
+
/** The maximum number of tokens for a single tool result. Results exceeding this limit are truncated. */
|
|
3794
|
+
toolResultTokenLimit?: InputMaybe<Scalars['Int']['input']>;
|
|
3795
|
+
/** The maximum number of tool call/response rounds to keep in context. Older rounds are dropped. */
|
|
3796
|
+
toolRoundLimit?: InputMaybe<Scalars['Int']['input']>;
|
|
3785
3797
|
/** The conversation strategy type. */
|
|
3786
3798
|
type?: InputMaybe<ConversationStrategyTypes>;
|
|
3787
3799
|
};
|
|
@@ -3816,6 +3828,12 @@ export type ConversationStrategyUpdateInput = {
|
|
|
3816
3828
|
messageLimit?: InputMaybe<Scalars['Int']['input']>;
|
|
3817
3829
|
/** The weight of conversation messages within prompt context, in range [0.0 - 1.0]. */
|
|
3818
3830
|
messagesWeight?: InputMaybe<Scalars['Float']['input']>;
|
|
3831
|
+
/** The fraction of token budget at which tool round windowing is triggered, in range [0.0 - 1.0]. */
|
|
3832
|
+
toolBudgetThreshold?: InputMaybe<Scalars['Float']['input']>;
|
|
3833
|
+
/** The maximum number of tokens for a single tool result. Results exceeding this limit are truncated. */
|
|
3834
|
+
toolResultTokenLimit?: InputMaybe<Scalars['Int']['input']>;
|
|
3835
|
+
/** The maximum number of tool call/response rounds to keep in context. Older rounds are dropped. */
|
|
3836
|
+
toolRoundLimit?: InputMaybe<Scalars['Int']['input']>;
|
|
3819
3837
|
/** The conversation strategy type. */
|
|
3820
3838
|
type?: InputMaybe<ConversationStrategyTypes>;
|
|
3821
3839
|
};
|
|
@@ -40284,6 +40302,9 @@ export type GetSpecificationQuery = {
|
|
|
40284
40302
|
factExtractionLimit?: number | null;
|
|
40285
40303
|
messagesWeight?: number | null;
|
|
40286
40304
|
contentsWeight?: number | null;
|
|
40305
|
+
toolResultTokenLimit?: number | null;
|
|
40306
|
+
toolRoundLimit?: number | null;
|
|
40307
|
+
toolBudgetThreshold?: number | null;
|
|
40287
40308
|
} | null;
|
|
40288
40309
|
promptStrategy?: {
|
|
40289
40310
|
__typename?: 'PromptStrategy';
|
|
@@ -40726,6 +40747,9 @@ export type QuerySpecificationsQuery = {
|
|
|
40726
40747
|
factExtractionLimit?: number | null;
|
|
40727
40748
|
messagesWeight?: number | null;
|
|
40728
40749
|
contentsWeight?: number | null;
|
|
40750
|
+
toolResultTokenLimit?: number | null;
|
|
40751
|
+
toolRoundLimit?: number | null;
|
|
40752
|
+
toolBudgetThreshold?: number | null;
|
|
40729
40753
|
} | null;
|
|
40730
40754
|
promptStrategy?: {
|
|
40731
40755
|
__typename?: 'PromptStrategy';
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import * as Types from "../generated/graphql-types.js";
|
|
2
|
+
/** Returns `true` when js-tiktoken is installed and the encoder loaded successfully. */
|
|
3
|
+
export declare function isAccurateTokenCounting(): boolean;
|
|
4
|
+
/**
|
|
5
|
+
* Token estimation.
|
|
6
|
+
*
|
|
7
|
+
* When js-tiktoken is installed, returns an accurate BPE token count (o200k_base encoding).
|
|
8
|
+
* Otherwise falls back to a conservative heuristic: chars / 3.5.
|
|
9
|
+
*/
|
|
10
|
+
export declare function estimateTokens(text: string): number;
|
|
11
|
+
/**
|
|
12
|
+
* Configuration for context window management during agentic tool loops.
|
|
13
|
+
* Values can be provided by the server (via ConversationStrategy) or set client-side.
|
|
14
|
+
*/
|
|
15
|
+
export interface ContextStrategyConfig {
|
|
16
|
+
/** Max tokens for any single tool result. Results exceeding this are truncated. Default: 8192 */
|
|
17
|
+
toolResultTokenLimit: number;
|
|
18
|
+
/** Max tool call/response rounds to keep in context. Older rounds are dropped FIFO. Default: 10 */
|
|
19
|
+
toolRoundLimit: number;
|
|
20
|
+
/** Fraction of token budget at which client-side windowing is triggered. Default: 0.75 */
|
|
21
|
+
rebudgetThreshold: number;
|
|
22
|
+
}
|
|
23
|
+
export declare const DEFAULT_CONTEXT_STRATEGY: ContextStrategyConfig;
|
|
24
|
+
/**
|
|
25
|
+
* Tracks token budget during streaming agent tool loops.
|
|
26
|
+
*
|
|
27
|
+
* Initialized from server-provided accurate token counts (via formatConversation details),
|
|
28
|
+
* then uses character-based heuristic estimation for incremental additions during the loop.
|
|
29
|
+
*/
|
|
30
|
+
export declare class TokenBudgetTracker {
|
|
31
|
+
private readonly tokenLimit;
|
|
32
|
+
private readonly completionTokenLimit;
|
|
33
|
+
private _usedTokens;
|
|
34
|
+
constructor(tokenLimit: number, completionTokenLimit: number, initialUsedTokens: number);
|
|
35
|
+
/**
|
|
36
|
+
* Create a tracker from formatConversation response details.
|
|
37
|
+
* Returns undefined if the details lack token information.
|
|
38
|
+
*/
|
|
39
|
+
static fromDetails(details: {
|
|
40
|
+
tokenLimit?: number | null;
|
|
41
|
+
completionTokenLimit?: number | null;
|
|
42
|
+
messages?: Array<{
|
|
43
|
+
tokens?: number | null;
|
|
44
|
+
} | null> | null;
|
|
45
|
+
}): TokenBudgetTracker | undefined;
|
|
46
|
+
/** Total available token budget (tokenLimit - completionTokenLimit, at 95% ceiling) */
|
|
47
|
+
get budget(): number;
|
|
48
|
+
/** Current estimated token usage */
|
|
49
|
+
get usedTokens(): number;
|
|
50
|
+
/** Remaining tokens before budget is exhausted */
|
|
51
|
+
get remaining(): number;
|
|
52
|
+
/** Current usage as a percentage (0-100) */
|
|
53
|
+
get usagePercent(): number;
|
|
54
|
+
/** Model's full context token limit */
|
|
55
|
+
get maxTokens(): number;
|
|
56
|
+
/** Track addition of new message content */
|
|
57
|
+
addMessage(text: string, serverTokenCount?: number): void;
|
|
58
|
+
/** Check if we need to trigger windowing/re-budgeting */
|
|
59
|
+
needsRebudget(threshold: number): boolean;
|
|
60
|
+
/** Reset tracker from a fresh set of messages (after windowing) */
|
|
61
|
+
resetFromMessages(messages: Array<{
|
|
62
|
+
message?: string | null;
|
|
63
|
+
tokens?: number | null;
|
|
64
|
+
}>): void;
|
|
65
|
+
/** Get current usage snapshot for emitting events */
|
|
66
|
+
getUsageSnapshot(): {
|
|
67
|
+
usedTokens: number;
|
|
68
|
+
maxTokens: number;
|
|
69
|
+
percentage: number;
|
|
70
|
+
remainingTokens: number;
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Truncates a tool result to fit within a token budget.
|
|
75
|
+
*
|
|
76
|
+
* Attempts to find a clean break point (JSON boundary or newline).
|
|
77
|
+
* Appends a [truncated] marker so the LLM knows data was cut.
|
|
78
|
+
*/
|
|
79
|
+
export declare function truncateToolResult(result: unknown, maxTokens: number, toolName: string): string;
|
|
80
|
+
/**
|
|
81
|
+
* Windows tool rounds to keep the messages array within budget.
|
|
82
|
+
*
|
|
83
|
+
* Preserves:
|
|
84
|
+
* - "Header" messages (system prompt, conversation history, initial user message)
|
|
85
|
+
* - The most recent `keepRounds` tool rounds
|
|
86
|
+
*
|
|
87
|
+
* Drops older tool rounds and inserts a system message noting what was removed.
|
|
88
|
+
*
|
|
89
|
+
* @returns The windowed messages array
|
|
90
|
+
*/
|
|
91
|
+
export declare function windowToolRounds(messages: Types.ConversationMessage[], keepRounds: number): Types.ConversationMessage[];
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
import { createRequire } from "node:module";
|
|
2
|
+
import * as Types from "../generated/graphql-types.js";
|
|
3
|
+
// ── Singleton tiktoken encoder (best-effort load) ───────────────────────────
|
|
4
|
+
let encoder;
|
|
5
|
+
try {
|
|
6
|
+
const require = createRequire(import.meta.url);
|
|
7
|
+
const { Tiktoken } = require("js-tiktoken/lite");
|
|
8
|
+
const ranks = require("js-tiktoken/ranks/o200k_base");
|
|
9
|
+
encoder = new Tiktoken(ranks);
|
|
10
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_INITIALIZATION) {
|
|
11
|
+
console.debug("[graphlit-sdk] tiktoken encoder loaded (o200k_base) — accurate token counting enabled");
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
catch {
|
|
15
|
+
// js-tiktoken not installed — fall back to heuristic
|
|
16
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_INITIALIZATION) {
|
|
17
|
+
console.debug("[graphlit-sdk] js-tiktoken not available — using heuristic token estimation (chars / 3.5)");
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
/** Returns `true` when js-tiktoken is installed and the encoder loaded successfully. */
|
|
21
|
+
export function isAccurateTokenCounting() {
|
|
22
|
+
return encoder !== undefined;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Token estimation.
|
|
26
|
+
*
|
|
27
|
+
* When js-tiktoken is installed, returns an accurate BPE token count (o200k_base encoding).
|
|
28
|
+
* Otherwise falls back to a conservative heuristic: chars / 3.5.
|
|
29
|
+
*/
|
|
30
|
+
export function estimateTokens(text) {
|
|
31
|
+
if (!text)
|
|
32
|
+
return 0;
|
|
33
|
+
if (encoder)
|
|
34
|
+
return encoder.encode(text).length;
|
|
35
|
+
return Math.ceil(text.length / 3.5);
|
|
36
|
+
}
|
|
37
|
+
export const DEFAULT_CONTEXT_STRATEGY = {
|
|
38
|
+
toolResultTokenLimit: 8192,
|
|
39
|
+
toolRoundLimit: 10,
|
|
40
|
+
rebudgetThreshold: 0.75,
|
|
41
|
+
};
|
|
42
|
+
/**
|
|
43
|
+
* Tracks token budget during streaming agent tool loops.
|
|
44
|
+
*
|
|
45
|
+
* Initialized from server-provided accurate token counts (via formatConversation details),
|
|
46
|
+
* then uses character-based heuristic estimation for incremental additions during the loop.
|
|
47
|
+
*/
|
|
48
|
+
export class TokenBudgetTracker {
|
|
49
|
+
tokenLimit;
|
|
50
|
+
completionTokenLimit;
|
|
51
|
+
_usedTokens;
|
|
52
|
+
constructor(tokenLimit, completionTokenLimit, initialUsedTokens) {
|
|
53
|
+
this.tokenLimit = tokenLimit;
|
|
54
|
+
this.completionTokenLimit = completionTokenLimit;
|
|
55
|
+
this._usedTokens = initialUsedTokens;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Create a tracker from formatConversation response details.
|
|
59
|
+
* Returns undefined if the details lack token information.
|
|
60
|
+
*/
|
|
61
|
+
static fromDetails(details) {
|
|
62
|
+
if (!details.tokenLimit)
|
|
63
|
+
return undefined;
|
|
64
|
+
const tokenLimit = details.tokenLimit;
|
|
65
|
+
const completionTokenLimit = details.completionTokenLimit ?? 4096;
|
|
66
|
+
const usedTokens = details.messages?.reduce((sum, msg) => sum + (msg?.tokens ?? 0), 0) ?? 0;
|
|
67
|
+
return new TokenBudgetTracker(tokenLimit, completionTokenLimit, usedTokens);
|
|
68
|
+
}
|
|
69
|
+
/** Total available token budget (tokenLimit - completionTokenLimit, at 95% ceiling) */
|
|
70
|
+
get budget() {
|
|
71
|
+
return Math.floor((this.tokenLimit - this.completionTokenLimit) * 0.95);
|
|
72
|
+
}
|
|
73
|
+
/** Current estimated token usage */
|
|
74
|
+
get usedTokens() {
|
|
75
|
+
return this._usedTokens;
|
|
76
|
+
}
|
|
77
|
+
/** Remaining tokens before budget is exhausted */
|
|
78
|
+
get remaining() {
|
|
79
|
+
return Math.max(0, this.budget - this._usedTokens);
|
|
80
|
+
}
|
|
81
|
+
/** Current usage as a percentage (0-100) */
|
|
82
|
+
get usagePercent() {
|
|
83
|
+
if (this.budget <= 0)
|
|
84
|
+
return 100;
|
|
85
|
+
return Math.round((this._usedTokens / this.budget) * 100);
|
|
86
|
+
}
|
|
87
|
+
/** Model's full context token limit */
|
|
88
|
+
get maxTokens() {
|
|
89
|
+
return this.tokenLimit;
|
|
90
|
+
}
|
|
91
|
+
/** Track addition of new message content */
|
|
92
|
+
addMessage(text, serverTokenCount) {
|
|
93
|
+
this._usedTokens += serverTokenCount ?? estimateTokens(text);
|
|
94
|
+
}
|
|
95
|
+
/** Check if we need to trigger windowing/re-budgeting */
|
|
96
|
+
needsRebudget(threshold) {
|
|
97
|
+
return this.usagePercent >= threshold * 100;
|
|
98
|
+
}
|
|
99
|
+
/** Reset tracker from a fresh set of messages (after windowing) */
|
|
100
|
+
resetFromMessages(messages) {
|
|
101
|
+
this._usedTokens = messages.reduce((sum, msg) => {
|
|
102
|
+
if (msg.tokens)
|
|
103
|
+
return sum + msg.tokens;
|
|
104
|
+
return sum + estimateTokens(msg.message ?? "");
|
|
105
|
+
}, 0);
|
|
106
|
+
}
|
|
107
|
+
/** Get current usage snapshot for emitting events */
|
|
108
|
+
getUsageSnapshot() {
|
|
109
|
+
return {
|
|
110
|
+
usedTokens: this._usedTokens,
|
|
111
|
+
maxTokens: this.tokenLimit,
|
|
112
|
+
percentage: this.usagePercent,
|
|
113
|
+
remainingTokens: this.remaining,
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Truncates a tool result to fit within a token budget.
|
|
119
|
+
*
|
|
120
|
+
* Attempts to find a clean break point (JSON boundary or newline).
|
|
121
|
+
* Appends a [truncated] marker so the LLM knows data was cut.
|
|
122
|
+
*/
|
|
123
|
+
export function truncateToolResult(result, maxTokens, toolName) {
|
|
124
|
+
const text = typeof result === "string" ? result : JSON.stringify(result);
|
|
125
|
+
if (!text)
|
|
126
|
+
return "";
|
|
127
|
+
const estimatedTokens = estimateTokens(text);
|
|
128
|
+
if (estimatedTokens <= maxTokens)
|
|
129
|
+
return text;
|
|
130
|
+
// When tiktoken is available, compute the actual chars-per-token ratio for
|
|
131
|
+
// this specific text instead of using the hardcoded 3.5 heuristic.
|
|
132
|
+
const charsPerToken = encoder && estimatedTokens > 0
|
|
133
|
+
? text.length / estimatedTokens
|
|
134
|
+
: 3.5;
|
|
135
|
+
const maxChars = Math.floor(maxTokens * charsPerToken);
|
|
136
|
+
let truncated = text.substring(0, maxChars);
|
|
137
|
+
// Try to find a clean break point
|
|
138
|
+
if (text.startsWith("{") || text.startsWith("[")) {
|
|
139
|
+
// For JSON, try to close at a valid boundary
|
|
140
|
+
const lastComplete = Math.max(truncated.lastIndexOf("},"), truncated.lastIndexOf("}\n"), truncated.lastIndexOf("],"), truncated.lastIndexOf("]\n"));
|
|
141
|
+
if (lastComplete > maxChars * 0.5) {
|
|
142
|
+
truncated = truncated.substring(0, lastComplete + 1);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
else {
|
|
146
|
+
// For plain text, break at newline
|
|
147
|
+
const lastNewline = truncated.lastIndexOf("\n");
|
|
148
|
+
if (lastNewline > maxChars * 0.5) {
|
|
149
|
+
truncated = truncated.substring(0, lastNewline);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
const truncatedTokens = estimateTokens(truncated);
|
|
153
|
+
return `${truncated}\n\n[truncated by ${toolName}: original ~${estimatedTokens} tokens, showing first ~${truncatedTokens} tokens]`;
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Identifies the boundary between "header" messages (system prompt, conversation history,
|
|
157
|
+
* initial user message) and "tool round" messages (assistant+tool pairs from the agentic loop).
|
|
158
|
+
*
|
|
159
|
+
* Tool rounds start at the first assistant message that has tool calls.
|
|
160
|
+
*/
|
|
161
|
+
function findToolRoundStart(messages) {
|
|
162
|
+
for (let i = 0; i < messages.length; i++) {
|
|
163
|
+
const msg = messages[i];
|
|
164
|
+
if (msg.role === Types.ConversationRoleTypes.Assistant &&
|
|
165
|
+
msg.toolCalls &&
|
|
166
|
+
msg.toolCalls.length > 0) {
|
|
167
|
+
return i;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
return messages.length; // No tool rounds found
|
|
171
|
+
}
|
|
172
|
+
/**
|
|
173
|
+
* Groups tool-round messages into logical rounds.
|
|
174
|
+
* Each round = one assistant message (with tool calls) + all subsequent tool response messages.
|
|
175
|
+
*/
|
|
176
|
+
function groupToolRounds(toolMessages) {
|
|
177
|
+
const rounds = [];
|
|
178
|
+
let currentRound = [];
|
|
179
|
+
for (const msg of toolMessages) {
|
|
180
|
+
if (msg.role === Types.ConversationRoleTypes.Assistant &&
|
|
181
|
+
currentRound.length > 0) {
|
|
182
|
+
// New assistant message starts a new round
|
|
183
|
+
rounds.push(currentRound);
|
|
184
|
+
currentRound = [msg];
|
|
185
|
+
}
|
|
186
|
+
else {
|
|
187
|
+
currentRound.push(msg);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
if (currentRound.length > 0) {
|
|
191
|
+
rounds.push(currentRound);
|
|
192
|
+
}
|
|
193
|
+
return rounds;
|
|
194
|
+
}
|
|
195
|
+
/**
|
|
196
|
+
* Windows tool rounds to keep the messages array within budget.
|
|
197
|
+
*
|
|
198
|
+
* Preserves:
|
|
199
|
+
* - "Header" messages (system prompt, conversation history, initial user message)
|
|
200
|
+
* - The most recent `keepRounds` tool rounds
|
|
201
|
+
*
|
|
202
|
+
* Drops older tool rounds and inserts a system message noting what was removed.
|
|
203
|
+
*
|
|
204
|
+
* @returns The windowed messages array
|
|
205
|
+
*/
|
|
206
|
+
export function windowToolRounds(messages, keepRounds) {
|
|
207
|
+
const headerEnd = findToolRoundStart(messages);
|
|
208
|
+
const header = messages.slice(0, headerEnd);
|
|
209
|
+
const toolMessages = messages.slice(headerEnd);
|
|
210
|
+
const rounds = groupToolRounds(toolMessages);
|
|
211
|
+
if (rounds.length <= keepRounds)
|
|
212
|
+
return messages;
|
|
213
|
+
const keptRounds = rounds.slice(-keepRounds);
|
|
214
|
+
const droppedCount = rounds.length - keepRounds;
|
|
215
|
+
// Summary marker so the LLM knows context was trimmed
|
|
216
|
+
const summaryMessage = {
|
|
217
|
+
__typename: "ConversationMessage",
|
|
218
|
+
role: Types.ConversationRoleTypes.System,
|
|
219
|
+
message: `[Context management: ${droppedCount} earlier tool calling round(s) were removed to stay within token limits. The most recent ${keepRounds} round(s) are preserved below.]`,
|
|
220
|
+
timestamp: new Date().toISOString(),
|
|
221
|
+
};
|
|
222
|
+
return [...header, summaryMessage, ...keptRounds.flat()];
|
|
223
|
+
}
|
|
@@ -81,6 +81,9 @@ export class UIEventAdapter {
|
|
|
81
81
|
case "context_window":
|
|
82
82
|
this.handleContextWindow(event.usage);
|
|
83
83
|
break;
|
|
84
|
+
case "context_management":
|
|
85
|
+
this.handleContextManagement(event);
|
|
86
|
+
break;
|
|
84
87
|
case "reasoning_start":
|
|
85
88
|
this.handleReasoningStart(event.format);
|
|
86
89
|
break;
|
|
@@ -611,6 +614,17 @@ export class UIEventAdapter {
|
|
|
611
614
|
timestamp: new Date(),
|
|
612
615
|
});
|
|
613
616
|
}
|
|
617
|
+
handleContextManagement(event) {
|
|
618
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
|
619
|
+
console.log(`📊 [UIEventAdapter] Context management: ${event.action.type}`);
|
|
620
|
+
}
|
|
621
|
+
this.emitUIEvent({
|
|
622
|
+
type: "context_management",
|
|
623
|
+
action: event.action,
|
|
624
|
+
usage: event.usage,
|
|
625
|
+
timestamp: event.timestamp,
|
|
626
|
+
});
|
|
627
|
+
}
|
|
614
628
|
handleReasoningStart(format) {
|
|
615
629
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
|
616
630
|
console.log(`🤔 [UIEventAdapter] Reasoning start - Format: ${format}`);
|
package/dist/types/agent.d.ts
CHANGED
|
@@ -8,9 +8,18 @@ export interface ArtifactCollector {
|
|
|
8
8
|
}[]>;
|
|
9
9
|
}
|
|
10
10
|
export type ToolHandler = (args: any, artifacts?: ArtifactCollector) => Promise<any>;
|
|
11
|
+
export interface ContextStrategy {
|
|
12
|
+
/** Max tokens for any single tool result. Results exceeding this are truncated. Default: 8192 */
|
|
13
|
+
toolResultTokenLimit?: number;
|
|
14
|
+
/** Max tool call/response rounds to keep in context. Older rounds are dropped FIFO. Default: 10 */
|
|
15
|
+
toolRoundLimit?: number;
|
|
16
|
+
/** Fraction of token budget (0.0-1.0) at which client-side windowing is triggered. Default: 0.75 */
|
|
17
|
+
rebudgetThreshold?: number;
|
|
18
|
+
}
|
|
11
19
|
export interface AgentOptions {
|
|
12
20
|
maxToolRounds?: number;
|
|
13
21
|
timeout?: number;
|
|
22
|
+
contextStrategy?: ContextStrategy;
|
|
14
23
|
}
|
|
15
24
|
export interface AgentMetrics {
|
|
16
25
|
totalTime: number;
|
|
@@ -27,6 +36,16 @@ export interface ContextWindowUsage {
|
|
|
27
36
|
percentage: number;
|
|
28
37
|
remainingTokens: number;
|
|
29
38
|
}
|
|
39
|
+
export type ContextManagementAction = {
|
|
40
|
+
type: "truncated_tool_result";
|
|
41
|
+
toolName: string;
|
|
42
|
+
originalTokens: number;
|
|
43
|
+
truncatedTokens: number;
|
|
44
|
+
} | {
|
|
45
|
+
type: "windowed_tool_rounds";
|
|
46
|
+
droppedRounds: number;
|
|
47
|
+
keptRounds: number;
|
|
48
|
+
};
|
|
30
49
|
export interface AgentResult {
|
|
31
50
|
message: string;
|
|
32
51
|
conversationId: string;
|
|
@@ -36,6 +55,7 @@ export interface AgentResult {
|
|
|
36
55
|
metrics?: AgentMetrics;
|
|
37
56
|
usage?: UsageInfo;
|
|
38
57
|
contextWindow?: ContextWindowUsage;
|
|
58
|
+
contextActions?: ContextManagementAction[];
|
|
39
59
|
error?: AgentError;
|
|
40
60
|
}
|
|
41
61
|
export interface StreamAgentOptions {
|
|
@@ -44,6 +64,7 @@ export interface StreamAgentOptions {
|
|
|
44
64
|
smoothingEnabled?: boolean;
|
|
45
65
|
chunkingStrategy?: "character" | "word" | "sentence";
|
|
46
66
|
smoothingDelay?: number;
|
|
67
|
+
contextStrategy?: ContextStrategy;
|
|
47
68
|
}
|
|
48
69
|
export interface ToolCallResult {
|
|
49
70
|
id: string;
|
package/dist/types/internal.d.ts
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
* Internal types used by the streaming implementation
|
|
3
3
|
* These are not exported to consumers of the library
|
|
4
4
|
*/
|
|
5
|
+
import { ContextManagementAction } from "./agent.js";
|
|
5
6
|
/**
|
|
6
7
|
* Low-level streaming events used internally by providers
|
|
7
8
|
* These get transformed into AgentStreamEvent by UIEventAdapter
|
|
@@ -64,6 +65,16 @@ export type StreamEvent = {
|
|
|
64
65
|
percentage: number;
|
|
65
66
|
remainingTokens: number;
|
|
66
67
|
};
|
|
68
|
+
} | {
|
|
69
|
+
type: "context_management";
|
|
70
|
+
action: ContextManagementAction;
|
|
71
|
+
usage: {
|
|
72
|
+
usedTokens: number;
|
|
73
|
+
maxTokens: number;
|
|
74
|
+
percentage: number;
|
|
75
|
+
remainingTokens: number;
|
|
76
|
+
};
|
|
77
|
+
timestamp: Date;
|
|
67
78
|
} | {
|
|
68
79
|
type: "reasoning_start";
|
|
69
80
|
format: "thinking_tag" | "markdown" | "custom";
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { ConversationMessage, ConversationToolCall } from "../generated/graphql-types.js";
|
|
2
|
+
import { ContextManagementAction } from "./agent.js";
|
|
2
3
|
/**
|
|
3
4
|
* Tool execution status for streaming
|
|
4
5
|
*/
|
|
@@ -44,6 +45,16 @@ export type AgentStreamEvent = {
|
|
|
44
45
|
conversationId: string;
|
|
45
46
|
timestamp: Date;
|
|
46
47
|
} | ContextWindowEvent | {
|
|
48
|
+
type: "context_management";
|
|
49
|
+
action: ContextManagementAction;
|
|
50
|
+
usage: {
|
|
51
|
+
usedTokens: number;
|
|
52
|
+
maxTokens: number;
|
|
53
|
+
percentage: number;
|
|
54
|
+
remainingTokens: number;
|
|
55
|
+
};
|
|
56
|
+
timestamp: Date;
|
|
57
|
+
} | {
|
|
47
58
|
type: "message_update";
|
|
48
59
|
message: StreamingConversationMessage;
|
|
49
60
|
isStreaming: boolean;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "graphlit-client",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.20260218002",
|
|
4
4
|
"description": "Graphlit API Client for TypeScript",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/client.js",
|
|
@@ -73,6 +73,9 @@
|
|
|
73
73
|
},
|
|
74
74
|
"@aws-sdk/client-bedrock-runtime": {
|
|
75
75
|
"optional": true
|
|
76
|
+
},
|
|
77
|
+
"js-tiktoken": {
|
|
78
|
+
"optional": true
|
|
76
79
|
}
|
|
77
80
|
},
|
|
78
81
|
"optionalDependencies": {
|
|
@@ -81,6 +84,7 @@
|
|
|
81
84
|
"@mistralai/mistralai": "^1.11.0",
|
|
82
85
|
"cohere-ai": "^7.20.0",
|
|
83
86
|
"groq-sdk": "^0.25.0",
|
|
87
|
+
"js-tiktoken": "^1.0.16",
|
|
84
88
|
"openai": "^5.3.0"
|
|
85
89
|
},
|
|
86
90
|
"devDependencies": {
|