@mastra/memory 1.18.1 → 1.18.2-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +37 -0
- package/dist/{chunk-LCRYFBV3.cjs → chunk-4AQHFADP.cjs} +48 -3
- package/dist/chunk-4AQHFADP.cjs.map +1 -0
- package/dist/{chunk-KGYJHNI6.js → chunk-LCALB7W6.js} +48 -3
- package/dist/chunk-LCALB7W6.js.map +1 -0
- package/dist/docs/SKILL.md +1 -1
- package/dist/docs/assets/SOURCE_MAP.json +29 -29
- package/dist/index.cjs +13 -13
- package/dist/index.js +4 -4
- package/dist/{observational-memory-5UNUGOE5.cjs → observational-memory-7M2T5EOV.cjs} +26 -26
- package/dist/{observational-memory-5UNUGOE5.cjs.map → observational-memory-7M2T5EOV.cjs.map} +1 -1
- package/dist/{observational-memory-7PMPJCPD.js → observational-memory-SYNXJVL4.js} +3 -3
- package/dist/{observational-memory-7PMPJCPD.js.map → observational-memory-SYNXJVL4.js.map} +1 -1
- package/dist/processors/index.cjs +24 -24
- package/dist/processors/index.js +1 -1
- package/dist/processors/observational-memory/token-counter.d.ts.map +1 -1
- package/package.json +5 -5
- package/dist/chunk-KGYJHNI6.js.map +0 -1
- package/dist/chunk-LCRYFBV3.cjs.map +0 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,42 @@
|
|
|
1
1
|
# @mastra/memory
|
|
2
2
|
|
|
3
|
+
## 1.18.2-alpha.0
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- Fixed Observational Memory missing the observation threshold for messages with large file parts. Previously `TokenCounter`'s local/sync counting path only stringified the file descriptor (`type`, `mimeType`, `filename`) for non-image files, so a 100KB PDF looked like ~8 tokens to OM and the conversation kept replaying the full unobserved history past every reasonable threshold. ([#16562](https://github.com/mastra-ai/mastra/pull/16562))
|
|
8
|
+
|
|
9
|
+
`TokenCounter` now estimates non-image file part tokens locally from the attachment's byte size and mime type using a per-provider heuristic, mirroring the existing local image-token estimator:
|
|
10
|
+
- Anthropic PDFs ≈ `bytes / 3` (floor 1500)
|
|
11
|
+
- Google PDFs ≈ `bytes / 20` (floor 258)
|
|
12
|
+
- OpenAI / unknown PDFs ≈ `bytes / 4` (floor 500)
|
|
13
|
+
- Text-ish mime types (`text/*`, JSON, XML, YAML) ≈ `bytes / 4`
|
|
14
|
+
- Other binary ≈ `bytes / 4`
|
|
15
|
+
|
|
16
|
+
URL-only file parts (no body to size) keep the previous descriptor-only local estimate. `countMessagesAsync()` continues to prefer provider token-count endpoints for supported providers; this change only improves the local fallback used when no provider endpoint is available.
|
|
17
|
+
|
|
18
|
+
```ts
|
|
19
|
+
// Before: this PDF counted as ~8 tokens locally regardless of size, so OM never triggered.
|
|
20
|
+
const part = {
|
|
21
|
+
type: 'file',
|
|
22
|
+
data: largePdfBase64,
|
|
23
|
+
mimeType: 'application/pdf',
|
|
24
|
+
filename: 'report.pdf',
|
|
25
|
+
};
|
|
26
|
+
// counter.countMessage(message) ≈ 8
|
|
27
|
+
|
|
28
|
+
// After: estimated locally from byte size on the active provider.
|
|
29
|
+
// counter.countMessage(message) ≈ tens of thousands of tokens
|
|
30
|
+
// → OM threshold trips as expected.
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
The internal token-estimate cache version was bumped, which invalidates persisted estimates from older `@mastra/memory` releases on the next read; entries are recomputed automatically.
|
|
34
|
+
|
|
35
|
+
Fixes https://github.com/mastra-ai/mastra/issues/16522
|
|
36
|
+
|
|
37
|
+
- Updated dependencies [[`bad08e9`](https://github.com/mastra-ai/mastra/commit/bad08e99c5291884c3ac76743c78c74f53a302c2)]:
|
|
38
|
+
- @mastra/core@1.35.0-alpha.1
|
|
39
|
+
|
|
3
40
|
## 1.18.1
|
|
4
41
|
|
|
5
42
|
### Patch Changes
|
|
@@ -5539,7 +5539,7 @@ var IMAGE_FILE_EXTENSIONS = /* @__PURE__ */ new Set([
|
|
|
5539
5539
|
"heif",
|
|
5540
5540
|
"avif"
|
|
5541
5541
|
]);
|
|
5542
|
-
var TOKEN_ESTIMATE_CACHE_VERSION =
|
|
5542
|
+
var TOKEN_ESTIMATE_CACHE_VERSION = 7;
|
|
5543
5543
|
var CLIENT_TOKEN_ESTIMATE_SOURCE = "client";
|
|
5544
5544
|
var DEFAULT_IMAGE_ESTIMATOR = {
|
|
5545
5545
|
baseTokens: 85,
|
|
@@ -6050,6 +6050,42 @@ function estimateAnthropicImageTokens(dimensions, sourceStats) {
|
|
|
6050
6050
|
}
|
|
6051
6051
|
return 1600;
|
|
6052
6052
|
}
|
|
6053
|
+
function estimateFileTokensFromBytes(provider, mimeType, sizeBytes) {
|
|
6054
|
+
const normalizedMime = (mimeType ?? "").toLowerCase().split(";", 1)[0].trim();
|
|
6055
|
+
const isPdf = normalizedMime === "application/pdf";
|
|
6056
|
+
const isTextish = normalizedMime.startsWith("text/") || ["application/json", "application/xml", "application/x-yaml", "application/yaml"].includes(normalizedMime);
|
|
6057
|
+
if (isPdf) {
|
|
6058
|
+
if (provider === "google") return Math.max(258, Math.ceil(sizeBytes / 20));
|
|
6059
|
+
if (provider === "anthropic") return Math.max(1500, Math.ceil(sizeBytes / 3));
|
|
6060
|
+
return Math.max(500, Math.ceil(sizeBytes / 4));
|
|
6061
|
+
}
|
|
6062
|
+
if (isTextish) return Math.max(1, Math.ceil(sizeBytes / 4));
|
|
6063
|
+
return Math.max(1, Math.ceil(sizeBytes / 4));
|
|
6064
|
+
}
|
|
6065
|
+
function estimateNonImageFileTokens(modelContext, part) {
|
|
6066
|
+
const sourceStats = resolveImageSourceStats(getObjectValue(part, "data"));
|
|
6067
|
+
if (sourceStats.sizeBytes === void 0) {
|
|
6068
|
+
return void 0;
|
|
6069
|
+
}
|
|
6070
|
+
const provider = resolveProviderId(modelContext);
|
|
6071
|
+
const modelId = modelContext?.modelId ?? null;
|
|
6072
|
+
const mimeType = getAttachmentMimeType(part, "application/octet-stream");
|
|
6073
|
+
const filename = getAttachmentFilename(part) ?? null;
|
|
6074
|
+
const tokens = estimateFileTokensFromBytes(provider, mimeType, sourceStats.sizeBytes);
|
|
6075
|
+
return {
|
|
6076
|
+
tokens,
|
|
6077
|
+
cachePayload: JSON.stringify({
|
|
6078
|
+
kind: "non-image-file",
|
|
6079
|
+
provider: provider ?? "fallback",
|
|
6080
|
+
modelId,
|
|
6081
|
+
estimator: "bytes",
|
|
6082
|
+
source: sourceStats.source,
|
|
6083
|
+
sizeBytes: sourceStats.sizeBytes,
|
|
6084
|
+
mimeType,
|
|
6085
|
+
filename
|
|
6086
|
+
})
|
|
6087
|
+
};
|
|
6088
|
+
}
|
|
6053
6089
|
function estimateGoogleImageTokens(modelContext, part, dimensions) {
|
|
6054
6090
|
if (isGoogleGemini3Model(modelContext)) {
|
|
6055
6091
|
const mediaResolution = resolveGoogleMediaResolution(part);
|
|
@@ -6477,6 +6513,15 @@ var TokenCounter = class _TokenCounter {
|
|
|
6477
6513
|
return this.readOrPersistFixedPartEstimate(part, "image-like-file", estimate.cachePayload, estimate.tokens);
|
|
6478
6514
|
}
|
|
6479
6515
|
if (part.type === "file") {
|
|
6516
|
+
const byteEstimate = estimateNonImageFileTokens(this.getModelContext(), part);
|
|
6517
|
+
if (byteEstimate) {
|
|
6518
|
+
return this.readOrPersistFixedPartEstimate(
|
|
6519
|
+
part,
|
|
6520
|
+
"non-image-file",
|
|
6521
|
+
byteEstimate.cachePayload,
|
|
6522
|
+
byteEstimate.tokens
|
|
6523
|
+
);
|
|
6524
|
+
}
|
|
6480
6525
|
return this.readOrPersistPartEstimate(part, "file-descriptor", serializeNonImageFilePartForTokenCounting(part));
|
|
6481
6526
|
}
|
|
6482
6527
|
return void 0;
|
|
@@ -9692,5 +9737,5 @@ exports.stripEphemeralAnchorIds = stripEphemeralAnchorIds;
|
|
|
9692
9737
|
exports.stripObservationGroups = stripObservationGroups;
|
|
9693
9738
|
exports.truncateStringByTokens = truncateStringByTokens;
|
|
9694
9739
|
exports.wrapInObservationGroup = wrapInObservationGroup;
|
|
9695
|
-
//# sourceMappingURL=chunk-
|
|
9696
|
-
//# sourceMappingURL=chunk-
|
|
9740
|
+
//# sourceMappingURL=chunk-4AQHFADP.cjs.map
|
|
9741
|
+
//# sourceMappingURL=chunk-4AQHFADP.cjs.map
|