claude-brain 0.30.2 → 0.30.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +241 -191
- package/VERSION +1 -1
- package/assets/CLAUDE-unified.md +11 -11
- package/assets/CLAUDE.md +29 -29
- package/package.json +7 -3
- package/packs/backend/node.json +173 -173
- package/packs/core/javascript.json +176 -176
- package/packs/core/typescript.json +222 -222
- package/packs/frontend/react.json +254 -254
- package/packs/meta/testing.json +172 -172
- package/scripts/postinstall.mjs +531 -531
- package/src/automation/decision-detector.ts +452 -452
- package/src/automation/phase12-manager.ts +456 -456
- package/src/automation/proactive-recall.ts +373 -373
- package/src/automation/project-detector.ts +310 -310
- package/src/automation/repo-scanner.ts +210 -205
- package/src/cli/auto-setup.ts +75 -75
- package/src/cli/auto-start.ts +266 -266
- package/src/cli/bin.ts +264 -264
- package/src/cli/commands/autostart.ts +90 -90
- package/src/cli/commands/chroma.ts +578 -577
- package/src/cli/commands/export-training.ts +70 -70
- package/src/cli/commands/export.ts +130 -130
- package/src/cli/commands/git-hook.ts +183 -183
- package/src/cli/commands/hooks.ts +217 -217
- package/src/cli/commands/init.ts +123 -123
- package/src/cli/commands/install-mcp.ts +122 -111
- package/src/cli/commands/models.ts +979 -979
- package/src/cli/commands/pack.ts +200 -200
- package/src/cli/commands/refresh.ts +344 -339
- package/src/cli/commands/reindex.ts +120 -120
- package/src/cli/commands/serve.ts +466 -463
- package/src/cli/commands/start.ts +44 -44
- package/src/cli/commands/status.ts +220 -203
- package/src/cli/commands/uninstall-mcp.ts +45 -41
- package/src/cli/commands/update.ts +130 -124
- package/src/cli/migrate-chroma.ts +106 -106
- package/src/cli/ui/animations.ts +80 -80
- package/src/cli/ui/components.ts +82 -82
- package/src/cli/ui/index.ts +4 -4
- package/src/cli/ui/logo.ts +36 -36
- package/src/cli/ui/theme.ts +55 -55
- package/src/code-intelligence/indexer.ts +352 -352
- package/src/code-intelligence/linker.ts +178 -178
- package/src/code-intelligence/parser.ts +484 -484
- package/src/code-intelligence/query.ts +291 -291
- package/src/code-intelligence/schema.ts +83 -83
- package/src/code-intelligence/types.ts +95 -95
- package/src/config/defaults.ts +52 -52
- package/src/config/home.ts +56 -56
- package/src/config/index.ts +5 -5
- package/src/config/loader.ts +192 -192
- package/src/config/schema.ts +446 -415
- package/src/config/validator.ts +182 -182
- package/src/context/assembler.ts +407 -400
- package/src/context/index.ts +79 -79
- package/src/context/progress-tracker.ts +174 -174
- package/src/context/standards-manager.ts +287 -287
- package/src/context/validator.ts +58 -58
- package/src/diagnostics/index.ts +122 -121
- package/src/health/index.ts +233 -232
- package/src/hooks/brain-hook.ts +134 -131
- package/src/hooks/capture.ts +168 -168
- package/src/hooks/claude-code-mastery.md +112 -112
- package/src/hooks/context-hook.ts +260 -245
- package/src/hooks/deduplicator.ts +72 -72
- package/src/hooks/git-capture.ts +109 -109
- package/src/hooks/git-hook-installer.ts +211 -207
- package/src/hooks/index.ts +20 -20
- package/src/hooks/installer.ts +306 -288
- package/src/hooks/interceptor-hook.ts +204 -201
- package/src/hooks/passive-classifier.ts +397 -397
- package/src/hooks/queue.ts +160 -129
- package/src/hooks/session-tracker.ts +312 -312
- package/src/hooks/types.ts +52 -52
- package/src/index.ts +7 -7
- package/src/intelligence/cross-project/generalizer.ts +283 -283
- package/src/intelligence/cross-project/index.ts +7 -7
- package/src/intelligence/hf-downloader.ts +222 -222
- package/src/intelligence/hf-manifest.json +78 -78
- package/src/intelligence/index.ts +24 -24
- package/src/intelligence/inference-router.ts +762 -762
- package/src/intelligence/model-manager.ts +263 -245
- package/src/intelligence/optimization/index.ts +10 -10
- package/src/intelligence/optimization/precompute.ts +202 -202
- package/src/intelligence/optimization/semantic-cache.ts +213 -207
- package/src/intelligence/prediction/index.ts +7 -7
- package/src/intelligence/prediction/recommender.ts +276 -268
- package/src/intelligence/reasoning/chain-retrieval.ts +243 -247
- package/src/intelligence/reasoning/index.ts +7 -7
- package/src/intelligence/temporal/evolution.ts +193 -197
- package/src/intelligence/temporal/index.ts +16 -16
- package/src/intelligence/temporal/query-processor.ts +190 -190
- package/src/intelligence/temporal/timeline.ts +272 -259
- package/src/intelligence/temporal/trends.ts +263 -263
- package/src/intelligence/tokenizer.ts +118 -118
- package/src/knowledge/entity-extractor.ts +447 -443
- package/src/knowledge/graph/builder.ts +185 -185
- package/src/knowledge/graph/linker.ts +201 -201
- package/src/knowledge/graph/memory-graph.ts +359 -359
- package/src/knowledge/graph/schema.ts +99 -99
- package/src/knowledge/graph/search.ts +166 -166
- package/src/knowledge/relationship-extractor.ts +108 -108
- package/src/memory/chroma/client.ts +211 -192
- package/src/memory/chroma/collection-manager.ts +92 -92
- package/src/memory/chroma/config.ts +57 -57
- package/src/memory/chroma/embeddings.ts +177 -175
- package/src/memory/chroma/index.ts +82 -82
- package/src/memory/chroma/migration.ts +270 -270
- package/src/memory/chroma/schemas.ts +69 -69
- package/src/memory/chroma/search.ts +319 -315
- package/src/memory/chroma/store.ts +755 -747
- package/src/memory/compression.ts +121 -121
- package/src/memory/consolidation/archiver.ts +162 -165
- package/src/memory/consolidation/merger.ts +182 -186
- package/src/memory/consolidation/scorer.ts +136 -136
- package/src/memory/database.ts +9 -0
- package/src/memory/dual-write.ts +145 -0
- package/src/memory/embeddings.ts +226 -226
- package/src/memory/episodic/detector.ts +108 -108
- package/src/memory/episodic/manager.ts +347 -351
- package/src/memory/episodic/summarizer.ts +179 -179
- package/src/memory/episodic/types.ts +52 -52
- package/src/memory/fts5-search.ts +692 -633
- package/src/memory/index.ts +943 -1060
- package/src/memory/migrations/add-fts5.ts +118 -108
- package/src/memory/patterns.ts +438 -438
- package/src/memory/pruning.ts +60 -60
- package/src/memory/schema.ts +88 -88
- package/src/memory/store.ts +911 -787
- package/src/orchestrator/handlers/decision-handler.ts +204 -204
- package/src/packs/index.ts +9 -9
- package/src/packs/loader.ts +134 -134
- package/src/packs/manager.ts +204 -204
- package/src/packs/ranker.ts +78 -78
- package/src/packs/types.ts +81 -81
- package/src/phase12/index.ts +5 -5
- package/src/retrieval/bm25/index.ts +300 -297
- package/src/retrieval/bm25/tokenizer.ts +184 -184
- package/src/retrieval/feedback/adaptive.ts +221 -221
- package/src/retrieval/feedback/index.ts +16 -16
- package/src/retrieval/feedback/metrics.ts +221 -221
- package/src/retrieval/feedback/store.ts +283 -283
- package/src/retrieval/fusion/index.ts +194 -194
- package/src/retrieval/fusion/rrf.ts +165 -165
- package/src/retrieval/index.ts +12 -12
- package/src/retrieval/pipeline.ts +375 -375
- package/src/retrieval/query/expander.ts +203 -203
- package/src/retrieval/query/index.ts +27 -27
- package/src/retrieval/query/intent-classifier.ts +252 -252
- package/src/retrieval/query/temporal-parser.ts +295 -295
- package/src/retrieval/reranker/index.ts +189 -188
- package/src/retrieval/reranker/model.ts +99 -95
- package/src/retrieval/service.ts +125 -125
- package/src/retrieval/types.ts +162 -162
- package/src/routing/entity-extractor.ts +454 -454
- package/src/routing/handlers/exploration-handler.ts +369 -0
- package/src/routing/handlers/index.ts +19 -0
- package/src/routing/handlers/memory-handler.ts +273 -0
- package/src/routing/handlers/mutation-handler.ts +241 -0
- package/src/routing/handlers/recall-handler.ts +642 -0
- package/src/routing/handlers/shared.ts +515 -0
- package/src/routing/handlers/types.ts +48 -0
- package/src/routing/intent-classifier.ts +552 -552
- package/src/routing/response-filter.ts +399 -391
- package/src/routing/router.ts +245 -2193
- package/src/routing/search-engine.ts +521 -514
- package/src/routing/types.ts +104 -94
- package/src/scripts/health-check.ts +118 -118
- package/src/scripts/setup.ts +122 -122
- package/src/server/auto-updater.ts +283 -276
- package/src/server/handlers/call-tool.ts +159 -159
- package/src/server/handlers/list-tools.ts +35 -35
- package/src/server/handlers/tools/auto-remember.ts +165 -165
- package/src/server/handlers/tools/brain.ts +86 -86
- package/src/server/handlers/tools/create-project.ts +135 -135
- package/src/server/handlers/tools/get-code-standards.ts +123 -123
- package/src/server/handlers/tools/get-corrections.ts +152 -152
- package/src/server/handlers/tools/get-patterns.ts +156 -156
- package/src/server/handlers/tools/get-project-context.ts +75 -75
- package/src/server/handlers/tools/index.ts +30 -30
- package/src/server/handlers/tools/init-project.ts +756 -756
- package/src/server/handlers/tools/list-projects.ts +126 -126
- package/src/server/handlers/tools/recall-similar.ts +87 -87
- package/src/server/handlers/tools/recognize-pattern.ts +132 -132
- package/src/server/handlers/tools/record-correction.ts +131 -131
- package/src/server/handlers/tools/remember-decision.ts +168 -168
- package/src/server/handlers/tools/schemas.ts +179 -179
- package/src/server/handlers/tools/search-code.ts +122 -122
- package/src/server/handlers/tools/smart-context.ts +146 -146
- package/src/server/handlers/tools/update-progress.ts +131 -131
- package/src/server/http-api.ts +215 -1229
- package/src/server/mcp-proxy.ts +85 -84
- package/src/server/mcp-server.ts +285 -284
- package/src/server/middleware/auth.ts +39 -0
- package/src/server/middleware/error-handler.ts +37 -0
- package/src/server/middleware/rate-limit.ts +53 -0
- package/src/server/middleware/validate.ts +42 -0
- package/src/server/pid-manager.ts +137 -136
- package/src/server/providers/resources.ts +581 -581
- package/src/server/routes/code.ts +228 -0
- package/src/server/routes/context.ts +26 -0
- package/src/server/routes/health.ts +19 -0
- package/src/server/routes/helpers.ts +100 -0
- package/src/server/routes/hooks.ts +197 -0
- package/src/server/routes/mcp.ts +47 -0
- package/src/server/routes/memory.ts +397 -0
- package/src/server/routes/models.ts +96 -0
- package/src/server/routes/projects.ts +89 -0
- package/src/server/routes/types.ts +21 -0
- package/src/server/schemas/api-schemas.ts +202 -0
- package/src/server/services.ts +720 -720
- package/src/server/utils/memory-indicator.ts +84 -84
- package/src/server/utils/response-formatter.ts +129 -129
- package/src/server/web-viewer.ts +1145 -1115
- package/src/setup/index.ts +38 -38
- package/src/tools/registry.ts +115 -115
- package/src/tools/schemas.ts +666 -666
- package/src/tools/types.ts +412 -412
- package/src/training/data-store.ts +320 -298
- package/src/training/retrain-pipeline.ts +399 -394
- package/src/utils/error-handler.ts +136 -136
- package/src/utils/index.ts +58 -58
- package/src/utils/kill-port.ts +55 -53
- package/src/utils/phase12-helper.ts +56 -56
- package/src/utils/safe-path.ts +43 -0
- package/src/utils/timing.ts +47 -47
- package/src/utils/transaction.ts +63 -63
- package/src/vault/index.ts +4 -3
- package/src/vault/paths.ts +106 -106
- package/src/vault/query.ts +4 -1
- package/src/vault/reader.ts +44 -1
- package/src/vault/watcher.ts +24 -1
- package/src/vault/writer.ts +487 -413
- package/skills/persistent-memory/SKILL.md +0 -148
- package/skills/persistent-memory/references/tool-reference.md +0 -90
|
@@ -1,118 +1,118 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Tokenizer — SLM Upgrade Phase 4B
|
|
3
|
-
* GPT-2 BPE tokenizer for ONNX model inference.
|
|
4
|
-
*
|
|
5
|
-
* Strategy:
|
|
6
|
-
* 1. Try to dynamically import `tiktoken` (JS package)
|
|
7
|
-
* 2. Fall back to a simple whitespace tokenizer with hash-based IDs
|
|
8
|
-
*
|
|
9
|
-
* The tokenizer pads/truncates sequences to a fixed length and
|
|
10
|
-
* returns both input_ids and attention_mask arrays.
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
import type { Logger } from 'pino'
|
|
14
|
-
|
|
15
|
-
export interface TokenizerOutput {
|
|
16
|
-
inputIds: number[]
|
|
17
|
-
attentionMask: number[]
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
export interface Tokenizer {
|
|
21
|
-
encode(text: string, maxLength: number): TokenizerOutput
|
|
22
|
-
decode(tokenIds: number[]): string
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
/** Singleton cache */
|
|
26
|
-
let cachedTokenizer: Tokenizer | null = null
|
|
27
|
-
|
|
28
|
-
/**
|
|
29
|
-
* Get a tokenizer instance. Tries tiktoken first, falls back to hash-based.
|
|
30
|
-
*/
|
|
31
|
-
export async function getTokenizer(logger?: Logger): Promise<Tokenizer> {
|
|
32
|
-
if (cachedTokenizer) return cachedTokenizer
|
|
33
|
-
|
|
34
|
-
// Try tiktoken (JS binding for GPT-2 BPE)
|
|
35
|
-
try {
|
|
36
|
-
const tiktoken = await import('tiktoken')
|
|
37
|
-
const enc = tiktoken.encoding_for_model('gpt2')
|
|
38
|
-
|
|
39
|
-
cachedTokenizer = {
|
|
40
|
-
encode(text: string, maxLength: number): TokenizerOutput {
|
|
41
|
-
const tokens = Array.from(enc.encode(text))
|
|
42
|
-
|
|
43
|
-
// Truncate if necessary
|
|
44
|
-
const truncated = tokens.slice(0, maxLength)
|
|
45
|
-
|
|
46
|
-
// Pad to maxLength
|
|
47
|
-
const inputIds = new Array(maxLength).fill(0)
|
|
48
|
-
const attentionMask = new Array(maxLength).fill(0)
|
|
49
|
-
|
|
50
|
-
for (let i = 0; i < truncated.length; i++) {
|
|
51
|
-
inputIds[i] = truncated[i]
|
|
52
|
-
attentionMask[i] = 1
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
return { inputIds, attentionMask }
|
|
56
|
-
},
|
|
57
|
-
decode(tokenIds: number[]): string {
|
|
58
|
-
// Filter out padding (0) tokens
|
|
59
|
-
const filtered = tokenIds.filter(id => id !== 0)
|
|
60
|
-
return new TextDecoder().decode(enc.decode(new Uint32Array(filtered)))
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
logger?.debug('Using tiktoken GPT-2 tokenizer')
|
|
65
|
-
return cachedTokenizer
|
|
66
|
-
} catch {
|
|
67
|
-
// tiktoken not available
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
// Fallback: simple whitespace tokenizer with hash-based IDs
|
|
71
|
-
logger?.warn('tiktoken not available — using fallback hash-based tokenizer (reduced accuracy)')
|
|
72
|
-
|
|
73
|
-
cachedTokenizer = {
|
|
74
|
-
encode(text: string, maxLength: number): TokenizerOutput {
|
|
75
|
-
// Split on whitespace and punctuation, filter empties
|
|
76
|
-
const tokens = text
|
|
77
|
-
.toLowerCase()
|
|
78
|
-
.split(/(\s+|[.,!?;:'"()\[\]{}<>\/\\@#$%^&*+=~`|_-]+)/)
|
|
79
|
-
.filter(t => t.trim().length > 0)
|
|
80
|
-
|
|
81
|
-
// Hash each token to a stable ID in [1, 50256] range (GPT-2 vocab size)
|
|
82
|
-
const VOCAB_SIZE = 50256
|
|
83
|
-
const tokenIds = tokens.map(t => {
|
|
84
|
-
let hash = 5381
|
|
85
|
-
for (let i = 0; i < t.length; i++) {
|
|
86
|
-
hash = ((hash << 5) + hash + t.charCodeAt(i)) & 0x7fffffff
|
|
87
|
-
}
|
|
88
|
-
return (hash % (VOCAB_SIZE - 1)) + 1 // avoid 0 (used for padding)
|
|
89
|
-
})
|
|
90
|
-
|
|
91
|
-
// Truncate
|
|
92
|
-
const truncated = tokenIds.slice(0, maxLength)
|
|
93
|
-
|
|
94
|
-
// Pad to maxLength
|
|
95
|
-
const inputIds = new Array(maxLength).fill(0)
|
|
96
|
-
const attentionMask = new Array(maxLength).fill(0)
|
|
97
|
-
|
|
98
|
-
for (let i = 0; i < truncated.length; i++) {
|
|
99
|
-
inputIds[i] = truncated[i]
|
|
100
|
-
attentionMask[i] = 1
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
return { inputIds, attentionMask }
|
|
104
|
-
},
|
|
105
|
-
decode(_tokenIds: number[]): string {
|
|
106
|
-
// Hash-based tokenizer is one-way; decode is not possible.
|
|
107
|
-
// Compression will fall back to returning original text.
|
|
108
|
-
return ''
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
return cachedTokenizer
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
/** Reset cached tokenizer (for testing) */
|
|
116
|
-
export function _resetTokenizerForTesting(): void {
|
|
117
|
-
cachedTokenizer = null
|
|
118
|
-
}
|
|
1
|
+
/**
|
|
2
|
+
* Tokenizer — SLM Upgrade Phase 4B
|
|
3
|
+
* GPT-2 BPE tokenizer for ONNX model inference.
|
|
4
|
+
*
|
|
5
|
+
* Strategy:
|
|
6
|
+
* 1. Try to dynamically import `tiktoken` (JS package)
|
|
7
|
+
* 2. Fall back to a simple whitespace tokenizer with hash-based IDs
|
|
8
|
+
*
|
|
9
|
+
* The tokenizer pads/truncates sequences to a fixed length and
|
|
10
|
+
* returns both input_ids and attention_mask arrays.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import type { Logger } from 'pino'
|
|
14
|
+
|
|
15
|
+
export interface TokenizerOutput {
|
|
16
|
+
inputIds: number[]
|
|
17
|
+
attentionMask: number[]
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface Tokenizer {
|
|
21
|
+
encode(text: string, maxLength: number): TokenizerOutput
|
|
22
|
+
decode(tokenIds: number[]): string
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/** Singleton cache */
|
|
26
|
+
let cachedTokenizer: Tokenizer | null = null
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Get a tokenizer instance. Tries tiktoken first, falls back to hash-based.
|
|
30
|
+
*/
|
|
31
|
+
export async function getTokenizer(logger?: Logger): Promise<Tokenizer> {
|
|
32
|
+
if (cachedTokenizer) return cachedTokenizer
|
|
33
|
+
|
|
34
|
+
// Try tiktoken (JS binding for GPT-2 BPE)
|
|
35
|
+
try {
|
|
36
|
+
const tiktoken = await import('tiktoken')
|
|
37
|
+
const enc = tiktoken.encoding_for_model('gpt2')
|
|
38
|
+
|
|
39
|
+
cachedTokenizer = {
|
|
40
|
+
encode(text: string, maxLength: number): TokenizerOutput {
|
|
41
|
+
const tokens = Array.from(enc.encode(text))
|
|
42
|
+
|
|
43
|
+
// Truncate if necessary
|
|
44
|
+
const truncated = tokens.slice(0, maxLength)
|
|
45
|
+
|
|
46
|
+
// Pad to maxLength
|
|
47
|
+
const inputIds = new Array(maxLength).fill(0)
|
|
48
|
+
const attentionMask = new Array(maxLength).fill(0)
|
|
49
|
+
|
|
50
|
+
for (let i = 0; i < truncated.length; i++) {
|
|
51
|
+
inputIds[i] = truncated[i]
|
|
52
|
+
attentionMask[i] = 1
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
return { inputIds, attentionMask }
|
|
56
|
+
},
|
|
57
|
+
decode(tokenIds: number[]): string {
|
|
58
|
+
// Filter out padding (0) tokens
|
|
59
|
+
const filtered = tokenIds.filter(id => id !== 0)
|
|
60
|
+
return new TextDecoder().decode(enc.decode(new Uint32Array(filtered)))
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
logger?.debug('Using tiktoken GPT-2 tokenizer')
|
|
65
|
+
return cachedTokenizer
|
|
66
|
+
} catch {
|
|
67
|
+
// tiktoken not available
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Fallback: simple whitespace tokenizer with hash-based IDs
|
|
71
|
+
logger?.warn('tiktoken not available — using fallback hash-based tokenizer (reduced accuracy)')
|
|
72
|
+
|
|
73
|
+
cachedTokenizer = {
|
|
74
|
+
encode(text: string, maxLength: number): TokenizerOutput {
|
|
75
|
+
// Split on whitespace and punctuation, filter empties
|
|
76
|
+
const tokens = text
|
|
77
|
+
.toLowerCase()
|
|
78
|
+
.split(/(\s+|[.,!?;:'"()\[\]{}<>\/\\@#$%^&*+=~`|_-]+)/)
|
|
79
|
+
.filter(t => t.trim().length > 0)
|
|
80
|
+
|
|
81
|
+
// Hash each token to a stable ID in [1, 50256] range (GPT-2 vocab size)
|
|
82
|
+
const VOCAB_SIZE = 50256
|
|
83
|
+
const tokenIds = tokens.map(t => {
|
|
84
|
+
let hash = 5381
|
|
85
|
+
for (let i = 0; i < t.length; i++) {
|
|
86
|
+
hash = ((hash << 5) + hash + t.charCodeAt(i)) & 0x7fffffff
|
|
87
|
+
}
|
|
88
|
+
return (hash % (VOCAB_SIZE - 1)) + 1 // avoid 0 (used for padding)
|
|
89
|
+
})
|
|
90
|
+
|
|
91
|
+
// Truncate
|
|
92
|
+
const truncated = tokenIds.slice(0, maxLength)
|
|
93
|
+
|
|
94
|
+
// Pad to maxLength
|
|
95
|
+
const inputIds = new Array(maxLength).fill(0)
|
|
96
|
+
const attentionMask = new Array(maxLength).fill(0)
|
|
97
|
+
|
|
98
|
+
for (let i = 0; i < truncated.length; i++) {
|
|
99
|
+
inputIds[i] = truncated[i]
|
|
100
|
+
attentionMask[i] = 1
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return { inputIds, attentionMask }
|
|
104
|
+
},
|
|
105
|
+
decode(_tokenIds: number[]): string {
|
|
106
|
+
// Hash-based tokenizer is one-way; decode is not possible.
|
|
107
|
+
// Compression will fall back to returning original text.
|
|
108
|
+
return ''
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return cachedTokenizer
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/** Reset cached tokenizer (for testing) */
|
|
116
|
+
export function _resetTokenizerForTesting(): void {
|
|
117
|
+
cachedTokenizer = null
|
|
118
|
+
}
|