claudecode-rlm 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +209 -0
- package/dist/config.d.ts +176 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +103 -0
- package/dist/config.js.map +1 -0
- package/dist/graph/index.d.ts +10 -0
- package/dist/graph/index.d.ts.map +1 -0
- package/dist/graph/index.js +10 -0
- package/dist/graph/index.js.map +1 -0
- package/dist/graph/ingestion.d.ts +68 -0
- package/dist/graph/ingestion.d.ts.map +1 -0
- package/dist/graph/ingestion.js +417 -0
- package/dist/graph/ingestion.js.map +1 -0
- package/dist/graph/storage.d.ts +51 -0
- package/dist/graph/storage.d.ts.map +1 -0
- package/dist/graph/storage.js +552 -0
- package/dist/graph/storage.js.map +1 -0
- package/dist/graph/traversal.d.ts +54 -0
- package/dist/graph/traversal.d.ts.map +1 -0
- package/dist/graph/traversal.js +255 -0
- package/dist/graph/traversal.js.map +1 -0
- package/dist/graph/types.d.ts +152 -0
- package/dist/graph/types.d.ts.map +1 -0
- package/dist/graph/types.js +94 -0
- package/dist/graph/types.js.map +1 -0
- package/dist/index.d.ts +30 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +190 -0
- package/dist/index.js.map +1 -0
- package/dist/plugin-types.d.ts +96 -0
- package/dist/plugin-types.d.ts.map +1 -0
- package/dist/plugin-types.js +17 -0
- package/dist/plugin-types.js.map +1 -0
- package/dist/search/enhanced.d.ts +95 -0
- package/dist/search/enhanced.d.ts.map +1 -0
- package/dist/search/enhanced.js +194 -0
- package/dist/search/enhanced.js.map +1 -0
- package/dist/search/index.d.ts +8 -0
- package/dist/search/index.d.ts.map +1 -0
- package/dist/search/index.js +8 -0
- package/dist/search/index.js.map +1 -0
- package/dist/search/patterns.d.ts +38 -0
- package/dist/search/patterns.d.ts.map +1 -0
- package/dist/search/patterns.js +124 -0
- package/dist/search/patterns.js.map +1 -0
- package/dist/tools/graph-query.d.ts +14 -0
- package/dist/tools/graph-query.d.ts.map +1 -0
- package/dist/tools/graph-query.js +203 -0
- package/dist/tools/graph-query.js.map +1 -0
- package/dist/tools/index.d.ts +8 -0
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/index.js +8 -0
- package/dist/tools/index.js.map +1 -0
- package/dist/tools/memory.d.ts +20 -0
- package/dist/tools/memory.d.ts.map +1 -0
- package/dist/tools/memory.js +181 -0
- package/dist/tools/memory.js.map +1 -0
- package/package.json +66 -0
- package/src/config.ts +111 -0
- package/src/graph/index.ts +10 -0
- package/src/graph/ingestion.ts +528 -0
- package/src/graph/storage.ts +639 -0
- package/src/graph/traversal.ts +348 -0
- package/src/graph/types.ts +144 -0
- package/src/index.ts +238 -0
- package/src/plugin-types.ts +107 -0
- package/src/search/enhanced.ts +264 -0
- package/src/search/index.ts +23 -0
- package/src/search/patterns.ts +139 -0
- package/src/tools/graph-query.ts +257 -0
- package/src/tools/index.ts +8 -0
- package/src/tools/memory.ts +208 -0
package/package.json
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json.schemastore.org/package.json",
|
|
3
|
+
"name": "claudecode-rlm",
|
|
4
|
+
"version": "1.0.0",
|
|
5
|
+
"description": "Advanced RLM (Recursive Language Model) plugin for Claude Code - Knowledge graph-based context storage with 74x faster reads",
|
|
6
|
+
"type": "module",
|
|
7
|
+
"license": "MIT",
|
|
8
|
+
"author": "Michael Thornton <tekcin@yahoo.com>",
|
|
9
|
+
"repository": {
|
|
10
|
+
"type": "git",
|
|
11
|
+
"url": "https://github.com/tekcin/claudecode-rlm"
|
|
12
|
+
},
|
|
13
|
+
"homepage": "https://github.com/tekcin/claudecode-rlm#readme",
|
|
14
|
+
"bugs": {
|
|
15
|
+
"url": "https://github.com/tekcin/claudecode-rlm/issues"
|
|
16
|
+
},
|
|
17
|
+
"keywords": [
|
|
18
|
+
"claude-code",
|
|
19
|
+
"claudecode",
|
|
20
|
+
"plugin",
|
|
21
|
+
"rlm",
|
|
22
|
+
"context",
|
|
23
|
+
"knowledge-graph",
|
|
24
|
+
"ai",
|
|
25
|
+
"llm",
|
|
26
|
+
"anthropic"
|
|
27
|
+
],
|
|
28
|
+
"main": "./dist/index.js",
|
|
29
|
+
"types": "./dist/index.d.ts",
|
|
30
|
+
"exports": {
|
|
31
|
+
".": {
|
|
32
|
+
"import": "./dist/index.js",
|
|
33
|
+
"types": "./dist/index.d.ts"
|
|
34
|
+
}
|
|
35
|
+
},
|
|
36
|
+
"files": [
|
|
37
|
+
"dist",
|
|
38
|
+
"src"
|
|
39
|
+
],
|
|
40
|
+
"scripts": {
|
|
41
|
+
"build": "tsc",
|
|
42
|
+
"dev": "tsc --watch",
|
|
43
|
+
"typecheck": "tsc --noEmit",
|
|
44
|
+
"test": "node test/benchmark.cjs",
|
|
45
|
+
"test:optimized": "node test/benchmark-optimized.cjs",
|
|
46
|
+
"prepublishOnly": "npm run build"
|
|
47
|
+
},
|
|
48
|
+
"dependencies": {
|
|
49
|
+
"zod": "^3.23.0"
|
|
50
|
+
},
|
|
51
|
+
"peerDependencies": {
|
|
52
|
+
"@anthropic-ai/claude-code": ">=1.0.0"
|
|
53
|
+
},
|
|
54
|
+
"peerDependenciesMeta": {
|
|
55
|
+
"@anthropic-ai/claude-code": {
|
|
56
|
+
"optional": true
|
|
57
|
+
}
|
|
58
|
+
},
|
|
59
|
+
"devDependencies": {
|
|
60
|
+
"@types/node": "^22.0.0",
|
|
61
|
+
"typescript": "^5.6.0"
|
|
62
|
+
},
|
|
63
|
+
"engines": {
|
|
64
|
+
"node": ">=20.0.0"
|
|
65
|
+
}
|
|
66
|
+
}
|
package/src/config.ts
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Plugin configuration schema.
|
|
3
|
+
*
|
|
4
|
+
* Configuration is loaded from .claude/claudecode.jsonc under the
|
|
5
|
+
* "claudecode-rlm" key.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { z } from "zod"
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Graph configuration schema.
|
|
12
|
+
*/
|
|
13
|
+
export const GraphConfigSchema = z.object({
|
|
14
|
+
/** Whether graph features are enabled */
|
|
15
|
+
enabled: z.boolean().default(true),
|
|
16
|
+
/** Whether to automatically ingest archived context into graph */
|
|
17
|
+
auto_ingest: z.boolean().default(true),
|
|
18
|
+
/** Maximum nodes to traverse in a search */
|
|
19
|
+
max_traversal_nodes: z.number().default(50),
|
|
20
|
+
/** Maximum depth for graph traversal */
|
|
21
|
+
max_traversal_depth: z.number().default(3),
|
|
22
|
+
})
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Enhanced search configuration schema.
|
|
26
|
+
*/
|
|
27
|
+
export const EnhancedSearchConfigSchema = z.object({
|
|
28
|
+
/** Use additional reference patterns from PinkyClawd */
|
|
29
|
+
additional_patterns: z.boolean().default(true),
|
|
30
|
+
/** Weight for recency in scoring (0-1) */
|
|
31
|
+
recency_weight: z.number().min(0).max(1).default(0.3),
|
|
32
|
+
/** Weight for entity matches in scoring (0-1) */
|
|
33
|
+
entity_boost: z.number().min(0).max(1).default(0.2),
|
|
34
|
+
/** Maximum age in days for recency scoring */
|
|
35
|
+
max_age_days: z.number().default(30),
|
|
36
|
+
/** Minimum score threshold for results */
|
|
37
|
+
min_score_threshold: z.number().default(0.3),
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Task detection configuration schema.
|
|
42
|
+
*/
|
|
43
|
+
export const TaskDetectionConfigSchema = z.object({
|
|
44
|
+
/** Archive context when task completion is detected */
|
|
45
|
+
auto_archive_on_completion: z.boolean().default(true),
|
|
46
|
+
})
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Full plugin configuration schema.
|
|
50
|
+
*/
|
|
51
|
+
export const PluginConfigSchema = z.object({
|
|
52
|
+
/** Graph-based context storage */
|
|
53
|
+
graph: GraphConfigSchema.default({}),
|
|
54
|
+
/** Enhanced search settings */
|
|
55
|
+
enhanced_search: EnhancedSearchConfigSchema.default({}),
|
|
56
|
+
/** Task detection settings */
|
|
57
|
+
task_detection: TaskDetectionConfigSchema.default({}),
|
|
58
|
+
})
|
|
59
|
+
|
|
60
|
+
export type PluginConfig = z.infer<typeof PluginConfigSchema>
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Default configuration.
|
|
64
|
+
*/
|
|
65
|
+
export const DEFAULT_CONFIG: PluginConfig = {
|
|
66
|
+
graph: {
|
|
67
|
+
enabled: true,
|
|
68
|
+
auto_ingest: true,
|
|
69
|
+
max_traversal_nodes: 50,
|
|
70
|
+
max_traversal_depth: 3,
|
|
71
|
+
},
|
|
72
|
+
enhanced_search: {
|
|
73
|
+
additional_patterns: true,
|
|
74
|
+
recency_weight: 0.3,
|
|
75
|
+
entity_boost: 0.2,
|
|
76
|
+
max_age_days: 30,
|
|
77
|
+
min_score_threshold: 0.3,
|
|
78
|
+
},
|
|
79
|
+
task_detection: {
|
|
80
|
+
auto_archive_on_completion: true,
|
|
81
|
+
},
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Parse and validate configuration.
|
|
86
|
+
*/
|
|
87
|
+
export function parseConfig(config: unknown): PluginConfig {
|
|
88
|
+
try {
|
|
89
|
+
return PluginConfigSchema.parse(config ?? {})
|
|
90
|
+
} catch {
|
|
91
|
+
// Return defaults on parse error
|
|
92
|
+
return DEFAULT_CONFIG
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Merge partial config with defaults.
|
|
98
|
+
*/
|
|
99
|
+
export function mergeConfig(partial: Partial<PluginConfig>): PluginConfig {
|
|
100
|
+
return {
|
|
101
|
+
graph: { ...DEFAULT_CONFIG.graph, ...partial.graph },
|
|
102
|
+
enhanced_search: {
|
|
103
|
+
...DEFAULT_CONFIG.enhanced_search,
|
|
104
|
+
...partial.enhanced_search,
|
|
105
|
+
},
|
|
106
|
+
task_detection: {
|
|
107
|
+
...DEFAULT_CONFIG.task_detection,
|
|
108
|
+
...partial.task_detection,
|
|
109
|
+
},
|
|
110
|
+
}
|
|
111
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Graph module exports.
|
|
3
|
+
*
|
|
4
|
+
* RLM-Graph: Knowledge graph-based context storage and retrieval.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
export * from "./types.js"
|
|
8
|
+
export { GraphStorage } from "./storage.js"
|
|
9
|
+
export { EntityExtractor, ContentChunker, GraphIngester } from "./ingestion.js"
|
|
10
|
+
export { GraphTraverser, GraphSearcher } from "./traversal.js"
|
|
@@ -0,0 +1,528 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Graph ingestion: Entity extraction and content chunking.
|
|
3
|
+
*
|
|
4
|
+
* Provides utilities for processing text content:
|
|
5
|
+
* - EntityExtractor: Identifies named entities and code elements
|
|
6
|
+
* - ContentChunker: Splits content into hierarchical chunks
|
|
7
|
+
* - GraphIngester: Converts content into graph structure
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import * as crypto from "crypto"
|
|
11
|
+
import {
|
|
12
|
+
type GraphNode,
|
|
13
|
+
type GraphEdge,
|
|
14
|
+
type Entity,
|
|
15
|
+
NodeType,
|
|
16
|
+
RelationType,
|
|
17
|
+
EntityType,
|
|
18
|
+
} from "./types.js"
|
|
19
|
+
import { GraphStorage } from "./storage.js"
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Stopwords to filter from keyword extraction.
|
|
23
|
+
*/
|
|
24
|
+
const STOPWORDS = new Set([
|
|
25
|
+
// Basic articles and prepositions
|
|
26
|
+
"the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
|
|
27
|
+
"have", "has", "had", "do", "does", "did", "will", "would", "could",
|
|
28
|
+
"should", "may", "might", "must", "shall", "can", "need", "dare",
|
|
29
|
+
"ought", "used", "to", "of", "in", "for", "on", "with", "at", "by",
|
|
30
|
+
"from", "as", "into", "through", "during", "before", "after", "above",
|
|
31
|
+
"below", "between", "under", "again", "further", "then", "once", "here",
|
|
32
|
+
"there", "when", "where", "why", "how", "all", "each", "few", "more",
|
|
33
|
+
"most", "other", "some", "such", "no", "nor", "not", "only", "own",
|
|
34
|
+
"same", "so", "than", "too", "very", "just", "and", "but", "if", "or",
|
|
35
|
+
"because", "until", "while", "about", "against",
|
|
36
|
+
// Pronouns
|
|
37
|
+
"i", "me", "my", "myself", "we", "our", "ours", "ourselves",
|
|
38
|
+
"you", "your", "yours", "yourself", "yourselves",
|
|
39
|
+
"he", "him", "his", "himself", "she", "her", "hers", "herself",
|
|
40
|
+
"it", "its", "itself", "they", "them", "their", "theirs", "themselves",
|
|
41
|
+
"what", "which", "who", "whom", "this", "that", "these", "those",
|
|
42
|
+
// Common verbs
|
|
43
|
+
"am", "having", "doing", "going", "coming", "getting", "making",
|
|
44
|
+
"taking", "using", "trying", "saying", "seeing", "wanting", "needing",
|
|
45
|
+
"knowing", "thinking", "looking", "giving", "finding", "telling",
|
|
46
|
+
"asking", "working", "seeming", "feeling", "leaving", "calling",
|
|
47
|
+
// Fillers
|
|
48
|
+
"please", "thanks", "okay", "ok", "yes", "yeah", "no", "nope",
|
|
49
|
+
"right", "sure", "great", "good", "nice", "cool", "fine", "alright",
|
|
50
|
+
"hello", "hi", "hey", "bye", "goodbye",
|
|
51
|
+
// RLM-specific
|
|
52
|
+
"remember", "recall", "earlier", "previously", "discussed", "mentioned",
|
|
53
|
+
"said", "decided", "talked", "worked", "think", "know", "like",
|
|
54
|
+
])
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Extracts entities from text content.
|
|
58
|
+
*/
|
|
59
|
+
export namespace EntityExtractor {
|
|
60
|
+
// Code-related regex patterns
|
|
61
|
+
const CODE_PATTERNS = [
|
|
62
|
+
// Classes ending with Error, Exception, Handler, etc.
|
|
63
|
+
/\b([A-Z][a-zA-Z0-9]*(?:Error|Exception|Handler|Manager|Service|Controller|Factory|Builder))\b/g,
|
|
64
|
+
// Function calls: functionName(
|
|
65
|
+
/\b([a-z_][a-z0-9_]*)\s*\(/g,
|
|
66
|
+
// Constants: MAX_TIMEOUT, DEBUG_MODE
|
|
67
|
+
/\b([A-Z_][A-Z0-9_]{2,})\b/g,
|
|
68
|
+
// Inline code: `variable`
|
|
69
|
+
/`([^`]+)`/g,
|
|
70
|
+
// Type definitions: class User, interface Config
|
|
71
|
+
/(?:class|interface|type|struct)\s+([A-Z][a-zA-Z0-9]*)/g,
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Extract entities from text.
|
|
76
|
+
*/
|
|
77
|
+
export function extract(text: string): Entity[] {
|
|
78
|
+
const entities = new Map<string, EntityType>()
|
|
79
|
+
|
|
80
|
+
// Extract capitalized terms (potential proper nouns)
|
|
81
|
+
const capitalizedPattern = /\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\b/g
|
|
82
|
+
let match: RegExpExecArray | null
|
|
83
|
+
while ((match = capitalizedPattern.exec(text)) !== null) {
|
|
84
|
+
const term = match[1]
|
|
85
|
+
if (!STOPWORDS.has(term.toLowerCase()) && term.length > 2) {
|
|
86
|
+
entities.set(term, EntityType.PROPER_NOUN)
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Extract code elements
|
|
91
|
+
for (const pattern of CODE_PATTERNS) {
|
|
92
|
+
pattern.lastIndex = 0
|
|
93
|
+
while ((match = pattern.exec(text)) !== null) {
|
|
94
|
+
const term = match[1]
|
|
95
|
+
if (term && !STOPWORDS.has(term.toLowerCase()) && term.length > 1) {
|
|
96
|
+
entities.set(term, EntityType.CODE_ELEMENT)
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Extract markdown links
|
|
102
|
+
const linkPattern = /\[([^\]]+)\]\([^)]+\)/g
|
|
103
|
+
while ((match = linkPattern.exec(text)) !== null) {
|
|
104
|
+
const linkText = match[1]
|
|
105
|
+
if (!STOPWORDS.has(linkText.toLowerCase())) {
|
|
106
|
+
entities.set(linkText, EntityType.REFERENCE)
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Extract file paths
|
|
111
|
+
const pathPattern = /(?:^|\s)([./]?(?:[a-zA-Z0-9_-]+\/)+[a-zA-Z0-9_.-]+)/g
|
|
112
|
+
while ((match = pathPattern.exec(text)) !== null) {
|
|
113
|
+
entities.set(match[1], EntityType.FILE_PATH)
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return Array.from(entities.entries()).map(([name, type]) => ({
|
|
117
|
+
name,
|
|
118
|
+
type,
|
|
119
|
+
}))
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Extract top keywords based on frequency.
|
|
124
|
+
*/
|
|
125
|
+
export function extractKeywords(text: string, topN: number = 10): string[] {
|
|
126
|
+
const wordPattern = /\b[a-zA-Z]{3,}\b/g
|
|
127
|
+
const wordCounts = new Map<string, number>()
|
|
128
|
+
|
|
129
|
+
let match: RegExpExecArray | null
|
|
130
|
+
while ((match = wordPattern.exec(text)) !== null) {
|
|
131
|
+
const word = match[0].toLowerCase()
|
|
132
|
+
if (!STOPWORDS.has(word)) {
|
|
133
|
+
wordCounts.set(word, (wordCounts.get(word) || 0) + 1)
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
return Array.from(wordCounts.entries())
|
|
138
|
+
.sort((a, b) => b[1] - a[1])
|
|
139
|
+
.slice(0, topN)
|
|
140
|
+
.map(([word]) => word)
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Splits content into hierarchical chunks.
|
|
146
|
+
*/
|
|
147
|
+
export namespace ContentChunker {
|
|
148
|
+
const DEFAULT_SECTION_MIN_CHARS = 500
|
|
149
|
+
const DEFAULT_CHUNK_TARGET_CHARS = 300
|
|
150
|
+
const DEFAULT_CHUNK_OVERLAP = 50
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Split content into sections.
|
|
154
|
+
*/
|
|
155
|
+
export function splitIntoSections(
|
|
156
|
+
content: string,
|
|
157
|
+
minChars: number = DEFAULT_SECTION_MIN_CHARS
|
|
158
|
+
): Array<{ title: string; content: string }> {
|
|
159
|
+
const sections: Array<{ title: string; content: string }> = []
|
|
160
|
+
|
|
161
|
+
// Try to split by markdown headers
|
|
162
|
+
const headerPattern = /(?:^|\n)(#{1,3})\s+(.+?)(?:\n|$)/g
|
|
163
|
+
const matches: Array<{ level: number; title: string; index: number; end: number }> = []
|
|
164
|
+
|
|
165
|
+
let match: RegExpExecArray | null
|
|
166
|
+
while ((match = headerPattern.exec(content)) !== null) {
|
|
167
|
+
matches.push({
|
|
168
|
+
level: match[1].length,
|
|
169
|
+
title: match[2].trim(),
|
|
170
|
+
index: match.index,
|
|
171
|
+
end: headerPattern.lastIndex,
|
|
172
|
+
})
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
if (matches.length > 0) {
|
|
176
|
+
for (let i = 0; i < matches.length; i++) {
|
|
177
|
+
const start = matches[i].end
|
|
178
|
+
const end = i + 1 < matches.length ? matches[i + 1].index : content.length
|
|
179
|
+
const sectionContent = content.slice(start, end).trim()
|
|
180
|
+
|
|
181
|
+
if (sectionContent) {
|
|
182
|
+
sections.push({
|
|
183
|
+
title: matches[i].title,
|
|
184
|
+
content: sectionContent,
|
|
185
|
+
})
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
} else {
|
|
189
|
+
// Fall back to paragraph-based splitting
|
|
190
|
+
const paragraphs = content.split("\n\n")
|
|
191
|
+
let currentSection: string[] = []
|
|
192
|
+
let currentLength = 0
|
|
193
|
+
|
|
194
|
+
for (const para of paragraphs) {
|
|
195
|
+
currentSection.push(para)
|
|
196
|
+
currentLength += para.length
|
|
197
|
+
|
|
198
|
+
if (currentLength >= minChars) {
|
|
199
|
+
const sectionText = currentSection.join("\n\n")
|
|
200
|
+
const firstLine = sectionText.split("\n")[0].slice(0, 50)
|
|
201
|
+
sections.push({
|
|
202
|
+
title: firstLine,
|
|
203
|
+
content: sectionText,
|
|
204
|
+
})
|
|
205
|
+
currentSection = []
|
|
206
|
+
currentLength = 0
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
if (currentSection.length > 0) {
|
|
211
|
+
const sectionText = currentSection.join("\n\n")
|
|
212
|
+
const firstLine = sectionText.split("\n")[0].slice(0, 50)
|
|
213
|
+
sections.push({
|
|
214
|
+
title: firstLine,
|
|
215
|
+
content: sectionText,
|
|
216
|
+
})
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
return sections
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* Split content into overlapping chunks.
|
|
225
|
+
*/
|
|
226
|
+
export function splitIntoChunks(
|
|
227
|
+
content: string,
|
|
228
|
+
targetChars: number = DEFAULT_CHUNK_TARGET_CHARS,
|
|
229
|
+
overlap: number = DEFAULT_CHUNK_OVERLAP
|
|
230
|
+
): string[] {
|
|
231
|
+
if (content.length <= targetChars) {
|
|
232
|
+
return [content]
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
const chunks: string[] = []
|
|
236
|
+
const sentences = content.split(/(?<=[.!?])\s+/)
|
|
237
|
+
let currentChunk: string[] = []
|
|
238
|
+
let currentLength = 0
|
|
239
|
+
|
|
240
|
+
for (const sentence of sentences) {
|
|
241
|
+
const sentenceLen = sentence.length
|
|
242
|
+
|
|
243
|
+
if (currentLength + sentenceLen > targetChars && currentChunk.length > 0) {
|
|
244
|
+
chunks.push(currentChunk.join(" "))
|
|
245
|
+
|
|
246
|
+
// Keep overlap
|
|
247
|
+
const overlapChunk: string[] = []
|
|
248
|
+
let overlapLen = 0
|
|
249
|
+
for (let i = currentChunk.length - 1; i >= 0; i--) {
|
|
250
|
+
if (overlapLen + currentChunk[i].length <= overlap) {
|
|
251
|
+
overlapChunk.unshift(currentChunk[i])
|
|
252
|
+
overlapLen += currentChunk[i].length
|
|
253
|
+
} else {
|
|
254
|
+
break
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
currentChunk = overlapChunk
|
|
259
|
+
currentLength = overlapLen
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
currentChunk.push(sentence)
|
|
263
|
+
currentLength += sentenceLen
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
if (currentChunk.length > 0) {
|
|
267
|
+
chunks.push(currentChunk.join(" "))
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
return chunks
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
/**
|
|
275
|
+
* Ingests content into the knowledge graph.
|
|
276
|
+
*/
|
|
277
|
+
export namespace GraphIngester {
|
|
278
|
+
/**
|
|
279
|
+
* Generate a unique node ID.
|
|
280
|
+
*/
|
|
281
|
+
function generateNodeID(nodeType: string, content: string): string {
|
|
282
|
+
const uuid = crypto.randomUUID().slice(0, 8)
|
|
283
|
+
const contentHash = crypto
|
|
284
|
+
.createHash("md5")
|
|
285
|
+
.update(content)
|
|
286
|
+
.digest("hex")
|
|
287
|
+
.slice(0, 8)
|
|
288
|
+
return `${nodeType}_${uuid}_${contentHash}`
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
/**
|
|
292
|
+
* Context block interface (compatible with Claude Code RLM types).
|
|
293
|
+
*/
|
|
294
|
+
export interface ContextBlock {
|
|
295
|
+
id: string
|
|
296
|
+
sessionID: string
|
|
297
|
+
content: string
|
|
298
|
+
tokens: number
|
|
299
|
+
summary?: string
|
|
300
|
+
taskID?: string
|
|
301
|
+
taskDescription?: string
|
|
302
|
+
createdAt: number
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
/**
|
|
306
|
+
* Ingest a context block into the graph.
|
|
307
|
+
*/
|
|
308
|
+
export function ingestContextBlock(block: ContextBlock): GraphNode {
|
|
309
|
+
const now = Date.now()
|
|
310
|
+
|
|
311
|
+
// Create document node
|
|
312
|
+
const docNode: GraphNode = {
|
|
313
|
+
id: generateNodeID("doc", block.id),
|
|
314
|
+
sessionID: block.sessionID,
|
|
315
|
+
type: NodeType.DOCUMENT,
|
|
316
|
+
content: block.summary || block.content.slice(0, 200),
|
|
317
|
+
metadata: {
|
|
318
|
+
blockID: block.id,
|
|
319
|
+
taskID: block.taskID,
|
|
320
|
+
taskDescription: block.taskDescription,
|
|
321
|
+
tokens: block.tokens,
|
|
322
|
+
},
|
|
323
|
+
createdAt: now,
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
GraphStorage.addNode(docNode)
|
|
327
|
+
|
|
328
|
+
// Split into sections
|
|
329
|
+
const sections = ContentChunker.splitIntoSections(block.content)
|
|
330
|
+
|
|
331
|
+
for (const section of sections) {
|
|
332
|
+
const sectionNode = ingestSection(
|
|
333
|
+
docNode.id,
|
|
334
|
+
block.sessionID,
|
|
335
|
+
section.title,
|
|
336
|
+
section.content,
|
|
337
|
+
now
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
// Link document -> section
|
|
341
|
+
const edge: GraphEdge = {
|
|
342
|
+
sourceID: docNode.id,
|
|
343
|
+
targetID: sectionNode.id,
|
|
344
|
+
relationship: RelationType.HAS_SECTION,
|
|
345
|
+
weight: 1.0,
|
|
346
|
+
metadata: {},
|
|
347
|
+
createdAt: now,
|
|
348
|
+
}
|
|
349
|
+
GraphStorage.addEdgeWithSession(block.sessionID, edge)
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
return docNode
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
/**
|
|
356
|
+
* Ingest a section and its chunks.
|
|
357
|
+
*/
|
|
358
|
+
function ingestSection(
|
|
359
|
+
parentID: string,
|
|
360
|
+
sessionID: string,
|
|
361
|
+
title: string,
|
|
362
|
+
content: string,
|
|
363
|
+
timestamp: number
|
|
364
|
+
): GraphNode {
|
|
365
|
+
const sectionNode: GraphNode = {
|
|
366
|
+
id: generateNodeID("sec", content),
|
|
367
|
+
sessionID,
|
|
368
|
+
type: NodeType.SECTION,
|
|
369
|
+
content: title,
|
|
370
|
+
metadata: { fullContent: content.slice(0, 500) },
|
|
371
|
+
createdAt: timestamp,
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
GraphStorage.addNode(sectionNode)
|
|
375
|
+
|
|
376
|
+
// Split into chunks
|
|
377
|
+
const chunks = ContentChunker.splitIntoChunks(content)
|
|
378
|
+
let prevChunkID: string | null = null
|
|
379
|
+
|
|
380
|
+
for (const chunkContent of chunks) {
|
|
381
|
+
const chunkNode = ingestChunk(sessionID, chunkContent, timestamp)
|
|
382
|
+
|
|
383
|
+
// Link section -> chunk
|
|
384
|
+
const edge: GraphEdge = {
|
|
385
|
+
sourceID: sectionNode.id,
|
|
386
|
+
targetID: chunkNode.id,
|
|
387
|
+
relationship: RelationType.HAS_CHUNK,
|
|
388
|
+
weight: 1.0,
|
|
389
|
+
metadata: {},
|
|
390
|
+
createdAt: timestamp,
|
|
391
|
+
}
|
|
392
|
+
GraphStorage.addEdgeWithSession(sessionID, edge)
|
|
393
|
+
|
|
394
|
+
// Link previous chunk -> current chunk (temporal order)
|
|
395
|
+
if (prevChunkID) {
|
|
396
|
+
const followsEdge: GraphEdge = {
|
|
397
|
+
sourceID: prevChunkID,
|
|
398
|
+
targetID: chunkNode.id,
|
|
399
|
+
relationship: RelationType.FOLLOWS,
|
|
400
|
+
weight: 1.0,
|
|
401
|
+
metadata: {},
|
|
402
|
+
createdAt: timestamp,
|
|
403
|
+
}
|
|
404
|
+
GraphStorage.addEdgeWithSession(sessionID, followsEdge)
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
prevChunkID = chunkNode.id
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
return sectionNode
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
/**
|
|
414
|
+
* Ingest a chunk and extract entities.
|
|
415
|
+
*/
|
|
416
|
+
function ingestChunk(
|
|
417
|
+
sessionID: string,
|
|
418
|
+
content: string,
|
|
419
|
+
timestamp: number
|
|
420
|
+
): GraphNode {
|
|
421
|
+
const chunkNode: GraphNode = {
|
|
422
|
+
id: generateNodeID("chunk", content),
|
|
423
|
+
sessionID,
|
|
424
|
+
type: NodeType.CHUNK,
|
|
425
|
+
content,
|
|
426
|
+
metadata: {},
|
|
427
|
+
createdAt: timestamp,
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
GraphStorage.addNode(chunkNode)
|
|
431
|
+
|
|
432
|
+
// Extract and link entities
|
|
433
|
+
const entities = EntityExtractor.extract(content)
|
|
434
|
+
const entityCache = new Map<string, string>()
|
|
435
|
+
|
|
436
|
+
for (const entity of entities) {
|
|
437
|
+
let entityID: string
|
|
438
|
+
|
|
439
|
+
if (entityCache.has(entity.name)) {
|
|
440
|
+
entityID = entityCache.get(entity.name)!
|
|
441
|
+
} else {
|
|
442
|
+
entityID = getOrCreateEntity(sessionID, entity.name, entity.type, timestamp)
|
|
443
|
+
entityCache.set(entity.name, entityID)
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
// Link chunk -> entity
|
|
447
|
+
const edge: GraphEdge = {
|
|
448
|
+
sourceID: chunkNode.id,
|
|
449
|
+
targetID: entityID,
|
|
450
|
+
relationship: RelationType.MENTIONS,
|
|
451
|
+
weight: 1.0,
|
|
452
|
+
metadata: {},
|
|
453
|
+
createdAt: timestamp,
|
|
454
|
+
}
|
|
455
|
+
GraphStorage.addEdgeWithSession(sessionID, edge)
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
return chunkNode
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
/**
|
|
462
|
+
* Get existing entity or create new one.
|
|
463
|
+
*/
|
|
464
|
+
function getOrCreateEntity(
|
|
465
|
+
sessionID: string,
|
|
466
|
+
name: string,
|
|
467
|
+
entityType: EntityType,
|
|
468
|
+
timestamp: number
|
|
469
|
+
): string {
|
|
470
|
+
// Search for existing entity with same name
|
|
471
|
+
const existing = GraphStorage.searchNodes(
|
|
472
|
+
sessionID,
|
|
473
|
+
name,
|
|
474
|
+
NodeType.ENTITY,
|
|
475
|
+
1
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
if (existing.length > 0 && existing[0].content === name) {
|
|
479
|
+
return existing[0].id
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
// Create new entity
|
|
483
|
+
const entityNode: GraphNode = {
|
|
484
|
+
id: generateNodeID("ent", name),
|
|
485
|
+
sessionID,
|
|
486
|
+
type: NodeType.ENTITY,
|
|
487
|
+
content: name,
|
|
488
|
+
metadata: { entityType },
|
|
489
|
+
createdAt: timestamp,
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
GraphStorage.addNode(entityNode)
|
|
493
|
+
return entityNode.id
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
/**
|
|
497
|
+
* Ingest messages into the graph.
|
|
498
|
+
*/
|
|
499
|
+
export function ingestMessages(
|
|
500
|
+
sessionID: string,
|
|
501
|
+
messages: Array<{
|
|
502
|
+
role: string
|
|
503
|
+
content: string
|
|
504
|
+
}>
|
|
505
|
+
): GraphNode | null {
|
|
506
|
+
if (messages.length === 0) {
|
|
507
|
+
return null
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
// Build content from messages
|
|
511
|
+
const contentParts = messages.map(
|
|
512
|
+
(msg) => `${msg.role.toUpperCase()}: ${msg.content}`
|
|
513
|
+
)
|
|
514
|
+
const content = contentParts.join("\n\n")
|
|
515
|
+
|
|
516
|
+
// Create a temporary context block
|
|
517
|
+
const block: ContextBlock = {
|
|
518
|
+
id: `msg_${crypto.randomUUID().slice(0, 12)}`,
|
|
519
|
+
sessionID,
|
|
520
|
+
content,
|
|
521
|
+
summary: content.slice(0, 200),
|
|
522
|
+
tokens: Math.ceil(content.length / 4),
|
|
523
|
+
createdAt: Date.now(),
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
return ingestContextBlock(block)
|
|
527
|
+
}
|
|
528
|
+
}
|