claude-brain 0.14.2 → 0.14.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +191 -191
- package/VERSION +1 -1
- package/assets/CLAUDE-unified.md +11 -11
- package/assets/CLAUDE.md +11 -11
- package/bunfig.toml +8 -8
- package/package.json +80 -80
- package/packs/backend/node.json +173 -173
- package/packs/core/javascript.json +176 -176
- package/packs/core/typescript.json +222 -222
- package/packs/frontend/react.json +254 -254
- package/packs/meta/testing.json +172 -172
- package/src/automation/auto-context.ts +240 -240
- package/src/automation/decision-detector.ts +452 -452
- package/src/automation/index.ts +11 -11
- package/src/automation/phase12-manager.ts +456 -456
- package/src/automation/proactive-recall.ts +373 -373
- package/src/automation/project-detector.ts +310 -310
- package/src/automation/repo-scanner.ts +205 -205
- package/src/cli/auto-setup.ts +82 -82
- package/src/cli/bin.ts +202 -202
- package/src/cli/commands/chroma.ts +573 -573
- package/src/cli/commands/git-hook.ts +189 -189
- package/src/cli/commands/hooks.ts +213 -213
- package/src/cli/commands/init.ts +122 -122
- package/src/cli/commands/install-mcp.ts +92 -92
- package/src/cli/commands/pack.ts +197 -197
- package/src/cli/commands/serve.ts +167 -167
- package/src/cli/commands/start.ts +42 -42
- package/src/cli/commands/uninstall-mcp.ts +41 -41
- package/src/cli/commands/update.ts +121 -121
- package/src/cli/diagnose.ts +4 -4
- package/src/cli/health-check.ts +4 -4
- package/src/cli/migrate-chroma.ts +106 -106
- package/src/cli/setup.ts +4 -4
- package/src/cli/ui/animations.ts +80 -80
- package/src/cli/ui/components.ts +82 -82
- package/src/cli/ui/index.ts +4 -4
- package/src/cli/ui/logo.ts +36 -36
- package/src/cli/ui/theme.ts +55 -55
- package/src/config/defaults.ts +50 -50
- package/src/config/home.ts +55 -55
- package/src/config/index.ts +7 -7
- package/src/config/loader.ts +166 -166
- package/src/config/migration.ts +76 -76
- package/src/config/schema.ts +360 -360
- package/src/config/validator.ts +184 -184
- package/src/config/watcher.ts +86 -86
- package/src/context/assembler.ts +398 -398
- package/src/context/cache-manager.ts +101 -101
- package/src/context/formatter.ts +84 -84
- package/src/context/hierarchy.ts +85 -85
- package/src/context/index.ts +83 -83
- package/src/context/progress-tracker.ts +174 -174
- package/src/context/standards-manager.ts +287 -287
- package/src/context/types.ts +252 -252
- package/src/context/validator.ts +58 -58
- package/src/diagnostics/index.ts +123 -123
- package/src/health/index.ts +229 -229
- package/src/hooks/brain-hook.ts +112 -112
- package/src/hooks/capture.ts +168 -168
- package/src/hooks/deduplicator.ts +72 -72
- package/src/hooks/git-capture.ts +109 -109
- package/src/hooks/git-hook-installer.ts +207 -207
- package/src/hooks/index.ts +20 -20
- package/src/hooks/installer.ts +191 -194
- package/src/hooks/passive-classifier.ts +366 -366
- package/src/hooks/queue.ts +129 -129
- package/src/hooks/session-tracker.ts +275 -275
- package/src/hooks/types.ts +47 -47
- package/src/index.ts +7 -7
- package/src/intelligence/cross-project/affinity.ts +162 -162
- package/src/intelligence/cross-project/generalizer.ts +283 -283
- package/src/intelligence/cross-project/index.ts +13 -13
- package/src/intelligence/cross-project/transfer.ts +201 -201
- package/src/intelligence/index.ts +24 -24
- package/src/intelligence/optimization/index.ts +10 -10
- package/src/intelligence/optimization/precompute.ts +202 -202
- package/src/intelligence/optimization/semantic-cache.ts +207 -207
- package/src/intelligence/prediction/context-anticipator.ts +198 -198
- package/src/intelligence/prediction/decision-predictor.ts +184 -184
- package/src/intelligence/prediction/index.ts +13 -13
- package/src/intelligence/prediction/recommender.ts +268 -268
- package/src/intelligence/reasoning/chain-retrieval.ts +247 -247
- package/src/intelligence/reasoning/counterfactual.ts +248 -248
- package/src/intelligence/reasoning/index.ts +13 -13
- package/src/intelligence/reasoning/synthesizer.ts +169 -169
- package/src/intelligence/temporal/evolution.ts +197 -197
- package/src/intelligence/temporal/index.ts +16 -16
- package/src/intelligence/temporal/query-processor.ts +190 -190
- package/src/intelligence/temporal/timeline.ts +259 -259
- package/src/intelligence/temporal/trends.ts +263 -263
- package/src/knowledge/entity-extractor.ts +416 -416
- package/src/knowledge/graph/builder.ts +185 -185
- package/src/knowledge/graph/linker.ts +201 -201
- package/src/knowledge/graph/memory-graph.ts +359 -359
- package/src/knowledge/graph/schema.ts +99 -99
- package/src/knowledge/graph/search.ts +168 -168
- package/src/knowledge/relationship-extractor.ts +108 -108
- package/src/memory/chroma/client.ts +174 -174
- package/src/memory/chroma/collection-manager.ts +94 -94
- package/src/memory/chroma/config.ts +57 -57
- package/src/memory/chroma/embeddings.ts +153 -153
- package/src/memory/chroma/index.ts +82 -82
- package/src/memory/chroma/migration.ts +270 -270
- package/src/memory/chroma/schemas.ts +69 -69
- package/src/memory/chroma/search.ts +315 -315
- package/src/memory/chroma/store.ts +741 -741
- package/src/memory/consolidation/archiver.ts +164 -164
- package/src/memory/consolidation/merger.ts +186 -186
- package/src/memory/consolidation/scorer.ts +138 -138
- package/src/memory/context-builder.ts +236 -236
- package/src/memory/database.ts +169 -169
- package/src/memory/embedding-utils.ts +156 -156
- package/src/memory/embeddings.ts +226 -226
- package/src/memory/episodic/detector.ts +108 -108
- package/src/memory/episodic/manager.ts +351 -351
- package/src/memory/episodic/summarizer.ts +179 -179
- package/src/memory/episodic/types.ts +52 -52
- package/src/memory/index.ts +582 -582
- package/src/memory/knowledge-extractor.ts +455 -455
- package/src/memory/learning.ts +378 -378
- package/src/memory/patterns.ts +396 -396
- package/src/memory/schema.ts +88 -88
- package/src/memory/search.ts +309 -309
- package/src/memory/store.ts +787 -787
- package/src/memory/types.ts +121 -121
- package/src/orchestrator/coordinator.ts +272 -272
- package/src/orchestrator/decision-logger.ts +228 -228
- package/src/orchestrator/event-emitter.ts +198 -198
- package/src/orchestrator/event-queue.ts +184 -184
- package/src/orchestrator/handlers/base-handler.ts +70 -70
- package/src/orchestrator/handlers/context-handler.ts +73 -73
- package/src/orchestrator/handlers/decision-handler.ts +204 -204
- package/src/orchestrator/handlers/index.ts +10 -10
- package/src/orchestrator/handlers/status-handler.ts +131 -131
- package/src/orchestrator/handlers/task-handler.ts +171 -171
- package/src/orchestrator/index.ts +275 -275
- package/src/orchestrator/task-parser.ts +284 -284
- package/src/orchestrator/types.ts +98 -98
- package/src/packs/index.ts +9 -9
- package/src/packs/loader.ts +134 -134
- package/src/packs/manager.ts +204 -204
- package/src/packs/ranker.ts +78 -78
- package/src/packs/types.ts +81 -81
- package/src/phase12/index.ts +5 -5
- package/src/retrieval/bm25/index.ts +300 -300
- package/src/retrieval/bm25/tokenizer.ts +184 -184
- package/src/retrieval/feedback/adaptive.ts +223 -223
- package/src/retrieval/feedback/index.ts +16 -16
- package/src/retrieval/feedback/metrics.ts +223 -223
- package/src/retrieval/feedback/store.ts +283 -283
- package/src/retrieval/fusion/index.ts +194 -194
- package/src/retrieval/fusion/rrf.ts +163 -163
- package/src/retrieval/index.ts +12 -12
- package/src/retrieval/pipeline.ts +375 -375
- package/src/retrieval/query/expander.ts +198 -198
- package/src/retrieval/query/index.ts +27 -27
- package/src/retrieval/query/intent-classifier.ts +236 -236
- package/src/retrieval/query/temporal-parser.ts +295 -295
- package/src/retrieval/reranker/index.ts +188 -188
- package/src/retrieval/reranker/model.ts +95 -95
- package/src/retrieval/service.ts +125 -125
- package/src/retrieval/types.ts +162 -162
- package/src/routing/entity-extractor.ts +428 -428
- package/src/routing/intent-classifier.ts +436 -436
- package/src/routing/response-filter.ts +258 -254
- package/src/routing/router.ts +1322 -1314
- package/src/routing/search-engine.ts +475 -475
- package/src/routing/types.ts +94 -84
- package/src/scripts/health-check.ts +118 -118
- package/src/scripts/setup.ts +122 -122
- package/src/server/handlers/call-tool.ts +156 -156
- package/src/server/handlers/index.ts +9 -9
- package/src/server/handlers/list-tools.ts +35 -35
- package/src/server/handlers/tools/analyze-decision-evolution.ts +151 -151
- package/src/server/handlers/tools/auto-remember.ts +200 -200
- package/src/server/handlers/tools/brain.ts +85 -85
- package/src/server/handlers/tools/create-project.ts +135 -135
- package/src/server/handlers/tools/detect-trends.ts +144 -144
- package/src/server/handlers/tools/find-cross-project-patterns.ts +168 -168
- package/src/server/handlers/tools/get-activity-log.ts +194 -194
- package/src/server/handlers/tools/get-code-standards.ts +124 -124
- package/src/server/handlers/tools/get-corrections.ts +154 -154
- package/src/server/handlers/tools/get-decision-timeline.ts +172 -172
- package/src/server/handlers/tools/get-episode.ts +103 -103
- package/src/server/handlers/tools/get-patterns.ts +158 -158
- package/src/server/handlers/tools/get-phase12-status.ts +63 -63
- package/src/server/handlers/tools/get-project-context.ts +75 -75
- package/src/server/handlers/tools/get-recommendations.ts +145 -145
- package/src/server/handlers/tools/index.ts +31 -31
- package/src/server/handlers/tools/init-project.ts +757 -757
- package/src/server/handlers/tools/list-episodes.ts +90 -90
- package/src/server/handlers/tools/list-projects.ts +125 -125
- package/src/server/handlers/tools/rate-memory.ts +101 -101
- package/src/server/handlers/tools/recall-similar.ts +87 -87
- package/src/server/handlers/tools/recognize-pattern.ts +126 -126
- package/src/server/handlers/tools/record-correction.ts +125 -125
- package/src/server/handlers/tools/remember-decision.ts +153 -153
- package/src/server/handlers/tools/schemas.ts +253 -253
- package/src/server/handlers/tools/search-knowledge-graph.ts +102 -102
- package/src/server/handlers/tools/smart-context.ts +146 -146
- package/src/server/handlers/tools/update-progress.ts +131 -131
- package/src/server/handlers/tools/what-if-analysis.ts +135 -135
- package/src/server/http-api.ts +693 -693
- package/src/server/index.ts +40 -40
- package/src/server/mcp-server.ts +283 -283
- package/src/server/providers/index.ts +7 -7
- package/src/server/providers/prompts.ts +327 -327
- package/src/server/providers/resources.ts +622 -622
- package/src/server/services.ts +468 -468
- package/src/server/types.ts +39 -39
- package/src/server/utils/error-handler.ts +155 -155
- package/src/server/utils/index.ts +13 -13
- package/src/server/utils/memory-indicator.ts +83 -83
- package/src/server/utils/request-context.ts +122 -122
- package/src/server/utils/response-formatter.ts +129 -124
- package/src/server/utils/validators.ts +210 -210
- package/src/setup/index.ts +48 -48
- package/src/setup/wizard.ts +461 -461
- package/src/tools/index.ts +24 -24
- package/src/tools/registry.ts +115 -115
- package/src/tools/schemas.test.ts +30 -30
- package/src/tools/schemas.ts +617 -617
- package/src/tools/types.ts +412 -412
- package/src/utils/circuit-breaker.ts +130 -130
- package/src/utils/cleanup.ts +34 -34
- package/src/utils/error-handler.ts +132 -132
- package/src/utils/error-messages.ts +60 -60
- package/src/utils/fallback.ts +45 -45
- package/src/utils/index.ts +54 -54
- package/src/utils/logger-utils.ts +80 -80
- package/src/utils/logger.ts +88 -88
- package/src/utils/phase12-helper.ts +56 -56
- package/src/utils/retry.ts +94 -94
- package/src/utils/timing.ts +47 -47
- package/src/utils/transaction.ts +63 -63
- package/src/vault/frontmatter.ts +264 -264
- package/src/vault/index.ts +318 -318
- package/src/vault/paths.ts +106 -106
- package/src/vault/query.ts +422 -422
- package/src/vault/reader.ts +264 -264
- package/src/vault/templates.ts +186 -186
- package/src/vault/types.ts +73 -73
- package/src/vault/watcher.ts +277 -277
- package/src/vault/writer.ts +413 -413
- package/tsconfig.json +30 -30
|
@@ -1,455 +1,455 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Knowledge Extractor
|
|
3
|
-
* Phase 12: Advanced Memory & Intelligent Automation
|
|
4
|
-
*
|
|
5
|
-
* Automatically extracts knowledge from conversations and stores it
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
import type { Logger } from 'pino'
|
|
9
|
-
import type { MemoryManager } from './index'
|
|
10
|
-
|
|
11
|
-
export interface ExtractedKnowledge {
|
|
12
|
-
id: string
|
|
13
|
-
type: 'fact' | 'preference' | 'constraint' | 'goal' | 'definition'
|
|
14
|
-
content: string
|
|
15
|
-
confidence: number
|
|
16
|
-
source: string
|
|
17
|
-
project?: string
|
|
18
|
-
timestamp: Date
|
|
19
|
-
metadata?: Record<string, unknown>
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
export interface ExtractionResult {
|
|
23
|
-
knowledge: ExtractedKnowledge[]
|
|
24
|
-
stored: number
|
|
25
|
-
skipped: number
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
export class KnowledgeExtractor {
|
|
29
|
-
private logger: Logger
|
|
30
|
-
private memory: MemoryManager
|
|
31
|
-
|
|
32
|
-
// Pattern definitions for different knowledge types
|
|
33
|
-
private readonly FACT_PATTERNS = [
|
|
34
|
-
/(\w+(?:\s+\w+)*)\s+is\s+(?:a\s+)?(\w+(?:\s+\w+)*)/gi,
|
|
35
|
-
/(\w+(?:\s+\w+)*)\s+uses?\s+(\w+(?:\s+\w+)*)/gi,
|
|
36
|
-
/(\w+(?:\s+\w+)*)\s+requires?\s+(\w+(?:\s+\w+)*)/gi,
|
|
37
|
-
/(\w+(?:\s+\w+)*)\s+depends?\s+on\s+(\w+(?:\s+\w+)*)/gi,
|
|
38
|
-
/(\w+(?:\s+\w+)*)\s+contains?\s+(\w+(?:\s+\w+)*)/gi
|
|
39
|
-
]
|
|
40
|
-
|
|
41
|
-
private readonly PREFERENCE_PATTERNS = [
|
|
42
|
-
/prefer(?:s)?\s+(?:to\s+)?(?:use\s+)?(\w+(?:\s+\w+)*)/gi,
|
|
43
|
-
/like(?:s)?\s+(?:to\s+)?(?:use\s+)?(\w+(?:\s+\w+)*)/gi,
|
|
44
|
-
/always\s+use(?:s)?\s+(\w+(?:\s+\w+)*)/gi,
|
|
45
|
-
/should\s+use\s+(\w+(?:\s+\w+)*)/gi,
|
|
46
|
-
/better\s+to\s+use\s+(\w+(?:\s+\w+)*)/gi,
|
|
47
|
-
/recommend(?:s)?\s+(\w+(?:\s+\w+)*)/gi
|
|
48
|
-
]
|
|
49
|
-
|
|
50
|
-
private readonly CONSTRAINT_PATTERNS = [
|
|
51
|
-
/must\s+(?:not\s+)?(\w+(?:\s+\w+)*)/gi,
|
|
52
|
-
/cannot\s+(\w+(?:\s+\w+)*)/gi,
|
|
53
|
-
/required\s+to\s+(\w+(?:\s+\w+)*)/gi,
|
|
54
|
-
/not\s+allowed\s+to\s+(\w+(?:\s+\w+)*)/gi,
|
|
55
|
-
/never\s+(\w+(?:\s+\w+)*)/gi,
|
|
56
|
-
/forbidden\s+to\s+(\w+(?:\s+\w+)*)/gi,
|
|
57
|
-
/(?:it'?s\s+)?mandatory\s+(?:to\s+)?(\w+(?:\s+\w+)*)/gi
|
|
58
|
-
]
|
|
59
|
-
|
|
60
|
-
private readonly GOAL_PATTERNS = [
|
|
61
|
-
/(?:want|need)\s+to\s+(\w+(?:\s+\w+)*)/gi,
|
|
62
|
-
/goal\s+is\s+to\s+(\w+(?:\s+\w+)*)/gi,
|
|
63
|
-
/aim(?:s)?\s+to\s+(\w+(?:\s+\w+)*)/gi,
|
|
64
|
-
/objective\s+is\s+to\s+(\w+(?:\s+\w+)*)/gi,
|
|
65
|
-
/plan(?:ning)?\s+to\s+(\w+(?:\s+\w+)*)/gi,
|
|
66
|
-
/intend(?:s)?\s+to\s+(\w+(?:\s+\w+)*)/gi
|
|
67
|
-
]
|
|
68
|
-
|
|
69
|
-
private readonly DEFINITION_PATTERNS = [
|
|
70
|
-
/(\w+(?:\s+\w+)*)\s+(?:is\s+)?defined\s+as\s+(\w+(?:\s+\w+)*)/gi,
|
|
71
|
-
/(\w+(?:\s+\w+)*)\s+means\s+(\w+(?:\s+\w+)*)/gi,
|
|
72
|
-
/(?:by\s+)?(\w+(?:\s+\w+)*)\s+(?:we\s+)?mean\s+(\w+(?:\s+\w+)*)/gi
|
|
73
|
-
]
|
|
74
|
-
|
|
75
|
-
constructor(logger: Logger, memory: MemoryManager) {
|
|
76
|
-
this.logger = logger.child({ component: 'knowledge-extractor' })
|
|
77
|
-
this.memory = memory
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
/**
|
|
81
|
-
* Extract knowledge from conversation text
|
|
82
|
-
*/
|
|
83
|
-
async extractFromConversation(
|
|
84
|
-
text: string,
|
|
85
|
-
project?: string
|
|
86
|
-
): Promise<ExtractionResult> {
|
|
87
|
-
const knowledge: ExtractedKnowledge[] = []
|
|
88
|
-
|
|
89
|
-
// Skip very short text
|
|
90
|
-
if (text.length < 20) {
|
|
91
|
-
return { knowledge: [], stored: 0, skipped: 0 }
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
// Extract different types of knowledge
|
|
95
|
-
knowledge.push(...this.extractFacts(text, project))
|
|
96
|
-
knowledge.push(...this.extractPreferences(text, project))
|
|
97
|
-
knowledge.push(...this.extractConstraints(text, project))
|
|
98
|
-
knowledge.push(...this.extractGoals(text, project))
|
|
99
|
-
knowledge.push(...this.extractDefinitions(text, project))
|
|
100
|
-
|
|
101
|
-
// Deduplicate by content
|
|
102
|
-
const uniqueKnowledge = this.deduplicateKnowledge(knowledge)
|
|
103
|
-
|
|
104
|
-
// Store high-confidence knowledge
|
|
105
|
-
let stored = 0
|
|
106
|
-
let skipped = 0
|
|
107
|
-
|
|
108
|
-
for (const item of uniqueKnowledge) {
|
|
109
|
-
if (item.confidence >= 0.6) {
|
|
110
|
-
const success = await this.storeKnowledge(item)
|
|
111
|
-
if (success) {
|
|
112
|
-
stored++
|
|
113
|
-
} else {
|
|
114
|
-
skipped++
|
|
115
|
-
}
|
|
116
|
-
} else {
|
|
117
|
-
skipped++
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
this.logger.info(
|
|
122
|
-
{ extractedCount: uniqueKnowledge.length, stored, skipped },
|
|
123
|
-
'Knowledge extracted from conversation'
|
|
124
|
-
)
|
|
125
|
-
|
|
126
|
-
return { knowledge: uniqueKnowledge, stored, skipped }
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
/**
|
|
130
|
-
* Extract factual statements
|
|
131
|
-
*/
|
|
132
|
-
private extractFacts(text: string, project?: string): ExtractedKnowledge[] {
|
|
133
|
-
const facts: ExtractedKnowledge[] = []
|
|
134
|
-
|
|
135
|
-
for (const pattern of this.FACT_PATTERNS) {
|
|
136
|
-
// Reset pattern state
|
|
137
|
-
pattern.lastIndex = 0
|
|
138
|
-
|
|
139
|
-
let match
|
|
140
|
-
while ((match = pattern.exec(text)) !== null) {
|
|
141
|
-
const content = match[0].trim()
|
|
142
|
-
|
|
143
|
-
// Skip very short or very long matches
|
|
144
|
-
if (content.length < 10 || content.length > 200) continue
|
|
145
|
-
|
|
146
|
-
// Skip matches that are too generic
|
|
147
|
-
if (this.isGenericMatch(content)) continue
|
|
148
|
-
|
|
149
|
-
facts.push({
|
|
150
|
-
id: this.generateId(),
|
|
151
|
-
type: 'fact',
|
|
152
|
-
content,
|
|
153
|
-
confidence: this.calculateConfidence(content, 'fact'),
|
|
154
|
-
source: 'pattern-match',
|
|
155
|
-
project,
|
|
156
|
-
timestamp: new Date()
|
|
157
|
-
})
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
return facts
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
/**
|
|
165
|
-
* Extract user preferences
|
|
166
|
-
*/
|
|
167
|
-
private extractPreferences(text: string, project?: string): ExtractedKnowledge[] {
|
|
168
|
-
const preferences: ExtractedKnowledge[] = []
|
|
169
|
-
|
|
170
|
-
for (const pattern of this.PREFERENCE_PATTERNS) {
|
|
171
|
-
pattern.lastIndex = 0
|
|
172
|
-
|
|
173
|
-
let match
|
|
174
|
-
while ((match = pattern.exec(text)) !== null) {
|
|
175
|
-
const content = match[0].trim()
|
|
176
|
-
|
|
177
|
-
if (content.length < 10 || content.length > 200) continue
|
|
178
|
-
if (this.isGenericMatch(content)) continue
|
|
179
|
-
|
|
180
|
-
preferences.push({
|
|
181
|
-
id: this.generateId(),
|
|
182
|
-
type: 'preference',
|
|
183
|
-
content,
|
|
184
|
-
confidence: this.calculateConfidence(content, 'preference'),
|
|
185
|
-
source: 'preference-pattern',
|
|
186
|
-
project,
|
|
187
|
-
timestamp: new Date()
|
|
188
|
-
})
|
|
189
|
-
}
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
return preferences
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
/**
|
|
196
|
-
* Extract constraints
|
|
197
|
-
*/
|
|
198
|
-
private extractConstraints(text: string, project?: string): ExtractedKnowledge[] {
|
|
199
|
-
const constraints: ExtractedKnowledge[] = []
|
|
200
|
-
|
|
201
|
-
for (const pattern of this.CONSTRAINT_PATTERNS) {
|
|
202
|
-
pattern.lastIndex = 0
|
|
203
|
-
|
|
204
|
-
let match
|
|
205
|
-
while ((match = pattern.exec(text)) !== null) {
|
|
206
|
-
const content = match[0].trim()
|
|
207
|
-
|
|
208
|
-
if (content.length < 8 || content.length > 200) continue
|
|
209
|
-
if (this.isGenericMatch(content)) continue
|
|
210
|
-
|
|
211
|
-
constraints.push({
|
|
212
|
-
id: this.generateId(),
|
|
213
|
-
type: 'constraint',
|
|
214
|
-
content,
|
|
215
|
-
confidence: this.calculateConfidence(content, 'constraint'),
|
|
216
|
-
source: 'constraint-pattern',
|
|
217
|
-
project,
|
|
218
|
-
timestamp: new Date()
|
|
219
|
-
})
|
|
220
|
-
}
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
return constraints
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
/**
|
|
227
|
-
* Extract goals
|
|
228
|
-
*/
|
|
229
|
-
private extractGoals(text: string, project?: string): ExtractedKnowledge[] {
|
|
230
|
-
const goals: ExtractedKnowledge[] = []
|
|
231
|
-
|
|
232
|
-
for (const pattern of this.GOAL_PATTERNS) {
|
|
233
|
-
pattern.lastIndex = 0
|
|
234
|
-
|
|
235
|
-
let match
|
|
236
|
-
while ((match = pattern.exec(text)) !== null) {
|
|
237
|
-
const content = match[0].trim()
|
|
238
|
-
|
|
239
|
-
if (content.length < 10 || content.length > 200) continue
|
|
240
|
-
if (this.isGenericMatch(content)) continue
|
|
241
|
-
|
|
242
|
-
goals.push({
|
|
243
|
-
id: this.generateId(),
|
|
244
|
-
type: 'goal',
|
|
245
|
-
content,
|
|
246
|
-
confidence: this.calculateConfidence(content, 'goal'),
|
|
247
|
-
source: 'goal-pattern',
|
|
248
|
-
project,
|
|
249
|
-
timestamp: new Date()
|
|
250
|
-
})
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
return goals
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
/**
|
|
258
|
-
* Extract definitions
|
|
259
|
-
*/
|
|
260
|
-
private extractDefinitions(text: string, project?: string): ExtractedKnowledge[] {
|
|
261
|
-
const definitions: ExtractedKnowledge[] = []
|
|
262
|
-
|
|
263
|
-
for (const pattern of this.DEFINITION_PATTERNS) {
|
|
264
|
-
pattern.lastIndex = 0
|
|
265
|
-
|
|
266
|
-
let match
|
|
267
|
-
while ((match = pattern.exec(text)) !== null) {
|
|
268
|
-
const content = match[0].trim()
|
|
269
|
-
|
|
270
|
-
if (content.length < 10 || content.length > 200) continue
|
|
271
|
-
if (this.isGenericMatch(content)) continue
|
|
272
|
-
|
|
273
|
-
definitions.push({
|
|
274
|
-
id: this.generateId(),
|
|
275
|
-
type: 'definition',
|
|
276
|
-
content,
|
|
277
|
-
confidence: this.calculateConfidence(content, 'definition'),
|
|
278
|
-
source: 'definition-pattern',
|
|
279
|
-
project,
|
|
280
|
-
timestamp: new Date()
|
|
281
|
-
})
|
|
282
|
-
}
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
return definitions
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
/**
|
|
289
|
-
* Check if match is too generic
|
|
290
|
-
*/
|
|
291
|
-
private isGenericMatch(content: string): boolean {
|
|
292
|
-
const genericWords = new Set([
|
|
293
|
-
'it', 'this', 'that', 'thing', 'stuff', 'something',
|
|
294
|
-
'anything', 'everything', 'nothing'
|
|
295
|
-
])
|
|
296
|
-
|
|
297
|
-
const words = content.toLowerCase().split(/\W+/).filter(w => w.length > 0)
|
|
298
|
-
|
|
299
|
-
// If most words are generic, skip
|
|
300
|
-
const genericCount = words.filter(w => genericWords.has(w)).length
|
|
301
|
-
return genericCount > words.length / 2
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
/**
|
|
305
|
-
* Calculate confidence score for extracted knowledge
|
|
306
|
-
*/
|
|
307
|
-
private calculateConfidence(content: string, type: ExtractedKnowledge['type']): number {
|
|
308
|
-
let confidence = 0.5 // Base confidence
|
|
309
|
-
|
|
310
|
-
// Longer content = slightly higher confidence
|
|
311
|
-
if (content.length > 30) confidence += 0.1
|
|
312
|
-
if (content.length > 50) confidence += 0.05
|
|
313
|
-
|
|
314
|
-
// Type-specific adjustments
|
|
315
|
-
switch (type) {
|
|
316
|
-
case 'constraint':
|
|
317
|
-
// Constraints are usually clear
|
|
318
|
-
confidence += 0.2
|
|
319
|
-
break
|
|
320
|
-
case 'preference':
|
|
321
|
-
// Preferences have moderate confidence
|
|
322
|
-
confidence += 0.15
|
|
323
|
-
break
|
|
324
|
-
case 'definition':
|
|
325
|
-
// Definitions are usually precise
|
|
326
|
-
confidence += 0.2
|
|
327
|
-
break
|
|
328
|
-
case 'goal':
|
|
329
|
-
// Goals are intentional statements
|
|
330
|
-
confidence += 0.1
|
|
331
|
-
break
|
|
332
|
-
case 'fact':
|
|
333
|
-
// Facts need verification
|
|
334
|
-
confidence += 0.05
|
|
335
|
-
break
|
|
336
|
-
}
|
|
337
|
-
|
|
338
|
-
// Check for strong language
|
|
339
|
-
const strongIndicators = ['always', 'never', 'must', 'required', 'mandatory']
|
|
340
|
-
const hasStrong = strongIndicators.some(ind =>
|
|
341
|
-
content.toLowerCase().includes(ind)
|
|
342
|
-
)
|
|
343
|
-
if (hasStrong) confidence += 0.1
|
|
344
|
-
|
|
345
|
-
return Math.min(confidence, 1.0)
|
|
346
|
-
}
|
|
347
|
-
|
|
348
|
-
/**
|
|
349
|
-
* Deduplicate knowledge by content similarity
|
|
350
|
-
*/
|
|
351
|
-
private deduplicateKnowledge(knowledge: ExtractedKnowledge[]): ExtractedKnowledge[] {
|
|
352
|
-
const seen = new Set<string>()
|
|
353
|
-
const unique: ExtractedKnowledge[] = []
|
|
354
|
-
|
|
355
|
-
for (const item of knowledge) {
|
|
356
|
-
// Normalize content for comparison
|
|
357
|
-
const normalized = item.content.toLowerCase().replace(/\s+/g, ' ').trim()
|
|
358
|
-
|
|
359
|
-
if (!seen.has(normalized)) {
|
|
360
|
-
seen.add(normalized)
|
|
361
|
-
unique.push(item)
|
|
362
|
-
}
|
|
363
|
-
}
|
|
364
|
-
|
|
365
|
-
return unique
|
|
366
|
-
}
|
|
367
|
-
|
|
368
|
-
/**
|
|
369
|
-
* Store extracted knowledge
|
|
370
|
-
*/
|
|
371
|
-
private async storeKnowledge(knowledge: ExtractedKnowledge): Promise<boolean> {
|
|
372
|
-
try {
|
|
373
|
-
await this.memory.store.storeMemory({
|
|
374
|
-
project: knowledge.project || 'global',
|
|
375
|
-
content: `[${knowledge.type.toUpperCase()}] ${knowledge.content}`,
|
|
376
|
-
metadata: {
|
|
377
|
-
type: 'extracted-knowledge',
|
|
378
|
-
knowledgeType: knowledge.type,
|
|
379
|
-
confidence: knowledge.confidence,
|
|
380
|
-
source: knowledge.source
|
|
381
|
-
}
|
|
382
|
-
})
|
|
383
|
-
return true
|
|
384
|
-
} catch (error) {
|
|
385
|
-
this.logger.warn(
|
|
386
|
-
{ error, knowledgeId: knowledge.id },
|
|
387
|
-
'Failed to store extracted knowledge'
|
|
388
|
-
)
|
|
389
|
-
return false
|
|
390
|
-
}
|
|
391
|
-
}
|
|
392
|
-
|
|
393
|
-
/**
|
|
394
|
-
* Extract knowledge from multiple texts (batch processing)
|
|
395
|
-
*/
|
|
396
|
-
async extractFromMultiple(
|
|
397
|
-
texts: Array<{ text: string; project?: string }>
|
|
398
|
-
): Promise<ExtractionResult> {
|
|
399
|
-
const allKnowledge: ExtractedKnowledge[] = []
|
|
400
|
-
let totalStored = 0
|
|
401
|
-
let totalSkipped = 0
|
|
402
|
-
|
|
403
|
-
for (const { text, project } of texts) {
|
|
404
|
-
const result = await this.extractFromConversation(text, project)
|
|
405
|
-
allKnowledge.push(...result.knowledge)
|
|
406
|
-
totalStored += result.stored
|
|
407
|
-
totalSkipped += result.skipped
|
|
408
|
-
}
|
|
409
|
-
|
|
410
|
-
return {
|
|
411
|
-
knowledge: allKnowledge,
|
|
412
|
-
stored: totalStored,
|
|
413
|
-
skipped: totalSkipped
|
|
414
|
-
}
|
|
415
|
-
}
|
|
416
|
-
|
|
417
|
-
/**
|
|
418
|
-
* Get knowledge by type
|
|
419
|
-
*/
|
|
420
|
-
async getKnowledgeByType(
|
|
421
|
-
type: ExtractedKnowledge['type'],
|
|
422
|
-
project?: string,
|
|
423
|
-
limit: number = 10
|
|
424
|
-
): Promise<ExtractedKnowledge[]> {
|
|
425
|
-
try {
|
|
426
|
-
const results = await this.memory.searchRaw(
|
|
427
|
-
`[${type.toUpperCase()}]`,
|
|
428
|
-
{
|
|
429
|
-
project,
|
|
430
|
-
limit,
|
|
431
|
-
minSimilarity: 0.3
|
|
432
|
-
}
|
|
433
|
-
)
|
|
434
|
-
|
|
435
|
-
return results
|
|
436
|
-
.filter(r => r.memory.metadata?.knowledgeType === type)
|
|
437
|
-
.map(r => ({
|
|
438
|
-
id: r.memory.id,
|
|
439
|
-
type: r.memory.metadata?.knowledgeType as ExtractedKnowledge['type'],
|
|
440
|
-
content: r.memory.content.replace(/^\[\w+\]\s*/, ''),
|
|
441
|
-
confidence: r.memory.metadata?.confidence as number || 0,
|
|
442
|
-
source: r.memory.metadata?.source as string || 'unknown',
|
|
443
|
-
project: r.memory.project,
|
|
444
|
-
timestamp: r.memory.createdAt
|
|
445
|
-
}))
|
|
446
|
-
} catch (error) {
|
|
447
|
-
this.logger.error({ error, type, project }, 'Failed to get knowledge by type')
|
|
448
|
-
return []
|
|
449
|
-
}
|
|
450
|
-
}
|
|
451
|
-
|
|
452
|
-
private generateId(): string {
|
|
453
|
-
return `knowledge-${Date.now()}-${Math.random().toString(36).substring(2, 11)}`
|
|
454
|
-
}
|
|
455
|
-
}
|
|
1
|
+
/**
|
|
2
|
+
* Knowledge Extractor
|
|
3
|
+
* Phase 12: Advanced Memory & Intelligent Automation
|
|
4
|
+
*
|
|
5
|
+
* Automatically extracts knowledge from conversations and stores it
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { Logger } from 'pino'
|
|
9
|
+
import type { MemoryManager } from './index'
|
|
10
|
+
|
|
11
|
+
export interface ExtractedKnowledge {
|
|
12
|
+
id: string
|
|
13
|
+
type: 'fact' | 'preference' | 'constraint' | 'goal' | 'definition'
|
|
14
|
+
content: string
|
|
15
|
+
confidence: number
|
|
16
|
+
source: string
|
|
17
|
+
project?: string
|
|
18
|
+
timestamp: Date
|
|
19
|
+
metadata?: Record<string, unknown>
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface ExtractionResult {
|
|
23
|
+
knowledge: ExtractedKnowledge[]
|
|
24
|
+
stored: number
|
|
25
|
+
skipped: number
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export class KnowledgeExtractor {
|
|
29
|
+
private logger: Logger
|
|
30
|
+
private memory: MemoryManager
|
|
31
|
+
|
|
32
|
+
// Pattern definitions for different knowledge types
|
|
33
|
+
private readonly FACT_PATTERNS = [
|
|
34
|
+
/(\w+(?:\s+\w+)*)\s+is\s+(?:a\s+)?(\w+(?:\s+\w+)*)/gi,
|
|
35
|
+
/(\w+(?:\s+\w+)*)\s+uses?\s+(\w+(?:\s+\w+)*)/gi,
|
|
36
|
+
/(\w+(?:\s+\w+)*)\s+requires?\s+(\w+(?:\s+\w+)*)/gi,
|
|
37
|
+
/(\w+(?:\s+\w+)*)\s+depends?\s+on\s+(\w+(?:\s+\w+)*)/gi,
|
|
38
|
+
/(\w+(?:\s+\w+)*)\s+contains?\s+(\w+(?:\s+\w+)*)/gi
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
private readonly PREFERENCE_PATTERNS = [
|
|
42
|
+
/prefer(?:s)?\s+(?:to\s+)?(?:use\s+)?(\w+(?:\s+\w+)*)/gi,
|
|
43
|
+
/like(?:s)?\s+(?:to\s+)?(?:use\s+)?(\w+(?:\s+\w+)*)/gi,
|
|
44
|
+
/always\s+use(?:s)?\s+(\w+(?:\s+\w+)*)/gi,
|
|
45
|
+
/should\s+use\s+(\w+(?:\s+\w+)*)/gi,
|
|
46
|
+
/better\s+to\s+use\s+(\w+(?:\s+\w+)*)/gi,
|
|
47
|
+
/recommend(?:s)?\s+(\w+(?:\s+\w+)*)/gi
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
private readonly CONSTRAINT_PATTERNS = [
|
|
51
|
+
/must\s+(?:not\s+)?(\w+(?:\s+\w+)*)/gi,
|
|
52
|
+
/cannot\s+(\w+(?:\s+\w+)*)/gi,
|
|
53
|
+
/required\s+to\s+(\w+(?:\s+\w+)*)/gi,
|
|
54
|
+
/not\s+allowed\s+to\s+(\w+(?:\s+\w+)*)/gi,
|
|
55
|
+
/never\s+(\w+(?:\s+\w+)*)/gi,
|
|
56
|
+
/forbidden\s+to\s+(\w+(?:\s+\w+)*)/gi,
|
|
57
|
+
/(?:it'?s\s+)?mandatory\s+(?:to\s+)?(\w+(?:\s+\w+)*)/gi
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
private readonly GOAL_PATTERNS = [
|
|
61
|
+
/(?:want|need)\s+to\s+(\w+(?:\s+\w+)*)/gi,
|
|
62
|
+
/goal\s+is\s+to\s+(\w+(?:\s+\w+)*)/gi,
|
|
63
|
+
/aim(?:s)?\s+to\s+(\w+(?:\s+\w+)*)/gi,
|
|
64
|
+
/objective\s+is\s+to\s+(\w+(?:\s+\w+)*)/gi,
|
|
65
|
+
/plan(?:ning)?\s+to\s+(\w+(?:\s+\w+)*)/gi,
|
|
66
|
+
/intend(?:s)?\s+to\s+(\w+(?:\s+\w+)*)/gi
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
private readonly DEFINITION_PATTERNS = [
|
|
70
|
+
/(\w+(?:\s+\w+)*)\s+(?:is\s+)?defined\s+as\s+(\w+(?:\s+\w+)*)/gi,
|
|
71
|
+
/(\w+(?:\s+\w+)*)\s+means\s+(\w+(?:\s+\w+)*)/gi,
|
|
72
|
+
/(?:by\s+)?(\w+(?:\s+\w+)*)\s+(?:we\s+)?mean\s+(\w+(?:\s+\w+)*)/gi
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
constructor(logger: Logger, memory: MemoryManager) {
|
|
76
|
+
this.logger = logger.child({ component: 'knowledge-extractor' })
|
|
77
|
+
this.memory = memory
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Extract knowledge from conversation text
|
|
82
|
+
*/
|
|
83
|
+
async extractFromConversation(
|
|
84
|
+
text: string,
|
|
85
|
+
project?: string
|
|
86
|
+
): Promise<ExtractionResult> {
|
|
87
|
+
const knowledge: ExtractedKnowledge[] = []
|
|
88
|
+
|
|
89
|
+
// Skip very short text
|
|
90
|
+
if (text.length < 20) {
|
|
91
|
+
return { knowledge: [], stored: 0, skipped: 0 }
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Extract different types of knowledge
|
|
95
|
+
knowledge.push(...this.extractFacts(text, project))
|
|
96
|
+
knowledge.push(...this.extractPreferences(text, project))
|
|
97
|
+
knowledge.push(...this.extractConstraints(text, project))
|
|
98
|
+
knowledge.push(...this.extractGoals(text, project))
|
|
99
|
+
knowledge.push(...this.extractDefinitions(text, project))
|
|
100
|
+
|
|
101
|
+
// Deduplicate by content
|
|
102
|
+
const uniqueKnowledge = this.deduplicateKnowledge(knowledge)
|
|
103
|
+
|
|
104
|
+
// Store high-confidence knowledge
|
|
105
|
+
let stored = 0
|
|
106
|
+
let skipped = 0
|
|
107
|
+
|
|
108
|
+
for (const item of uniqueKnowledge) {
|
|
109
|
+
if (item.confidence >= 0.6) {
|
|
110
|
+
const success = await this.storeKnowledge(item)
|
|
111
|
+
if (success) {
|
|
112
|
+
stored++
|
|
113
|
+
} else {
|
|
114
|
+
skipped++
|
|
115
|
+
}
|
|
116
|
+
} else {
|
|
117
|
+
skipped++
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
this.logger.info(
|
|
122
|
+
{ extractedCount: uniqueKnowledge.length, stored, skipped },
|
|
123
|
+
'Knowledge extracted from conversation'
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
return { knowledge: uniqueKnowledge, stored, skipped }
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Extract factual statements
|
|
131
|
+
*/
|
|
132
|
+
private extractFacts(text: string, project?: string): ExtractedKnowledge[] {
|
|
133
|
+
const facts: ExtractedKnowledge[] = []
|
|
134
|
+
|
|
135
|
+
for (const pattern of this.FACT_PATTERNS) {
|
|
136
|
+
// Reset pattern state
|
|
137
|
+
pattern.lastIndex = 0
|
|
138
|
+
|
|
139
|
+
let match
|
|
140
|
+
while ((match = pattern.exec(text)) !== null) {
|
|
141
|
+
const content = match[0].trim()
|
|
142
|
+
|
|
143
|
+
// Skip very short or very long matches
|
|
144
|
+
if (content.length < 10 || content.length > 200) continue
|
|
145
|
+
|
|
146
|
+
// Skip matches that are too generic
|
|
147
|
+
if (this.isGenericMatch(content)) continue
|
|
148
|
+
|
|
149
|
+
facts.push({
|
|
150
|
+
id: this.generateId(),
|
|
151
|
+
type: 'fact',
|
|
152
|
+
content,
|
|
153
|
+
confidence: this.calculateConfidence(content, 'fact'),
|
|
154
|
+
source: 'pattern-match',
|
|
155
|
+
project,
|
|
156
|
+
timestamp: new Date()
|
|
157
|
+
})
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
return facts
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Extract user preferences
|
|
166
|
+
*/
|
|
167
|
+
private extractPreferences(text: string, project?: string): ExtractedKnowledge[] {
|
|
168
|
+
const preferences: ExtractedKnowledge[] = []
|
|
169
|
+
|
|
170
|
+
for (const pattern of this.PREFERENCE_PATTERNS) {
|
|
171
|
+
pattern.lastIndex = 0
|
|
172
|
+
|
|
173
|
+
let match
|
|
174
|
+
while ((match = pattern.exec(text)) !== null) {
|
|
175
|
+
const content = match[0].trim()
|
|
176
|
+
|
|
177
|
+
if (content.length < 10 || content.length > 200) continue
|
|
178
|
+
if (this.isGenericMatch(content)) continue
|
|
179
|
+
|
|
180
|
+
preferences.push({
|
|
181
|
+
id: this.generateId(),
|
|
182
|
+
type: 'preference',
|
|
183
|
+
content,
|
|
184
|
+
confidence: this.calculateConfidence(content, 'preference'),
|
|
185
|
+
source: 'preference-pattern',
|
|
186
|
+
project,
|
|
187
|
+
timestamp: new Date()
|
|
188
|
+
})
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
return preferences
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Extract constraints
|
|
197
|
+
*/
|
|
198
|
+
private extractConstraints(text: string, project?: string): ExtractedKnowledge[] {
|
|
199
|
+
const constraints: ExtractedKnowledge[] = []
|
|
200
|
+
|
|
201
|
+
for (const pattern of this.CONSTRAINT_PATTERNS) {
|
|
202
|
+
pattern.lastIndex = 0
|
|
203
|
+
|
|
204
|
+
let match
|
|
205
|
+
while ((match = pattern.exec(text)) !== null) {
|
|
206
|
+
const content = match[0].trim()
|
|
207
|
+
|
|
208
|
+
if (content.length < 8 || content.length > 200) continue
|
|
209
|
+
if (this.isGenericMatch(content)) continue
|
|
210
|
+
|
|
211
|
+
constraints.push({
|
|
212
|
+
id: this.generateId(),
|
|
213
|
+
type: 'constraint',
|
|
214
|
+
content,
|
|
215
|
+
confidence: this.calculateConfidence(content, 'constraint'),
|
|
216
|
+
source: 'constraint-pattern',
|
|
217
|
+
project,
|
|
218
|
+
timestamp: new Date()
|
|
219
|
+
})
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
return constraints
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
/**
|
|
227
|
+
* Extract goals
|
|
228
|
+
*/
|
|
229
|
+
private extractGoals(text: string, project?: string): ExtractedKnowledge[] {
|
|
230
|
+
const goals: ExtractedKnowledge[] = []
|
|
231
|
+
|
|
232
|
+
for (const pattern of this.GOAL_PATTERNS) {
|
|
233
|
+
pattern.lastIndex = 0
|
|
234
|
+
|
|
235
|
+
let match
|
|
236
|
+
while ((match = pattern.exec(text)) !== null) {
|
|
237
|
+
const content = match[0].trim()
|
|
238
|
+
|
|
239
|
+
if (content.length < 10 || content.length > 200) continue
|
|
240
|
+
if (this.isGenericMatch(content)) continue
|
|
241
|
+
|
|
242
|
+
goals.push({
|
|
243
|
+
id: this.generateId(),
|
|
244
|
+
type: 'goal',
|
|
245
|
+
content,
|
|
246
|
+
confidence: this.calculateConfidence(content, 'goal'),
|
|
247
|
+
source: 'goal-pattern',
|
|
248
|
+
project,
|
|
249
|
+
timestamp: new Date()
|
|
250
|
+
})
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
return goals
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Extract definitions
|
|
259
|
+
*/
|
|
260
|
+
private extractDefinitions(text: string, project?: string): ExtractedKnowledge[] {
|
|
261
|
+
const definitions: ExtractedKnowledge[] = []
|
|
262
|
+
|
|
263
|
+
for (const pattern of this.DEFINITION_PATTERNS) {
|
|
264
|
+
pattern.lastIndex = 0
|
|
265
|
+
|
|
266
|
+
let match
|
|
267
|
+
while ((match = pattern.exec(text)) !== null) {
|
|
268
|
+
const content = match[0].trim()
|
|
269
|
+
|
|
270
|
+
if (content.length < 10 || content.length > 200) continue
|
|
271
|
+
if (this.isGenericMatch(content)) continue
|
|
272
|
+
|
|
273
|
+
definitions.push({
|
|
274
|
+
id: this.generateId(),
|
|
275
|
+
type: 'definition',
|
|
276
|
+
content,
|
|
277
|
+
confidence: this.calculateConfidence(content, 'definition'),
|
|
278
|
+
source: 'definition-pattern',
|
|
279
|
+
project,
|
|
280
|
+
timestamp: new Date()
|
|
281
|
+
})
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
return definitions
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* Check if match is too generic
|
|
290
|
+
*/
|
|
291
|
+
private isGenericMatch(content: string): boolean {
|
|
292
|
+
const genericWords = new Set([
|
|
293
|
+
'it', 'this', 'that', 'thing', 'stuff', 'something',
|
|
294
|
+
'anything', 'everything', 'nothing'
|
|
295
|
+
])
|
|
296
|
+
|
|
297
|
+
const words = content.toLowerCase().split(/\W+/).filter(w => w.length > 0)
|
|
298
|
+
|
|
299
|
+
// If most words are generic, skip
|
|
300
|
+
const genericCount = words.filter(w => genericWords.has(w)).length
|
|
301
|
+
return genericCount > words.length / 2
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
/**
|
|
305
|
+
* Calculate confidence score for extracted knowledge
|
|
306
|
+
*/
|
|
307
|
+
private calculateConfidence(content: string, type: ExtractedKnowledge['type']): number {
|
|
308
|
+
let confidence = 0.5 // Base confidence
|
|
309
|
+
|
|
310
|
+
// Longer content = slightly higher confidence
|
|
311
|
+
if (content.length > 30) confidence += 0.1
|
|
312
|
+
if (content.length > 50) confidence += 0.05
|
|
313
|
+
|
|
314
|
+
// Type-specific adjustments
|
|
315
|
+
switch (type) {
|
|
316
|
+
case 'constraint':
|
|
317
|
+
// Constraints are usually clear
|
|
318
|
+
confidence += 0.2
|
|
319
|
+
break
|
|
320
|
+
case 'preference':
|
|
321
|
+
// Preferences have moderate confidence
|
|
322
|
+
confidence += 0.15
|
|
323
|
+
break
|
|
324
|
+
case 'definition':
|
|
325
|
+
// Definitions are usually precise
|
|
326
|
+
confidence += 0.2
|
|
327
|
+
break
|
|
328
|
+
case 'goal':
|
|
329
|
+
// Goals are intentional statements
|
|
330
|
+
confidence += 0.1
|
|
331
|
+
break
|
|
332
|
+
case 'fact':
|
|
333
|
+
// Facts need verification
|
|
334
|
+
confidence += 0.05
|
|
335
|
+
break
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
// Check for strong language
|
|
339
|
+
const strongIndicators = ['always', 'never', 'must', 'required', 'mandatory']
|
|
340
|
+
const hasStrong = strongIndicators.some(ind =>
|
|
341
|
+
content.toLowerCase().includes(ind)
|
|
342
|
+
)
|
|
343
|
+
if (hasStrong) confidence += 0.1
|
|
344
|
+
|
|
345
|
+
return Math.min(confidence, 1.0)
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* Deduplicate knowledge by content similarity
|
|
350
|
+
*/
|
|
351
|
+
private deduplicateKnowledge(knowledge: ExtractedKnowledge[]): ExtractedKnowledge[] {
|
|
352
|
+
const seen = new Set<string>()
|
|
353
|
+
const unique: ExtractedKnowledge[] = []
|
|
354
|
+
|
|
355
|
+
for (const item of knowledge) {
|
|
356
|
+
// Normalize content for comparison
|
|
357
|
+
const normalized = item.content.toLowerCase().replace(/\s+/g, ' ').trim()
|
|
358
|
+
|
|
359
|
+
if (!seen.has(normalized)) {
|
|
360
|
+
seen.add(normalized)
|
|
361
|
+
unique.push(item)
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
return unique
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
/**
|
|
369
|
+
* Store extracted knowledge
|
|
370
|
+
*/
|
|
371
|
+
private async storeKnowledge(knowledge: ExtractedKnowledge): Promise<boolean> {
|
|
372
|
+
try {
|
|
373
|
+
await this.memory.store.storeMemory({
|
|
374
|
+
project: knowledge.project || 'global',
|
|
375
|
+
content: `[${knowledge.type.toUpperCase()}] ${knowledge.content}`,
|
|
376
|
+
metadata: {
|
|
377
|
+
type: 'extracted-knowledge',
|
|
378
|
+
knowledgeType: knowledge.type,
|
|
379
|
+
confidence: knowledge.confidence,
|
|
380
|
+
source: knowledge.source
|
|
381
|
+
}
|
|
382
|
+
})
|
|
383
|
+
return true
|
|
384
|
+
} catch (error) {
|
|
385
|
+
this.logger.warn(
|
|
386
|
+
{ error, knowledgeId: knowledge.id },
|
|
387
|
+
'Failed to store extracted knowledge'
|
|
388
|
+
)
|
|
389
|
+
return false
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
/**
|
|
394
|
+
* Extract knowledge from multiple texts (batch processing)
|
|
395
|
+
*/
|
|
396
|
+
async extractFromMultiple(
|
|
397
|
+
texts: Array<{ text: string; project?: string }>
|
|
398
|
+
): Promise<ExtractionResult> {
|
|
399
|
+
const allKnowledge: ExtractedKnowledge[] = []
|
|
400
|
+
let totalStored = 0
|
|
401
|
+
let totalSkipped = 0
|
|
402
|
+
|
|
403
|
+
for (const { text, project } of texts) {
|
|
404
|
+
const result = await this.extractFromConversation(text, project)
|
|
405
|
+
allKnowledge.push(...result.knowledge)
|
|
406
|
+
totalStored += result.stored
|
|
407
|
+
totalSkipped += result.skipped
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
return {
|
|
411
|
+
knowledge: allKnowledge,
|
|
412
|
+
stored: totalStored,
|
|
413
|
+
skipped: totalSkipped
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
/**
|
|
418
|
+
* Get knowledge by type
|
|
419
|
+
*/
|
|
420
|
+
async getKnowledgeByType(
|
|
421
|
+
type: ExtractedKnowledge['type'],
|
|
422
|
+
project?: string,
|
|
423
|
+
limit: number = 10
|
|
424
|
+
): Promise<ExtractedKnowledge[]> {
|
|
425
|
+
try {
|
|
426
|
+
const results = await this.memory.searchRaw(
|
|
427
|
+
`[${type.toUpperCase()}]`,
|
|
428
|
+
{
|
|
429
|
+
project,
|
|
430
|
+
limit,
|
|
431
|
+
minSimilarity: 0.3
|
|
432
|
+
}
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
return results
|
|
436
|
+
.filter(r => r.memory.metadata?.knowledgeType === type)
|
|
437
|
+
.map(r => ({
|
|
438
|
+
id: r.memory.id,
|
|
439
|
+
type: r.memory.metadata?.knowledgeType as ExtractedKnowledge['type'],
|
|
440
|
+
content: r.memory.content.replace(/^\[\w+\]\s*/, ''),
|
|
441
|
+
confidence: r.memory.metadata?.confidence as number || 0,
|
|
442
|
+
source: r.memory.metadata?.source as string || 'unknown',
|
|
443
|
+
project: r.memory.project,
|
|
444
|
+
timestamp: r.memory.createdAt
|
|
445
|
+
}))
|
|
446
|
+
} catch (error) {
|
|
447
|
+
this.logger.error({ error, type, project }, 'Failed to get knowledge by type')
|
|
448
|
+
return []
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
private generateId(): string {
|
|
453
|
+
return `knowledge-${Date.now()}-${Math.random().toString(36).substring(2, 11)}`
|
|
454
|
+
}
|
|
455
|
+
}
|