persyst-mcp 2.2.3 → 2.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -0
- package/bin/export.js +116 -0
- package/bin/import.js +160 -0
- package/hooks/persyst-hook.js +9 -10
- package/index.js +11 -1
- package/package.json +13 -3
- package/src/database.js +37 -19
- package/src/events.js +19 -0
- package/src/extractor-heuristic.js +502 -324
- package/src/sdk.d.ts +175 -0
- package/src/sdk.js +217 -0
- package/src/search.js +103 -7
- package/src/server.js +723 -183
- package/src/tools.js +14 -6
- package/src/watcher.js +27 -12
|
@@ -1,324 +1,502 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* extractor-heuristic.js — Tier 2: Zero-Cost Regex-Based Fact Extractor
|
|
3
|
-
*
|
|
4
|
-
* Scans raw conversation text for
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
//
|
|
79
|
-
{
|
|
80
|
-
regex:
|
|
81
|
-
category: '
|
|
82
|
-
confidence: 0.
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
//
|
|
103
|
-
{
|
|
104
|
-
regex:
|
|
105
|
-
category: '
|
|
106
|
-
confidence: 0.
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
//
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
//
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
//
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
}
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
//
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
});
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
*
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
}
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
1
|
+
/**
|
|
2
|
+
* extractor-heuristic.js — Tier 2: Zero-Cost Regex-Based Fact Extractor
|
|
3
|
+
*
|
|
4
|
+
* Scans raw conversation text for extractable knowledge signals.
|
|
5
|
+
*
|
|
6
|
+
* Operates in TWO modes:
|
|
7
|
+
*
|
|
8
|
+
* 1. EXPLICIT SAVE MODE (highest priority, bypasses all filters)
|
|
9
|
+
* Triggered when user says: "remember", "save this", "note:", "important:",
|
|
10
|
+
* "don't forget", "fyi", "keep in mind", "remind me", "make a note"
|
|
11
|
+
* These always get stored — confidence 0.95. No tech filter applied.
|
|
12
|
+
* Examples:
|
|
13
|
+
* "Remember: the staging server is flaky on Mondays"
|
|
14
|
+
* "Note: John handles DB migrations, don't touch those files"
|
|
15
|
+
* "Don't forget the SSL cert expires March 15"
|
|
16
|
+
* "FYI the client doesn't want emojis in any responses"
|
|
17
|
+
*
|
|
18
|
+
* 2. IMPLICIT PATTERN MODE (normal extraction, requires tech context)
|
|
19
|
+
* Regex patterns for common developer signal phrases:
|
|
20
|
+
* "I prefer...", "we decided...", "always use...", "stack includes..."
|
|
21
|
+
* Conservative: high-precision, low-recall
|
|
22
|
+
* Filters non-technical content (noise filter)
|
|
23
|
+
*
|
|
24
|
+
* Design decisions:
|
|
25
|
+
* - Runs synchronously — zero latency overhead on the hot path
|
|
26
|
+
* - Returns structured facts with confidence scores (0.0 - 1.0)
|
|
27
|
+
* - Explicit saves always win — no filter can suppress them
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
// ============================================================
|
|
31
|
+
// EXPLICIT SAVE TRIGGERS
|
|
32
|
+
// These phrases indicate the user intentionally wants something saved.
|
|
33
|
+
// Order matters — more specific patterns come first.
|
|
34
|
+
// ============================================================
|
|
35
|
+
|
|
36
|
+
const EXPLICIT_SAVE_PATTERNS = [
|
|
37
|
+
// "remember: ..." / "remember that ..." / "remember to ..."
|
|
38
|
+
{
|
|
39
|
+
regex: /\bremember(?:\s*[:–—])?\s+(?:that\s+|to\s+)?(.+?)(?:\.|$)/gi,
|
|
40
|
+
category: 'note',
|
|
41
|
+
confidence: 0.95
|
|
42
|
+
},
|
|
43
|
+
// "note: ..." / "note that ..."
|
|
44
|
+
{
|
|
45
|
+
regex: /\bnote(?:\s*[:–—])\s*(.+?)(?:\.|$)/gi,
|
|
46
|
+
category: 'note',
|
|
47
|
+
confidence: 0.95
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
regex: /\bnote\s+that\s+(.+?)(?:\.|$)/gi,
|
|
51
|
+
category: 'note',
|
|
52
|
+
confidence: 0.95
|
|
53
|
+
},
|
|
54
|
+
// "important: ..."
|
|
55
|
+
{
|
|
56
|
+
regex: /\bimportant(?:\s*[:–—])\s*(.+?)(?:\.|$)/gi,
|
|
57
|
+
category: 'note',
|
|
58
|
+
confidence: 0.95
|
|
59
|
+
},
|
|
60
|
+
// "fyi: ..." / "fyi, ..."
|
|
61
|
+
{
|
|
62
|
+
regex: /\bfyi(?:\s*[:–—,])?\s*(.+?)(?:\.|$)/gi,
|
|
63
|
+
category: 'note',
|
|
64
|
+
confidence: 0.90
|
|
65
|
+
},
|
|
66
|
+
// "don't forget ..."
|
|
67
|
+
{
|
|
68
|
+
regex: /\bdon['']t\s+forget\s+(?:that\s+|to\s+)?(.+?)(?:\.|$)/gi,
|
|
69
|
+
category: 'reminder',
|
|
70
|
+
confidence: 0.90
|
|
71
|
+
},
|
|
72
|
+
// "keep in mind ..."
|
|
73
|
+
{
|
|
74
|
+
regex: /\bkeep\s+in\s+mind\s+(?:that\s+)?(.+?)(?:\.\s*$|$)/gi,
|
|
75
|
+
category: 'note',
|
|
76
|
+
confidence: 0.90
|
|
77
|
+
},
|
|
78
|
+
// "save this: ..." / "save that ..."
|
|
79
|
+
{
|
|
80
|
+
regex: /\bsave\s+(?:this|that|the following)(?:\s*[:–—])?\s*(.+?)(?:\.|$)/gi,
|
|
81
|
+
category: 'note',
|
|
82
|
+
confidence: 0.95
|
|
83
|
+
},
|
|
84
|
+
// "remind me ..." / "set a reminder ..."
|
|
85
|
+
{
|
|
86
|
+
regex: /\bremind\s+(?:me\s+)?(?:to\s+|that\s+|about\s+)?(.+?)(?:\.|$)/gi,
|
|
87
|
+
category: 'reminder',
|
|
88
|
+
confidence: 0.90
|
|
89
|
+
},
|
|
90
|
+
// "make a note ..." / "take a note ..."
|
|
91
|
+
{
|
|
92
|
+
regex: /\b(?:make|take)\s+a\s+note(?:\s*[:–—]|s?\s+that\s+|s?\s+about\s+|:?\s+)?(.+?)(?:\.|$)/gi,
|
|
93
|
+
category: 'note',
|
|
94
|
+
confidence: 0.90
|
|
95
|
+
},
|
|
96
|
+
// "heads up: ..." / "heads up, ..."
|
|
97
|
+
{
|
|
98
|
+
regex: /\bheads?\s+up(?:\s*[:–—,])?\s*(.+?)(?:\.|$)/gi,
|
|
99
|
+
category: 'note',
|
|
100
|
+
confidence: 0.90
|
|
101
|
+
},
|
|
102
|
+
// "warning: ..." (project context, not log output)
|
|
103
|
+
{
|
|
104
|
+
regex: /\bwarning(?:\s*[:–—])\s*(.+?)(?:\.|$)/gi,
|
|
105
|
+
category: 'note',
|
|
106
|
+
confidence: 0.85
|
|
107
|
+
},
|
|
108
|
+
// "caution: ..."
|
|
109
|
+
{
|
|
110
|
+
regex: /\bcaution(?:\s*[:–—])\s*(.+?)(?:\.|$)/gi,
|
|
111
|
+
category: 'note',
|
|
112
|
+
confidence: 0.85
|
|
113
|
+
},
|
|
114
|
+
// "the rule is ..." / "our rule is ..."
|
|
115
|
+
{
|
|
116
|
+
regex: /\b(?:the|our)\s+rule\s+is\s+(?:that\s+)?(.+?)(?:\.|$)/gi,
|
|
117
|
+
category: 'rule',
|
|
118
|
+
confidence: 0.90
|
|
119
|
+
}
|
|
120
|
+
];
|
|
121
|
+
|
|
122
|
+
// ============================================================
|
|
123
|
+
// IMPLICIT PATTERN DEFINITIONS
|
|
124
|
+
// Ordered by specificity — most specific patterns first
|
|
125
|
+
// Each pattern: regex, category, confidence, template
|
|
126
|
+
// ============================================================
|
|
127
|
+
|
|
128
|
+
const PATTERNS = [
|
|
129
|
+
// --- Decision patterns (highest confidence) ---
|
|
130
|
+
{
|
|
131
|
+
regex: /(?:we|i|the team)\s+(?:have\s+)?decided\s+(?:to\s+)?(?:use|go\s+with|adopt|switch\s+to|move\s+to)\s+(.+?)(?:\.|$)/gi,
|
|
132
|
+
category: 'decision',
|
|
133
|
+
confidence: 0.85,
|
|
134
|
+
template: (match) => `Decision: ${cleanFact(match[1])}`
|
|
135
|
+
},
|
|
136
|
+
{
|
|
137
|
+
regex: /(?:we(?:'re|\s+are)?\s+)?(?:going|moving)\s+(?:to\s+)?(?:use|adopt|switch\s+to|migrate\s+to)\s+(.+?)(?:\s+(?:for|because|since|as)\b|\.|$)/gi,
|
|
138
|
+
category: 'decision',
|
|
139
|
+
confidence: 0.80,
|
|
140
|
+
template: (match) => `Decision: Moving to ${cleanFact(match[1])}`
|
|
141
|
+
},
|
|
142
|
+
|
|
143
|
+
// --- Explicit preference patterns ---
|
|
144
|
+
{
|
|
145
|
+
regex: /i\s+(?:always\s+)?prefer\s+(.+?)(?:\s+(?:over|instead\s+of|rather\s+than)\s+(.+?))?(?:\.|$)/gi,
|
|
146
|
+
category: 'preference',
|
|
147
|
+
confidence: 0.80,
|
|
148
|
+
template: (match) => {
|
|
149
|
+
const pref = cleanFact(match[1]);
|
|
150
|
+
const alt = match[2] ? ` over ${cleanFact(match[2])}` : '';
|
|
151
|
+
return `Preference: ${pref}${alt}`;
|
|
152
|
+
}
|
|
153
|
+
},
|
|
154
|
+
{
|
|
155
|
+
regex: /(?:we|i)\s+(?:should\s+)?(?:always|never)\s+(?:use|avoid|include|add|write|create)\s+(.+?)(?:\.|$)/gi,
|
|
156
|
+
category: 'preference',
|
|
157
|
+
confidence: 0.75,
|
|
158
|
+
template: (match) => `Rule: ${cleanFact(match[0])}`
|
|
159
|
+
},
|
|
160
|
+
|
|
161
|
+
// --- Stack / technology patterns ---
|
|
162
|
+
{
|
|
163
|
+
regex: /(?:our|the|my)\s+(?:tech\s+)?stack\s+(?:includes?|uses?|is|has)\s+(.+?)(?:\.\s|\.$|$)/gim,
|
|
164
|
+
category: 'stack',
|
|
165
|
+
confidence: 0.85,
|
|
166
|
+
template: (match) => `Stack: ${cleanFact(match[1])}`
|
|
167
|
+
},
|
|
168
|
+
{
|
|
169
|
+
regex: /(?:we(?:'re|\s+are)?\s+)?using\s+(.+?)\s+(?:for|as)\s+(?:our|the)\s+(.+?)(?:\.|$)/gi,
|
|
170
|
+
category: 'stack',
|
|
171
|
+
confidence: 0.80,
|
|
172
|
+
template: (match) => `Stack: Using ${cleanFact(match[1])} for ${cleanFact(match[2])}`
|
|
173
|
+
},
|
|
174
|
+
{
|
|
175
|
+
regex: /(?:our|the)\s+(?:backend|frontend|database|api|server|client|infra(?:structure)?)\s+(?:is|uses?|runs?\s+on)\s+(.+?)(?:\.|$)/gi,
|
|
176
|
+
category: 'stack',
|
|
177
|
+
confidence: 0.80,
|
|
178
|
+
template: (match) => `Stack: ${cleanFact(match[0])}`
|
|
179
|
+
},
|
|
180
|
+
|
|
181
|
+
// --- Naming / convention patterns ---
|
|
182
|
+
{
|
|
183
|
+
regex: /(?:name|call|rename)\s+(?:it|this|the\s+\w+)\s+[\"'`]?(\w[\w\-\.]+)[\"'`]?/gi,
|
|
184
|
+
category: 'naming',
|
|
185
|
+
confidence: 0.70,
|
|
186
|
+
template: (match) => `Naming: ${cleanFact(match[0])}`
|
|
187
|
+
},
|
|
188
|
+
|
|
189
|
+
// --- Architecture patterns ---
|
|
190
|
+
{
|
|
191
|
+
regex: /(?:the\s+)?(?:project|app|application|system|architecture)\s+(?:follows?|uses?|is\s+based\s+on|implements?)\s+(.+?)(?:\s+pattern|\s+architecture)?(?:\.|$)/gi,
|
|
192
|
+
category: 'architecture',
|
|
193
|
+
confidence: 0.80,
|
|
194
|
+
template: (match) => `Architecture: ${cleanFact(match[1])}`
|
|
195
|
+
},
|
|
196
|
+
|
|
197
|
+
// --- Coding rule / style patterns ---
|
|
198
|
+
{
|
|
199
|
+
regex: /(?:always|never|must|should|don't|do\s+not)\s+(?:use|write|create|add|include|put|place|keep)\s+(.+?)(?:\.|$)/gi,
|
|
200
|
+
category: 'rule',
|
|
201
|
+
confidence: 0.70,
|
|
202
|
+
template: (match) => `Rule: ${cleanFact(match[0])}`
|
|
203
|
+
},
|
|
204
|
+
|
|
205
|
+
// --- Config / env patterns ---
|
|
206
|
+
{
|
|
207
|
+
regex: /(?:set|change|update|configure)\s+(?:the\s+)?(?:port|host|env|environment|config|setting)\s+(?:to|=|:)\s*[\"'`]?(.+?)[\"'`]?(?:\.|$)/gi,
|
|
208
|
+
category: 'config',
|
|
209
|
+
confidence: 0.75,
|
|
210
|
+
template: (match) => `Config: ${cleanFact(match[0])}`
|
|
211
|
+
}
|
|
212
|
+
];
|
|
213
|
+
|
|
214
|
+
// ============================================================
|
|
215
|
+
// NOISE FILTERS
|
|
216
|
+
// Skip lines that look like code, errors, or system output
|
|
217
|
+
// ============================================================
|
|
218
|
+
|
|
219
|
+
const NOISE_PATTERNS = [
|
|
220
|
+
/^[\s]*(?:import|export|const|let|var|function|class|if|else|for|while|return|throw|try|catch)\s/,
|
|
221
|
+
/^[\s]*[{}\[\]();]/,
|
|
222
|
+
/^[\s]*\/\//,
|
|
223
|
+
/^[\s]*\*/,
|
|
224
|
+
/^[\s]*```/,
|
|
225
|
+
/^\s*$/,
|
|
226
|
+
/^(?:error|warning|info|debug|trace):/i,
|
|
227
|
+
/^\s*at\s+\w+/, // stack trace lines
|
|
228
|
+
/^[A-Z_]{2,}=/, // ENV variable assignments
|
|
229
|
+
/^\d{4}-\d{2}-\d{2}/, // timestamp lines
|
|
230
|
+
];
|
|
231
|
+
|
|
232
|
+
/**
|
|
233
|
+
* Check if a line looks like noise (code, logs, etc.)
|
|
234
|
+
* @param {string} line
|
|
235
|
+
* @returns {boolean}
|
|
236
|
+
*/
|
|
237
|
+
function isNoiseLine(line) {
|
|
238
|
+
return NOISE_PATTERNS.some(p => p.test(line));
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// ============================================================
|
|
242
|
+
// FACT NORMALIZATION & COGNITIVE FILTER
|
|
243
|
+
// ============================================================
|
|
244
|
+
|
|
245
|
+
/**
|
|
246
|
+
* Clean and normalize an extracted fact string.
|
|
247
|
+
* Removes trailing punctuation, excess whitespace, and truncates.
|
|
248
|
+
* @param {string} raw
|
|
249
|
+
* @returns {string}
|
|
250
|
+
*/
|
|
251
|
+
function cleanFact(raw) {
|
|
252
|
+
if (!raw) return '';
|
|
253
|
+
return raw
|
|
254
|
+
.trim()
|
|
255
|
+
.replace(/[\s]+/g, ' ') // collapse whitespace
|
|
256
|
+
.replace(/[,;:]+$/, '') // strip trailing punctuation
|
|
257
|
+
.replace(/^["'`]+|["'`]+$/g, '') // strip quotes
|
|
258
|
+
.slice(0, 200); // hard max fact length
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// List of programming/tech concepts to distinguish tech context from conversational filler
|
|
262
|
+
const TECH_CONCEPTS = [
|
|
263
|
+
'mode', 'theme', 'config', 'stack', 'style', 'code', 'file', 'folder', 'path',
|
|
264
|
+
'api', 'endpoint', 'json', 'data', 'db', 'database', 'table', 'migration',
|
|
265
|
+
'schema', 'sql', 'query', 'url', 'port', 'host', 'env', 'environment',
|
|
266
|
+
'node', 'npm', 'git', 'react', 'vue', 'angular', 'svelte', 'next', 'express',
|
|
267
|
+
'postgres', 'sqlite', 'mongo', 'mysql', 'docker', 'ubuntu', 'linux', 'server',
|
|
268
|
+
'pipeline', 'ci', 'cd', 'github', 'actions', 'oauth', 'auth', 'security',
|
|
269
|
+
'token', 'key', 'credential', 'package', 'dependency', 'library', 'script',
|
|
270
|
+
'test', 'jest', 'vitest', 'eslint', 'prettier', 'tailwind', 'css', 'html',
|
|
271
|
+
'js', 'ts', 'typescript', 'javascript', 'eval', 'function', 'class', 'component',
|
|
272
|
+
'import', 'export', 'require', 'const', 'let', 'var', 'compiler', 'build',
|
|
273
|
+
'cli', 'command', 'terminal', 'mcp', 'server', 'client', 'persyst', 'memory'
|
|
274
|
+
];
|
|
275
|
+
|
|
276
|
+
/**
|
|
277
|
+
* Filter out conversational filler and keep only valid technical statements/preferences.
|
|
278
|
+
* NOTE: This filter is ONLY applied to implicit pattern matches, NOT to explicit saves.
|
|
279
|
+
* @param {string} content - The extracted fact text
|
|
280
|
+
* @returns {boolean} - true if it is a valid, high-value fact
|
|
281
|
+
*/
|
|
282
|
+
function cognitiveNoiseFilter(content) {
|
|
283
|
+
const normalized = content.toLowerCase().trim();
|
|
284
|
+
|
|
285
|
+
// 1. Filter out interrogatives (questions)
|
|
286
|
+
const questionWords = ['how', 'why', 'what', 'where', 'when', 'who', 'can', 'could', 'would', 'is', 'are', 'should'];
|
|
287
|
+
if (normalized.endsWith('?')) return false;
|
|
288
|
+
for (const q of questionWords) {
|
|
289
|
+
if (normalized.startsWith(q + ' ') || normalized.includes(` ${q} `) || normalized.includes(`:${q} `)) {
|
|
290
|
+
if (normalized.includes(' ?') || normalized.endsWith('?')) return false;
|
|
291
|
+
if (/preference:\s+(?:can|could|would|is|are|how|why|what|where)\s/i.test(content)) return false;
|
|
292
|
+
if (/rule:\s+(?:can|could|would|is|are|how|why|what|where)\s/i.test(content)) return false;
|
|
293
|
+
if (/decision:\s+(?:can|could|would|is|are|how|why|what|where)\s/i.test(content)) return false;
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// 2. Filter out transient pronouns/vague statements without enough context
|
|
298
|
+
if (/preference:\s+(?:this|that|it|these|those|us|me|them|him|her)\b/i.test(content)) return false;
|
|
299
|
+
if (/decision:\s+(?:this|that|it|these|those|us|me|them|him|her)\b/i.test(content)) return false;
|
|
300
|
+
|
|
301
|
+
// 3. Filter out transient time references indicating very short-term state
|
|
302
|
+
const transientTerms = ['today', 'tomorrow', 'yesterday', 'now', 'just', 'temporary', 'currently', 'for now', 'briefly', 'at the moment'];
|
|
303
|
+
for (const term of transientTerms) {
|
|
304
|
+
if (normalized.includes(` ${term} `) || normalized.endsWith(` ${term}`)) {
|
|
305
|
+
return false;
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
// 4. Filter out trace logs, build outputs, compile errors
|
|
310
|
+
if (normalized.includes('at ') && normalized.includes('.js:')) return false;
|
|
311
|
+
if (normalized.includes('error:') || normalized.includes('exception:')) return false;
|
|
312
|
+
if (normalized.includes('exit code') || normalized.includes('npm error')) return false;
|
|
313
|
+
|
|
314
|
+
// 5. Require at least one programming/project-related concept
|
|
315
|
+
const words = normalized.split(/[^a-zA-Z0-9\-\.\/]+/);
|
|
316
|
+
const hasTechTerm = words.some(w => {
|
|
317
|
+
return TECH_CONCEPTS.some(concept => {
|
|
318
|
+
if (concept.length <= 2) {
|
|
319
|
+
return w === concept;
|
|
320
|
+
}
|
|
321
|
+
return w.includes(concept);
|
|
322
|
+
}) ||
|
|
323
|
+
w.endsWith('.js') || w.endsWith('.json') || w.endsWith('.css') || w.endsWith('.md') ||
|
|
324
|
+
w.includes('/') || w.includes('\\');
|
|
325
|
+
});
|
|
326
|
+
|
|
327
|
+
if (!hasTechTerm) {
|
|
328
|
+
return false;
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
return true;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
// ============================================================
|
|
335
|
+
// EXPLICIT SAVE EXTRACTION
|
|
336
|
+
// Runs first. Bypasses all noise filters.
|
|
337
|
+
// The user said "remember this" — we save it, period.
|
|
338
|
+
// ============================================================
|
|
339
|
+
|
|
340
|
+
/**
|
|
341
|
+
* Extract explicitly-commanded saves from text.
|
|
342
|
+
* User phrases like "remember:", "note:", "don't forget" always get stored.
|
|
343
|
+
* No tech concept filter. No question filter. Confidence: 0.90–0.95.
|
|
344
|
+
*
|
|
345
|
+
* @param {string} text
|
|
346
|
+
* @returns {Array<{content: string, category: string, confidence: number, explicit: true}>}
|
|
347
|
+
*/
|
|
348
|
+
function extractExplicitSaves(text) {
|
|
349
|
+
const results = [];
|
|
350
|
+
const seen = new Set();
|
|
351
|
+
|
|
352
|
+
for (const pattern of EXPLICIT_SAVE_PATTERNS) {
|
|
353
|
+
pattern.regex.lastIndex = 0;
|
|
354
|
+
let match;
|
|
355
|
+
while ((match = pattern.regex.exec(text)) !== null) {
|
|
356
|
+
const raw = match[1] || match[0];
|
|
357
|
+
const cleaned = cleanFact(raw);
|
|
358
|
+
|
|
359
|
+
// Minimum useful length
|
|
360
|
+
if (!cleaned || cleaned.length < 8) continue;
|
|
361
|
+
|
|
362
|
+
// Skip pure questions
|
|
363
|
+
if (cleaned.endsWith('?')) continue;
|
|
364
|
+
|
|
365
|
+
// Skip if this is just a meta-instruction to the system itself ("remember to search memories")
|
|
366
|
+
const metaWords = ['search_memories', 'add_memory', 'get_optimized_context', 'persyst tool'];
|
|
367
|
+
if (metaWords.some(w => cleaned.toLowerCase().includes(w))) continue;
|
|
368
|
+
|
|
369
|
+
const key = cleaned.toLowerCase().replace(/\s+/g, ' ').trim();
|
|
370
|
+
if (seen.has(key)) continue;
|
|
371
|
+
seen.add(key);
|
|
372
|
+
|
|
373
|
+
// Format the content with a Note:/Reminder: prefix if not already prefixed
|
|
374
|
+
let content = cleaned;
|
|
375
|
+
if (!/^(?:Note|Reminder|Rule|Important|Warning|Caution|FYI):/i.test(cleaned)) {
|
|
376
|
+
const prefix = pattern.category === 'reminder' ? 'Reminder' : 'Note';
|
|
377
|
+
content = `${prefix}: ${cleaned}`;
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
results.push({
|
|
381
|
+
content,
|
|
382
|
+
category: pattern.category,
|
|
383
|
+
confidence: pattern.confidence,
|
|
384
|
+
explicit: true // Mark as user-commanded — bypasses any downstream filters
|
|
385
|
+
});
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
return results;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// ============================================================
|
|
393
|
+
// MAIN EXTRACTION FUNCTION
|
|
394
|
+
// ============================================================
|
|
395
|
+
|
|
396
|
+
/**
|
|
397
|
+
* Extract facts from raw conversation text using regex heuristics.
|
|
398
|
+
*
|
|
399
|
+
* Runs in priority order:
|
|
400
|
+
* 1. Explicit saves ("remember:", "note:", "don't forget") — always stored
|
|
401
|
+
* 2. Implicit patterns (tech decisions, preferences, rules) — filtered
|
|
402
|
+
*
|
|
403
|
+
* @param {string} text - Raw conversation text (user prompt or full turn)
|
|
404
|
+
* @param {Object} [options={}]
|
|
405
|
+
* @param {number} [options.minConfidence=0.65] - Minimum confidence to include a fact
|
|
406
|
+
* @param {number} [options.maxFacts=15] - Maximum facts to extract per call
|
|
407
|
+
* @returns {Array<{content: string, category: string, confidence: number, explicit?: boolean}>}
|
|
408
|
+
*
|
|
409
|
+
* @example
|
|
410
|
+
* // Explicit save — bypasses all filters
|
|
411
|
+
* extractHeuristic("Remember: the staging server is flaky on Mondays")
|
|
412
|
+
* // => [{ content: "Note: the staging server is flaky on Mondays", category: "note", confidence: 0.95, explicit: true }]
|
|
413
|
+
*
|
|
414
|
+
* // Implicit pattern — goes through noise filter
|
|
415
|
+
* extractHeuristic("I prefer Postgres over SQLite for our backend database.")
|
|
416
|
+
* // => [{ content: "Preference: Postgres over SQLite", category: "preference", confidence: 0.80 }]
|
|
417
|
+
*/
|
|
418
|
+
export function extractHeuristic(text, options = {}) {
|
|
419
|
+
const {
|
|
420
|
+
minConfidence = 0.65,
|
|
421
|
+
maxFacts = 15
|
|
422
|
+
} = options;
|
|
423
|
+
|
|
424
|
+
if (!text || typeof text !== 'string' || text.length < 10) {
|
|
425
|
+
return [];
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
// --- Step 1: Explicit saves (highest priority, no filter) ---
|
|
429
|
+
const explicitFacts = extractExplicitSaves(text);
|
|
430
|
+
|
|
431
|
+
// --- Step 2: Implicit pattern matching (filtered, tech-required) ---
|
|
432
|
+
const implicitFacts = [];
|
|
433
|
+
const seen = new Set(explicitFacts.map(f => f.content.toLowerCase().replace(/\s+/g, ' ').trim()));
|
|
434
|
+
|
|
435
|
+
// Process line-by-line to filter code/noise
|
|
436
|
+
const lines = text.split('\n');
|
|
437
|
+
const cleanLines = lines.filter(line => !isNoiseLine(line));
|
|
438
|
+
const cleanText = cleanLines.join('\n');
|
|
439
|
+
|
|
440
|
+
for (const pattern of PATTERNS) {
|
|
441
|
+
pattern.regex.lastIndex = 0;
|
|
442
|
+
|
|
443
|
+
let match;
|
|
444
|
+
while ((match = pattern.regex.exec(cleanText)) !== null) {
|
|
445
|
+
if (match[0].length < 8) continue;
|
|
446
|
+
|
|
447
|
+
try {
|
|
448
|
+
const content = pattern.template(match);
|
|
449
|
+
if (!content || content.length < 5) continue;
|
|
450
|
+
|
|
451
|
+
if (!cognitiveNoiseFilter(content)) continue;
|
|
452
|
+
|
|
453
|
+
const key = content.toLowerCase().replace(/\s+/g, ' ').trim();
|
|
454
|
+
if (seen.has(key)) continue;
|
|
455
|
+
seen.add(key);
|
|
456
|
+
|
|
457
|
+
if (pattern.confidence >= minConfidence) {
|
|
458
|
+
implicitFacts.push({
|
|
459
|
+
content,
|
|
460
|
+
category: pattern.category,
|
|
461
|
+
confidence: pattern.confidence
|
|
462
|
+
});
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
if (explicitFacts.length + implicitFacts.length >= maxFacts) break;
|
|
466
|
+
} catch (_) {
|
|
467
|
+
continue;
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
if (explicitFacts.length + implicitFacts.length >= maxFacts) break;
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
// Explicit facts first (user-commanded), then implicit sorted by confidence
|
|
475
|
+
implicitFacts.sort((a, b) => b.confidence - a.confidence);
|
|
476
|
+
return [...explicitFacts, ...implicitFacts];
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
/**
|
|
480
|
+
* Quick check: does this text contain any extractable signals?
|
|
481
|
+
* Cheaper than running full extraction — use as a gate.
|
|
482
|
+
*
|
|
483
|
+
* @param {string} text
|
|
484
|
+
* @returns {boolean}
|
|
485
|
+
*/
|
|
486
|
+
export function hasExtractableSignals(text) {
|
|
487
|
+
if (!text || text.length < 10) return false;
|
|
488
|
+
|
|
489
|
+
// Check explicit save triggers first (very cheap)
|
|
490
|
+
for (const pattern of EXPLICIT_SAVE_PATTERNS) {
|
|
491
|
+
pattern.regex.lastIndex = 0;
|
|
492
|
+
if (pattern.regex.test(text)) return true;
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
// Then implicit patterns
|
|
496
|
+
for (const pattern of PATTERNS) {
|
|
497
|
+
pattern.regex.lastIndex = 0;
|
|
498
|
+
if (pattern.regex.test(text)) return true;
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
return false;
|
|
502
|
+
}
|