@yamo/memory-mesh 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +80 -0
- package/bin/memory_mesh.js +69 -0
- package/bin/scrubber.js +81 -0
- package/index.d.ts +111 -0
- package/lib/adapters/index.js +3 -0
- package/lib/embeddings/factory.js +150 -0
- package/lib/embeddings/index.js +2 -0
- package/lib/embeddings/service.js +586 -0
- package/lib/index.js +18 -0
- package/lib/lancedb/client.js +631 -0
- package/lib/lancedb/config.js +215 -0
- package/lib/lancedb/errors.js +144 -0
- package/lib/lancedb/index.js +4 -0
- package/lib/lancedb/schema.js +197 -0
- package/lib/memory/index.js +3 -0
- package/lib/memory/memory-context-manager.js +388 -0
- package/lib/memory/memory-mesh.js +910 -0
- package/lib/memory/memory-translator.js +130 -0
- package/lib/memory/migrate-memory.js +227 -0
- package/lib/memory/migrate-to-v2.js +120 -0
- package/lib/memory/scorer.js +85 -0
- package/lib/memory/vector-memory.js +364 -0
- package/lib/privacy/audit-logger.js +176 -0
- package/lib/privacy/dlp-redactor.js +72 -0
- package/lib/privacy/index.js +10 -0
- package/lib/reporting/skill-report-generator.js +283 -0
- package/lib/scrubber/.gitkeep +1 -0
- package/lib/scrubber/config/defaults.js +62 -0
- package/lib/scrubber/errors/scrubber-error.js +43 -0
- package/lib/scrubber/index.js +25 -0
- package/lib/scrubber/scrubber.js +130 -0
- package/lib/scrubber/stages/chunker.js +103 -0
- package/lib/scrubber/stages/metadata-annotator.js +74 -0
- package/lib/scrubber/stages/normalizer.js +59 -0
- package/lib/scrubber/stages/semantic-filter.js +61 -0
- package/lib/scrubber/stages/structural-cleaner.js +82 -0
- package/lib/scrubber/stages/validator.js +66 -0
- package/lib/scrubber/telemetry.js +66 -0
- package/lib/scrubber/utils/hash.js +39 -0
- package/lib/scrubber/utils/html-parser.js +45 -0
- package/lib/scrubber/utils/pattern-matcher.js +63 -0
- package/lib/scrubber/utils/token-counter.js +31 -0
- package/lib/search/filter.js +275 -0
- package/lib/search/hybrid.js +137 -0
- package/lib/search/index.js +3 -0
- package/lib/search/pattern-miner.js +160 -0
- package/lib/utils/error-sanitizer.js +84 -0
- package/lib/utils/handoff-validator.js +85 -0
- package/lib/utils/index.js +4 -0
- package/lib/utils/spinner.js +190 -0
- package/lib/utils/streaming-client.js +128 -0
- package/package.json +39 -0
- package/skills/SKILL.md +462 -0
- package/skills/skill-scrubber.yamo +41 -0
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FilterBuilder - Fluent API for building LanceDB filter expressions
|
|
3
|
+
* Provides type-safe filter construction for metadata queries
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
class FilterBuilder {
|
|
7
|
+
/**
|
|
8
|
+
* Create a new FilterBuilder
|
|
9
|
+
*/
|
|
10
|
+
constructor() {
|
|
11
|
+
this.filters = [];
|
|
12
|
+
this.operator = 'AND';
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Add equality filter
|
|
17
|
+
* @param {string} field - Field name
|
|
18
|
+
* @param {*} value - Value to compare
|
|
19
|
+
* @returns {FilterBuilder} this for chaining
|
|
20
|
+
*/
|
|
21
|
+
equals(field, value) {
|
|
22
|
+
this.filters.push(`${field} = ${this._quote(value)}`);
|
|
23
|
+
return this;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Add inequality filter
|
|
28
|
+
* @param {string} field - Field name
|
|
29
|
+
* @param {*} value - Value to compare
|
|
30
|
+
* @returns {FilterBuilder} this for chaining
|
|
31
|
+
*/
|
|
32
|
+
notEquals(field, value) {
|
|
33
|
+
this.filters.push(`${field} != ${this._quote(value)}`);
|
|
34
|
+
return this;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Add greater than filter
|
|
39
|
+
* @param {string} field - Field name
|
|
40
|
+
* @param {number} value - Value to compare
|
|
41
|
+
* @returns {FilterBuilder} this for chaining
|
|
42
|
+
*/
|
|
43
|
+
gt(field, value) {
|
|
44
|
+
this.filters.push(`${field} > ${value}`);
|
|
45
|
+
return this;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Add greater than or equal filter
|
|
50
|
+
* @param {string} field - Field name
|
|
51
|
+
* @param {number} value - Value to compare
|
|
52
|
+
* @returns {FilterBuilder} this for chaining
|
|
53
|
+
*/
|
|
54
|
+
gte(field, value) {
|
|
55
|
+
this.filters.push(`${field} >= ${value}`);
|
|
56
|
+
return this;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Add less than filter
|
|
61
|
+
* @param {string} field - Field name
|
|
62
|
+
* @param {number} value - Value to compare
|
|
63
|
+
* @returns {FilterBuilder} this for chaining
|
|
64
|
+
*/
|
|
65
|
+
lt(field, value) {
|
|
66
|
+
this.filters.push(`${field} < ${value}`);
|
|
67
|
+
return this;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Add less than or equal filter
|
|
72
|
+
* @param {string} field - Field name
|
|
73
|
+
* @param {number} value - Value to compare
|
|
74
|
+
* @returns {FilterBuilder} this for chaining
|
|
75
|
+
*/
|
|
76
|
+
lte(field, value) {
|
|
77
|
+
this.filters.push(`${field} <= ${value}`);
|
|
78
|
+
return this;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Add contains filter (LIKE)
|
|
83
|
+
* @param {string} field - Field name
|
|
84
|
+
* @param {string} value - Value to search for
|
|
85
|
+
* @returns {FilterBuilder} this for chaining
|
|
86
|
+
*/
|
|
87
|
+
contains(field, value) {
|
|
88
|
+
this.filters.push(`${field} LIKE '%${this._escapeLike(value)}%'`);
|
|
89
|
+
return this;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Add starts with filter
|
|
94
|
+
* @param {string} field - Field name
|
|
95
|
+
* @param {string} value - Value to match
|
|
96
|
+
* @returns {FilterBuilder} this for chaining
|
|
97
|
+
*/
|
|
98
|
+
startsWith(field, value) {
|
|
99
|
+
this.filters.push(`${field} LIKE '${this._escapeLike(value)}%'`);
|
|
100
|
+
return this;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Add ends with filter
|
|
105
|
+
* @param {string} field - Field name
|
|
106
|
+
* @param {string} value - Value to match
|
|
107
|
+
* @returns {FilterBuilder} this for chaining
|
|
108
|
+
*/
|
|
109
|
+
endsWith(field, value) {
|
|
110
|
+
this.filters.push(`${field} LIKE '%${this._escapeLike(value)}'`);
|
|
111
|
+
return this;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Add IN array filter
|
|
116
|
+
* @param {string} field - Field name
|
|
117
|
+
* @param {Array} values - Array of values
|
|
118
|
+
* @returns {FilterBuilder} this for chaining
|
|
119
|
+
*/
|
|
120
|
+
in(field, values) {
|
|
121
|
+
if (!Array.isArray(values) || values.length === 0) {
|
|
122
|
+
throw new Error('IN filter requires non-empty array');
|
|
123
|
+
}
|
|
124
|
+
const quoted = values.map(v => this._quote(v)).join(', ');
|
|
125
|
+
this.filters.push(`${field} IN [${quoted}]`);
|
|
126
|
+
return this;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Add NOT IN array filter
|
|
131
|
+
* @param {string} field - Field name
|
|
132
|
+
* @param {Array} values - Array of values
|
|
133
|
+
* @returns {FilterBuilder} this for chaining
|
|
134
|
+
*/
|
|
135
|
+
notIn(field, values) {
|
|
136
|
+
if (!Array.isArray(values) || values.length === 0) {
|
|
137
|
+
throw new Error('NOT IN filter requires non-empty array');
|
|
138
|
+
}
|
|
139
|
+
const quoted = values.map(v => this._quote(v)).join(', ');
|
|
140
|
+
this.filters.push(`${field} NOT IN [${quoted}]`);
|
|
141
|
+
return this;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Add range filter (inclusive)
|
|
146
|
+
* @param {string} field - Field name
|
|
147
|
+
* @param {number} min - Minimum value
|
|
148
|
+
* @param {number} max - Maximum value
|
|
149
|
+
* @returns {FilterBuilder} this for chaining
|
|
150
|
+
*/
|
|
151
|
+
range(field, min, max) {
|
|
152
|
+
this.filters.push(`${field} >= ${min} AND ${field} <= ${max}`);
|
|
153
|
+
return this;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Add date range filter
|
|
158
|
+
* @param {string} field - Field name
|
|
159
|
+
* @param {string|Date} startDate - Start date
|
|
160
|
+
* @param {string|Date} endDate - End date
|
|
161
|
+
* @returns {FilterBuilder} this for chaining
|
|
162
|
+
*/
|
|
163
|
+
dateRange(field, startDate, endDate) {
|
|
164
|
+
const start = startDate instanceof Date ? startDate.toISOString() : startDate;
|
|
165
|
+
const end = endDate instanceof Date ? endDate.toISOString() : endDate;
|
|
166
|
+
this.filters.push(`${field} >= '${start}' AND ${field} <= '${end}'`);
|
|
167
|
+
return this;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Add nested metadata filter
|
|
172
|
+
* @param {string} field - Metadata field name
|
|
173
|
+
* @param {*} value - Value to compare
|
|
174
|
+
* @returns {FilterBuilder} this for chaining
|
|
175
|
+
*/
|
|
176
|
+
metadata(field, value) {
|
|
177
|
+
this.filters.push(`metadata.${field} = ${this._quote(value)}`);
|
|
178
|
+
return this;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Combine filters with AND
|
|
183
|
+
* @returns {FilterBuilder} this for chaining
|
|
184
|
+
*/
|
|
185
|
+
and() {
|
|
186
|
+
this.operator = 'AND';
|
|
187
|
+
return this;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Combine filters with OR
|
|
192
|
+
* @returns {FilterBuilder} this for chaining
|
|
193
|
+
*/
|
|
194
|
+
or() {
|
|
195
|
+
this.operator = 'OR';
|
|
196
|
+
return this;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Build and return the filter string
|
|
201
|
+
* @returns {string} Filter expression
|
|
202
|
+
*/
|
|
203
|
+
build() {
|
|
204
|
+
if (this.filters.length === 0) {
|
|
205
|
+
return '';
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
return this.filters.join(` ${this.operator} `);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Check if builder has any filters
|
|
213
|
+
* @returns {boolean} True if filters exist
|
|
214
|
+
*/
|
|
215
|
+
hasFilters() {
|
|
216
|
+
return this.filters.length > 0;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Get filter count
|
|
221
|
+
* @returns {number} Number of filters
|
|
222
|
+
*/
|
|
223
|
+
count() {
|
|
224
|
+
return this.filters.length;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* Reset all filters
|
|
229
|
+
* @returns {FilterBuilder} this for chaining
|
|
230
|
+
*/
|
|
231
|
+
reset() {
|
|
232
|
+
this.filters = [];
|
|
233
|
+
this.operator = 'AND';
|
|
234
|
+
return this;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
/**
|
|
238
|
+
* Quote string values for SQL
|
|
239
|
+
* @private
|
|
240
|
+
* @param {*} value - Value to quote
|
|
241
|
+
* @returns {string} Quoted value
|
|
242
|
+
*/
|
|
243
|
+
_quote(value) {
|
|
244
|
+
if (typeof value === 'string') {
|
|
245
|
+
return `'${value.replace(/'/g, "''")}'`;
|
|
246
|
+
}
|
|
247
|
+
if (value === null) {
|
|
248
|
+
return 'NULL';
|
|
249
|
+
}
|
|
250
|
+
if (typeof value === 'boolean') {
|
|
251
|
+
return value ? 'true' : 'false';
|
|
252
|
+
}
|
|
253
|
+
return String(value);
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
/**
|
|
257
|
+
* Escape special LIKE characters
|
|
258
|
+
* @private
|
|
259
|
+
* @param {string} value - Value to escape
|
|
260
|
+
* @returns {string} Escaped value
|
|
261
|
+
*/
|
|
262
|
+
_escapeLike(value) {
|
|
263
|
+
return value.replace(/'/g, "''").replace(/%/g, '\\%').replace(/_/g, '\\_');
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/**
|
|
267
|
+
* Create a new builder instance
|
|
268
|
+
* @returns {FilterBuilder} New builder
|
|
269
|
+
*/
|
|
270
|
+
static create() {
|
|
271
|
+
return new FilterBuilder();
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
export default FilterBuilder;
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HybridSearch - Combines vector and keyword search
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { QueryError } from "../lancedb/errors.js";
|
|
6
|
+
|
|
7
|
+
class HybridSearch {
|
|
8
|
+
constructor(client, embeddingFactory, options = {}) {
|
|
9
|
+
this.client = client;
|
|
10
|
+
this.embeddingFactory = embeddingFactory;
|
|
11
|
+
this.alpha = options.alpha !== undefined
|
|
12
|
+
? options.alpha
|
|
13
|
+
: parseFloat(process.env.HYBRID_SEARCH_ALPHA || '0.5');
|
|
14
|
+
this.rrfK = options.rrfK || 60;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
async search(query, options = {}) {
|
|
18
|
+
// @ts-ignore
|
|
19
|
+
const limit = options.limit || 10;
|
|
20
|
+
// @ts-ignore
|
|
21
|
+
const alpha = options.alpha !== undefined ? options.alpha : this.alpha;
|
|
22
|
+
|
|
23
|
+
try {
|
|
24
|
+
const [vectorResults, keywordResults] = await Promise.all([
|
|
25
|
+
// @ts-ignore
|
|
26
|
+
this._vectorSearch(query, limit * 2, options.filter),
|
|
27
|
+
// @ts-ignore
|
|
28
|
+
this._keywordSearch(query, limit * 2, options.filter)
|
|
29
|
+
]);
|
|
30
|
+
|
|
31
|
+
const mergedResults = this._reciprocalRankFusion(
|
|
32
|
+
vectorResults,
|
|
33
|
+
keywordResults,
|
|
34
|
+
alpha,
|
|
35
|
+
this.rrfK
|
|
36
|
+
);
|
|
37
|
+
|
|
38
|
+
return mergedResults.slice(0, limit);
|
|
39
|
+
} catch (error) {
|
|
40
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
41
|
+
throw new QueryError('Hybrid search failed', {
|
|
42
|
+
query,
|
|
43
|
+
alpha,
|
|
44
|
+
originalError: message
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
async _vectorSearch(query, limit, filter = null) {
|
|
50
|
+
const embedding = await this.embeddingFactory.embed(query);
|
|
51
|
+
const searchOptions = { limit, metric: 'cosine' };
|
|
52
|
+
if (filter) searchOptions.filter = filter;
|
|
53
|
+
|
|
54
|
+
const result = await this.client.search(embedding, searchOptions);
|
|
55
|
+
return result.map(r => ({
|
|
56
|
+
...r,
|
|
57
|
+
score: 1 - (r.score || 0),
|
|
58
|
+
searchType: 'vector'
|
|
59
|
+
}));
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
async _keywordSearch(query, limit, filter = null) {
|
|
63
|
+
try {
|
|
64
|
+
const embedding = await this.embeddingFactory.embed(query);
|
|
65
|
+
const searchOptions = { limit: limit * 3, metric: 'cosine' };
|
|
66
|
+
if (filter) searchOptions.filter = filter;
|
|
67
|
+
|
|
68
|
+
const result = await this.client.search(embedding, searchOptions);
|
|
69
|
+
const queryTerms = query.toLowerCase().split(/\s+/);
|
|
70
|
+
|
|
71
|
+
return result
|
|
72
|
+
.filter(r => {
|
|
73
|
+
const content = r.content.toLowerCase();
|
|
74
|
+
return queryTerms.some(term => content.includes(term));
|
|
75
|
+
})
|
|
76
|
+
.slice(0, limit)
|
|
77
|
+
.map(r => ({
|
|
78
|
+
...r,
|
|
79
|
+
score: r.score || 0,
|
|
80
|
+
searchType: 'keyword'
|
|
81
|
+
}));
|
|
82
|
+
} catch (error) {
|
|
83
|
+
return [];
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
_reciprocalRankFusion(vectorResults, keywordResults, alpha, k = 60) {
|
|
88
|
+
const scores = new Map();
|
|
89
|
+
vectorResults.forEach((result, index) => {
|
|
90
|
+
const rr = 1 / (k + index + 1);
|
|
91
|
+
scores.set(result.id, {
|
|
92
|
+
result,
|
|
93
|
+
vectorScore: rr * (1 - alpha),
|
|
94
|
+
keywordScore: 0,
|
|
95
|
+
vectorRank: index + 1,
|
|
96
|
+
keywordRank: null
|
|
97
|
+
});
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
keywordResults.forEach((result, index) => {
|
|
101
|
+
const rr = 1 / (k + index + 1);
|
|
102
|
+
if (scores.has(result.id)) {
|
|
103
|
+
const entry = scores.get(result.id);
|
|
104
|
+
entry.keywordScore = rr * alpha;
|
|
105
|
+
entry.keywordRank = index + 1;
|
|
106
|
+
} else {
|
|
107
|
+
scores.set(result.id, {
|
|
108
|
+
result,
|
|
109
|
+
vectorScore: 0,
|
|
110
|
+
keywordScore: rr * alpha,
|
|
111
|
+
vectorRank: null,
|
|
112
|
+
keywordRank: index + 1
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
return Array.from(scores.values())
|
|
118
|
+
.map(({ result, vectorScore, keywordScore, vectorRank, keywordRank }) => ({
|
|
119
|
+
...result,
|
|
120
|
+
combinedScore: vectorScore + keywordScore,
|
|
121
|
+
vectorScore,
|
|
122
|
+
keywordScore,
|
|
123
|
+
vectorRank,
|
|
124
|
+
keywordRank,
|
|
125
|
+
searchType: vectorRank !== null && keywordRank !== null
|
|
126
|
+
? 'hybrid'
|
|
127
|
+
: vectorRank !== null ? 'vector' : 'keyword'
|
|
128
|
+
}))
|
|
129
|
+
.sort((a, b) => b.combinedScore - a.combinedScore);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
getStats() {
|
|
133
|
+
return { alpha: this.alpha, rrfK: this.rrfK, type: 'hybrid' };
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
export default HybridSearch;
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PatternMiner - Analyzes search results for patterns and insights
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import crypto from "crypto";
|
|
6
|
+
|
|
7
|
+
class PatternMiner {
|
|
8
|
+
constructor(options = {}) {
|
|
9
|
+
// @ts-ignore
|
|
10
|
+
this.similarityThreshold = options.similarityThreshold || 0.8;
|
|
11
|
+
// @ts-ignore
|
|
12
|
+
this.minClusterSize = options.minClusterSize || 2;
|
|
13
|
+
// @ts-ignore
|
|
14
|
+
this.maxThemes = options.maxThemes || 10;
|
|
15
|
+
// @ts-ignore
|
|
16
|
+
this.minWordLength = options.minWordLength || 4;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
async minePatterns(results, options = {}) {
|
|
20
|
+
// @ts-ignore
|
|
21
|
+
const { extractEntities = false, deduplicate = true } = options;
|
|
22
|
+
let workingResults = [...results];
|
|
23
|
+
if (deduplicate) workingResults = this.deduplicate(workingResults);
|
|
24
|
+
|
|
25
|
+
const patterns = {
|
|
26
|
+
originalCount: results.length,
|
|
27
|
+
uniqueCount: workingResults.length,
|
|
28
|
+
duplicatesRemoved: results.length - workingResults.length,
|
|
29
|
+
clusters: [],
|
|
30
|
+
themes: [],
|
|
31
|
+
entities: [],
|
|
32
|
+
summary: ''
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
// @ts-ignore
|
|
36
|
+
patterns.clusters = this._clusterResults(workingResults);
|
|
37
|
+
// @ts-ignore
|
|
38
|
+
patterns.themes = this._extractThemes(workingResults);
|
|
39
|
+
|
|
40
|
+
if (extractEntities) {
|
|
41
|
+
// @ts-ignore
|
|
42
|
+
patterns.entities = await this._extractEntities(workingResults);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
patterns.summary = this._generateSummary(patterns);
|
|
46
|
+
return patterns;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
_clusterResults(results) {
|
|
50
|
+
const clusters = [];
|
|
51
|
+
const used = new Set();
|
|
52
|
+
for (let i = 0; i < results.length; i++) {
|
|
53
|
+
if (used.has(i)) continue;
|
|
54
|
+
const cluster = { representative: results[i], members: [results[i]], similarityScore: 1.0 };
|
|
55
|
+
for (let j = i + 1; j < results.length; j++) {
|
|
56
|
+
if (used.has(j)) continue;
|
|
57
|
+
const similarity = this._calculateSimilarity(results[i], results[j]);
|
|
58
|
+
if (similarity >= this.similarityThreshold) {
|
|
59
|
+
cluster.members.push(results[j]);
|
|
60
|
+
used.add(j);
|
|
61
|
+
cluster.similarityScore = Math.min(cluster.similarityScore, similarity);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
if (cluster.members.length >= this.minClusterSize || cluster.members.length === 1) {
|
|
65
|
+
used.add(i);
|
|
66
|
+
clusters.push(cluster);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
return clusters;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
_calculateSimilarity(result1, result2) {
|
|
73
|
+
let score = 0;
|
|
74
|
+
let factors = 0;
|
|
75
|
+
if (result1.score !== undefined && result2.score !== undefined) {
|
|
76
|
+
const scoreDiff = Math.abs(result1.score - result2.score);
|
|
77
|
+
const scoreSim = Math.max(0, 1 - scoreDiff);
|
|
78
|
+
score += scoreSim * 0.5;
|
|
79
|
+
factors += 0.5;
|
|
80
|
+
}
|
|
81
|
+
if (result1.metadata?.type && result2.metadata?.type) {
|
|
82
|
+
const typeMatch = result1.metadata.type === result2.metadata.type ? 1 : 0;
|
|
83
|
+
score += typeMatch * 0.25;
|
|
84
|
+
factors += 0.25;
|
|
85
|
+
}
|
|
86
|
+
const contentSim = this._contentOverlap(result1.content, result2.content);
|
|
87
|
+
score += contentSim * 0.1;
|
|
88
|
+
factors += 0.1;
|
|
89
|
+
return factors > 0 ? score / factors : 0;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
_contentOverlap(content1, content2) {
|
|
93
|
+
const words1 = new Set(content1.toLowerCase().split(/\s+/).filter(w => w.length > this.minWordLength));
|
|
94
|
+
const words2 = new Set(content2.toLowerCase().split(/\s+/).filter(w => w.length > this.minWordLength));
|
|
95
|
+
if (words1.size === 0 || words2.size === 0) return 0;
|
|
96
|
+
const intersection = new Set([...words1].filter(x => words2.has(x)));
|
|
97
|
+
const union = new Set([...words1, ...words2]);
|
|
98
|
+
return intersection.size / union.size;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
_extractThemes(results) {
|
|
102
|
+
const themeMap = new Map();
|
|
103
|
+
const stopWords = new Set(['the', 'this', 'that', 'with', 'from', 'have', 'been', 'were', 'they', 'their', 'what', 'when', 'where', 'which', 'will', 'your', 'about', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'could', 'would', 'should', 'each', 'only', 'being', 'other', 'some', 'such', 'them', 'these', 'those', 'over', 'also']);
|
|
104
|
+
results.forEach(result => {
|
|
105
|
+
const words = result.content.toLowerCase().split(/\s+/).map(w => w.replace(/[^a-z]/g, '')).filter(w => w.length >= this.minWordLength && !stopWords.has(w));
|
|
106
|
+
words.forEach(word => {
|
|
107
|
+
themeMap.set(word, (themeMap.get(word) || 0) + 1);
|
|
108
|
+
});
|
|
109
|
+
});
|
|
110
|
+
return Array.from(themeMap.entries()).sort((a, b) => b[1] - a[1]).slice(0, this.maxThemes).map(([word, count]) => ({
|
|
111
|
+
word, count, frequency: count / results.length
|
|
112
|
+
}));
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
async _extractEntities(results) {
|
|
116
|
+
const entities = [];
|
|
117
|
+
results.forEach(result => {
|
|
118
|
+
const emailMatches = result.content.match(/\b[\w.-]+@[\w.-]+\.\w+\b/g);
|
|
119
|
+
if (emailMatches) emailMatches.forEach(e => entities.push({ type: 'email', value: e }));
|
|
120
|
+
});
|
|
121
|
+
const uniqueEntities = [];
|
|
122
|
+
const seen = new Set();
|
|
123
|
+
entities.forEach(e => {
|
|
124
|
+
const key = `${e.type}:${e.value}`;
|
|
125
|
+
if (!seen.has(key)) {
|
|
126
|
+
seen.add(key);
|
|
127
|
+
uniqueEntities.push(e);
|
|
128
|
+
}
|
|
129
|
+
});
|
|
130
|
+
return uniqueEntities;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
_generateSummary(patterns) {
|
|
134
|
+
return `Original results: ${patterns.originalCount}, Unique results: ${patterns.uniqueCount}`;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
deduplicate(results) {
|
|
138
|
+
const seen = new Set();
|
|
139
|
+
const unique = [];
|
|
140
|
+
results.forEach(result => {
|
|
141
|
+
const hash = crypto.createHash('md5').update(result.content).digest('hex');
|
|
142
|
+
if (!seen.has(hash)) {
|
|
143
|
+
seen.add(hash);
|
|
144
|
+
unique.push(result);
|
|
145
|
+
}
|
|
146
|
+
});
|
|
147
|
+
return unique;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
getStats() {
|
|
151
|
+
return {
|
|
152
|
+
similarityThreshold: this.similarityThreshold,
|
|
153
|
+
minClusterSize: this.minClusterSize,
|
|
154
|
+
maxThemes: this.maxThemes,
|
|
155
|
+
minWordLength: this.minWordLength
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
export default PatternMiner;
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Secure Error Handling Utilities
|
|
3
|
+
* Sanitizes error messages to prevent API key leakage
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Sanitize error messages by redacting sensitive information
|
|
8
|
+
* @param {string} message - Error message to sanitize
|
|
9
|
+
* @returns {string} Sanitized error message
|
|
10
|
+
*/
|
|
11
|
+
export function sanitizeErrorMessage(message) {
|
|
12
|
+
if (typeof message !== 'string') {
|
|
13
|
+
return '[Non-string error message]';
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
// Redact common sensitive patterns
|
|
17
|
+
return message
|
|
18
|
+
// Redact Bearer tokens
|
|
19
|
+
.replace(/Bearer\s+[A-Za-z0-9\-._~+/]+=*/gi, 'Bearer [REDACTED]')
|
|
20
|
+
// Redact OpenAI API keys (sk- followed by 32+ chars)
|
|
21
|
+
.replace(/sk-[A-Za-z0-9]{32,}/g, 'sk-[REDACTED]')
|
|
22
|
+
// Redact generic API keys (20+ alphanumeric chars after api_key)
|
|
23
|
+
.replace(/api_key["\s:]+[A-Za-z0-9]{20,}/gi, 'api_key: [REDACTED]')
|
|
24
|
+
// Redact environment variable patterns that might contain secrets
|
|
25
|
+
.replace(/(OPENAI_API_KEY|ANTHROPIC_API_KEY|GOOGLE_API_KEY)[="'\s]+[A-Za-z0-9\-_]+/gi, '$1=[REDACTED]')
|
|
26
|
+
// Redact Authorization headers
|
|
27
|
+
.replace(/Authorization:\s*[^"\r\n]+/gi, 'Authorization: [REDACTED]')
|
|
28
|
+
// Redact potential JWT tokens (header.payload.signature pattern)
|
|
29
|
+
.replace(/eyJ[A-Za-z0-9_-]+\.eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]*/g, '[JWT_REDACTED]');
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Sanitize error object for logging
|
|
34
|
+
* @param {Error|Object} error - Error object to sanitize
|
|
35
|
+
* @returns {Object} Sanitized error object safe for logging
|
|
36
|
+
*/
|
|
37
|
+
export function sanitizeErrorForLogging(error) {
|
|
38
|
+
if (!error || typeof error !== 'object') {
|
|
39
|
+
return { message: '[Invalid error object]' };
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const sanitized = {
|
|
43
|
+
name: error.name || 'Error',
|
|
44
|
+
message: sanitizeErrorMessage(error.message || 'Unknown error')
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
// Only include stack in development
|
|
48
|
+
if (process.env.NODE_ENV === 'development' && error.stack) {
|
|
49
|
+
sanitized.stack = error.stack;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Include code if present (non-sensitive)
|
|
53
|
+
if (error.code) {
|
|
54
|
+
sanitized.code = error.code;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Include timestamp
|
|
58
|
+
sanitized.timestamp = new Date().toISOString();
|
|
59
|
+
|
|
60
|
+
return sanitized;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Wrap a function to catch and sanitize errors
|
|
65
|
+
* @param {Function} fn - Function to wrap
|
|
66
|
+
* @param {string} context - Context description for error logging
|
|
67
|
+
* @returns {Function} Wrapped function with error sanitization
|
|
68
|
+
*/
|
|
69
|
+
export function withSanitizedErrors(fn, context = 'operation') {
|
|
70
|
+
return async (...args) => {
|
|
71
|
+
try {
|
|
72
|
+
return await fn(...args);
|
|
73
|
+
} catch (error) {
|
|
74
|
+
const sanitizedError = sanitizeErrorForLogging(error);
|
|
75
|
+
throw {
|
|
76
|
+
success: false,
|
|
77
|
+
error: {
|
|
78
|
+
...sanitizedError,
|
|
79
|
+
context
|
|
80
|
+
}
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
};
|
|
84
|
+
}
|