@memlab/core 1.1.4 → 1.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/dist/__tests__/parser/HeapParser.test.js +2 -2
  2. package/dist/__tests__/parser/NodeHeap.test.js +5 -5
  3. package/dist/__tests__/parser/StringNode.test.js +1 -1
  4. package/dist/__tests__/parser/traverse/HeapNodeTraverse.test.js +2 -2
  5. package/dist/index.d.ts +5 -1
  6. package/dist/index.js +22 -2
  7. package/dist/lib/Config.d.ts +16 -9
  8. package/dist/lib/Config.js +15 -0
  9. package/dist/lib/FileManager.js +4 -2
  10. package/dist/lib/HeapAnalyzer.js +25 -9
  11. package/dist/lib/NodeHeap.d.ts +52 -9
  12. package/dist/lib/NodeHeap.js +72 -21
  13. package/dist/lib/PackageInfoLoader.d.ts +7 -0
  14. package/dist/lib/PackageInfoLoader.js +66 -0
  15. package/dist/lib/Serializer.js +48 -25
  16. package/dist/lib/Types.d.ts +119 -35
  17. package/dist/lib/Utils.js +24 -9
  18. package/dist/lib/heap-data/HeapSnapshot.d.ts +1 -0
  19. package/dist/lib/heap-data/HeapSnapshot.js +3 -30
  20. package/dist/lib/heap-data/HeapStringNode.js +2 -0
  21. package/dist/lib/heap-data/MemLabTagStore.d.ts +23 -0
  22. package/dist/lib/heap-data/MemLabTagStore.js +110 -0
  23. package/dist/trace-cluster/TraceBucket.js +6 -1
  24. package/dist/trace-cluster/strategies/MLTraceSimilarityStrategy.d.ts +15 -0
  25. package/dist/trace-cluster/strategies/MLTraceSimilarityStrategy.js +61 -0
  26. package/dist/trace-cluster/strategies/machine-learning/DistanceMatrix.d.ts +11 -0
  27. package/dist/trace-cluster/strategies/machine-learning/DistanceMatrix.js +54 -0
  28. package/dist/trace-cluster/strategies/machine-learning/HAC.d.ts +17 -0
  29. package/dist/trace-cluster/strategies/machine-learning/HAC.js +122 -0
  30. package/dist/trace-cluster/strategies/machine-learning/Ngram.d.ts +11 -0
  31. package/dist/trace-cluster/strategies/machine-learning/Ngram.js +22 -0
  32. package/dist/trace-cluster/strategies/machine-learning/TfidfVectorizer.d.ts +38 -0
  33. package/dist/trace-cluster/strategies/machine-learning/TfidfVectorizer.js +144 -0
  34. package/package.json +1 -1
@@ -18,11 +18,13 @@ const Console_1 = __importDefault(require("../Console"));
18
18
  const HeapNode_1 = __importDefault(require("./HeapNode"));
19
19
  const HeapEdge_1 = __importDefault(require("./HeapEdge"));
20
20
  const HeapUtils_1 = require("./HeapUtils");
21
+ const MemLabTagStore_1 = __importDefault(require("./MemLabTagStore"));
21
22
  const EMPTY_UINT8_ARRAY = new Uint8Array(0);
22
23
  const EMPTY_UINT32_ARRAY = new Uint32Array(0);
23
24
  class HeapSnapshot {
24
25
  // eslint-disable-next-line @typescript-eslint/no-unused-vars
25
26
  constructor(snapshot, _options = {}) {
27
+ this.isProcessed = false;
26
28
  this._nodeCount = -1;
27
29
  this._edgeCount = -1;
28
30
  this._nodeId2NodeIdx = {};
@@ -158,36 +160,7 @@ class HeapSnapshot {
158
160
  return detected;
159
161
  }
160
162
  hasObjectWithTag(tag) {
161
- // get tagStore
162
- let tagStore = null;
163
- this.nodes.forEach((node) => {
164
- if (node.name === 'MemLabTaggedStore' && node.type === 'object') {
165
- tagStore = node;
166
- return false;
167
- }
168
- });
169
- if (tagStore == null) {
170
- return false;
171
- }
172
- const store = tagStore;
173
- // get tagStore.taggedObjects
174
- const taggedObjects = store.getReferenceNode('taggedObjects', 'property');
175
- if (taggedObjects == null) {
176
- return false;
177
- }
178
- // get taggedObjects[tag]
179
- const weakSet = taggedObjects.getReferenceNode(tag, 'property');
180
- if (weakSet == null) {
181
- return false;
182
- }
183
- // get weakSet.table
184
- const table = weakSet.getReferenceNode('table');
185
- if (table == null) {
186
- return false;
187
- }
188
- // check if the table has any weak reference to any object
189
- const ref = table.findAnyReference((edge) => edge.type === 'weak' && edge.toNode.name !== 'system / Oddball');
190
- return ref != null;
163
+ return MemLabTagStore_1.default.hasObjectWithTag(this, tag);
191
164
  }
192
165
  getNodeById(id) {
193
166
  if (!(id in this._nodeId2NodeIdx)) {
@@ -35,6 +35,8 @@ class HeapStringNode extends HeapNode_1.default {
35
35
  if (parentNode == null) {
36
36
  throw (0, HeapUtils_1.throwError)(new Error('broken sliced string'));
37
37
  }
38
+ // sliced string in heap snapshot doesn't include
39
+ // the start index and the end index, so this may be inaccurate
38
40
  return parentNode.stringValue;
39
41
  }
40
42
  return this.name;
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ *
7
+ * @emails oncall+ws_labs
8
+ * @format
9
+ */
10
+ import type { AnyValue, IHeapSnapshot } from '../Types';
11
+ declare type AnyObject = Record<AnyValue, AnyValue>;
12
+ /** @internal */
13
+ export default class MemLabTaggedStore {
14
+ taggedObjects: Record<string, WeakSet<AnyObject>>;
15
+ private constructor();
16
+ private static instance;
17
+ readonly id: string;
18
+ static getInstance(): MemLabTaggedStore;
19
+ static tagObject<T>(o: T, tag: string): void;
20
+ static hasObjectWithTag(heap: IHeapSnapshot, tag: string): boolean;
21
+ }
22
+ export {};
23
+ //# sourceMappingURL=MemLabTagStore.d.ts.map
@@ -0,0 +1,110 @@
1
+ "use strict";
2
+ /**
3
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
4
+ *
5
+ * This source code is licensed under the MIT license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ *
8
+ * @emails oncall+ws_labs
9
+ * @format
10
+ */
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ const __1 = require("../..");
13
+ let uindex = 1;
14
+ function getUniqueID() {
15
+ const randId = `${Math.random()}`;
16
+ return `${process.pid}-${Date.now()}-${randId}-${uindex++}`;
17
+ }
18
+ /** @internal */
19
+ class MemLabTaggedStore {
20
+ constructor() {
21
+ this.id = getUniqueID();
22
+ this.taggedObjects = Object.create(null);
23
+ }
24
+ // make sure it's a singleton
25
+ static getInstance() {
26
+ if (!MemLabTaggedStore.instance) {
27
+ MemLabTaggedStore.instance = new MemLabTaggedStore();
28
+ }
29
+ return MemLabTaggedStore.instance;
30
+ }
31
+ // tag an object with a mark
32
+ static tagObject(o, tag) {
33
+ const store = MemLabTaggedStore.getInstance();
34
+ if (!store.taggedObjects[tag]) {
35
+ store.taggedObjects[tag] = new WeakSet();
36
+ }
37
+ store.taggedObjects[tag].add(o);
38
+ }
39
+ // check if any object in the heap snapshot has the mark
40
+ // tagged by this MemLabTaggedStore in this execution context
41
+ static hasObjectWithTag(heap, tag) {
42
+ const curContextTagStoreID = MemLabTaggedStore.getInstance().id;
43
+ let tagStore = null;
44
+ // get all MemLabTaggedStore instances in the heap snapshot
45
+ const stores = [];
46
+ heap.nodes.forEach((node) => {
47
+ if (node.name === 'MemLabTaggedStore' && node.type === 'object') {
48
+ stores.push(node);
49
+ }
50
+ });
51
+ // if no tag store found
52
+ if (stores.length === 0) {
53
+ return false;
54
+ // if there is only one store found
55
+ }
56
+ else if (stores.length === 1) {
57
+ tagStore = stores[0];
58
+ // if there are multiple MemLabTagStore instances
59
+ // found in the heap snapshot
60
+ }
61
+ else if (stores.length > 1) {
62
+ stores.forEach((node) => {
63
+ // in case multiple instances of MemLabTaggedStore exists
64
+ // in the heap snapshot, we need to make sure that the
65
+ // tag store is the one matching the current execution context
66
+ let storeID = '';
67
+ // match tag store id
68
+ node.forEachReference(edge => {
69
+ var _a, _b;
70
+ if (edge.name_or_index === 'id' && edge.toNode.isString) {
71
+ storeID = (_b = (_a = edge.toNode.toStringNode()) === null || _a === void 0 ? void 0 : _a.stringValue) !== null && _b !== void 0 ? _b : '';
72
+ return { stop: true };
73
+ }
74
+ });
75
+ if (curContextTagStoreID === storeID) {
76
+ tagStore = node;
77
+ }
78
+ });
79
+ if (tagStore == null) {
80
+ throw __1.utils.haltOrThrow('Multiple MemLabTagStore instances found in heap snapshot ' +
81
+ 'when checking object tags, please make sure only one memlab ' +
82
+ 'instance is running at a time and double check that memlab is ' +
83
+ 'not running in Jest concurrent mode.');
84
+ }
85
+ }
86
+ if (tagStore == null) {
87
+ return false;
88
+ }
89
+ const store = tagStore;
90
+ // get tagStore.taggedObjects
91
+ const taggedObjects = store.getReferenceNode('taggedObjects', 'property');
92
+ if (taggedObjects == null) {
93
+ return false;
94
+ }
95
+ // get taggedObjects[tag]
96
+ const weakSet = taggedObjects.getReferenceNode(tag, 'property');
97
+ if (weakSet == null) {
98
+ return false;
99
+ }
100
+ // get weakSet.table
101
+ const table = weakSet.getReferenceNode('table');
102
+ if (table == null) {
103
+ return false;
104
+ }
105
+ // check if the table has any weak reference to any object
106
+ const ref = table.findAnyReference((edge) => edge.type === 'weak' && edge.toNode.name !== 'system / Oddball');
107
+ return ref != null;
108
+ }
109
+ }
110
+ exports.default = MemLabTaggedStore;
@@ -20,6 +20,7 @@ const Utils_1 = __importDefault(require("../lib/Utils"));
20
20
  const TraceElement_1 = require("./TraceElement");
21
21
  const TraceSimilarityStrategy_1 = __importDefault(require("./strategies/TraceSimilarityStrategy"));
22
22
  const TraceAsClusterStrategy_1 = __importDefault(require("./strategies/TraceAsClusterStrategy"));
23
+ const MLTraceSimilarityStrategy_1 = __importDefault(require("./strategies/MLTraceSimilarityStrategy"));
23
24
  // sync up with html/intern/js/webspeed/memlab/lib/LeakCluster.js
24
25
  class NormalizedTrace {
25
26
  constructor(p = null, snapshot = null) {
@@ -157,7 +158,11 @@ class NormalizedTrace {
157
158
  };
158
159
  }
159
160
  static clusterLeakTraces(leakTraces) {
160
- const { allClusters } = NormalizedTrace.diffTraces(leakTraces, []);
161
+ const { allClusters } = NormalizedTrace.diffTraces(leakTraces, [], {
162
+ strategy: Config_1.default.isMLClustering
163
+ ? new MLTraceSimilarityStrategy_1.default()
164
+ : undefined,
165
+ });
161
166
  const lastNodeFromTrace = (trace) => trace[trace.length - 1];
162
167
  const labaledLeakTraces = allClusters.reduce((acc, bucket) => {
163
168
  const lastNodeFromFirstTrace = lastNodeFromTrace(bucket[0]);
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ *
7
+ * @emails oncall+ws_labs
8
+ * @format
9
+ */
10
+ import type { IClusterStrategy, LeakTrace, TraceDiff } from '../../lib/Types';
11
+ export default class MLTraceSimilarityStrategy implements IClusterStrategy {
12
+ diffTraces(newLeakTraces: LeakTrace[]): TraceDiff;
13
+ traceToDoc(trace: LeakTrace): string;
14
+ }
15
+ //# sourceMappingURL=MLTraceSimilarityStrategy.d.ts.map
@@ -0,0 +1,61 @@
1
+ "use strict";
2
+ /**
3
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
4
+ *
5
+ * This source code is licensed under the MIT license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ *
8
+ * @emails oncall+ws_labs
9
+ * @format
10
+ */
11
+ var __importDefault = (this && this.__importDefault) || function (mod) {
12
+ return (mod && mod.__esModule) ? mod : { "default": mod };
13
+ };
14
+ Object.defineProperty(exports, "__esModule", { value: true });
15
+ const Config_1 = __importDefault(require("../../lib/Config"));
16
+ const DistanceMatrix_1 = require("./machine-learning/DistanceMatrix");
17
+ const HAC_1 = require("./machine-learning/HAC");
18
+ const TfidfVectorizer_1 = require("./machine-learning/TfidfVectorizer");
19
+ class MLTraceSimilarityStrategy {
20
+ diffTraces(newLeakTraces) {
21
+ var _a;
22
+ const rawDocuments = newLeakTraces.map(this.traceToDoc);
23
+ const vectorizer = new TfidfVectorizer_1.TfidfVectorizer({ rawDocuments });
24
+ const tfidfs = vectorizer.computeTfidfs();
25
+ const dmatrix = (0, DistanceMatrix_1.distance)(tfidfs);
26
+ const result = (0, HAC_1.cluster)(rawDocuments.length, dmatrix, Config_1.default.mlClusteringLinkageMaxDistance);
27
+ const map = new Map();
28
+ for (let i = 0; i < result.length; i++) {
29
+ const traceIdx = result[i];
30
+ const repTrace = newLeakTraces[traceIdx];
31
+ const trace = newLeakTraces[i];
32
+ if (!map.has(repTrace)) {
33
+ map.set(repTrace, [repTrace]);
34
+ }
35
+ // to please linter
36
+ (_a = map.get(repTrace)) === null || _a === void 0 ? void 0 : _a.push(trace);
37
+ }
38
+ return {
39
+ allClusters: Array.from(map.values()),
40
+ staleClusters: [],
41
+ clustersToAdd: [],
42
+ };
43
+ }
44
+ traceToDoc(trace) {
45
+ const res = [];
46
+ for (const t of trace) {
47
+ let name = t.kind === 'node' ? String(t.name) : String(t.name_or_index);
48
+ if (name === '') {
49
+ name = '_null_';
50
+ }
51
+ name = name.replace(/ /g, '_');
52
+ name = name.replace(/\d/g, '');
53
+ if (name === '') {
54
+ name = '_number_';
55
+ }
56
+ res.push(name);
57
+ }
58
+ return res.join(' ');
59
+ }
60
+ }
61
+ exports.default = MLTraceSimilarityStrategy;
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ *
7
+ * @emails oncall+ws_labs
8
+ * @format
9
+ */
10
+ export declare const distance: (tfidfs: Record<string, number>[]) => Float32Array;
11
+ //# sourceMappingURL=DistanceMatrix.d.ts.map
@@ -0,0 +1,54 @@
1
+ "use strict";
2
+ /**
3
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
4
+ *
5
+ * This source code is licensed under the MIT license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ *
8
+ * @emails oncall+ws_labs
9
+ * @format
10
+ */
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.distance = void 0;
13
+ const cache = new Map();
14
+ const buildIntersection = (tfidfs, i, j) => {
15
+ const intersection = [];
16
+ if (!cache.has(i)) {
17
+ cache.set(i, Object.keys(tfidfs[i]));
18
+ }
19
+ if (!cache.has(j)) {
20
+ cache.set(j, Object.keys(tfidfs[j]));
21
+ }
22
+ const [keys, tfidf] = cache.get(i).length > cache.get(j).length
23
+ ? [cache.get(j), tfidfs[i]]
24
+ : [cache.get(i), tfidfs[j]];
25
+ for (const k of keys) {
26
+ if (tfidf[k]) {
27
+ intersection.push(k);
28
+ }
29
+ }
30
+ return intersection;
31
+ };
32
+ const distance = (tfidfs) => {
33
+ const n = tfidfs.length;
34
+ const distances = new Float32Array((n * (n - 1)) / 2);
35
+ let distIdx = 0;
36
+ const dotProducs = tfidfs.map(atfidf => Object.values(atfidf).reduce((sum, v) => sum + v * v, 0));
37
+ for (let i = 0; i < tfidfs.length; i++) {
38
+ const a = tfidfs[i];
39
+ for (let j = i + 1; j < tfidfs.length; j++) {
40
+ const b = tfidfs[j];
41
+ const intersection = buildIntersection(tfidfs, i, j);
42
+ const dotProdOfCommons = intersection.reduce((sum, vidx) => sum + a[vidx] * b[vidx], 0);
43
+ // TODO make it pluggable to use other distance measures like euclidean, manhattan
44
+ const cosineSimilarity = 1 -
45
+ dotProdOfCommons /
46
+ (Math.sqrt(dotProducs[i]) / Math.sqrt(dotProducs[j]));
47
+ distances[distIdx] = cosineSimilarity;
48
+ distIdx++;
49
+ }
50
+ }
51
+ cache.clear();
52
+ return distances;
53
+ };
54
+ exports.distance = distance;
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ *
7
+ * @emails oncall+ws_labs
8
+ * @format
9
+ */
10
+ /**
11
+ *
12
+ * @param {*} nDocs number of docs
13
+ * @param {*} D condenced distance matrix
14
+ * @returns labels - list of doc ids as clusters
15
+ */
16
+ export declare const cluster: (nDocs: number, condensedDistanceMatrix: Float32Array, maxDistanceThreshold: number) => number[] | Uint32Array;
17
+ //# sourceMappingURL=HAC.d.ts.map
@@ -0,0 +1,122 @@
1
+ "use strict";
2
+ /**
3
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
4
+ *
5
+ * This source code is licensed under the MIT license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ *
8
+ * @emails oncall+ws_labs
9
+ * @format
10
+ */
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.cluster = void 0;
13
+ const condensedIndex = (n, i, j) => {
14
+ if (i > j) {
15
+ return condensedIndex(n, j, i);
16
+ }
17
+ // to get distance between (i, j) think of this sequence.
18
+ // (n - 1) + (n - 2) + ... + (n - i) + (j - i) - 1
19
+ return n * i - (i * (i + 1)) / 2 + (j - i - 1);
20
+ };
21
+ function getRootLabel(array, idx) {
22
+ let rootIdx = idx;
23
+ while (array[rootIdx] !== rootIdx) {
24
+ rootIdx = array[rootIdx];
25
+ }
26
+ return rootIdx;
27
+ }
28
+ /**
29
+ *
30
+ * @param {*} nDocs number of docs
31
+ * @param {*} D condenced distance matrix
32
+ * @returns labels - list of doc ids as clusters
33
+ */
34
+ const cluster = (nDocs, condensedDistanceMatrix, maxDistanceThreshold) => {
35
+ if (nDocs <= 1)
36
+ return [0];
37
+ const condencedDistanceMatrixCopy = new Float32Array(condensedDistanceMatrix);
38
+ const sizeOfClusters = new Uint32Array(nDocs).fill(1);
39
+ let chainLength = 0;
40
+ let clusterChain = [];
41
+ let traceAIdx = -1;
42
+ let traceBIdx = -1;
43
+ let currentMin = Number.MAX_SAFE_INTEGER;
44
+ let distanceBetweenTraces;
45
+ const labels = new Uint32Array(nDocs).map((_, idx) => idx);
46
+ for (let k = 0; k < nDocs - 1; k++) {
47
+ traceBIdx = -1;
48
+ if (chainLength === 0) {
49
+ for (let i = 0; i < nDocs; i++) {
50
+ if (sizeOfClusters[i] > 0) {
51
+ clusterChain[0] = i;
52
+ chainLength = 1;
53
+ break;
54
+ }
55
+ }
56
+ }
57
+ while (chainLength > 0) {
58
+ traceAIdx = clusterChain[chainLength - 1];
59
+ if (chainLength > 1) {
60
+ traceBIdx = clusterChain[chainLength - 2];
61
+ currentMin =
62
+ condencedDistanceMatrixCopy[condensedIndex(nDocs, traceAIdx, traceBIdx)];
63
+ }
64
+ else {
65
+ currentMin = Number.MAX_SAFE_INTEGER;
66
+ }
67
+ for (let i = 0; i < nDocs; i++) {
68
+ if (sizeOfClusters[i] == 0 || traceAIdx == i) {
69
+ continue;
70
+ }
71
+ distanceBetweenTraces =
72
+ condencedDistanceMatrixCopy[condensedIndex(nDocs, traceAIdx, i)];
73
+ if (distanceBetweenTraces < currentMin) {
74
+ currentMin = distanceBetweenTraces;
75
+ traceBIdx = i;
76
+ }
77
+ }
78
+ // make sure that traceA and traceB are closest to each other
79
+ if (chainLength > 1 &&
80
+ traceBIdx !== -1 &&
81
+ traceBIdx === clusterChain[chainLength - 2]) {
82
+ break;
83
+ }
84
+ clusterChain[chainLength] = traceBIdx;
85
+ chainLength = chainLength + 1;
86
+ }
87
+ clusterChain = [];
88
+ chainLength = 0;
89
+ if (currentMin > maxDistanceThreshold) {
90
+ sizeOfClusters[traceAIdx] = 0;
91
+ sizeOfClusters[traceBIdx] = 0;
92
+ continue;
93
+ }
94
+ if (traceAIdx === -1 || traceBIdx === -1) {
95
+ continue;
96
+ }
97
+ if (traceAIdx > traceBIdx) {
98
+ [traceAIdx, traceBIdx] = [traceBIdx, traceAIdx];
99
+ }
100
+ const nx = sizeOfClusters[traceAIdx];
101
+ const ny = sizeOfClusters[traceBIdx];
102
+ labels[traceAIdx] = traceBIdx;
103
+ sizeOfClusters[traceAIdx] = 0;
104
+ sizeOfClusters[traceBIdx] = nx + ny;
105
+ for (let i = 0; i < nDocs; i++) {
106
+ const ni = sizeOfClusters[i];
107
+ if (ni === 0 || i === traceBIdx) {
108
+ continue;
109
+ }
110
+ const d_xi = condencedDistanceMatrixCopy[condensedIndex(nDocs, i, traceAIdx)];
111
+ const d_yi = condencedDistanceMatrixCopy[condensedIndex(nDocs, i, traceBIdx)];
112
+ const size_x = nx;
113
+ const size_y = ny;
114
+ // TODO make it generic to support other linkage methods like complete, weighted etc...
115
+ const updatedDist = (size_x * d_xi + size_y * d_yi) / (size_x + size_y);
116
+ condencedDistanceMatrixCopy[condensedIndex(nDocs, i, traceBIdx)] =
117
+ updatedDist;
118
+ }
119
+ }
120
+ return labels.map((_, idx) => getRootLabel(labels, idx));
121
+ };
122
+ exports.cluster = cluster;
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ *
7
+ * @emails oncall+ws_labs
8
+ * @format
9
+ */
10
+ export declare function nGram(n: number, terms: string[]): string[];
11
+ //# sourceMappingURL=Ngram.d.ts.map
@@ -0,0 +1,22 @@
1
+ "use strict";
2
+ /**
3
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
4
+ *
5
+ * This source code is licensed under the MIT license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ *
8
+ * @emails oncall+ws_labs
9
+ * @format
10
+ */
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.nGram = void 0;
13
+ function nGram(n, terms) {
14
+ const nGrams = [];
15
+ let index = 0;
16
+ while (index <= terms.length - n) {
17
+ nGrams[index] = terms.slice(index, index + n).join(' ');
18
+ ++index;
19
+ }
20
+ return nGrams;
21
+ }
22
+ exports.nGram = nGram;
@@ -0,0 +1,38 @@
1
+ /**
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ *
7
+ * @emails oncall+ws_labs
8
+ * @format
9
+ */
10
+ interface TfidfVectorizerProps {
11
+ rawDocuments: string[];
12
+ maxDF?: number;
13
+ }
14
+ export declare class TfidfVectorizer {
15
+ rawDocuments: string[];
16
+ vocabulary: Record<string, string>;
17
+ documentFrequency: Record<string, number>;
18
+ maxDF: number;
19
+ documents: Record<string, number>[];
20
+ tfidfs: Record<string, number>[];
21
+ constructor({ rawDocuments, maxDF }: TfidfVectorizerProps);
22
+ computeTfidfs(): Record<string, number>[];
23
+ tokenize(text: string): string[];
24
+ buildVocabulary(tokenizedDocuments: string[][]): Record<string, string>;
25
+ processDocuments(tokenizedDocuments: string[][]): void;
26
+ limit(): void;
27
+ /**
28
+ * Smooth idf weights by adding 1 to document frequencies (DF), as if an extra
29
+ * document was seen containing every term in the collection exactly once.
30
+ * This prevents zero divisions.
31
+ * */
32
+ smooth(): void;
33
+ buildTfidfs(): Record<string, number>[];
34
+ tf(vocabIdx: string, document: Record<string, number>): number;
35
+ idf(vocabIdx: string): number;
36
+ }
37
+ export {};
38
+ //# sourceMappingURL=TfidfVectorizer.d.ts.map