@memlab/core 1.1.3 → 1.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +4 -0
- package/dist/index.js +21 -1
- package/dist/lib/Config.d.ts +13 -9
- package/dist/lib/Config.js +8 -0
- package/dist/lib/HeapAnalyzer.js +6 -1
- package/dist/lib/NodeHeap.d.ts +20 -0
- package/dist/lib/NodeHeap.js +20 -0
- package/dist/lib/PackageInfoLoader.d.ts +7 -0
- package/dist/lib/PackageInfoLoader.js +66 -0
- package/dist/lib/Serializer.js +48 -25
- package/dist/lib/Types.d.ts +89 -5
- package/dist/trace-cluster/TraceBucket.js +6 -1
- package/dist/trace-cluster/strategies/MLTraceSimilarityStrategy.d.ts +15 -0
- package/dist/trace-cluster/strategies/MLTraceSimilarityStrategy.js +61 -0
- package/dist/trace-cluster/strategies/machine-learning/DistanceMatrix.d.ts +11 -0
- package/dist/trace-cluster/strategies/machine-learning/DistanceMatrix.js +54 -0
- package/dist/trace-cluster/strategies/machine-learning/HAC.d.ts +17 -0
- package/dist/trace-cluster/strategies/machine-learning/HAC.js +124 -0
- package/dist/trace-cluster/strategies/machine-learning/Ngram.d.ts +11 -0
- package/dist/trace-cluster/strategies/machine-learning/Ngram.js +22 -0
- package/dist/trace-cluster/strategies/machine-learning/TfidfVectorizer.d.ts +38 -0
- package/dist/trace-cluster/strategies/machine-learning/TfidfVectorizer.js +140 -0
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -7,6 +7,8 @@
|
|
|
7
7
|
* @emails oncall+ws_labs
|
|
8
8
|
* @format
|
|
9
9
|
*/
|
|
10
|
+
/** @internal */
|
|
11
|
+
export declare function registerPackage(): Promise<void>;
|
|
10
12
|
export * from './lib/Types';
|
|
11
13
|
/** @internal */
|
|
12
14
|
export { default as config } from './lib/Config';
|
|
@@ -42,5 +44,7 @@ export { default as leakClusterLogger } from './logger/LeakClusterLogger';
|
|
|
42
44
|
export { default as NormalizedTrace } from './trace-cluster/TraceBucket';
|
|
43
45
|
/** @internal */
|
|
44
46
|
export { default as EvaluationMetric } from './trace-cluster/EvalutationMetric';
|
|
47
|
+
/** @internal */
|
|
48
|
+
export * from './lib/PackageInfoLoader';
|
|
45
49
|
export * from './lib/NodeHeap';
|
|
46
50
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.js
CHANGED
|
@@ -22,11 +22,29 @@ var __createBinding = (this && this.__createBinding) || (Object.create ? (functi
|
|
|
22
22
|
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
23
23
|
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
24
24
|
};
|
|
25
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
26
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
27
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
28
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
29
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
30
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
31
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
32
|
+
});
|
|
33
|
+
};
|
|
25
34
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
26
35
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
27
36
|
};
|
|
28
37
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
29
|
-
exports.EvaluationMetric = exports.NormalizedTrace = exports.leakClusterLogger = exports.ProcessManager = exports.modes = exports.constant = exports.analysis = exports.browserInfo = exports.serializer = exports.fileManager = exports.utils = exports.BaseOption = exports.info = exports.config = void 0;
|
|
38
|
+
exports.EvaluationMetric = exports.NormalizedTrace = exports.leakClusterLogger = exports.ProcessManager = exports.modes = exports.constant = exports.analysis = exports.browserInfo = exports.serializer = exports.fileManager = exports.utils = exports.BaseOption = exports.info = exports.config = exports.registerPackage = void 0;
|
|
39
|
+
const path_1 = __importDefault(require("path"));
|
|
40
|
+
const PackageInfoLoader_1 = require("./lib/PackageInfoLoader");
|
|
41
|
+
/** @internal */
|
|
42
|
+
function registerPackage() {
|
|
43
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
44
|
+
return PackageInfoLoader_1.PackageInfoLoader.registerPackage(path_1.default.join(__dirname, '..'));
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
exports.registerPackage = registerPackage;
|
|
30
48
|
__exportStar(require("./lib/Types"), exports);
|
|
31
49
|
/** @internal */
|
|
32
50
|
var Config_1 = require("./lib/Config");
|
|
@@ -76,4 +94,6 @@ Object.defineProperty(exports, "NormalizedTrace", { enumerable: true, get: funct
|
|
|
76
94
|
/** @internal */
|
|
77
95
|
var EvalutationMetric_1 = require("./trace-cluster/EvalutationMetric");
|
|
78
96
|
Object.defineProperty(exports, "EvaluationMetric", { enumerable: true, get: function () { return __importDefault(EvalutationMetric_1).default; } });
|
|
97
|
+
/** @internal */
|
|
98
|
+
__exportStar(require("./lib/PackageInfoLoader"), exports);
|
|
79
99
|
__exportStar(require("./lib/NodeHeap"), exports);
|
package/dist/lib/Config.d.ts
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
* @format
|
|
9
9
|
*/
|
|
10
10
|
import type { LaunchOptions, Permission } from 'puppeteer';
|
|
11
|
-
import type { AnyFunction, AnyValue, IClusterStrategy, IRunningMode, IScenario, Nullable, Optional, QuickExperiment, ILeakFilter } from './Types';
|
|
11
|
+
import type { AnyFunction, AnyValue, IClusterStrategy, IRunningMode, IScenario, Nullable, Optional, QuickExperiment, ILeakFilter, IPackageInfo } from './Types';
|
|
12
12
|
interface BrowserLaunchArgumentOptions {
|
|
13
13
|
headless?: boolean;
|
|
14
14
|
userDataDir?: string;
|
|
@@ -44,14 +44,18 @@ export declare enum ErrorHandling {
|
|
|
44
44
|
}
|
|
45
45
|
/** @internal */
|
|
46
46
|
export declare class MemLabConfig {
|
|
47
|
-
snapshotHasDetachedness: boolean;
|
|
48
|
-
specifiedEngine: boolean;
|
|
49
|
-
verbose: boolean;
|
|
50
|
-
jsEngine: string;
|
|
51
47
|
_reportLeaksInTimers: boolean;
|
|
52
48
|
_deviceManualOverridden: boolean;
|
|
53
49
|
_timerNodes: string[];
|
|
54
50
|
_timerEdges: string[];
|
|
51
|
+
_isFullRun: boolean;
|
|
52
|
+
_scenario: Optional<IScenario>;
|
|
53
|
+
_isHeadfulBrowser: boolean;
|
|
54
|
+
_browser: string;
|
|
55
|
+
snapshotHasDetachedness: boolean;
|
|
56
|
+
specifiedEngine: boolean;
|
|
57
|
+
verbose: boolean;
|
|
58
|
+
jsEngine: string;
|
|
55
59
|
targetApp: string;
|
|
56
60
|
targetTab: string;
|
|
57
61
|
analysisMode: string;
|
|
@@ -94,7 +98,6 @@ export declare class MemLabConfig {
|
|
|
94
98
|
puppeteerConfig: LaunchOptions & BrowserLaunchArgumentOptions & BrowserConnectOptions;
|
|
95
99
|
openDevtoolsConsole: boolean;
|
|
96
100
|
emulateDevice: Nullable<Device>;
|
|
97
|
-
_browser: string;
|
|
98
101
|
addEnableGK: Set<string>;
|
|
99
102
|
addDisableGK: Set<string>;
|
|
100
103
|
qes: QuickExperiment[];
|
|
@@ -169,15 +172,16 @@ export declare class MemLabConfig {
|
|
|
169
172
|
oversizeObjectAsLeak: boolean;
|
|
170
173
|
oversizeThreshold: number;
|
|
171
174
|
clusterRetainedSizeThreshold: number;
|
|
172
|
-
_isFullRun: boolean;
|
|
173
|
-
_scenario: Optional<IScenario>;
|
|
174
|
-
_isHeadfulBrowser: boolean;
|
|
175
175
|
externalLeakFilter?: Optional<ILeakFilter>;
|
|
176
176
|
monoRepoDir: string;
|
|
177
177
|
muteConsole: boolean;
|
|
178
|
+
includeObjectInfoInTraceReturnChain: boolean;
|
|
178
179
|
logUnclassifiedClusters: boolean;
|
|
179
180
|
errorHandling: ErrorHandling;
|
|
180
181
|
clusterStrategy: Optional<IClusterStrategy>;
|
|
182
|
+
packageInfo: IPackageInfo[];
|
|
183
|
+
isMLClustering: boolean;
|
|
184
|
+
mlClusteringLinkageMaxDistance: number;
|
|
181
185
|
constructor(options?: ConfigOption);
|
|
182
186
|
private initInternalConfigs;
|
|
183
187
|
private init;
|
package/dist/lib/Config.js
CHANGED
|
@@ -97,6 +97,8 @@ class MemLabConfig {
|
|
|
97
97
|
this.jsEngine = Constant_1.default.defaultEngine;
|
|
98
98
|
// the default browser (Chromium)
|
|
99
99
|
this._browser = 'chrome';
|
|
100
|
+
// a list of package information
|
|
101
|
+
this.packageInfo = [];
|
|
100
102
|
// a set of additional GKs to be enabled
|
|
101
103
|
this.addEnableGK = new Set();
|
|
102
104
|
// a set of additional GKs to be disabled
|
|
@@ -117,8 +119,14 @@ class MemLabConfig {
|
|
|
117
119
|
this.muteConsole = false;
|
|
118
120
|
// log all leak traces, each as an unclassified cluster
|
|
119
121
|
this.logUnclassifiedClusters = false;
|
|
122
|
+
// If true, the detailed JSON file of each representative
|
|
123
|
+
// trace (for visualization) will include detailed object
|
|
124
|
+
// info for each Fiber node on the return chain.
|
|
125
|
+
// This may bloat the trace size from 100KB to 50MB.
|
|
126
|
+
this.includeObjectInfoInTraceReturnChain = false;
|
|
120
127
|
// by default halt the program when utils.haltOrThrow is calleds
|
|
121
128
|
this.errorHandling = ErrorHandling.Halt;
|
|
129
|
+
this.mlClusteringLinkageMaxDistance = 0.7;
|
|
122
130
|
}
|
|
123
131
|
// initialize configurable parameters
|
|
124
132
|
init(options = {}) {
|
package/dist/lib/HeapAnalyzer.js
CHANGED
|
@@ -33,6 +33,7 @@ const Console_1 = __importDefault(require("./Console"));
|
|
|
33
33
|
const Serializer_1 = __importDefault(require("./Serializer"));
|
|
34
34
|
const Utils_1 = __importDefault(require("./Utils"));
|
|
35
35
|
const LeakObjectFilter_1 = require("./leak-filters/LeakObjectFilter");
|
|
36
|
+
const MLTraceSimilarityStrategy_1 = __importDefault(require("../trace-cluster/strategies/MLTraceSimilarityStrategy"));
|
|
36
37
|
class MemoryAnalyst {
|
|
37
38
|
checkLeak() {
|
|
38
39
|
return __awaiter(this, void 0, void 0, function* () {
|
|
@@ -630,7 +631,11 @@ class MemoryAnalyst {
|
|
|
630
631
|
Console_1.default.midLevel(`${numOfLeakedObjects} leaked objects`);
|
|
631
632
|
}
|
|
632
633
|
// cluster traces from the current run
|
|
633
|
-
const clusters = TraceBucket_1.default.clusterPaths(paths, snapshot, this.aggregateDominatorMetrics
|
|
634
|
+
const clusters = TraceBucket_1.default.clusterPaths(paths, snapshot, this.aggregateDominatorMetrics, {
|
|
635
|
+
strategy: Config_1.default.isMLClustering
|
|
636
|
+
? new MLTraceSimilarityStrategy_1.default()
|
|
637
|
+
: undefined,
|
|
638
|
+
});
|
|
634
639
|
yield this.serializeClusterUpdate(clusters);
|
|
635
640
|
if (Config_1.default.logUnclassifiedClusters) {
|
|
636
641
|
// cluster traces from the current run
|
package/dist/lib/NodeHeap.d.ts
CHANGED
|
@@ -48,6 +48,26 @@ import type { IHeapSnapshot } from './Types';
|
|
|
48
48
|
* ```
|
|
49
49
|
*/
|
|
50
50
|
export declare function tagObject<T extends object>(o: T, tag: string): T;
|
|
51
|
+
/**
|
|
52
|
+
* Take a heap snapshot of the current program state and save it as a
|
|
53
|
+
* `.heapsnapshot` file under a randomly generated folder inside the system's
|
|
54
|
+
* temp folder.
|
|
55
|
+
*
|
|
56
|
+
* **Note**: All `.heapsnapshot` files could also be loaded by Chrome DevTools.
|
|
57
|
+
* @returns the absolute file path to the saved `.heapsnapshot` file.
|
|
58
|
+
*
|
|
59
|
+
* * **Examples**:
|
|
60
|
+
* ```typescript
|
|
61
|
+
* import type {IHeapSnapshot} from '@memlab/core';
|
|
62
|
+
* import {dumpNodeHeapSnapshot} from '@memlab/core';
|
|
63
|
+
* import {getHeapFromFile} from '@memlab/heap-analysis';
|
|
64
|
+
*
|
|
65
|
+
* (async function () {
|
|
66
|
+
* const heapFile = dumpNodeHeapSnapshot();
|
|
67
|
+
* const heap: IHeapSnapshot = await getHeapFromFile(heapFile);
|
|
68
|
+
* })();
|
|
69
|
+
* ```
|
|
70
|
+
*/
|
|
51
71
|
export declare function dumpNodeHeapSnapshot(): string;
|
|
52
72
|
/**
|
|
53
73
|
* Take a heap snapshot of the current program state
|
package/dist/lib/NodeHeap.js
CHANGED
|
@@ -80,6 +80,26 @@ function tagObject(o, tag) {
|
|
|
80
80
|
return o;
|
|
81
81
|
}
|
|
82
82
|
exports.tagObject = tagObject;
|
|
83
|
+
/**
|
|
84
|
+
* Take a heap snapshot of the current program state and save it as a
|
|
85
|
+
* `.heapsnapshot` file under a randomly generated folder inside the system's
|
|
86
|
+
* temp folder.
|
|
87
|
+
*
|
|
88
|
+
* **Note**: All `.heapsnapshot` files could also be loaded by Chrome DevTools.
|
|
89
|
+
* @returns the absolute file path to the saved `.heapsnapshot` file.
|
|
90
|
+
*
|
|
91
|
+
* * **Examples**:
|
|
92
|
+
* ```typescript
|
|
93
|
+
* import type {IHeapSnapshot} from '@memlab/core';
|
|
94
|
+
* import {dumpNodeHeapSnapshot} from '@memlab/core';
|
|
95
|
+
* import {getHeapFromFile} from '@memlab/heap-analysis';
|
|
96
|
+
*
|
|
97
|
+
* (async function () {
|
|
98
|
+
* const heapFile = dumpNodeHeapSnapshot();
|
|
99
|
+
* const heap: IHeapSnapshot = await getHeapFromFile(heapFile);
|
|
100
|
+
* })();
|
|
101
|
+
* ```
|
|
102
|
+
*/
|
|
83
103
|
function dumpNodeHeapSnapshot() {
|
|
84
104
|
const file = path_1.default.join(FileManager_1.default.generateTmpHeapDir(), `nodejs.heapsnapshot`);
|
|
85
105
|
v8_1.default.writeHeapSnapshot(file);
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
exports.PackageInfoLoader = void 0;
|
|
16
|
+
/**
|
|
17
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
18
|
+
*
|
|
19
|
+
* This source code is licensed under the MIT license found in the
|
|
20
|
+
* LICENSE file in the root directory of this source tree.
|
|
21
|
+
*
|
|
22
|
+
* @emails oncall+ws_labs
|
|
23
|
+
* @format
|
|
24
|
+
*/
|
|
25
|
+
const fs_extra_1 = __importDefault(require("fs-extra"));
|
|
26
|
+
const path_1 = __importDefault(require("path"));
|
|
27
|
+
const Config_1 = __importDefault(require("./Config"));
|
|
28
|
+
const Utils_1 = __importDefault(require("./Utils"));
|
|
29
|
+
/** @internal */
|
|
30
|
+
class PackageInfoLoader {
|
|
31
|
+
static loadFrom(packageDirectory) {
|
|
32
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
33
|
+
let exists = yield fs_extra_1.default.pathExists(packageDirectory);
|
|
34
|
+
if (!exists) {
|
|
35
|
+
throw Utils_1.default.haltOrThrow(`package directory doesn't exist: ${packageDirectory}`);
|
|
36
|
+
}
|
|
37
|
+
let packageJSONFile = path_1.default.join(packageDirectory, 'package-oss.json');
|
|
38
|
+
exists = yield fs_extra_1.default.pathExists(packageJSONFile);
|
|
39
|
+
if (!exists) {
|
|
40
|
+
packageJSONFile = path_1.default.join(packageDirectory, 'package.json');
|
|
41
|
+
}
|
|
42
|
+
exists = yield fs_extra_1.default.pathExists(packageJSONFile);
|
|
43
|
+
if (!exists) {
|
|
44
|
+
throw Utils_1.default.haltOrThrow(`package.json doesn't exist: ${packageJSONFile}`);
|
|
45
|
+
}
|
|
46
|
+
try {
|
|
47
|
+
const metaData = yield fs_extra_1.default.readJSON(packageJSONFile, 'UTF-8');
|
|
48
|
+
return Object.assign(Object.assign({}, metaData), { packageLocation: packageDirectory });
|
|
49
|
+
}
|
|
50
|
+
catch (ex) {
|
|
51
|
+
throw Utils_1.default.haltOrThrow(Utils_1.default.getError(ex));
|
|
52
|
+
}
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
static registerPackage(packageDirectory) {
|
|
56
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
57
|
+
if (!PackageInfoLoader.registeredPackages.has(packageDirectory)) {
|
|
58
|
+
PackageInfoLoader.registeredPackages.add(packageDirectory);
|
|
59
|
+
const packageInfo = yield PackageInfoLoader.loadFrom(packageDirectory);
|
|
60
|
+
Config_1.default.packageInfo.push(packageInfo);
|
|
61
|
+
}
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
exports.PackageInfoLoader = PackageInfoLoader;
|
|
66
|
+
PackageInfoLoader.registeredPackages = new Set();
|
package/dist/lib/Serializer.js
CHANGED
|
@@ -99,7 +99,7 @@ function JSONifyDetachedHTMLElement(node, args, options) {
|
|
|
99
99
|
// options for elem.__reactProps$xxx
|
|
100
100
|
const propsOptions = Object.assign({}, options);
|
|
101
101
|
propsOptions.forceJSONifyDepth = 1;
|
|
102
|
-
|
|
102
|
+
iterateSelectedEdges(node, (edge) => {
|
|
103
103
|
const key = JSONifyEdgeNameAndType(edge);
|
|
104
104
|
if (Utils_1.default.isReactFiberEdge(edge)) {
|
|
105
105
|
info[key] = JSONifyNode(edge.toNode, args, fiberOptions);
|
|
@@ -110,7 +110,8 @@ function JSONifyDetachedHTMLElement(node, args, options) {
|
|
|
110
110
|
else {
|
|
111
111
|
info[key] = JSONifyNodeInShort(edge.toNode);
|
|
112
112
|
}
|
|
113
|
-
|
|
113
|
+
return null;
|
|
114
|
+
});
|
|
114
115
|
return info;
|
|
115
116
|
}
|
|
116
117
|
function calculateReturnTrace(node, cache) {
|
|
@@ -128,15 +129,16 @@ function calculateReturnTrace(node, cache) {
|
|
|
128
129
|
const objectNodeUsefulProps = new Set(['_context']);
|
|
129
130
|
function JSONifyNodeOneLevel(node) {
|
|
130
131
|
const info = Object.create(null);
|
|
131
|
-
|
|
132
|
+
iterateSelectedEdges(node, (edge) => {
|
|
132
133
|
const key = JSONifyEdgeNameAndType(edge);
|
|
133
134
|
info[key] = JSONifyNodeShallow(edge.toNode);
|
|
134
|
-
|
|
135
|
+
return null;
|
|
136
|
+
});
|
|
135
137
|
return info;
|
|
136
138
|
}
|
|
137
139
|
function JSONifyNodeShallow(node) {
|
|
138
140
|
const info = Object.create(null);
|
|
139
|
-
|
|
141
|
+
iterateSelectedEdges(node, (edge) => {
|
|
140
142
|
const key = JSONifyEdgeNameAndType(edge);
|
|
141
143
|
if (objectNodeUsefulProps.has(edge.name_or_index)) {
|
|
142
144
|
info[key] = JSONifyNodeShallow(edge.toNode);
|
|
@@ -144,7 +146,8 @@ function JSONifyNodeShallow(node) {
|
|
|
144
146
|
else {
|
|
145
147
|
info[key] = JSONifyNodeInShort(edge.toNode);
|
|
146
148
|
}
|
|
147
|
-
|
|
149
|
+
return null;
|
|
150
|
+
});
|
|
148
151
|
return info;
|
|
149
152
|
}
|
|
150
153
|
const fiberNodeUsefulProps = new Set([
|
|
@@ -154,15 +157,17 @@ const fiberNodeUsefulProps = new Set([
|
|
|
154
157
|
]);
|
|
155
158
|
function JSONifyFiberNodeShallow(node) {
|
|
156
159
|
const info = Object.create(null);
|
|
157
|
-
|
|
160
|
+
iterateSelectedEdges(node, (edge) => {
|
|
158
161
|
const key = JSONifyEdgeNameAndType(edge);
|
|
159
162
|
if (fiberNodeUsefulProps.has(edge.name_or_index) &&
|
|
160
163
|
Utils_1.default.isObjectNode(edge.toNode)) {
|
|
161
164
|
info[key] = JSONifyNodeShallow(edge.toNode);
|
|
162
|
-
continue;
|
|
163
165
|
}
|
|
164
|
-
|
|
165
|
-
|
|
166
|
+
else {
|
|
167
|
+
info[key] = JSONifyNodeInShort(edge.toNode);
|
|
168
|
+
}
|
|
169
|
+
return null;
|
|
170
|
+
});
|
|
166
171
|
return info;
|
|
167
172
|
}
|
|
168
173
|
// calculate the summary of return chain of the FiberNode
|
|
@@ -187,7 +192,9 @@ function JSONifyFiberNodeReturnTrace(node, args, options) {
|
|
|
187
192
|
}
|
|
188
193
|
const parentInfo = getNodeNameInJSON(parent, args);
|
|
189
194
|
key = `${key}: --return (property)---> ${parentInfo}`;
|
|
190
|
-
const info =
|
|
195
|
+
const info = Config_1.default.includeObjectInfoInTraceReturnChain
|
|
196
|
+
? JSONifyFiberNodeShallow(parent)
|
|
197
|
+
: Object.create(null);
|
|
191
198
|
trace[key] = info;
|
|
192
199
|
}
|
|
193
200
|
return trace;
|
|
@@ -206,25 +213,27 @@ function JSONifyFiberNode(node, args, options) {
|
|
|
206
213
|
propsOptions.forceJSONifyDepth = 1;
|
|
207
214
|
}
|
|
208
215
|
propsOptions.forceJSONifyDepth--;
|
|
209
|
-
|
|
216
|
+
iterateSelectedEdges(node, (edge) => {
|
|
210
217
|
const key = JSONifyEdgeNameAndType(edge);
|
|
211
218
|
info[key] =
|
|
212
|
-
propsOptions.forceJSONifyDepth >= 1
|
|
219
|
+
propsOptions.forceJSONifyDepth && propsOptions.forceJSONifyDepth >= 1
|
|
213
220
|
? JSONifyNode(edge.toNode, args, propsOptions)
|
|
214
221
|
: JSONifyNodeInShort(edge.toNode);
|
|
215
|
-
|
|
222
|
+
return null;
|
|
223
|
+
});
|
|
216
224
|
return info;
|
|
217
225
|
}
|
|
218
226
|
function JSONifyClosure(node, args, options) {
|
|
219
227
|
const info = Object.create(null);
|
|
220
|
-
|
|
228
|
+
iterateSelectedEdges(node, (edge) => {
|
|
221
229
|
if (edge.name_or_index === 'shared' ||
|
|
222
230
|
edge.name_or_index === 'context' ||
|
|
223
231
|
edge.name_or_index === 'displayName') {
|
|
224
232
|
const key = filterJSONPropName(edge.name_or_index);
|
|
225
233
|
info[key] = JSONifyNode(edge.toNode, args, options);
|
|
226
234
|
}
|
|
227
|
-
|
|
235
|
+
return null;
|
|
236
|
+
});
|
|
228
237
|
return info;
|
|
229
238
|
}
|
|
230
239
|
function JSONifyNumberNode(node,
|
|
@@ -238,7 +247,7 @@ _options) {
|
|
|
238
247
|
}
|
|
239
248
|
function JSONifyCode(node, args, options) {
|
|
240
249
|
const info = Object.create(null);
|
|
241
|
-
|
|
250
|
+
iterateSelectedEdges(node, (edge) => {
|
|
242
251
|
if (edge.name_or_index === 'name_or_scope_info' &&
|
|
243
252
|
edge.toNode.name === '(function scope info)') {
|
|
244
253
|
const key = 'variables with non-number values in closure scope chain';
|
|
@@ -251,14 +260,15 @@ function JSONifyCode(node, args, options) {
|
|
|
251
260
|
const key = filterJSONPropName(edge.name_or_index);
|
|
252
261
|
info[key] = JSONifyNode(edge.toNode, args, options);
|
|
253
262
|
}
|
|
254
|
-
|
|
263
|
+
return null;
|
|
264
|
+
});
|
|
255
265
|
return info;
|
|
256
266
|
}
|
|
257
267
|
function JSONifyContext(node, args, options) {
|
|
258
268
|
const info = Object.create(null);
|
|
259
269
|
const key = 'variables in scope (used by nested closures)';
|
|
260
270
|
const closure_vars = (info[key] = Object.create(null));
|
|
261
|
-
|
|
271
|
+
iterateSelectedEdges(node, (edge) => {
|
|
262
272
|
const key = filterJSONPropName(edge.name_or_index);
|
|
263
273
|
if (edge.type === 'context') {
|
|
264
274
|
closure_vars[key] = JSONifyNodeInShort(edge.toNode);
|
|
@@ -266,15 +276,27 @@ function JSONifyContext(node, args, options) {
|
|
|
266
276
|
else if (edge.type === '') {
|
|
267
277
|
info[key] = JSONifyNode(edge.toNode, args, options);
|
|
268
278
|
}
|
|
269
|
-
|
|
279
|
+
return null;
|
|
280
|
+
});
|
|
270
281
|
return info;
|
|
271
282
|
}
|
|
283
|
+
function iterateSelectedEdges(node, callback) {
|
|
284
|
+
let edgesProcessed = 0;
|
|
285
|
+
node.forEachReference((edge) => {
|
|
286
|
+
if (edge.type === 'internal') {
|
|
287
|
+
if (edge.name_or_index === 'map' || edge.is_index) {
|
|
288
|
+
return;
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
if (edgesProcessed++ > 100) {
|
|
292
|
+
return { stop: true };
|
|
293
|
+
}
|
|
294
|
+
return callback(edge);
|
|
295
|
+
});
|
|
296
|
+
}
|
|
272
297
|
function JSONifyOrdinaryValue(node, args, options) {
|
|
273
298
|
const info = Object.create(null);
|
|
274
|
-
|
|
275
|
-
if (edge.name_or_index === 'map' && edge.type === 'internal') {
|
|
276
|
-
continue;
|
|
277
|
-
}
|
|
299
|
+
iterateSelectedEdges(node, (edge) => {
|
|
278
300
|
const key = JSONifyEdgeNameAndType(edge);
|
|
279
301
|
const toNode = edge.toNode;
|
|
280
302
|
const toNodeName = toNode.name;
|
|
@@ -293,7 +315,8 @@ function JSONifyOrdinaryValue(node, args, options) {
|
|
|
293
315
|
else {
|
|
294
316
|
info[key] = JSONifyNodeInShort(toNode);
|
|
295
317
|
}
|
|
296
|
-
|
|
318
|
+
return null;
|
|
319
|
+
});
|
|
297
320
|
return info;
|
|
298
321
|
}
|
|
299
322
|
function JSONifyNode(node, args, options) {
|
package/dist/lib/Types.d.ts
CHANGED
|
@@ -32,7 +32,6 @@ export declare type AnyOptions = Record<string, unknown>;
|
|
|
32
32
|
export declare type UnusedOptions = Record<string, never>;
|
|
33
33
|
/** @internal */
|
|
34
34
|
export declare type Command = [string, string[], AnyOptions];
|
|
35
|
-
export declare type Predicator<T> = (node: T) => boolean;
|
|
36
35
|
/** @internal */
|
|
37
36
|
export declare type HeapNodeIdSet = Set<number>;
|
|
38
37
|
/** @internal */
|
|
@@ -86,9 +85,26 @@ export declare type CLIArgs = {
|
|
|
86
85
|
'local-puppeteer': boolean;
|
|
87
86
|
'snapshot-dir': string;
|
|
88
87
|
};
|
|
88
|
+
/**
|
|
89
|
+
* the predicate callback is used to decide if a
|
|
90
|
+
* entity of type `T`.
|
|
91
|
+
* For more concrete examples on where it is used,
|
|
92
|
+
* check out {@link findAnyReference}, {@link findAnyReferrer},
|
|
93
|
+
* and {@link findReferrers}.
|
|
94
|
+
*
|
|
95
|
+
* @typeParam T - the type of the entity to be checked
|
|
96
|
+
* @param entity - the entity to be checked
|
|
97
|
+
* @returns whether the entity passes the predicate check
|
|
98
|
+
*/
|
|
99
|
+
export declare type Predicator<T> = (entity: T) => boolean;
|
|
100
|
+
/**
|
|
101
|
+
* Data structure for holding cookies.
|
|
102
|
+
* For concrete example, check out {@link cookies}.
|
|
103
|
+
*/
|
|
89
104
|
export declare type Cookies = Array<{
|
|
90
105
|
name: string;
|
|
91
106
|
value: string;
|
|
107
|
+
domain?: string;
|
|
92
108
|
}>;
|
|
93
109
|
/** @internal */
|
|
94
110
|
export interface IE2EScenarioSynthesizer {
|
|
@@ -119,6 +135,12 @@ export interface E2EScenarioSynthesizerConstructor {
|
|
|
119
135
|
new (config: Config): IE2EScenarioSynthesizer;
|
|
120
136
|
}
|
|
121
137
|
/** @internal */
|
|
138
|
+
export interface IPackageInfo {
|
|
139
|
+
name: string;
|
|
140
|
+
version: string;
|
|
141
|
+
packageLocation?: string;
|
|
142
|
+
}
|
|
143
|
+
/** @internal */
|
|
122
144
|
export interface IRunningMode {
|
|
123
145
|
setConfig(config: Config): void;
|
|
124
146
|
beforeRunning(visitPlan: IE2EScenarioVisitPlan): void;
|
|
@@ -281,9 +303,10 @@ export interface ILeakFilter {
|
|
|
281
303
|
/**
|
|
282
304
|
* Lifecycle function callback that is invoked initially once before calling any
|
|
283
305
|
* leak filter function.
|
|
306
|
+
* For concrete example, check out {@link beforeLeakFilter}.
|
|
284
307
|
*
|
|
285
|
-
* @param
|
|
286
|
-
* @param leakedNodeIds
|
|
308
|
+
* @param snapshot heap snapshot see {@link IHeapSnapshot}
|
|
309
|
+
* @param leakedNodeIds the set of leaked object (node) ids.
|
|
287
310
|
*/
|
|
288
311
|
export declare type InitLeakFilterCallback = (snapshot: IHeapSnapshot, leakedNodeIds: HeapNodeIdSet) => void;
|
|
289
312
|
/**
|
|
@@ -292,6 +315,8 @@ export declare type InitLeakFilterCallback = (snapshot: IHeapSnapshot, leakedNod
|
|
|
292
315
|
* allocated but not released from the target interaction
|
|
293
316
|
* in the heap snapshot.
|
|
294
317
|
*
|
|
318
|
+
* For concrete examples, check out {@link leakFilter}.
|
|
319
|
+
*
|
|
295
320
|
* @param node - the node that is kept alive in the memory in the heap snapshot
|
|
296
321
|
* @param snapshot - the snapshot of target interaction
|
|
297
322
|
* @param leakedNodeIds - the set of leaked node ids
|
|
@@ -310,6 +335,11 @@ export declare type LeakFilterCallback = (node: IHeapNode, snapshot: IHeapSnapsh
|
|
|
310
335
|
/**
|
|
311
336
|
* The callback defines browser interactions which are
|
|
312
337
|
* used by memlab to interact with the web app under test.
|
|
338
|
+
* For concrete examples, check out {@link action} or {@link back}.
|
|
339
|
+
*
|
|
340
|
+
* @param page the puppeteer [`Page`](https://pptr.dev/api/puppeteer.page)
|
|
341
|
+
* object, which provides APIs to interact with the web browser
|
|
342
|
+
* @returns no return value
|
|
313
343
|
*/
|
|
314
344
|
export declare type InteractionsCallback = (page: Page, args?: OperationArgs) => Promise<void>;
|
|
315
345
|
/**
|
|
@@ -359,7 +389,11 @@ export interface IScenario {
|
|
|
359
389
|
* a list of `<name, value, domain>` tuples.
|
|
360
390
|
*
|
|
361
391
|
* **Note**: please make sure that you provide the correct `domain` field for
|
|
362
|
-
* the cookies tuples.
|
|
392
|
+
* the cookies tuples. If no `domain` field is specified, memlab will try
|
|
393
|
+
* to fill in a domain based on the `url` callback.
|
|
394
|
+
* For example, when the `domain` field is absent,
|
|
395
|
+
* memlab will auto fill in `.facebook.com` as domain base
|
|
396
|
+
* on the initial page load's url: `https://www.facebook.com/`.
|
|
363
397
|
*
|
|
364
398
|
* @returns cookie list
|
|
365
399
|
* * **Examples**:
|
|
@@ -374,6 +408,8 @@ export interface IScenario {
|
|
|
374
408
|
* // ...
|
|
375
409
|
* ],
|
|
376
410
|
* };
|
|
411
|
+
*
|
|
412
|
+
* module.exports = scenario;
|
|
377
413
|
* ```
|
|
378
414
|
*/
|
|
379
415
|
cookies?: () => Cookies;
|
|
@@ -386,6 +422,8 @@ export interface IScenario {
|
|
|
386
422
|
* const scenario = {
|
|
387
423
|
* url: () => 'https://www.npmjs.com/',
|
|
388
424
|
* };
|
|
425
|
+
*
|
|
426
|
+
* module.exports = scenario;
|
|
389
427
|
* ```
|
|
390
428
|
* If a test scenario only specifies the `url` callback (without the `action`
|
|
391
429
|
* callback), memlab will try to detect memory leaks from the initial page
|
|
@@ -414,6 +452,8 @@ export interface IScenario {
|
|
|
414
452
|
* await page.click('a[href="/back"]');
|
|
415
453
|
* },
|
|
416
454
|
* }
|
|
455
|
+
*
|
|
456
|
+
* module.exports = scenario;
|
|
417
457
|
* ```
|
|
418
458
|
* Note: always clean up external puppeteer references to JS objects
|
|
419
459
|
* in the browser context.
|
|
@@ -431,6 +471,8 @@ export interface IScenario {
|
|
|
431
471
|
* },
|
|
432
472
|
* back: async (page) => ... ,
|
|
433
473
|
* }
|
|
474
|
+
*
|
|
475
|
+
* module.exports = scenario;
|
|
434
476
|
```
|
|
435
477
|
*/
|
|
436
478
|
action?: InteractionsCallback;
|
|
@@ -438,6 +480,10 @@ export interface IScenario {
|
|
|
438
480
|
* `back` is the callback function that specifies how memlab should
|
|
439
481
|
* back/revert the `action` callback. Think of it as an undo action.
|
|
440
482
|
*
|
|
483
|
+
* * **Parameters**:
|
|
484
|
+
* * page: `Page` | the puppeteer [`Page`](https://pptr.dev/api/puppeteer.page)
|
|
485
|
+
* object, which provides APIs to interact with the web browser
|
|
486
|
+
*
|
|
441
487
|
* * **Examples**:
|
|
442
488
|
* ```typescript
|
|
443
489
|
* const scenario = {
|
|
@@ -661,7 +707,12 @@ export interface IDataBuilder {
|
|
|
661
707
|
}
|
|
662
708
|
/**
|
|
663
709
|
* Callback function to provide if the page is loaded.
|
|
710
|
+
* For concrete example, check out {@link isPageLoaded}.
|
|
664
711
|
* @param page - puppeteer's [Page](https://pptr.dev/api/puppeteer.page/) object.
|
|
712
|
+
* @returns a boolean value, if it returns `true`, memlab will consider
|
|
713
|
+
* the navigation completes, if it returns `false`, memlab will keep calling
|
|
714
|
+
* this callback until it returns `true`. This is an async callback, you can
|
|
715
|
+
* also `await` and returns `true` until some async logic is resolved.
|
|
665
716
|
*/
|
|
666
717
|
export declare type CheckPageLoadCallback = (page: Page) => Promise<boolean>;
|
|
667
718
|
/** @internal */
|
|
@@ -711,16 +762,42 @@ export declare type E2EStepInfo = IE2EStepBasic & {
|
|
|
711
762
|
delay?: number;
|
|
712
763
|
metrics: Record<string, number>;
|
|
713
764
|
};
|
|
714
|
-
/**
|
|
765
|
+
/**
|
|
766
|
+
* This data structure contains the input configuration for the browser and
|
|
767
|
+
* output data from the browser. You can retrieve the instance of this type
|
|
768
|
+
* through {@link RunMetaInfo}.
|
|
769
|
+
*/
|
|
715
770
|
export interface IBrowserInfo {
|
|
771
|
+
/**
|
|
772
|
+
* browser version
|
|
773
|
+
*/
|
|
716
774
|
_browserVersion: string;
|
|
775
|
+
/**
|
|
776
|
+
* configuration for puppeteer
|
|
777
|
+
*/
|
|
717
778
|
_puppeteerConfig: LaunchOptions;
|
|
779
|
+
/**
|
|
780
|
+
* all web console output
|
|
781
|
+
*/
|
|
718
782
|
_consoleMessages: string[];
|
|
719
783
|
}
|
|
784
|
+
/**
|
|
785
|
+
* This data structure holds the information about memlab run.
|
|
786
|
+
* You can retrieve the instance of this type through {@link getRunMetaInfo}.
|
|
787
|
+
*/
|
|
720
788
|
export declare type RunMetaInfo = {
|
|
789
|
+
/** @internal */
|
|
721
790
|
app: string;
|
|
791
|
+
/** @internal */
|
|
722
792
|
interaction: string;
|
|
793
|
+
/**
|
|
794
|
+
* type of the memlab run
|
|
795
|
+
*/
|
|
723
796
|
type: string;
|
|
797
|
+
/**
|
|
798
|
+
* input configuration for the browser and
|
|
799
|
+
* output data from the browser
|
|
800
|
+
*/
|
|
724
801
|
browserInfo: IBrowserInfo;
|
|
725
802
|
};
|
|
726
803
|
/**
|
|
@@ -1135,6 +1212,13 @@ export interface IHeapNodeBasic {
|
|
|
1135
1212
|
*/
|
|
1136
1213
|
id: number;
|
|
1137
1214
|
}
|
|
1215
|
+
/**
|
|
1216
|
+
* Executes a provided callback once for JavaScript references.
|
|
1217
|
+
* For concrete examples, check out {@link forEachReference}
|
|
1218
|
+
* or {@link forEachReferrer}.
|
|
1219
|
+
* @param callback the callback for each JavaScript reference from a collection
|
|
1220
|
+
* @returns this API returns void
|
|
1221
|
+
*/
|
|
1138
1222
|
export declare type EdgeIterationCallback = (edge: IHeapEdge) => Optional<{
|
|
1139
1223
|
stop: boolean;
|
|
1140
1224
|
}>;
|
|
@@ -20,6 +20,7 @@ const Utils_1 = __importDefault(require("../lib/Utils"));
|
|
|
20
20
|
const TraceElement_1 = require("./TraceElement");
|
|
21
21
|
const TraceSimilarityStrategy_1 = __importDefault(require("./strategies/TraceSimilarityStrategy"));
|
|
22
22
|
const TraceAsClusterStrategy_1 = __importDefault(require("./strategies/TraceAsClusterStrategy"));
|
|
23
|
+
const MLTraceSimilarityStrategy_1 = __importDefault(require("./strategies/MLTraceSimilarityStrategy"));
|
|
23
24
|
// sync up with html/intern/js/webspeed/memlab/lib/LeakCluster.js
|
|
24
25
|
class NormalizedTrace {
|
|
25
26
|
constructor(p = null, snapshot = null) {
|
|
@@ -157,7 +158,11 @@ class NormalizedTrace {
|
|
|
157
158
|
};
|
|
158
159
|
}
|
|
159
160
|
static clusterLeakTraces(leakTraces) {
|
|
160
|
-
const { allClusters } = NormalizedTrace.diffTraces(leakTraces, []
|
|
161
|
+
const { allClusters } = NormalizedTrace.diffTraces(leakTraces, [], {
|
|
162
|
+
strategy: Config_1.default.isMLClustering
|
|
163
|
+
? new MLTraceSimilarityStrategy_1.default()
|
|
164
|
+
: undefined,
|
|
165
|
+
});
|
|
161
166
|
const lastNodeFromTrace = (trace) => trace[trace.length - 1];
|
|
162
167
|
const labaledLeakTraces = allClusters.reduce((acc, bucket) => {
|
|
163
168
|
const lastNodeFromFirstTrace = lastNodeFromTrace(bucket[0]);
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*
|
|
7
|
+
* @emails oncall+ws_labs
|
|
8
|
+
* @format
|
|
9
|
+
*/
|
|
10
|
+
import type { IClusterStrategy, LeakTrace, TraceDiff } from '../../lib/Types';
|
|
11
|
+
export default class MLTraceSimilarityStrategy implements IClusterStrategy {
|
|
12
|
+
diffTraces(newLeakTraces: LeakTrace[]): TraceDiff;
|
|
13
|
+
traceToDoc(trace: LeakTrace): string;
|
|
14
|
+
}
|
|
15
|
+
//# sourceMappingURL=MLTraceSimilarityStrategy.d.ts.map
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under the MIT license found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
|
7
|
+
*
|
|
8
|
+
* @emails oncall+ws_labs
|
|
9
|
+
* @format
|
|
10
|
+
*/
|
|
11
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
const Config_1 = __importDefault(require("../../lib/Config"));
|
|
16
|
+
const DistanceMatrix_1 = require("./machine-learning/DistanceMatrix");
|
|
17
|
+
const HAC_1 = require("./machine-learning/HAC");
|
|
18
|
+
const TfidfVectorizer_1 = require("./machine-learning/TfidfVectorizer");
|
|
19
|
+
class MLTraceSimilarityStrategy {
|
|
20
|
+
diffTraces(newLeakTraces) {
|
|
21
|
+
var _a;
|
|
22
|
+
const rawDocuments = newLeakTraces.map(this.traceToDoc);
|
|
23
|
+
const vectorizer = new TfidfVectorizer_1.TfidfVectorizer({ rawDocuments });
|
|
24
|
+
const tfidfs = vectorizer.computeTfidfs();
|
|
25
|
+
const dmatrix = (0, DistanceMatrix_1.distance)(tfidfs);
|
|
26
|
+
const result = (0, HAC_1.cluster)(rawDocuments.length, dmatrix, Config_1.default.mlClusteringLinkageMaxDistance);
|
|
27
|
+
const map = new Map();
|
|
28
|
+
for (let i = 0; i < result.length; i++) {
|
|
29
|
+
const traceIdx = result[i];
|
|
30
|
+
const repTrace = newLeakTraces[traceIdx];
|
|
31
|
+
const trace = newLeakTraces[i];
|
|
32
|
+
if (!map.has(repTrace)) {
|
|
33
|
+
map.set(repTrace, [repTrace]);
|
|
34
|
+
}
|
|
35
|
+
// to please lint
|
|
36
|
+
(_a = map.get(repTrace)) === null || _a === void 0 ? void 0 : _a.push(trace);
|
|
37
|
+
}
|
|
38
|
+
return {
|
|
39
|
+
allClusters: Array.from(map.values()),
|
|
40
|
+
staleClusters: [],
|
|
41
|
+
clustersToAdd: [],
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
traceToDoc(trace) {
|
|
45
|
+
const res = [];
|
|
46
|
+
for (const t of trace) {
|
|
47
|
+
let name = t.kind === 'node' ? String(t.name) : String(t.name_or_index);
|
|
48
|
+
if (name === '') {
|
|
49
|
+
name = '_null_';
|
|
50
|
+
}
|
|
51
|
+
name = name.replace(/ /g, '_');
|
|
52
|
+
name = name.replace(/\d/g, '');
|
|
53
|
+
if (name === '') {
|
|
54
|
+
name = '_number_';
|
|
55
|
+
}
|
|
56
|
+
res.push(name);
|
|
57
|
+
}
|
|
58
|
+
return res.join(' ');
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
exports.default = MLTraceSimilarityStrategy;
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*
|
|
7
|
+
* @emails oncall+ws_labs
|
|
8
|
+
* @format
|
|
9
|
+
*/
|
|
10
|
+
export declare const distance: (tfidfs: Record<string, number>[]) => Float32Array;
|
|
11
|
+
//# sourceMappingURL=DistanceMatrix.d.ts.map
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under the MIT license found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
|
7
|
+
*
|
|
8
|
+
* @emails oncall+ws_labs
|
|
9
|
+
* @format
|
|
10
|
+
*/
|
|
11
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
exports.distance = void 0;
|
|
13
|
+
const cache = new Map();
|
|
14
|
+
const buildIntersection = (tfidfs, i, j) => {
|
|
15
|
+
const intersection = [];
|
|
16
|
+
if (!cache.has(i)) {
|
|
17
|
+
cache.set(i, Object.keys(tfidfs[i]));
|
|
18
|
+
}
|
|
19
|
+
if (!cache.has(j)) {
|
|
20
|
+
cache.set(j, Object.keys(tfidfs[j]));
|
|
21
|
+
}
|
|
22
|
+
const [keys, tfidf] = cache.get(i).length > cache.get(j).length
|
|
23
|
+
? [cache.get(j), tfidfs[i]]
|
|
24
|
+
: [cache.get(i), tfidfs[j]];
|
|
25
|
+
for (const k of keys) {
|
|
26
|
+
if (tfidf[k]) {
|
|
27
|
+
intersection.push(k);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
return intersection;
|
|
31
|
+
};
|
|
32
|
+
const distance = (tfidfs) => {
|
|
33
|
+
const n = tfidfs.length;
|
|
34
|
+
const distances = new Float32Array((n * (n - 1)) / 2);
|
|
35
|
+
let distIdx = 0;
|
|
36
|
+
const dotProducs = tfidfs.map(atfidf => Object.values(atfidf).reduce((sum, v) => sum + v * v, 0));
|
|
37
|
+
for (let i = 0; i < tfidfs.length; i++) {
|
|
38
|
+
const a = tfidfs[i];
|
|
39
|
+
for (let j = i + 1; j < tfidfs.length; j++) {
|
|
40
|
+
const b = tfidfs[j];
|
|
41
|
+
const intersection = buildIntersection(tfidfs, i, j);
|
|
42
|
+
const dotProdOfCommons = intersection.reduce((sum, vidx) => sum + a[vidx] * b[vidx], 0);
|
|
43
|
+
// TODO make it pluggable to use other distance measures like euclidean, manhattan
|
|
44
|
+
const cosineSimilarity = 1 -
|
|
45
|
+
dotProdOfCommons /
|
|
46
|
+
(Math.sqrt(dotProducs[i]) / Math.sqrt(dotProducs[j]));
|
|
47
|
+
distances[distIdx] = cosineSimilarity;
|
|
48
|
+
distIdx++;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
cache.clear();
|
|
52
|
+
return distances;
|
|
53
|
+
};
|
|
54
|
+
exports.distance = distance;
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*
|
|
7
|
+
* @emails oncall+ws_labs
|
|
8
|
+
* @format
|
|
9
|
+
*/
|
|
10
|
+
/**
|
|
11
|
+
*
|
|
12
|
+
* @param {*} nDocs number of docs
|
|
13
|
+
* @param {*} D condenced distance matrix
|
|
14
|
+
* @returns labels - list of doc ids as clusters
|
|
15
|
+
*/
|
|
16
|
+
export declare const cluster: (nDocs: number, condensedDistanceMatrix: Float32Array, maxDistanceThreshold: number) => number[];
|
|
17
|
+
//# sourceMappingURL=HAC.d.ts.map
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under the MIT license found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
|
7
|
+
*
|
|
8
|
+
* @emails oncall+ws_labs
|
|
9
|
+
* @format
|
|
10
|
+
*/
|
|
11
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
exports.cluster = void 0;
|
|
13
|
+
const condensedIndex = (n, i, j) => {
|
|
14
|
+
if (i > j) {
|
|
15
|
+
return condensedIndex(n, j, i);
|
|
16
|
+
}
|
|
17
|
+
// to get distance between (i, j) think of this sequence.
|
|
18
|
+
// (n - 1) + (n - 2) + ... + (n - i) + (j - i) - 1
|
|
19
|
+
return n * i - (i * (i + 1)) / 2 + (j - i - 1);
|
|
20
|
+
};
|
|
21
|
+
const getRootLabel = (array, idx) => {
|
|
22
|
+
let rootIdx = idx;
|
|
23
|
+
while (array[rootIdx] !== rootIdx) {
|
|
24
|
+
rootIdx = array[rootIdx];
|
|
25
|
+
}
|
|
26
|
+
return rootIdx;
|
|
27
|
+
};
|
|
28
|
+
/**
|
|
29
|
+
*
|
|
30
|
+
* @param {*} nDocs number of docs
|
|
31
|
+
* @param {*} D condenced distance matrix
|
|
32
|
+
* @returns labels - list of doc ids as clusters
|
|
33
|
+
*/
|
|
34
|
+
const cluster = (nDocs, condensedDistanceMatrix, maxDistanceThreshold) => {
|
|
35
|
+
if (nDocs <= 1)
|
|
36
|
+
return [0];
|
|
37
|
+
const condencedDistanceMatrixCopy = new Float32Array(condensedDistanceMatrix);
|
|
38
|
+
const sizeOfClusters = new Uint32Array(nDocs).fill(1);
|
|
39
|
+
let chainLength = 0;
|
|
40
|
+
let clusterChain = [];
|
|
41
|
+
let traceAIdx = -1;
|
|
42
|
+
let traceBIdx = -1;
|
|
43
|
+
let currentMin = Number.MAX_SAFE_INTEGER;
|
|
44
|
+
let distanceBetweenTraces;
|
|
45
|
+
const labels = Array(nDocs)
|
|
46
|
+
.fill(0)
|
|
47
|
+
.map((_, idx) => idx);
|
|
48
|
+
for (let k = 0; k < nDocs - 1; k++) {
|
|
49
|
+
traceBIdx = -1;
|
|
50
|
+
if (chainLength === 0) {
|
|
51
|
+
for (let i = 0; i < nDocs; i++) {
|
|
52
|
+
if (sizeOfClusters[i] > 0) {
|
|
53
|
+
clusterChain[0] = i;
|
|
54
|
+
chainLength = 1;
|
|
55
|
+
break;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
while (chainLength > 0) {
|
|
60
|
+
traceAIdx = clusterChain[chainLength - 1];
|
|
61
|
+
if (chainLength > 1) {
|
|
62
|
+
traceBIdx = clusterChain[chainLength - 2];
|
|
63
|
+
currentMin =
|
|
64
|
+
condencedDistanceMatrixCopy[condensedIndex(nDocs, traceAIdx, traceBIdx)];
|
|
65
|
+
}
|
|
66
|
+
else {
|
|
67
|
+
currentMin = Number.MAX_SAFE_INTEGER;
|
|
68
|
+
}
|
|
69
|
+
for (let i = 0; i < nDocs; i++) {
|
|
70
|
+
if (sizeOfClusters[i] == 0 || traceAIdx == i) {
|
|
71
|
+
continue;
|
|
72
|
+
}
|
|
73
|
+
distanceBetweenTraces =
|
|
74
|
+
condencedDistanceMatrixCopy[condensedIndex(nDocs, traceAIdx, i)];
|
|
75
|
+
if (distanceBetweenTraces < currentMin) {
|
|
76
|
+
currentMin = distanceBetweenTraces;
|
|
77
|
+
traceBIdx = i;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
// to make sure we found a two mutual traces whose distance is smallest.
|
|
81
|
+
if (chainLength > 1 &&
|
|
82
|
+
traceBIdx !== -1 &&
|
|
83
|
+
traceBIdx === clusterChain[chainLength - 2]) {
|
|
84
|
+
break;
|
|
85
|
+
}
|
|
86
|
+
clusterChain[chainLength] = traceBIdx;
|
|
87
|
+
chainLength = chainLength + 1;
|
|
88
|
+
}
|
|
89
|
+
clusterChain = [];
|
|
90
|
+
chainLength = 0;
|
|
91
|
+
if (currentMin > maxDistanceThreshold) {
|
|
92
|
+
sizeOfClusters[traceAIdx] = 0;
|
|
93
|
+
sizeOfClusters[traceBIdx] = 0;
|
|
94
|
+
continue;
|
|
95
|
+
}
|
|
96
|
+
if (traceAIdx === -1 || traceBIdx === -1) {
|
|
97
|
+
continue;
|
|
98
|
+
}
|
|
99
|
+
if (traceAIdx > traceBIdx) {
|
|
100
|
+
[traceAIdx, traceBIdx] = [traceBIdx, traceAIdx];
|
|
101
|
+
}
|
|
102
|
+
const nx = sizeOfClusters[traceAIdx];
|
|
103
|
+
const ny = sizeOfClusters[traceBIdx];
|
|
104
|
+
labels[traceAIdx] = traceBIdx;
|
|
105
|
+
sizeOfClusters[traceAIdx] = 0;
|
|
106
|
+
sizeOfClusters[traceBIdx] = nx + ny;
|
|
107
|
+
for (let i = 0; i < nDocs; i++) {
|
|
108
|
+
const ni = sizeOfClusters[i];
|
|
109
|
+
if (ni === 0 || i === traceBIdx) {
|
|
110
|
+
continue;
|
|
111
|
+
}
|
|
112
|
+
const d_xi = condencedDistanceMatrixCopy[condensedIndex(nDocs, i, traceAIdx)];
|
|
113
|
+
const d_yi = condencedDistanceMatrixCopy[condensedIndex(nDocs, i, traceBIdx)];
|
|
114
|
+
const size_x = nx;
|
|
115
|
+
const size_y = ny;
|
|
116
|
+
// TODO make it generic to support other linkage methods like complete, weighted etc...
|
|
117
|
+
const updatedDist = (size_x * d_xi + size_y * d_yi) / (size_x + size_y);
|
|
118
|
+
condencedDistanceMatrixCopy[condensedIndex(nDocs, i, traceBIdx)] =
|
|
119
|
+
updatedDist;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
return labels.map((_, idx) => getRootLabel(labels, idx));
|
|
123
|
+
};
|
|
124
|
+
exports.cluster = cluster;
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*
|
|
7
|
+
* @emails oncall+ws_labs
|
|
8
|
+
* @format
|
|
9
|
+
*/
|
|
10
|
+
export declare function nGram(n: number, terms: string[]): string[];
|
|
11
|
+
//# sourceMappingURL=Ngram.d.ts.map
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under the MIT license found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
|
7
|
+
*
|
|
8
|
+
* @emails oncall+ws_labs
|
|
9
|
+
* @format
|
|
10
|
+
*/
|
|
11
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
exports.nGram = void 0;
|
|
13
|
+
function nGram(n, terms) {
|
|
14
|
+
const nGrams = [];
|
|
15
|
+
let index = 0;
|
|
16
|
+
while (index <= terms.length - n) {
|
|
17
|
+
nGrams[index] = terms.slice(index, index + n).join(' ');
|
|
18
|
+
++index;
|
|
19
|
+
}
|
|
20
|
+
return nGrams;
|
|
21
|
+
}
|
|
22
|
+
exports.nGram = nGram;
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*
|
|
7
|
+
* @emails oncall+ws_labs
|
|
8
|
+
* @format
|
|
9
|
+
*/
|
|
10
|
+
interface TfidfVectorizerProps {
|
|
11
|
+
rawDocuments: string[];
|
|
12
|
+
maxDF?: number;
|
|
13
|
+
}
|
|
14
|
+
export declare class TfidfVectorizer {
|
|
15
|
+
rawDocuments: string[];
|
|
16
|
+
vocabulary: Record<string, string>;
|
|
17
|
+
documentFrequency: Record<string, number>;
|
|
18
|
+
maxDF: number;
|
|
19
|
+
documents: Record<string, number>[];
|
|
20
|
+
tfidfs: Record<string, number>[];
|
|
21
|
+
constructor({ rawDocuments, maxDF }: TfidfVectorizerProps);
|
|
22
|
+
computeTfidfs(): Record<string, number>[];
|
|
23
|
+
tokenize(text: string): string[];
|
|
24
|
+
buildVocabulary(tokenizedDocuments: string[][]): Record<string, string>;
|
|
25
|
+
processDocuments(tokenizedDocuments: string[][]): void;
|
|
26
|
+
limit(): void;
|
|
27
|
+
/**
|
|
28
|
+
* Smooth idf weights by adding 1 to document frequencies (DF), as if an extra
|
|
29
|
+
* document was seen containing every term in the collection exactly once.
|
|
30
|
+
* This prevents zero divisions.
|
|
31
|
+
* */
|
|
32
|
+
smooth(): void;
|
|
33
|
+
buildTfidfs(): Record<string, number>[];
|
|
34
|
+
tf(vocabIdx: string, document: Record<string, number>): number;
|
|
35
|
+
idf(vocabIdx: string): number;
|
|
36
|
+
}
|
|
37
|
+
export {};
|
|
38
|
+
//# sourceMappingURL=TfidfVectorizer.d.ts.map
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under the MIT license found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
|
7
|
+
*
|
|
8
|
+
* @emails oncall+ws_labs
|
|
9
|
+
* @format
|
|
10
|
+
*/
|
|
11
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
exports.TfidfVectorizer = void 0;
|
|
13
|
+
const Ngram_1 = require("./Ngram");
|
|
14
|
+
const SMOOTHING_KEY = '__smoothObjectKey';
|
|
15
|
+
const VOCAB_IDX_FOR_DOC_WITH_HIGH_DF = '-1';
|
|
16
|
+
class TfidfVectorizer {
|
|
17
|
+
constructor({ rawDocuments, maxDF = 0.8 }) {
|
|
18
|
+
this.rawDocuments = [];
|
|
19
|
+
this.vocabulary = Object.create(null);
|
|
20
|
+
this.documentFrequency = Object.create(null);
|
|
21
|
+
this.documents = [];
|
|
22
|
+
this.rawDocuments = rawDocuments;
|
|
23
|
+
this.maxDF = maxDF;
|
|
24
|
+
}
|
|
25
|
+
computeTfidfs() {
|
|
26
|
+
const tokenizedDocuments = this.rawDocuments.map(this.tokenize);
|
|
27
|
+
this.vocabulary = this.buildVocabulary(tokenizedDocuments);
|
|
28
|
+
this.processDocuments(tokenizedDocuments);
|
|
29
|
+
this.limit();
|
|
30
|
+
this.smooth();
|
|
31
|
+
this.tfidfs = this.buildTfidfs();
|
|
32
|
+
return this.tfidfs;
|
|
33
|
+
}
|
|
34
|
+
tokenize(text) {
|
|
35
|
+
const terms = text.split(' ');
|
|
36
|
+
return [...terms, ...(0, Ngram_1.nGram)(2, terms), ...(0, Ngram_1.nGram)(3, terms)];
|
|
37
|
+
}
|
|
38
|
+
buildVocabulary(tokenizedDocuments) {
|
|
39
|
+
let vocabIdx = 0;
|
|
40
|
+
const vocabulary = Object.create(null);
|
|
41
|
+
tokenizedDocuments.forEach(doc => {
|
|
42
|
+
doc.forEach(term => {
|
|
43
|
+
if (!vocabulary[String(term)]) {
|
|
44
|
+
vocabulary[String(term)] = String(vocabIdx);
|
|
45
|
+
vocabIdx++;
|
|
46
|
+
}
|
|
47
|
+
});
|
|
48
|
+
});
|
|
49
|
+
return vocabulary;
|
|
50
|
+
}
|
|
51
|
+
processDocuments(tokenizedDocuments) {
|
|
52
|
+
tokenizedDocuments.forEach(terms => {
|
|
53
|
+
const document = {};
|
|
54
|
+
terms.forEach(t => {
|
|
55
|
+
const vocabIdx = this.vocabulary[t];
|
|
56
|
+
if (document[vocabIdx]) {
|
|
57
|
+
document[vocabIdx] += 1;
|
|
58
|
+
}
|
|
59
|
+
else {
|
|
60
|
+
if (this.documentFrequency[vocabIdx]) {
|
|
61
|
+
this.documentFrequency[vocabIdx] += 1;
|
|
62
|
+
}
|
|
63
|
+
else {
|
|
64
|
+
this.documentFrequency[vocabIdx] = 1;
|
|
65
|
+
}
|
|
66
|
+
document[vocabIdx] = 1;
|
|
67
|
+
}
|
|
68
|
+
});
|
|
69
|
+
this.documents.push(document);
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
limit() {
|
|
73
|
+
const nMaxDF = Math.floor(this.documents.length * this.maxDF);
|
|
74
|
+
const vocabIdxsToDelete = [];
|
|
75
|
+
this.documents.forEach(doc => {
|
|
76
|
+
Object.keys(doc).forEach(vocabIdx => {
|
|
77
|
+
if (this.documentFrequency[vocabIdx] > nMaxDF) {
|
|
78
|
+
delete doc[vocabIdx];
|
|
79
|
+
vocabIdxsToDelete.push(vocabIdx);
|
|
80
|
+
}
|
|
81
|
+
});
|
|
82
|
+
});
|
|
83
|
+
vocabIdxsToDelete.forEach(vocabIdx => {
|
|
84
|
+
delete this.documentFrequency[vocabIdx];
|
|
85
|
+
delete this.vocabulary[vocabIdx];
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Smooth idf weights by adding 1 to document frequencies (DF), as if an extra
|
|
90
|
+
* document was seen containing every term in the collection exactly once.
|
|
91
|
+
* This prevents zero divisions.
|
|
92
|
+
* */
|
|
93
|
+
smooth() {
|
|
94
|
+
// for each vocabulary
|
|
95
|
+
Object.values(this.vocabulary).forEach(vocabIdx => (this.documentFrequency[vocabIdx] =
|
|
96
|
+
this.documentFrequency[vocabIdx] + 1));
|
|
97
|
+
this.documents.push({ [SMOOTHING_KEY]: 1 });
|
|
98
|
+
}
|
|
99
|
+
buildTfidfs() {
|
|
100
|
+
const tfidfs = [];
|
|
101
|
+
this.documents.forEach(document => {
|
|
102
|
+
// this means all the terms in the document are the terms
|
|
103
|
+
// that have high document frequency.
|
|
104
|
+
// This will make all the docs with high DF to be clustered together.
|
|
105
|
+
if (Object.keys(document).length === 0) {
|
|
106
|
+
tfidfs.push({ [VOCAB_IDX_FOR_DOC_WITH_HIGH_DF]: 1 });
|
|
107
|
+
return;
|
|
108
|
+
}
|
|
109
|
+
if (!document[SMOOTHING_KEY]) {
|
|
110
|
+
const atfidf = Object.keys(document).map(vocabIdx => {
|
|
111
|
+
return [vocabIdx, this.tf(vocabIdx, document) * this.idf(vocabIdx)];
|
|
112
|
+
});
|
|
113
|
+
// normalizing the values
|
|
114
|
+
const dotSum = atfidf
|
|
115
|
+
.map(([_, tfidfValue]) => tfidfValue * tfidfValue)
|
|
116
|
+
.reduce((sum, tfidfValueSquered) => sum + tfidfValueSquered, 0);
|
|
117
|
+
const dotSumSqrRoot = Math.sqrt(dotSum);
|
|
118
|
+
// Normalizing tfidfs
|
|
119
|
+
const atfidfVocabIdxValueObject = atfidf
|
|
120
|
+
.map(([vocabIdx, tfidfValue]) => [
|
|
121
|
+
vocabIdx,
|
|
122
|
+
tfidfValue / dotSumSqrRoot,
|
|
123
|
+
])
|
|
124
|
+
.reduce((obj, [vocabIdx, value]) => {
|
|
125
|
+
obj[vocabIdx] = value;
|
|
126
|
+
return obj;
|
|
127
|
+
}, {});
|
|
128
|
+
tfidfs.push(atfidfVocabIdxValueObject);
|
|
129
|
+
}
|
|
130
|
+
});
|
|
131
|
+
return tfidfs;
|
|
132
|
+
}
|
|
133
|
+
tf(vocabIdx, document) {
|
|
134
|
+
return 1 + Math.log(document[vocabIdx]);
|
|
135
|
+
}
|
|
136
|
+
idf(vocabIdx) {
|
|
137
|
+
return (1 + Math.log(this.documents.length / this.documentFrequency[vocabIdx]));
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
exports.TfidfVectorizer = TfidfVectorizer;
|