npm - @memlab/core - Versions diffs - 1.1.5 → 1.1.6 - Mend

@memlab/core 1.1.5 → 1.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/dist/lib/Config.d.ts +4 -1
package/dist/lib/Config.js +7 -1
package/dist/lib/HeapAnalyzer.js +6 -1
package/dist/lib/NodeHeap.d.ts +20 -0
package/dist/lib/NodeHeap.js +20 -0
package/dist/lib/PackageInfoLoader.js +1 -1
package/dist/lib/Serializer.js +48 -25
package/dist/lib/Types.d.ts +61 -4
package/dist/trace-cluster/TraceBucket.js +6 -1
package/dist/trace-cluster/strategies/MLTraceSimilarityStrategy.d.ts +15 -0
package/dist/trace-cluster/strategies/MLTraceSimilarityStrategy.js +61 -0
package/dist/trace-cluster/strategies/machine-learning/DistanceMatrix.d.ts +11 -0
package/dist/trace-cluster/strategies/machine-learning/DistanceMatrix.js +54 -0
package/dist/trace-cluster/strategies/machine-learning/HAC.d.ts +17 -0
package/dist/trace-cluster/strategies/machine-learning/HAC.js +124 -0
package/dist/trace-cluster/strategies/machine-learning/Ngram.d.ts +11 -0
package/dist/trace-cluster/strategies/machine-learning/Ngram.js +22 -0
package/dist/trace-cluster/strategies/machine-learning/TfidfVectorizer.d.ts +38 -0
package/dist/trace-cluster/strategies/machine-learning/TfidfVectorizer.js +140 -0
package/package.json +1 -1

package/dist/lib/Config.d.ts CHANGED Viewed

@@ -52,7 +52,6 @@ export declare class MemLabConfig {
     _scenario: Optional<IScenario>;
     _isHeadfulBrowser: boolean;
     _browser: string;
-    _packageInfo: IPackageInfo[];
     snapshotHasDetachedness: boolean;
     specifiedEngine: boolean;
     verbose: boolean;
@@ -176,9 +175,13 @@ export declare class MemLabConfig {
     externalLeakFilter?: Optional<ILeakFilter>;
     monoRepoDir: string;
     muteConsole: boolean;
+    includeObjectInfoInTraceReturnChain: boolean;
     logUnclassifiedClusters: boolean;
     errorHandling: ErrorHandling;
     clusterStrategy: Optional<IClusterStrategy>;
+    packageInfo: IPackageInfo[];
+    isMLClustering: boolean;
+    mlClusteringLinkageMaxDistance: number;
     constructor(options?: ConfigOption);
     private initInternalConfigs;
     private init;

package/dist/lib/Config.js CHANGED Viewed

@@ -98,7 +98,7 @@ class MemLabConfig {
         // the default browser (Chromium)
         this._browser = 'chrome';
         // a list of package information
-        this._packageInfo = [];
+        this.packageInfo = [];
         // a set of additional GKs to be enabled
         this.addEnableGK = new Set();
         // a set of additional GKs to be disabled
@@ -119,8 +119,14 @@ class MemLabConfig {
         this.muteConsole = false;
         // log all leak traces, each as an unclassified cluster
         this.logUnclassifiedClusters = false;
+        // If true, the detailed JSON file of each representative
+        // trace (for visualization) will include detailed object
+        // info for each Fiber node on the return chain.
+        // This may bloat the trace size from 100KB to 50MB.
+        this.includeObjectInfoInTraceReturnChain = false;
         // by default halt the program when utils.haltOrThrow is calleds
         this.errorHandling = ErrorHandling.Halt;
+        this.mlClusteringLinkageMaxDistance = 0.7;
     }
     // initialize configurable parameters
     init(options = {}) {

package/dist/lib/HeapAnalyzer.js CHANGED Viewed

@@ -33,6 +33,7 @@ const Console_1 = __importDefault(require("./Console"));
 const Serializer_1 = __importDefault(require("./Serializer"));
 const Utils_1 = __importDefault(require("./Utils"));
 const LeakObjectFilter_1 = require("./leak-filters/LeakObjectFilter");
+const MLTraceSimilarityStrategy_1 = __importDefault(require("../trace-cluster/strategies/MLTraceSimilarityStrategy"));
 class MemoryAnalyst {
     checkLeak() {
         return __awaiter(this, void 0, void 0, function* () {
@@ -630,7 +631,11 @@ class MemoryAnalyst {
                 Console_1.default.midLevel(`${numOfLeakedObjects} leaked objects`);
             }
             // cluster traces from the current run
-            const clusters = TraceBucket_1.default.clusterPaths(paths, snapshot, this.aggregateDominatorMetrics);
+            const clusters = TraceBucket_1.default.clusterPaths(paths, snapshot, this.aggregateDominatorMetrics, {
+                strategy: Config_1.default.isMLClustering
+                    ? new MLTraceSimilarityStrategy_1.default()
+                    : undefined,
+            });
             yield this.serializeClusterUpdate(clusters);
             if (Config_1.default.logUnclassifiedClusters) {
                 // cluster traces from the current run

package/dist/lib/NodeHeap.d.ts CHANGED Viewed

@@ -48,6 +48,26 @@ import type { IHeapSnapshot } from './Types';
  * ```
  */
 export declare function tagObject<T extends object>(o: T, tag: string): T;
+/**
+ * Take a heap snapshot of the current program state and save it as a
+ * `.heapsnapshot` file under a randomly generated folder inside the system's
+ * temp folder.
+ *
+ * **Note**: All `.heapsnapshot` files could also be loaded by Chrome DevTools.
+ * @returns the absolute file path to the saved `.heapsnapshot` file.
+ *
+ * * **Examples**:
+ * ```typescript
+ * import type {IHeapSnapshot} from '@memlab/core';
+ * import {dumpNodeHeapSnapshot} from '@memlab/core';
+ * import {getHeapFromFile} from '@memlab/heap-analysis';
+ *
+ * (async function () {
+ *   const heapFile = dumpNodeHeapSnapshot();
+ *   const heap: IHeapSnapshot = await getHeapFromFile(heapFile);
+ * })();
+ * ```
+ */
 export declare function dumpNodeHeapSnapshot(): string;
 /**
  * Take a heap snapshot of the current program state

package/dist/lib/NodeHeap.js CHANGED Viewed

@@ -80,6 +80,26 @@ function tagObject(o, tag) {
     return o;
 }
 exports.tagObject = tagObject;
+/**
+ * Take a heap snapshot of the current program state and save it as a
+ * `.heapsnapshot` file under a randomly generated folder inside the system's
+ * temp folder.
+ *
+ * **Note**: All `.heapsnapshot` files could also be loaded by Chrome DevTools.
+ * @returns the absolute file path to the saved `.heapsnapshot` file.
+ *
+ * * **Examples**:
+ * ```typescript
+ * import type {IHeapSnapshot} from '@memlab/core';
+ * import {dumpNodeHeapSnapshot} from '@memlab/core';
+ * import {getHeapFromFile} from '@memlab/heap-analysis';
+ *
+ * (async function () {
+ *   const heapFile = dumpNodeHeapSnapshot();
+ *   const heap: IHeapSnapshot = await getHeapFromFile(heapFile);
+ * })();
+ * ```
+ */
 function dumpNodeHeapSnapshot() {
     const file = path_1.default.join(FileManager_1.default.generateTmpHeapDir(), `nodejs.heapsnapshot`);
     v8_1.default.writeHeapSnapshot(file);

package/dist/lib/PackageInfoLoader.js CHANGED Viewed

@@ -57,7 +57,7 @@ class PackageInfoLoader {
             if (!PackageInfoLoader.registeredPackages.has(packageDirectory)) {
                 PackageInfoLoader.registeredPackages.add(packageDirectory);
                 const packageInfo = yield PackageInfoLoader.loadFrom(packageDirectory);
-                Config_1.default._packageInfo.push(packageInfo);
+                Config_1.default.packageInfo.push(packageInfo);
             }
         });
     }

package/dist/lib/Serializer.js CHANGED Viewed

@@ -99,7 +99,7 @@ function JSONifyDetachedHTMLElement(node, args, options) {
     // options for elem.__reactProps$xxx
     const propsOptions = Object.assign({}, options);
     propsOptions.forceJSONifyDepth = 1;
-    for (const edge of node.references) {
+    iterateSelectedEdges(node, (edge) => {
         const key = JSONifyEdgeNameAndType(edge);
         if (Utils_1.default.isReactFiberEdge(edge)) {
             info[key] = JSONifyNode(edge.toNode, args, fiberOptions);
@@ -110,7 +110,8 @@ function JSONifyDetachedHTMLElement(node, args, options) {
         else {
             info[key] = JSONifyNodeInShort(edge.toNode);
         }
-    }
+        return null;
+    });
     return info;
 }
 function calculateReturnTrace(node, cache) {
@@ -128,15 +129,16 @@ function calculateReturnTrace(node, cache) {
 const objectNodeUsefulProps = new Set(['_context']);
 function JSONifyNodeOneLevel(node) {
     const info = Object.create(null);
-    for (const edge of node.references) {
+    iterateSelectedEdges(node, (edge) => {
         const key = JSONifyEdgeNameAndType(edge);
         info[key] = JSONifyNodeShallow(edge.toNode);
-    }
+        return null;
+    });
     return info;
 }
 function JSONifyNodeShallow(node) {
     const info = Object.create(null);
-    for (const edge of node.references) {
+    iterateSelectedEdges(node, (edge) => {
         const key = JSONifyEdgeNameAndType(edge);
         if (objectNodeUsefulProps.has(edge.name_or_index)) {
             info[key] = JSONifyNodeShallow(edge.toNode);
@@ -144,7 +146,8 @@ function JSONifyNodeShallow(node) {
         else {
             info[key] = JSONifyNodeInShort(edge.toNode);
         }
-    }
+        return null;
+    });
     return info;
 }
 const fiberNodeUsefulProps = new Set([
@@ -154,15 +157,17 @@ const fiberNodeUsefulProps = new Set([
 ]);
 function JSONifyFiberNodeShallow(node) {
     const info = Object.create(null);
-    for (const edge of node.references) {
+    iterateSelectedEdges(node, (edge) => {
         const key = JSONifyEdgeNameAndType(edge);
         if (fiberNodeUsefulProps.has(edge.name_or_index) &&
             Utils_1.default.isObjectNode(edge.toNode)) {
             info[key] = JSONifyNodeShallow(edge.toNode);
-            continue;
         }
-        info[key] = JSONifyNodeInShort(edge.toNode);
-    }
+        else {
+            info[key] = JSONifyNodeInShort(edge.toNode);
+        }
+        return null;
+    });
     return info;
 }
 // calculate the summary of return chain of the FiberNode
@@ -187,7 +192,9 @@ function JSONifyFiberNodeReturnTrace(node, args, options) {
         }
         const parentInfo = getNodeNameInJSON(parent, args);
         key = `${key}:  --return (property)--->  ${parentInfo}`;
-        const info = JSONifyFiberNodeShallow(parent);
+        const info = Config_1.default.includeObjectInfoInTraceReturnChain
+            ? JSONifyFiberNodeShallow(parent)
+            : Object.create(null);
         trace[key] = info;
     }
     return trace;
@@ -206,25 +213,27 @@ function JSONifyFiberNode(node, args, options) {
         propsOptions.forceJSONifyDepth = 1;
     }
     propsOptions.forceJSONifyDepth--;
-    for (const edge of node.references) {
+    iterateSelectedEdges(node, (edge) => {
         const key = JSONifyEdgeNameAndType(edge);
         info[key] =
-            propsOptions.forceJSONifyDepth >= 1
+            propsOptions.forceJSONifyDepth && propsOptions.forceJSONifyDepth >= 1
                 ? JSONifyNode(edge.toNode, args, propsOptions)
                 : JSONifyNodeInShort(edge.toNode);
-    }
+        return null;
+    });
     return info;
 }
 function JSONifyClosure(node, args, options) {
     const info = Object.create(null);
-    for (const edge of node.references) {
+    iterateSelectedEdges(node, (edge) => {
         if (edge.name_or_index === 'shared' ||
             edge.name_or_index === 'context' ||
             edge.name_or_index === 'displayName') {
             const key = filterJSONPropName(edge.name_or_index);
             info[key] = JSONifyNode(edge.toNode, args, options);
         }
-    }
+        return null;
+    });
     return info;
 }
 function JSONifyNumberNode(node,
@@ -238,7 +247,7 @@ _options) {
 }
 function JSONifyCode(node, args, options) {
     const info = Object.create(null);
-    for (const edge of node.references) {
+    iterateSelectedEdges(node, (edge) => {
         if (edge.name_or_index === 'name_or_scope_info' &&
             edge.toNode.name === '(function scope info)') {
             const key = 'variables with non-number values in closure scope chain';
@@ -251,14 +260,15 @@ function JSONifyCode(node, args, options) {
             const key = filterJSONPropName(edge.name_or_index);
             info[key] = JSONifyNode(edge.toNode, args, options);
         }
-    }
+        return null;
+    });
     return info;
 }
 function JSONifyContext(node, args, options) {
     const info = Object.create(null);
     const key = 'variables in scope (used by nested closures)';
     const closure_vars = (info[key] = Object.create(null));
-    for (const edge of node.references) {
+    iterateSelectedEdges(node, (edge) => {
         const key = filterJSONPropName(edge.name_or_index);
         if (edge.type === 'context') {
             closure_vars[key] = JSONifyNodeInShort(edge.toNode);
@@ -266,15 +276,27 @@ function JSONifyContext(node, args, options) {
         else if (edge.type === '') {
             info[key] = JSONifyNode(edge.toNode, args, options);
         }
-    }
+        return null;
+    });
     return info;
 }
+function iterateSelectedEdges(node, callback) {
+    let edgesProcessed = 0;
+    node.forEachReference((edge) => {
+        if (edge.type === 'internal') {
+            if (edge.name_or_index === 'map' || edge.is_index) {
+                return;
+            }
+        }
+        if (edgesProcessed++ > 100) {
+            return { stop: true };
+        }
+        return callback(edge);
+    });
+}
 function JSONifyOrdinaryValue(node, args, options) {
     const info = Object.create(null);
-    for (const edge of node.references) {
-        if (edge.name_or_index === 'map' && edge.type === 'internal') {
-            continue;
-        }
+    iterateSelectedEdges(node, (edge) => {
         const key = JSONifyEdgeNameAndType(edge);
         const toNode = edge.toNode;
         const toNodeName = toNode.name;
@@ -293,7 +315,8 @@ function JSONifyOrdinaryValue(node, args, options) {
         else {
             info[key] = JSONifyNodeInShort(toNode);
         }
-    }
+        return null;
+    });
     return info;
 }
 function JSONifyNode(node, args, options) {

package/dist/lib/Types.d.ts CHANGED Viewed

@@ -32,7 +32,6 @@ export declare type AnyOptions = Record<string, unknown>;
 export declare type UnusedOptions = Record<string, never>;
 /** @internal */
 export declare type Command = [string, string[], AnyOptions];
-export declare type Predicator<T> = (node: T) => boolean;
 /** @internal */
 export declare type HeapNodeIdSet = Set<number>;
 /** @internal */
@@ -86,6 +85,22 @@ export declare type CLIArgs = {
     'local-puppeteer': boolean;
     'snapshot-dir': string;
 };
+/**
+ * the predicate callback is used to decide if a
+ * entity of type `T`.
+ * For more concrete examples on where it is used,
+ * check out {@link findAnyReference}, {@link findAnyReferrer},
+ * and {@link findReferrers}.
+ *
+ * @typeParam T - the type of the entity to be checked
+ * @param entity - the entity to be checked
+ * @returns whether the entity passes the predicate check
+ */
+export declare type Predicator<T> = (entity: T) => boolean;
+/**
+ * Data structure for holding cookies.
+ * For concrete example, check out {@link cookies}.
+ */
 export declare type Cookies = Array<{
     name: string;
     value: string;
@@ -288,9 +303,10 @@ export interface ILeakFilter {
 /**
  * Lifecycle function callback that is invoked initially once before calling any
  * leak filter function.
+ * For concrete example, check out {@link beforeLeakFilter}.
  *
- * @param snaphost - heap snapshot see {@link IHeapSnapshot}
- * @param leakedNodeIds - the set of leaked object (node) ids.
+ * @param snapshot heap snapshot see {@link IHeapSnapshot}
+ * @param leakedNodeIds the set of leaked object (node) ids.
  */
 export declare type InitLeakFilterCallback = (snapshot: IHeapSnapshot, leakedNodeIds: HeapNodeIdSet) => void;
 /**
@@ -299,6 +315,8 @@ export declare type InitLeakFilterCallback = (snapshot: IHeapSnapshot, leakedNod
  * allocated but not released from the target interaction
  * in the heap snapshot.
  *
+ * For concrete examples, check out {@link leakFilter}.
+ *
  * @param node - the node that is kept alive in the memory in the heap snapshot
  * @param snapshot - the snapshot of target interaction
  * @param leakedNodeIds - the set of leaked node ids
@@ -317,6 +335,11 @@ export declare type LeakFilterCallback = (node: IHeapNode, snapshot: IHeapSnapsh
 /**
  * The callback defines browser interactions which are
  * used by memlab to interact with the web app under test.
+ * For concrete examples, check out {@link action} or {@link back}.
+ *
+ * @param page the puppeteer [`Page`](https://pptr.dev/api/puppeteer.page)
+ * object, which provides APIs to interact with the web browser
+ * @returns no return value
  */
 export declare type InteractionsCallback = (page: Page, args?: OperationArgs) => Promise<void>;
 /**
@@ -684,6 +707,7 @@ export interface IDataBuilder {
 }
 /**
  * Callback function to provide if the page is loaded.
+ * For concrete example, check out {@link isPageLoaded}.
  * @param page - puppeteer's [Page](https://pptr.dev/api/puppeteer.page/) object.
  * @returns a boolean value, if it returns `true`, memlab will consider
  * the navigation completes, if it returns `false`, memlab will keep calling
@@ -738,16 +762,42 @@ export declare type E2EStepInfo = IE2EStepBasic & {
     delay?: number;
     metrics: Record<string, number>;
 };
-/** @internal */
+/**
+ * This data structure contains the input configuration for the browser and
+ * output data from the browser. You can retrieve the instance of this type
+ * through {@link RunMetaInfo}.
+ */
 export interface IBrowserInfo {
+    /**
+     * browser version
+     */
     _browserVersion: string;
+    /**
+     * configuration for puppeteer
+     */
     _puppeteerConfig: LaunchOptions;
+    /**
+     * all web console output
+     */
     _consoleMessages: string[];
 }
+/**
+ * This data structure holds the information about memlab run.
+ * You can retrieve the instance of this type through {@link getRunMetaInfo}.
+ */
 export declare type RunMetaInfo = {
+    /** @internal */
     app: string;
+    /** @internal */
     interaction: string;
+    /**
+     * type of the memlab run
+     */
     type: string;
+    /**
+     * input configuration for the browser and
+     * output data from the browser
+     */
     browserInfo: IBrowserInfo;
 };
 /**
@@ -1162,6 +1212,13 @@ export interface IHeapNodeBasic {
      */
     id: number;
 }
+/**
+ * Executes a provided callback once for JavaScript references.
+ * For concrete examples, check out {@link forEachReference}
+ * or {@link forEachReferrer}.
+ * @param callback the callback for each JavaScript reference from a collection
+ * @returns this API returns void
+ */
 export declare type EdgeIterationCallback = (edge: IHeapEdge) => Optional<{
     stop: boolean;
 }>;

package/dist/trace-cluster/TraceBucket.js CHANGED Viewed

@@ -20,6 +20,7 @@ const Utils_1 = __importDefault(require("../lib/Utils"));
 const TraceElement_1 = require("./TraceElement");
 const TraceSimilarityStrategy_1 = __importDefault(require("./strategies/TraceSimilarityStrategy"));
 const TraceAsClusterStrategy_1 = __importDefault(require("./strategies/TraceAsClusterStrategy"));
+const MLTraceSimilarityStrategy_1 = __importDefault(require("./strategies/MLTraceSimilarityStrategy"));
 // sync up with html/intern/js/webspeed/memlab/lib/LeakCluster.js
 class NormalizedTrace {
     constructor(p = null, snapshot = null) {
@@ -157,7 +158,11 @@ class NormalizedTrace {
         };
     }
     static clusterLeakTraces(leakTraces) {
-        const { allClusters } = NormalizedTrace.diffTraces(leakTraces, []);
+        const { allClusters } = NormalizedTrace.diffTraces(leakTraces, [], {
+            strategy: Config_1.default.isMLClustering
+                ? new MLTraceSimilarityStrategy_1.default()
+                : undefined,
+        });
         const lastNodeFromTrace = (trace) => trace[trace.length - 1];
         const labaledLeakTraces = allClusters.reduce((acc, bucket) => {
             const lastNodeFromFirstTrace = lastNodeFromTrace(bucket[0]);

package/dist/trace-cluster/strategies/MLTraceSimilarityStrategy.d.ts ADDED Viewed

@@ -0,0 +1,15 @@
+/**
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ *
+ * @emails oncall+ws_labs
+ * @format
+ */
+import type { IClusterStrategy, LeakTrace, TraceDiff } from '../../lib/Types';
+export default class MLTraceSimilarityStrategy implements IClusterStrategy {
+    diffTraces(newLeakTraces: LeakTrace[]): TraceDiff;
+    traceToDoc(trace: LeakTrace): string;
+}
+//# sourceMappingURL=MLTraceSimilarityStrategy.d.ts.map

package/dist/trace-cluster/strategies/MLTraceSimilarityStrategy.js ADDED Viewed

@@ -0,0 +1,61 @@
+"use strict";
+/**
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ *
+ * @emails oncall+ws_labs
+ * @format
+ */
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+const Config_1 = __importDefault(require("../../lib/Config"));
+const DistanceMatrix_1 = require("./machine-learning/DistanceMatrix");
+const HAC_1 = require("./machine-learning/HAC");
+const TfidfVectorizer_1 = require("./machine-learning/TfidfVectorizer");
+class MLTraceSimilarityStrategy {
+    diffTraces(newLeakTraces) {
+        var _a;
+        const rawDocuments = newLeakTraces.map(this.traceToDoc);
+        const vectorizer = new TfidfVectorizer_1.TfidfVectorizer({ rawDocuments });
+        const tfidfs = vectorizer.computeTfidfs();
+        const dmatrix = (0, DistanceMatrix_1.distance)(tfidfs);
+        const result = (0, HAC_1.cluster)(rawDocuments.length, dmatrix, Config_1.default.mlClusteringLinkageMaxDistance);
+        const map = new Map();
+        for (let i = 0; i < result.length; i++) {
+            const traceIdx = result[i];
+            const repTrace = newLeakTraces[traceIdx];
+            const trace = newLeakTraces[i];
+            if (!map.has(repTrace)) {
+                map.set(repTrace, [repTrace]);
+            }
+            // to please lint
+            (_a = map.get(repTrace)) === null || _a === void 0 ? void 0 : _a.push(trace);
+        }
+        return {
+            allClusters: Array.from(map.values()),
+            staleClusters: [],
+            clustersToAdd: [],
+        };
+    }
+    traceToDoc(trace) {
+        const res = [];
+        for (const t of trace) {
+            let name = t.kind === 'node' ? String(t.name) : String(t.name_or_index);
+            if (name === '') {
+                name = '_null_';
+            }
+            name = name.replace(/ /g, '_');
+            name = name.replace(/\d/g, '');
+            if (name === '') {
+                name = '_number_';
+            }
+            res.push(name);
+        }
+        return res.join(' ');
+    }
+}
+exports.default = MLTraceSimilarityStrategy;

package/dist/trace-cluster/strategies/machine-learning/DistanceMatrix.d.ts ADDED Viewed

@@ -0,0 +1,11 @@
+/**
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ *
+ * @emails oncall+ws_labs
+ * @format
+ */
+export declare const distance: (tfidfs: Record<string, number>[]) => Float32Array;
+//# sourceMappingURL=DistanceMatrix.d.ts.map

package/dist/trace-cluster/strategies/machine-learning/DistanceMatrix.js ADDED Viewed

@@ -0,0 +1,54 @@
+"use strict";
+/**
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ *
+ * @emails oncall+ws_labs
+ * @format
+ */
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.distance = void 0;
+const cache = new Map();
+const buildIntersection = (tfidfs, i, j) => {
+    const intersection = [];
+    if (!cache.has(i)) {
+        cache.set(i, Object.keys(tfidfs[i]));
+    }
+    if (!cache.has(j)) {
+        cache.set(j, Object.keys(tfidfs[j]));
+    }
+    const [keys, tfidf] = cache.get(i).length > cache.get(j).length
+        ? [cache.get(j), tfidfs[i]]
+        : [cache.get(i), tfidfs[j]];
+    for (const k of keys) {
+        if (tfidf[k]) {
+            intersection.push(k);
+        }
+    }
+    return intersection;
+};
+const distance = (tfidfs) => {
+    const n = tfidfs.length;
+    const distances = new Float32Array((n * (n - 1)) / 2);
+    let distIdx = 0;
+    const dotProducs = tfidfs.map(atfidf => Object.values(atfidf).reduce((sum, v) => sum + v * v, 0));
+    for (let i = 0; i < tfidfs.length; i++) {
+        const a = tfidfs[i];
+        for (let j = i + 1; j < tfidfs.length; j++) {
+            const b = tfidfs[j];
+            const intersection = buildIntersection(tfidfs, i, j);
+            const dotProdOfCommons = intersection.reduce((sum, vidx) => sum + a[vidx] * b[vidx], 0);
+            // TODO make it pluggable to use other distance measures like euclidean, manhattan
+            const cosineSimilarity = 1 -
+                dotProdOfCommons /
+                    (Math.sqrt(dotProducs[i]) / Math.sqrt(dotProducs[j]));
+            distances[distIdx] = cosineSimilarity;
+            distIdx++;
+        }
+    }
+    cache.clear();
+    return distances;
+};
+exports.distance = distance;

package/dist/trace-cluster/strategies/machine-learning/HAC.d.ts ADDED Viewed

@@ -0,0 +1,17 @@
+/**
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ *
+ * @emails oncall+ws_labs
+ * @format
+ */
+/**
+ *
+ * @param {*} nDocs number of docs
+ * @param {*} D condenced distance matrix
+ * @returns labels - list of doc ids as clusters
+ */
+export declare const cluster: (nDocs: number, condensedDistanceMatrix: Float32Array, maxDistanceThreshold: number) => number[];
+//# sourceMappingURL=HAC.d.ts.map

package/dist/trace-cluster/strategies/machine-learning/HAC.js ADDED Viewed

@@ -0,0 +1,124 @@
+"use strict";
+/**
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ *
+ * @emails oncall+ws_labs
+ * @format
+ */
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.cluster = void 0;
+const condensedIndex = (n, i, j) => {
+    if (i > j) {
+        return condensedIndex(n, j, i);
+    }
+    // to get distance between (i, j) think of this sequence.
+    // (n - 1) + (n - 2) + ... + (n - i) + (j - i) - 1
+    return n * i - (i * (i + 1)) / 2 + (j - i - 1);
+};
+const getRootLabel = (array, idx) => {
+    let rootIdx = idx;
+    while (array[rootIdx] !== rootIdx) {
+        rootIdx = array[rootIdx];
+    }
+    return rootIdx;
+};
+/**
+ *
+ * @param {*} nDocs number of docs
+ * @param {*} D condenced distance matrix
+ * @returns labels - list of doc ids as clusters
+ */
+const cluster = (nDocs, condensedDistanceMatrix, maxDistanceThreshold) => {
+    if (nDocs <= 1)
+        return [0];
+    const condencedDistanceMatrixCopy = new Float32Array(condensedDistanceMatrix);
+    const sizeOfClusters = new Uint32Array(nDocs).fill(1);
+    let chainLength = 0;
+    let clusterChain = [];
+    let traceAIdx = -1;
+    let traceBIdx = -1;
+    let currentMin = Number.MAX_SAFE_INTEGER;
+    let distanceBetweenTraces;
+    const labels = Array(nDocs)
+        .fill(0)
+        .map((_, idx) => idx);
+    for (let k = 0; k < nDocs - 1; k++) {
+        traceBIdx = -1;
+        if (chainLength === 0) {
+            for (let i = 0; i < nDocs; i++) {
+                if (sizeOfClusters[i] > 0) {
+                    clusterChain[0] = i;
+                    chainLength = 1;
+                    break;
+                }
+            }
+        }
+        while (chainLength > 0) {
+            traceAIdx = clusterChain[chainLength - 1];
+            if (chainLength > 1) {
+                traceBIdx = clusterChain[chainLength - 2];
+                currentMin =
+                    condencedDistanceMatrixCopy[condensedIndex(nDocs, traceAIdx, traceBIdx)];
+            }
+            else {
+                currentMin = Number.MAX_SAFE_INTEGER;
+            }
+            for (let i = 0; i < nDocs; i++) {
+                if (sizeOfClusters[i] == 0 || traceAIdx == i) {
+                    continue;
+                }
+                distanceBetweenTraces =
+                    condencedDistanceMatrixCopy[condensedIndex(nDocs, traceAIdx, i)];
+                if (distanceBetweenTraces < currentMin) {
+                    currentMin = distanceBetweenTraces;
+                    traceBIdx = i;
+                }
+            }
+            // to make sure we found a two mutual traces whose distance is smallest.
+            if (chainLength > 1 &&
+                traceBIdx !== -1 &&
+                traceBIdx === clusterChain[chainLength - 2]) {
+                break;
+            }
+            clusterChain[chainLength] = traceBIdx;
+            chainLength = chainLength + 1;
+        }
+        clusterChain = [];
+        chainLength = 0;
+        if (currentMin > maxDistanceThreshold) {
+            sizeOfClusters[traceAIdx] = 0;
+            sizeOfClusters[traceBIdx] = 0;
+            continue;
+        }
+        if (traceAIdx === -1 || traceBIdx === -1) {
+            continue;
+        }
+        if (traceAIdx > traceBIdx) {
+            [traceAIdx, traceBIdx] = [traceBIdx, traceAIdx];
+        }
+        const nx = sizeOfClusters[traceAIdx];
+        const ny = sizeOfClusters[traceBIdx];
+        labels[traceAIdx] = traceBIdx;
+        sizeOfClusters[traceAIdx] = 0;
+        sizeOfClusters[traceBIdx] = nx + ny;
+        for (let i = 0; i < nDocs; i++) {
+            const ni = sizeOfClusters[i];
+            if (ni === 0 || i === traceBIdx) {
+                continue;
+            }
+            const d_xi = condencedDistanceMatrixCopy[condensedIndex(nDocs, i, traceAIdx)];
+            const d_yi = condencedDistanceMatrixCopy[condensedIndex(nDocs, i, traceBIdx)];
+            const size_x = nx;
+            const size_y = ny;
+            // TODO make it generic to support other linkage methods like complete, weighted etc...
+            const updatedDist = (size_x * d_xi + size_y * d_yi) / (size_x + size_y);
+            condencedDistanceMatrixCopy[condensedIndex(nDocs, i, traceBIdx)] =
+                updatedDist;
+        }
+    }
+    return labels.map((_, idx) => getRootLabel(labels, idx));
+};
+exports.cluster = cluster;

package/dist/trace-cluster/strategies/machine-learning/Ngram.d.ts ADDED Viewed

@@ -0,0 +1,11 @@
+/**
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ *
+ * @emails oncall+ws_labs
+ * @format
+ */
+export declare function nGram(n: number, terms: string[]): string[];
+//# sourceMappingURL=Ngram.d.ts.map

package/dist/trace-cluster/strategies/machine-learning/Ngram.js ADDED Viewed

@@ -0,0 +1,22 @@
+"use strict";
+/**
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ *
+ * @emails oncall+ws_labs
+ * @format
+ */
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.nGram = void 0;
+function nGram(n, terms) {
+    const nGrams = [];
+    let index = 0;
+    while (index <= terms.length - n) {
+        nGrams[index] = terms.slice(index, index + n).join(' ');
+        ++index;
+    }
+    return nGrams;
+}
+exports.nGram = nGram;

package/dist/trace-cluster/strategies/machine-learning/TfidfVectorizer.d.ts ADDED Viewed

@@ -0,0 +1,38 @@
+/**
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ *
+ * @emails oncall+ws_labs
+ * @format
+ */
+interface TfidfVectorizerProps {
+    rawDocuments: string[];
+    maxDF?: number;
+}
+export declare class TfidfVectorizer {
+    rawDocuments: string[];
+    vocabulary: Record<string, string>;
+    documentFrequency: Record<string, number>;
+    maxDF: number;
+    documents: Record<string, number>[];
+    tfidfs: Record<string, number>[];
+    constructor({ rawDocuments, maxDF }: TfidfVectorizerProps);
+    computeTfidfs(): Record<string, number>[];
+    tokenize(text: string): string[];
+    buildVocabulary(tokenizedDocuments: string[][]): Record<string, string>;
+    processDocuments(tokenizedDocuments: string[][]): void;
+    limit(): void;
+    /**
+     * Smooth idf weights by adding 1 to document frequencies (DF), as if an extra
+     * document was seen containing every term in the collection exactly once.
+     * This prevents zero divisions.
+     * */
+    smooth(): void;
+    buildTfidfs(): Record<string, number>[];
+    tf(vocabIdx: string, document: Record<string, number>): number;
+    idf(vocabIdx: string): number;
+}
+export {};
+//# sourceMappingURL=TfidfVectorizer.d.ts.map

package/dist/trace-cluster/strategies/machine-learning/TfidfVectorizer.js ADDED Viewed

@@ -0,0 +1,140 @@
+"use strict";
+/**
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ *
+ * @emails oncall+ws_labs
+ * @format
+ */
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.TfidfVectorizer = void 0;
+const Ngram_1 = require("./Ngram");
+const SMOOTHING_KEY = '__smoothObjectKey';
+const VOCAB_IDX_FOR_DOC_WITH_HIGH_DF = '-1';
+class TfidfVectorizer {
+    constructor({ rawDocuments, maxDF = 0.8 }) {
+        this.rawDocuments = [];
+        this.vocabulary = Object.create(null);
+        this.documentFrequency = Object.create(null);
+        this.documents = [];
+        this.rawDocuments = rawDocuments;
+        this.maxDF = maxDF;
+    }
+    computeTfidfs() {
+        const tokenizedDocuments = this.rawDocuments.map(this.tokenize);
+        this.vocabulary = this.buildVocabulary(tokenizedDocuments);
+        this.processDocuments(tokenizedDocuments);
+        this.limit();
+        this.smooth();
+        this.tfidfs = this.buildTfidfs();
+        return this.tfidfs;
+    }
+    tokenize(text) {
+        const terms = text.split(' ');
+        return [...terms, ...(0, Ngram_1.nGram)(2, terms), ...(0, Ngram_1.nGram)(3, terms)];
+    }
+    buildVocabulary(tokenizedDocuments) {
+        let vocabIdx = 0;
+        const vocabulary = Object.create(null);
+        tokenizedDocuments.forEach(doc => {
+            doc.forEach(term => {
+                if (!vocabulary[String(term)]) {
+                    vocabulary[String(term)] = String(vocabIdx);
+                    vocabIdx++;
+                }
+            });
+        });
+        return vocabulary;
+    }
+    processDocuments(tokenizedDocuments) {
+        tokenizedDocuments.forEach(terms => {
+            const document = {};
+            terms.forEach(t => {
+                const vocabIdx = this.vocabulary[t];
+                if (document[vocabIdx]) {
+                    document[vocabIdx] += 1;
+                }
+                else {
+                    if (this.documentFrequency[vocabIdx]) {
+                        this.documentFrequency[vocabIdx] += 1;
+                    }
+                    else {
+                        this.documentFrequency[vocabIdx] = 1;
+                    }
+                    document[vocabIdx] = 1;
+                }
+            });
+            this.documents.push(document);
+        });
+    }
+    limit() {
+        const nMaxDF = Math.floor(this.documents.length * this.maxDF);
+        const vocabIdxsToDelete = [];
+        this.documents.forEach(doc => {
+            Object.keys(doc).forEach(vocabIdx => {
+                if (this.documentFrequency[vocabIdx] > nMaxDF) {
+                    delete doc[vocabIdx];
+                    vocabIdxsToDelete.push(vocabIdx);
+                }
+            });
+        });
+        vocabIdxsToDelete.forEach(vocabIdx => {
+            delete this.documentFrequency[vocabIdx];
+            delete this.vocabulary[vocabIdx];
+        });
+    }
+    /**
+     * Smooth idf weights by adding 1 to document frequencies (DF), as if an extra
+     * document was seen containing every term in the collection exactly once.
+     * This prevents zero divisions.
+     * */
+    smooth() {
+        // for each vocabulary
+        Object.values(this.vocabulary).forEach(vocabIdx => (this.documentFrequency[vocabIdx] =
+            this.documentFrequency[vocabIdx] + 1));
+        this.documents.push({ [SMOOTHING_KEY]: 1 });
+    }
+    buildTfidfs() {
+        const tfidfs = [];
+        this.documents.forEach(document => {
+            // this means all the terms in the document are the terms
+            // that have high document frequency.
+            // This will make all the docs with high DF to be clustered together.
+            if (Object.keys(document).length === 0) {
+                tfidfs.push({ [VOCAB_IDX_FOR_DOC_WITH_HIGH_DF]: 1 });
+                return;
+            }
+            if (!document[SMOOTHING_KEY]) {
+                const atfidf = Object.keys(document).map(vocabIdx => {
+                    return [vocabIdx, this.tf(vocabIdx, document) * this.idf(vocabIdx)];
+                });
+                // normalizing the values
+                const dotSum = atfidf
+                    .map(([_, tfidfValue]) => tfidfValue * tfidfValue)
+                    .reduce((sum, tfidfValueSquered) => sum + tfidfValueSquered, 0);
+                const dotSumSqrRoot = Math.sqrt(dotSum);
+                // Normalizing tfidfs
+                const atfidfVocabIdxValueObject = atfidf
+                    .map(([vocabIdx, tfidfValue]) => [
+                    vocabIdx,
+                    tfidfValue / dotSumSqrRoot,
+                ])
+                    .reduce((obj, [vocabIdx, value]) => {
+                    obj[vocabIdx] = value;
+                    return obj;
+                }, {});
+                tfidfs.push(atfidfVocabIdxValueObject);
+            }
+        });
+        return tfidfs;
+    }
+    tf(vocabIdx, document) {
+        return 1 + Math.log(document[vocabIdx]);
+    }
+    idf(vocabIdx) {
+        return (1 + Math.log(this.documents.length / this.documentFrequency[vocabIdx]));
+    }
+}
+exports.TfidfVectorizer = TfidfVectorizer;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@memlab/core",
-  "version": "1.1.5",
+  "version": "1.1.6",
   "license": "MIT",
   "description": "memlab core libraries",
   "author": "Liang Gong <lgong@fb.com>",