@datagrok/bio 2.13.6 → 2.13.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/dist/242.js +1 -1
- package/dist/242.js.map +1 -1
- package/dist/284.js +1 -1
- package/dist/284.js.map +1 -1
- package/dist/449.js +2 -0
- package/dist/449.js.map +1 -0
- package/dist/package-test.js +5 -5
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +6 -6
- package/dist/package.js.map +1 -1
- package/package.json +5 -5
- package/src/demo/bio01-similarity-diversity.ts +2 -3
- package/src/demo/bio01a-hierarchical-clustering-and-sequence-space.ts +3 -3
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +2 -3
- package/src/demo/bio03-atomic-level.ts +1 -4
- package/src/demo/bio05-helm-msa-sequence-space.ts +2 -3
- package/src/package-test.ts +3 -2
- package/src/package.ts +3 -2
- package/src/tests/mm-distance-tests.ts +6 -5
- package/src/tests/pepsea-tests.ts +2 -2
- package/src/utils/calculate-scores.ts +13 -0
- package/src/utils/monomer-cell-renderer.ts +4 -1
- package/dist/317.js +0 -2
- package/dist/317.js.map +0 -1
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Aleksandr Tanas",
|
|
6
6
|
"email": "atanas@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.13.
|
|
8
|
+
"version": "2.13.8",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -39,10 +39,10 @@
|
|
|
39
39
|
"@biowasm/aioli": "^3.1.0",
|
|
40
40
|
"@datagrok-libraries/bio": "^5.42.6",
|
|
41
41
|
"@datagrok-libraries/chem-meta": "^1.2.5",
|
|
42
|
-
"@datagrok-libraries/math": "^1.1.
|
|
43
|
-
"@datagrok-libraries/ml": "^6.6.
|
|
44
|
-
"@datagrok-libraries/tutorials": "^1.3.
|
|
45
|
-
"@datagrok-libraries/utils": "^4.2.
|
|
42
|
+
"@datagrok-libraries/math": "^1.1.12",
|
|
43
|
+
"@datagrok-libraries/ml": "^6.6.17",
|
|
44
|
+
"@datagrok-libraries/tutorials": "^1.3.13",
|
|
45
|
+
"@datagrok-libraries/utils": "^4.2.22",
|
|
46
46
|
"@webgpu/types": "^0.1.40",
|
|
47
47
|
"ajv": "^8.12.0",
|
|
48
48
|
"ajv-errors": "^3.0.0",
|
|
@@ -15,9 +15,8 @@ export async function demoBio01UI() {
|
|
|
15
15
|
let df: DG.DataFrame;
|
|
16
16
|
|
|
17
17
|
try {
|
|
18
|
-
const demoScript = new DemoScript(
|
|
19
|
-
'
|
|
20
|
-
'Sequence similarity tracking and evaluation dataset diversity');
|
|
18
|
+
const demoScript = new DemoScript('Similarity, Diversity',
|
|
19
|
+
'Sequence similarity tracking and evaluation dataset diversity', false, {autoStartFirstStep: true});
|
|
21
20
|
await demoScript
|
|
22
21
|
.step(`Load DNA sequences`, async () => {
|
|
23
22
|
grok.shell.windows.showContextPanel = false;
|
|
@@ -23,9 +23,9 @@ export async function demoBio01aUI() {
|
|
|
23
23
|
const activityColName = 'activity';
|
|
24
24
|
|
|
25
25
|
try {
|
|
26
|
-
const demoScript = new DemoScript(
|
|
27
|
-
'
|
|
28
|
-
|
|
26
|
+
const demoScript = new DemoScript('Sequence Space',
|
|
27
|
+
'Exploring sequence space of Macromolecules, comparison with hierarchical clustering results',
|
|
28
|
+
false, {autoStartFirstStep: true});
|
|
29
29
|
await demoScript
|
|
30
30
|
.step(`Load DNA sequences`, async () => {
|
|
31
31
|
[df, treeHelper, dendrogramSvc] = await Promise.all([
|
|
@@ -27,9 +27,8 @@ export async function demoBio01bUI() {
|
|
|
27
27
|
const dimRedMethod: DimReductionMethods = DimReductionMethods.UMAP;
|
|
28
28
|
|
|
29
29
|
try {
|
|
30
|
-
const demoScript = new DemoScript(
|
|
31
|
-
|
|
32
|
-
'Activity Cliffs analysis on Macromolecules data');
|
|
30
|
+
const demoScript = new DemoScript('Activity Cliffs', 'Activity Cliffs analysis on Macromolecules data', false,
|
|
31
|
+
{autoStartFirstStep: true});
|
|
33
32
|
await demoScript
|
|
34
33
|
.step(`Load DNA sequences`, async () => {
|
|
35
34
|
grok.shell.windows.showContextPanel = false;
|
|
@@ -15,10 +15,7 @@ export async function demoBio03UI(): Promise<void> {
|
|
|
15
15
|
let dlg: DG.Dialog;
|
|
16
16
|
|
|
17
17
|
try {
|
|
18
|
-
await new DemoScript(
|
|
19
|
-
'Atomic Level',
|
|
20
|
-
'Atomic level structure of Macromolecules',
|
|
21
|
-
)
|
|
18
|
+
await new DemoScript('Atomic Level', 'Atomic level structure of Macromolecules', false, {autoStartFirstStep: true})
|
|
22
19
|
.step(`Loading Macromolecules notation 'Helm'`, async () => {
|
|
23
20
|
grok.shell.windows.showContextPanel = false;
|
|
24
21
|
grok.shell.windows.showProperties = false;
|
|
@@ -38,9 +38,8 @@ export async function demoBio05UI(): Promise<void> {
|
|
|
38
38
|
let pepseaDcStartPromise: Promise<void>;
|
|
39
39
|
|
|
40
40
|
try {
|
|
41
|
-
const demoScript = new DemoScript(
|
|
42
|
-
|
|
43
|
-
'MSA and composition analysis on Helm data');
|
|
41
|
+
const demoScript = new DemoScript('Helm, MSA, Sequence Space', 'MSA and composition analysis on Helm data',
|
|
42
|
+
false, {autoStartFirstStep: true});
|
|
44
43
|
await demoScript
|
|
45
44
|
.step(`Load peptides with non-natural aminoacids in 'HELM' notation`, async () => {
|
|
46
45
|
[pepseaDcStatus, df] = await Promise.all([
|
package/src/package-test.ts
CHANGED
|
@@ -44,8 +44,9 @@ export {tests};
|
|
|
44
44
|
//input: string category {optional: true}
|
|
45
45
|
//input: string test {optional: true}
|
|
46
46
|
//input: object testContext {optional: true}
|
|
47
|
+
//input: bool stressTest {optional: true}
|
|
47
48
|
//output: dataframe result
|
|
48
|
-
export async function test(category: string, test: string, testContext: TestContext): Promise<DG.DataFrame> {
|
|
49
|
-
const data = await runTests({category, test, testContext});
|
|
49
|
+
export async function test(category: string, test: string, testContext: TestContext, stressTest?: boolean): Promise<DG.DataFrame> {
|
|
50
|
+
const data = await runTests({category, test, testContext, stressTest});
|
|
50
51
|
return DG.DataFrame.fromObjects(data)!;
|
|
51
52
|
}
|
package/src/package.ts
CHANGED
|
@@ -78,6 +78,7 @@ import {CyclizedNotationProvider} from './utils/cyclized';
|
|
|
78
78
|
import {getMolColumnFromHelm} from './utils/helm-to-molfile/utils';
|
|
79
79
|
import {PackageSettingsEditorWidget} from './widgets/package-settings-editor-widget';
|
|
80
80
|
import {getUserLibSettings, setUserLibSettings} from '@datagrok-libraries/bio/src/monomer-works/lib-settings';
|
|
81
|
+
import { calculateScoresWithEmptyValues } from './utils/calculate-scores';
|
|
81
82
|
|
|
82
83
|
export const _package = new BioPackage();
|
|
83
84
|
|
|
@@ -868,7 +869,7 @@ export function SubsequenceSearchTopMenu(macromolecules: DG.Column): void {
|
|
|
868
869
|
export async function sequenceIdentityScoring(
|
|
869
870
|
table: DG.DataFrame, macromolecule: DG.Column, reference: string
|
|
870
871
|
): Promise<DG.Column<number>> {
|
|
871
|
-
const scores =
|
|
872
|
+
const scores = calculateScoresWithEmptyValues(table, macromolecule, reference, SCORE.IDENTITY);
|
|
872
873
|
return scores;
|
|
873
874
|
}
|
|
874
875
|
|
|
@@ -882,7 +883,7 @@ export async function sequenceIdentityScoring(
|
|
|
882
883
|
export async function sequenceSimilarityScoring(
|
|
883
884
|
table: DG.DataFrame, macromolecule: DG.Column, reference: string
|
|
884
885
|
): Promise<DG.Column<number>> {
|
|
885
|
-
const scores =
|
|
886
|
+
const scores = calculateScoresWithEmptyValues(table, macromolecule, reference, SCORE.SIMILARITY);
|
|
886
887
|
return scores;
|
|
887
888
|
}
|
|
888
889
|
|
|
@@ -82,7 +82,7 @@ ATCGAATCGA`;
|
|
|
82
82
|
|
|
83
83
|
test('needleman-blosum62-del', async () => {
|
|
84
84
|
const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH]({gapOpen: 8, gapExtend: 2});
|
|
85
|
-
_testDistance(prot3, prot4, df, -
|
|
85
|
+
_testDistance(prot3, prot4, df, -3.667);
|
|
86
86
|
});
|
|
87
87
|
|
|
88
88
|
test('needleman-custom-sub', async () => {
|
|
@@ -96,21 +96,21 @@ ATCGAATCGA`;
|
|
|
96
96
|
const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH](
|
|
97
97
|
{scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 1},
|
|
98
98
|
);
|
|
99
|
-
_testDistance(prot3, prot4, df, 0.
|
|
99
|
+
_testDistance(prot3, prot4, df, 0.667);
|
|
100
100
|
});
|
|
101
101
|
|
|
102
102
|
test('needleman-custom-zero-extend', async () => {
|
|
103
103
|
const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH](
|
|
104
104
|
{scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 0},
|
|
105
105
|
);
|
|
106
|
-
_testDistance(prot5, prot6, df, 0.
|
|
106
|
+
_testDistance(prot5, prot6, df, 0.333);
|
|
107
107
|
});
|
|
108
108
|
|
|
109
109
|
test('needleman-custom-half-extend', async () => {
|
|
110
110
|
const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH](
|
|
111
111
|
{scoringMatrix, alphabetIndexes, gapOpen: 2, gapExtend: 1},
|
|
112
112
|
);
|
|
113
|
-
_testDistance(prot5, prot6, df, 1.
|
|
113
|
+
_testDistance(prot5, prot6, df, 1.667);
|
|
114
114
|
});
|
|
115
115
|
|
|
116
116
|
test('needleman-custom-same-extend', async () => {
|
|
@@ -121,7 +121,8 @@ ATCGAATCGA`;
|
|
|
121
121
|
const seq1 = Array(10000).fill('FWRY').join('');
|
|
122
122
|
const seq2 = Array(10000).fill('FYWRRY').join('');
|
|
123
123
|
_testDistance(seq1, seq2, df, 0.667);
|
|
124
|
-
} else
|
|
124
|
+
} else
|
|
125
|
+
_testDistance(prot5, prot6, df, 1.333);
|
|
125
126
|
}, {benchmark: true});
|
|
126
127
|
});
|
|
127
128
|
|
|
@@ -40,7 +40,7 @@ category('PepSeA', () => {
|
|
|
40
40
|
const tgtMsaCol = df.getCol('MSA');
|
|
41
41
|
for (let i = 0; i < resMsaCol!.length; ++i)
|
|
42
42
|
expect(resMsaCol!.get(i) == tgtMsaCol.get(i), true);
|
|
43
|
-
}, {timeout: 60000 /* docker
|
|
43
|
+
}, {timeout: 60000 /* docker */, stressTest: true});
|
|
44
44
|
|
|
45
45
|
test('stderr', async () => {
|
|
46
46
|
const logger = new TestLogger();
|
|
@@ -50,7 +50,7 @@ category('PepSeA', () => {
|
|
|
50
50
|
const tgtMsaCol = df.getCol('MSA');
|
|
51
51
|
expectArray(resMsaCol!.toList(), tgtMsaCol.toList());
|
|
52
52
|
expect(logger.warningList[0].message, pepseaStderrWarningList);
|
|
53
|
-
}, {timeout: 60000 /* docker
|
|
53
|
+
}, {timeout: 60000 /* docker */, stressTest: true});
|
|
54
54
|
|
|
55
55
|
test('error', async () => {
|
|
56
56
|
const logger = new TestLogger();
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import { calculateScores, SCORE } from '@datagrok-libraries/bio/src/utils/macromolecule/scoring';
|
|
3
|
+
|
|
4
|
+
export async function calculateScoresWithEmptyValues(
|
|
5
|
+
table: DG.DataFrame, macromolecule: DG.Column, reference: string, scoring: SCORE
|
|
6
|
+
): Promise<DG.Column<number>> {
|
|
7
|
+
const scores = await calculateScores(table, macromolecule, reference, scoring);
|
|
8
|
+
for (let i = 0; i < scores.length; i++) {
|
|
9
|
+
if (macromolecule.isNone(i))
|
|
10
|
+
scores.set(i, null, false);
|
|
11
|
+
}
|
|
12
|
+
return scores;
|
|
13
|
+
}
|
|
@@ -105,6 +105,9 @@ export class MonomerCellRenderer extends DG.GridCellRenderer {
|
|
|
105
105
|
const color = palette.get(s);
|
|
106
106
|
|
|
107
107
|
g.fillStyle = color;
|
|
108
|
-
|
|
108
|
+
//cell width of monomer should dictate how many characters can be displayed
|
|
109
|
+
// for width 40, 6 characters can be displayed (0.15 is 6 / 40)
|
|
110
|
+
const maxChars = Math.max(2, Math.floor(w * 0.15));
|
|
111
|
+
g.fillText(monomerToShort(s, maxChars), x + (w / 2), y + (h / 2), w);
|
|
109
112
|
}
|
|
110
113
|
}
|
package/dist/317.js
DELETED
|
@@ -1,2 +0,0 @@
|
|
|
1
|
-
var bio;(()=>{"use strict";var e,n,t={9260:(e,n,t)=>{var r;!function(e){e.EUCLIDEAN="EUCLIDEAN",e.MANHATTAN="MANHATTAN"}(r||(r={}));const a={[r.EUCLIDEAN]:function(e){return`\n var sum = 0.0;\n for (var i = 0u; i < ${e}; i = i + 1u) {\n sum = sum + distances[i] * distances[i] * computeInfo.weights[i] * computeInfo.weights[i];\n }\n return sqrt(sum);\n `},[r.MANHATTAN]:function(e){return`\n var sum = 0.0;\n for (var i = 0u; i < ${e}; i = i + 1u) {\n sum = sum + abs(distances[i]) * computeInfo.weights[i];\n }\n return sum;\n `}};var i;!function(e){e.HAMMING="Hamming",e.EUCLIDEAN="Euclidean",e.MANHATTAN="Manhattan",e.TANIMOTO="Tanimoto",e.LEVENSTEIN="Levenshtein",e.NEEDLEMAN_WUNSCH="Needlemann-Wunsch",e.MONOMER_CHEMICAL_DISTANCE="Monomer chemical distance",e.SOKAL="Sokal",e.COSINE="Cosine",e.ASYMMETRIC="Asymmetric",e.Difference="Difference",e.OneHot="One-Hot"}(i||(i={}));const o={[i.HAMMING]:function(e,n){return`\n let aLength: u32 = computeInfo.entrySizes[${n}][aIndex];\n let bLength: u32 = computeInfo.entrySizes[${n}][bIndex];\n let maxLength: u32 = max(aLength, bLength);\n let minLength: u32 = min(aLength, bLength);\n let sizeDiff: u32 = maxLength - minLength;\n \n let maxIntDistance = ceil(maxDistance * f32(maxLength)) - f32(sizeDiff);\n\n var diff: f32 = 0.0;\n for (var i = 0u; i < ${e}; i = i + 1u) {\n diff = diff + f32(a[i] != b[i]);\n if (diff > maxIntDistance) {\n return 1.0;\n }\n }\n diff += f32(sizeDiff);\n return diff / ${e};\n `},[i.EUCLIDEAN]:function(e,n){return`\n var dist: f32 = 0.0;\n for (var i = 0u; i < ${e}; i = i + 1u) {\n dist = dist + f32(a[i] - b[i]) * f32(a[i] - b[i]);\n }\n return sqrt(dist);\n `},[i.MANHATTAN]:function(e,n){return`\n var dist: f32 = 0.0;\n for (var i = 0u; i < ${e}; i = i + 1u) {\n dist = dist + abs(f32(a[i] - b[i]));\n }\n return dist;\n `},[i.TANIMOTO]:function(e,n){return`\n var onBitsa: u32 = 0u;\n var onBitsb: u32 = 0u;\n for (var i = 0u; i < ${e}u; i = i + 1u) {\n onBitsa = onBitsa + countOneBits(a[i]);\n onBitsb = onBitsb + countOneBits(b[i]);\n }\n\n if (onBitsa == 0u && onBitsb == 0u) {\n return 0.0;\n }\n\n let totalOnBits = onBitsa + onBitsb;\n var commonBits: u32 = 0u;\n for (var i = 0u; i < ${e}u; i = i + 1u) {\n commonBits = commonBits + countOneBits(a[i] & b[i]);\n }\n\n return 1.0 - f32(commonBits) / f32(totalOnBits - commonBits);\n `},[i.LEVENSTEIN]:function(e,n){return`\n let aLength: u32 = computeInfo.entrySizes[${n}][aIndex];\n let bLength: u32 = computeInfo.entrySizes[${n}][bIndex];\n let maxLength: u32 = max(aLength, bLength);\n let minLength: u32 = min(aLength, bLength);\n\n let maxIntDistance = ceil(maxDistance * f32(maxLength));\n\n // we will store two arrays as matrix and swap the working indices per pass.\n // this way we can reduce memory usage per computation to just O(aLength)\n // the grid will have aLength + 1 columns and bLength + 1 rows\n // this will be guaranteed by iteration, but the array sizes must be known at compile time, so we will use a fixed size of maxArraySize\n var dynamicPassMat: array<array<f32, ${e+1}u>, 2>; // initialize to 0\n \n var prevIndex: u32 = 0;\n var curIndex: u32 = 1; // we will swap these indices per pass\n\n // initialize the first row\n for (var i = 0u; i <= aLength; i = i + 1u) {\n dynamicPassMat[prevIndex][i] = f32(i);\n }\n\n // iterate over the rows\n for (var i = 1u; i <= bLength; i = i + 1u) {\n dynamicPassMat[curIndex][0] = f32(i);\n var minEntry: f32 = f32(maxLength);\n let prevRow = &dynamicPassMat[prevIndex];\n let curRow = &dynamicPassMat[curIndex];\n let bMon = u32(b[i - 1]);\n for (var j = 1u; j <= aLength; j = j + 1u) {\n var cost: f32 = f32(a[j - 1] != bMon);\n var res: f32 = min(\n min(\n (*prevRow)[j] + 1.0, // deletion\n (*curRow)[j - 1] + 1.0, // insertion\n ),\n (*prevRow)[j - 1] + cost // substitution\n );\n (*curRow)[j] = res;\n if (res < minEntry) {\n minEntry = res;\n }\n }\n // swap the indices\n let temp: u32 = prevIndex;\n prevIndex = curIndex;\n curIndex = temp;\n if (minEntry > maxIntDistance) {\n return 1.0;\n }\n }\n\n return dynamicPassMat[prevIndex][aLength] / f32(maxLength);\n `},[i.NEEDLEMAN_WUNSCH]:function(e,n){return`\n let aLength: u32 = computeInfo.entrySizes[${n}][aIndex];\n let bLength: u32 = computeInfo.entrySizes[${n}][bIndex];\n let maxLength: u32 = max(aLength, bLength);\n let minLength: u32 = min(aLength, bLength);\n \n let maxIntDistance = ceil(maxDistance * f32(maxLength));\n // we will store two arrays as matrix and swap the working indices per pass.\n // this way we can reduce memory usage per computation to just O(aLength)\n // the grid will have aLength + 1 columns and bLength + 1 rows\n // this will be guaranteed by iteration, but the array sizes must be known at compile time, so we will use a fixed size of maxArraySize\n var dynamicPassMat: array<array<f32, ${e+1}u>, 2>; // initialize to 0\n \n // we need to keep track of which operation led to the current cell\n // i.e. whether we came from the left, top or diagonal to assign gap open/gap extend penalty\n var verticalGaps: array<u32, ${e+1}u>;\n var horizontalGaps: array<u32, ${e+1}u>;\n\n let gapOpenPenalty: f32 = suppInfo.gapOpenPenalty${n};\n let gapExtensionPenalty: f32 = suppInfo.gapExtensionPenalty${n};\n var prevIndex: u32 = 0;\n var curIndex: u32 = 1; // we will swap these indices per pass\n // initialize the first row\n for (var i = 0u; i <= aLength; i = i + 1u) {\n dynamicPassMat[prevIndex][i] = gapOpenPenalty + f32(i - 1) * gapExtensionPenalty;\n dynamicPassMat[curIndex][i] = 0.0;\n }\n dynamicPassMat[0][0] = 0.0;\n\n let simMatrix = &suppInfo.similarityMatrix${n}; // using pointers make things faster\n // iterate over the rows\n for (var i = 1u; i <= bLength; i = i + 1u) {\n let prevRow = &dynamicPassMat[prevIndex];\n let curRow = &dynamicPassMat[curIndex];\n (*curRow)[0] = gapOpenPenalty + f32(i - 1) * gapExtensionPenalty;\n var minEntry: f32 = f32(maxLength);\n let monB = u32(b[i - 1]);\n for (var j = 1u; j <= aLength; j = j + 1u) {\n let monA = u32(a[j - 1]);\n \n let cost: f32 = (*prevRow)[j - 1] + 1f - (*simMatrix)[monA][monB];\n var top = (*prevRow)[j]; // deletion\n if (verticalGaps[j] > 0) {\n top = top + gapExtensionPenalty;\n } else {\n top = top + gapOpenPenalty;\n }\n var left = (*curRow)[j - 1]; // insertion\n if (horizontalGaps[j - 1] > 0) {\n left = left + gapExtensionPenalty;\n } else {\n left = left + gapOpenPenalty;\n }\n var res: f32 = min(\n min(\n top, // deletion\n left, // insertion\n ),\n cost // substitution\n );\n (*curRow)[j] = res;\n if (res < minEntry) {\n minEntry = res;\n }\n // update the horizontal and vertical gaps\n if (res == cost) {\n verticalGaps[j] = 0;\n horizontalGaps[j] = 0;\n } else if (res == left) {\n verticalGaps[j] = 0;\n horizontalGaps[j] = 1;\n } else {\n verticalGaps[j] = 1;\n horizontalGaps[j] = 0;\n }\n }\n // swap the indices\n let temp: u32 = prevIndex;\n prevIndex = curIndex;\n curIndex = temp;\n if (minEntry > maxIntDistance) {\n return 1.0;\n }\n }\n return dynamicPassMat[prevIndex][aLength] / f32(maxLength);\n \n `},[i.MONOMER_CHEMICAL_DISTANCE]:function(e,n){return`\n let aLength: u32 = computeInfo.entrySizes[${n}][aIndex];\n let bLength: u32 = computeInfo.entrySizes[${n}][bIndex];\n let maxLength: u32 = max(aLength, bLength);\n let minLength: u32 = min(aLength, bLength);\n let sizeDiff: u32 = maxLength - minLength;\n \n let maxIntDistance = ceil(maxDistance * f32(maxLength)) - f32(sizeDiff);\n\n let simMatrix = &(suppInfo.similarityMatrix${n}); // using pointers make things faster\n var diff: f32 = 0.0;\n for (var i = 0u; i < ${e}; i = i + 1u) {\n diff = diff + 1.0 - (*simMatrix)[u32(a[i])][u32(b[i])];\n if (diff > maxIntDistance) {\n return 1.0;\n }\n }\n diff += f32(sizeDiff);\n return diff / ${e};\n `},[i.SOKAL]:function(e,n){return`\n var onBitsa: u32 = 0u;\n var onBitsb: u32 = 0u;\n for (var i = 0u; i < ${e}u; i = i + 1u) {\n onBitsa = onBitsa + countOneBits(a[i]);\n onBitsb = onBitsb + countOneBits(b[i]);\n }\n let total = onBitsa + onBitsb;\n if (total == 0u) {\n return 1.0;\n }\n var commonBits: u32 = 0u;\n for (var i = 0u; i < ${e}u; i = i + 1u) {\n commonBits = commonBits + countOneBits(a[i] & b[i]);\n }\n return 1.0 - f32(commonBits) / f32(total * 2 - commonBits * 3);\n `},[i.COSINE]:function(e,n){return`\n var onBitsa: u32 = 0u;\n var onBitsb: u32 = 0u;\n for (var i = 0u; i < ${e}u; i = i + 1u) {\n onBitsa = onBitsa + countOneBits(a[i]);\n onBitsb = onBitsb + countOneBits(b[i]);\n }\n let total = onBitsa * onBitsb; // p.s. here total is taken by multiplying\n if (total == 0u) {\n return 1.0;\n }\n var commonBits: u32 = 0u;\n for (var i = 0u; i < ${e}u; i = i + 1u) {\n commonBits = commonBits + countOneBits(a[i] & b[i]);\n }\n return 1.0 - f32(commonBits) / sqrt(f32(total));\n `},[i.ASYMMETRIC]:function(e,n){return`\n var onBitsa: u32 = 0u;\n var onBitsb: u32 = 0u;\n for (var i = 0u; i < ${e}u; i = i + 1u) {\n onBitsa = onBitsa + countOneBits(a[i]);\n onBitsb = onBitsb + countOneBits(b[i]);\n }\n let min = min(onBitsa, onBitsb);\n if (min == 0u) {\n return 1.0;\n }\n var commonBits: u32 = 0u;\n for (var i = 0u; i < ${e}u; i = i + 1u) {\n commonBits = commonBits + countOneBits(a[i] & b[i]);\n }\n return 1.0 - f32(commonBits) / f32(min);\n `},[i.Difference]:function(e,n){return`\n let range = suppInfo.range${n};\n return f32(abs(f32(a[0]) - f32(b[0])) / range);\n `},[i.OneHot]:function(e,n){return`\n let aLength: u32 = computeInfo.entrySizes[${n}][aIndex];\n let bLength: u32 = computeInfo.entrySizes[${n}][bIndex];\n if (aLength != bLength) {\n return 1.0;\n }\n for (var i = 0u; i < aLength; i = i + 1u) {\n if(a[i] != b[i]) {\n return 1.0;\n }\n }\n return 0.0;\n `}},s={[i.HAMMING]:e=>Math.ceil(e/30),[i.EUCLIDEAN]:e=>Math.ceil(e/30),[i.MANHATTAN]:e=>Math.ceil(e/30),[i.TANIMOTO]:e=>Math.ceil(e/60),[i.SOKAL]:e=>Math.ceil(e/60),[i.COSINE]:e=>Math.ceil(e/60),[i.ASYMMETRIC]:e=>Math.ceil(e/60),[i.LEVENSTEIN]:e=>Math.ceil(e*e/60),[i.NEEDLEMAN_WUNSCH]:e=>Math.ceil(e*e/60),[i.MONOMER_CHEMICAL_DISTANCE]:e=>Math.ceil(e/25),[i.Difference]:e=>1,[i.OneHot]:e=>Math.ceil(e/40)},u={STRING:new Set([i.HAMMING,i.LEVENSTEIN,i.NEEDLEMAN_WUNSCH,i.MONOMER_CHEMICAL_DISTANCE,i.OneHot]),UINT32ARRAY:new Set([i.HAMMING,i.EUCLIDEAN,i.MANHATTAN,i.MONOMER_CHEMICAL_DISTANCE,i.LEVENSTEIN,i.NEEDLEMAN_WUNSCH,i.TANIMOTO,i.COSINE,i.SOKAL,i.ASYMMETRIC,i.OneHot,i.Difference]),INT32ARRAY:new Set([i.EUCLIDEAN,i.MANHATTAN,i.OneHot,i.Difference]),FLOAT32ARRAY:new Set([i.EUCLIDEAN,i.MANHATTAN,i.Difference]),NUMBER:new Set([i.EUCLIDEAN,i.MANHATTAN,i.Difference]),BITARRAY:new Set([i.TANIMOTO,i.COSINE,i.SOKAL,i.ASYMMETRIC])};var l=function(e,n,t,r){return new(t||(t=Promise))((function(a,i){function o(e){try{u(r.next(e))}catch(e){i(e)}}function s(e){try{u(r.throw(e))}catch(e){i(e)}}function u(e){var n;e.done?a(e.value):(n=e.value,n instanceof t?n:new t((function(e){e(n)}))).then(o,s)}u((r=r.apply(e,n||[])).next())}))};let f=null,c=null;function d(){return l(this,void 0,void 0,(function*(){if(!f&&(f=yield navigator.gpu.requestAdapter({powerPreference:"high-performance"}),null==f))return null;let e=!1;if(c&&(c.lost.then((()=>{e=!0})),yield new Promise((e=>setTimeout(e,10)))),!c||e){const e=1e9,n=f.limits,t=n.maxBufferSize,r=n.maxStorageBufferBindingSize;try{return c=yield f.requestDevice({requiredLimits:{maxBufferSize:Math.min(t,e),maxStorageBufferBindingSize:Math.min(r,e)}}),c}catch(e){return console.error("Failed to create device with required limits",e),c=yield f.requestDevice(),c}}return c}))}function p(e,n=.8,t,a,o,l){return f=this,c=void 0,h=function*(){const f=yield d();if(!f)return null;const c=Object.values(i);if(t.some((e=>!c.includes(e))))throw new Error("Invalid distance metrics provided: "+t.join(", "));if(!Object.values(r).includes(a))throw new Error("Invalid aggregation function provided: "+a);const p=1-n;if(l.length!==e.length||l.length!==t.length||l.length!==o.length)throw new Error("Options, weigths and distance functions must be provided for each column");if(e.some((n=>n.length!==e[0].length)))throw new Error("All entry lists must be the same length");const h=e.length,g=e[0].length,y=e.map(((e,n)=>function(e,n=i.HAMMING,t,r={gapOpenPenalty:1,gapExtensionPenalty:.6}){var a,o;let l=null;const f=e.some((e=>"string"==typeof e))?(l="STRING",e.map((e=>new Uint32Array(e.split("").map((e=>e.charCodeAt(0))))))):e.some((e=>"number"==typeof e))?(l="NUMBER",e.map((e=>new Float32Array([e])))):"object"==typeof e[0]&&e.some((e=>"_data"in e&&"_length"in e))?(l="BITARRAY",e.map((e=>e._data))):e.some((e=>e instanceof Float32Array))?(l="FLOAT32ARRAY",e):e.some((e=>e instanceof Uint32Array))?(l="UINT32ARRAY",e):e.some((e=>e instanceof Int32Array))?(l="INT32ARRAY",e):void 0;if(!f||!l)throw new Error("Invalid entry type, could not determine entry type from input list");const c=f[0]instanceof Int32Array?"INT32ARRAY":f[0]instanceof Float32Array?"FLOAT32ARRAY":"UINT32ARRAY",d=new Uint32Array(f.map((e=>e.length)));if(!u[l]||!u[l].has(n))throw new Error(`Distance metric '${n}' not supported for entry type '${l}'`);const p=d.reduce(((e,n)=>Math.max(e,n)),0),m=s[n](p),h="INT32ARRAY"===c?Int32Array:"FLOAT32ARRAY"===c?Float32Array:Uint32Array,g=new h(f.length*p);f.forEach(((e,n)=>{g.set(e,n*p)}));let y="",A=0,b="FLOAT32ARRAY",w=null;if(n===i.NEEDLEMAN_WUNSCH||n===i.MONOMER_CHEMICAL_DISTANCE){let e=r.scoringMatrix&&r.alphabetIndexes?Object.keys(r.alphabetIndexes).reduce(((e,n)=>Math.max(e,n.charCodeAt(0))),0):-1;if(!r.alphabetIndexes||!r.scoringMatrix){for(let n=0;n<g.length;n++)g[n]>e&&(e=g[n]);r.scoringMatrix=new Array(e+1).fill(null).map((()=>new Array(e+1).fill(0))),r.alphabetIndexes={};for(let e=0;e<r.scoringMatrix.length;e++)r.scoringMatrix[e][e]=1,r.alphabetIndexes[String.fromCharCode(e)]=e}const n=(e+1)*(e+1),i=new Array(e+1).fill(null).map((()=>new Float32Array(e+1)));for(let n=0;n<e+1;n++)i[n][n]=1;const s=r.alphabetIndexes;for(const e of Object.keys(s))for(const n of Object.keys(s))e!==n&&(i[e.charCodeAt(0)][n.charCodeAt(0)]=r.scoringMatrix[s[e]][s[n]]);A=2+n,b="FLOAT32ARRAY",w=new Float32Array(A),w[0]=null!==(a=r.gapOpenPenalty)&&void 0!==a?a:1,w[1]=null!==(o=r.gapExtensionPenalty)&&void 0!==o?o:.6;let u=2;for(let e=0;e<i.length;e++)w.set(i[e],u),u+=i[e].length;y=`\n gapOpenPenalty${t}: f32,\n gapExtensionPenalty${t}: f32,\n similarityMatrix${t}: array<array<f32, ${e+1}>, ${e+1}>`}else if(n===i.Difference){if(!r.range||"number"!=typeof r.range||r.range<=0){const e=g.reduce(((e,n)=>Math.min(e,n)),g[0]),n=g.reduce(((e,n)=>Math.max(e,n)),g[0]);r.range=n-e}r.range<=0&&(r.range=1),A=1,b="FLOAT32ARRAY",w=new Float32Array([r.range]),y=`\n range${t}: f32`}const x=g instanceof Int32Array?"i32":g instanceof Float32Array?"f32":"u32",E=`data${t}: array<array<${x}, ${p}>, ${f.length}>`;return{flatSourceArray:g,sourceArraySize:g.length,maxEntryLen:p,arraySizes:d,complexity:m,suppInfoBuffer:w,suppInfoSize:A,suppInfoType:b,suppInfoStructWgsl:y,entryType:l,dataTypeWGSL:x,dataStructWgsl:E,EncodedArrayConstructor:h}}(e,t[n],n,l[n])));if(0===h)throw new Error("No columns provided. Please provide at least one column of data.");1===h&&(a=r.MANHATTAN);let A=y.map((e=>e.suppInfoStructWgsl)).filter((e=>!!e&&""!=e)).join(",\n"),b=!1;A&&""!=A.trim()||(b=!0,A="\ndummy: f32\n");const w=y.map((e=>e.dataStructWgsl)).filter((e=>!!e&&""!=e)).join(",\n"),x=new Uint32Array(h*g);y.forEach(((e,n)=>{x.set(e.arraySizes,n*g)}));const E=1e4,I=100,M=y.reduce(((e,n)=>e+n.complexity),0),N=Math.ceil(1e4/M),v=Math.ceil(Math.sqrt(Math.ceil(100))),B=10*v,S=g*(g-1)/2,U=Math.ceil(S/E),C=f.createShaderModule({label:"Sparse matrix compute shader",code:`\n // each thread will perform 100 iterations at one time, comparing 100 pairs of entries.\n // in total, each thread will perform at most ${U} comparisons.\n // first is the result struct, containing is, js, and distances. each array with length of 100,\n // and also integer for how many pairs were found to be below threshold.\n struct SparseResult {\n i: array<array<u32, 100>, 10000>,\n j: array<array<u32, 100>, 10000>,\n distances: array<array<f32, 100>, 10000>,\n found: array<u32, 10000>,\n done: array<u32, 10000>\n }\n // struct for the data\n struct ComputeInfo {\n // start at cols and rows, and end at cols and rows for each thread, these will be calculated on cpu and passed to gpu.\n startAtCols: array<u32, 10000>,\n startAtRows: array<u32, 10000>,\n endAtCols: array<u32, 10000>,\n endAtRows: array<u32, 10000>,\n\n // the ACTUALLY sizes of each entry\n entrySizes: array<array<u32, ${g}>, ${h}>,\n // the weights for each entry\n weights: array<f32, ${h}>,\n // the data for each entry\n ${w} // an example of the dataWgsl would be:\n //data0: array<array<u32,20>,100>,\n //data1: array<array<u32,20>,100>\n }\n\n // struct for the supplementary information\n struct SuppInfo {\n // struct containing all the supplementary info, like scoring matrix, alphabet indexes, range, etc.\n ${A}\n };\n\n @group(0) @binding(0) var<storage, read_write> computeInfo: ComputeInfo;\n @group(0) @binding(1) var<storage, read_write> suppInfo: SuppInfo;\n @group(0) @binding(2) var<storage, read_write> results: SparseResult;\n @compute @workgroup_size(10, 10) fn calcSparseMatrix(\n @builtin(global_invocation_id) id: vec3<u32>\n ) {\n ${b?"let otherDummy = suppInfo.dummy * 2;":""} // just to make sure that the suppInfo is not optimized out\n let threadCol = id.x;\n let threadRow = id.y;\n let linearIndex = threadRow * ${B} + threadCol;\n if (linearIndex >= 10000) {\n return; // if we are out of bounds, return\n } \n var startAtCol: u32 = computeInfo.startAtCols[linearIndex];\n var startAtRow: u32 = computeInfo.startAtRows[linearIndex];\n let endAtCol: u32 = min(computeInfo.endAtCols[linearIndex], ${g}u);\n let endAtRow: u32 = min(computeInfo.endAtRows[linearIndex], ${g}u);\n let is = &results.i[linearIndex];\n let js = &results.j[linearIndex];\n let distances = &results.distances[linearIndex];\n results.found[linearIndex] = 0; // initialize the found counter\n var found: u32 = 0;\n if (results.done[linearIndex] > 0) {\n return; // if we are done, return\n }\n for (var i = 0; i < ${N}; i++) {\n if (startAtCol >= endAtCol && startAtRow >= endAtRow) {\n results.done[linearIndex] = 1;\n break;\n }\n if (found >= 100) {\n break;\n }\n let dist = combinedDistance(startAtCol, startAtRow);\n if (dist <= ${p}) {\n (*is)[found] = startAtCol;\n (*js)[found] = startAtRow;\n (*distances)[found] = dist;\n found = found + 1;\n }\n startAtCol = startAtCol + 1;\n if (startAtCol >= ${g}u) {\n startAtRow += 1;\n startAtCol = startAtRow + 1;\n }\n }\n results.found[linearIndex] = found;\n // update the startAtCols and startAtRows\n computeInfo.startAtCols[linearIndex] = startAtCol;\n computeInfo.startAtRows[linearIndex] = startAtRow;\n\n }\n\n // this will generate the distance script for each distance metric and then combine them into one\n ${m(t,y.map((e=>e.maxEntryLen)),p,a)}\n\n\n `}),O=f.createComputePipeline({label:"sparse matrix compute pipeline",layout:"auto",compute:{module:C,entryPoint:"calcSparseMatrix"}}),L=new Uint32Array(E),P=new Uint32Array(E),R=new Uint32Array(E),T=new Uint32Array(E),_=Math.floor(S/E);let j=0,$=1;console.time("GPUthreadStarts");for(let e=0;e<E;e++){const n=9999===e?S-1:(e+1)*_,t=g-2-Math.floor(Math.sqrt(-8*n+4*g*(g-1)-7)/2-.5),r=n-g*t+Math.floor((t+1)*(t+2)/2);L[e]=$,P[e]=j,R[e]=r,T[e]=t,j=t,$=r}console.timeEnd("GPUthreadStarts");const D=4e4+g*h+h+y.reduce(((e,n)=>e+n.sourceArraySize),0),G=y.reduce(((e,n)=>e+n.suppInfoSize),0),z=1e6,k=D*Uint32Array.BYTES_PER_ELEMENT;let Y=k;const F=15&k;0!==F&&(Y+=16-F);const H=f.createBuffer({label:"compute info buffer",size:Y,usage:GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC|GPUBufferUsage.COPY_DST,mappedAtCreation:!0}),K=H.getMappedRange();let W=0;new Uint32Array(K,W,E).set(L),W+=E*Uint32Array.BYTES_PER_ELEMENT,new Uint32Array(K,W,E).set(P),W+=E*Uint32Array.BYTES_PER_ELEMENT,new Uint32Array(K,W,E).set(R),W+=E*Uint32Array.BYTES_PER_ELEMENT,new Uint32Array(K,W,E).set(T),W+=E*Uint32Array.BYTES_PER_ELEMENT,new Uint32Array(K,W,x.length).set(x),W+=x.length*Uint32Array.BYTES_PER_ELEMENT,new Float32Array(K,W,h).set(o),W+=h*Float32Array.BYTES_PER_ELEMENT;for(const e of y){const n=e.EncodedArrayConstructor,t=e.sourceArraySize;new n(K,W,t).set(e.flatSourceArray),W+=t*n.BYTES_PER_ELEMENT}H.unmap();const q=G*Uint32Array.BYTES_PER_ELEMENT;let X=q;const V=15&q;0!==V&&(X+=16-V),X=Math.max(X,16);const Q=f.createBuffer({label:"supp info buffer",size:X,usage:GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC|GPUBufferUsage.COPY_DST,mappedAtCreation:!0}),J=Q.getMappedRange();let Z=0;for(const e of y)e.suppInfoBuffer&&e.suppInfoBuffer.byteLength>0&&e.suppInfoSize>0&&(new("UINT32ARRAY"===e.suppInfoType?Uint32Array:Float32Array)(J,Z,e.suppInfoBuffer.length).set(e.suppInfoBuffer),Z+=e.suppInfoBuffer.byteLength);0===Z&&new Uint32Array(J,0,4).set([1,1,1,1]),Q.unmap();const ee=302e4*Uint32Array.BYTES_PER_ELEMENT;let ne=ee;const te=15ⅇ0!==te&&(ne+=16-te);const re=f.createBuffer({label:"results buffer",size:ne,usage:GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC}),ae=f.createBindGroup({label:"bindGroup for sparse matrix buffer",layout:O.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:H}},{binding:1,resource:{buffer:Q}},{binding:2,resource:{buffer:re}}]}),ie=f.createBuffer({label:"results out buffer",size:re.size,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST}),oe=[],se=[],ue=[];let le=!1;for(;!le;){const e=f.createCommandEncoder({label:"distance encoder"}),n=e.beginComputePass({label:"distance compute pass"});n.setPipeline(O),n.setBindGroup(0,ae),n.dispatchWorkgroups(v,v),n.end(),e.copyBufferToBuffer(re,0,ie,0,ie.size);const t=e.finish();f.queue.submit([t]),yield f.queue.onSubmittedWorkDone(),yield ie.mapAsync(GPUMapMode.READ);const r=ie.getMappedRange();let a=0;const i=new Uint32Array(r,a,z);a+=z*Uint32Array.BYTES_PER_ELEMENT;const o=new Uint32Array(r,a,z);a+=z*Uint32Array.BYTES_PER_ELEMENT;const s=new Float32Array(r,a,z);a+=z*Float32Array.BYTES_PER_ELEMENT;const u=new Uint32Array(r,a,E);a+=E*Uint32Array.BYTES_PER_ELEMENT,le=new Uint32Array(r,a,E).every((e=>1===e));const l=u.reduce(((e,n)=>e+n),0),c=new Uint32Array(l),d=new Uint32Array(l),p=new Float32Array(l);let m=0;for(let e=0;e<u.length;e++){const n=u[e];0!==n&&(c.set(i.subarray(e*I,e*I+n),m),d.set(o.subarray(e*I,e*I+n),m),p.set(s.subarray(e*I,e*I+n),m),m+=n)}oe.push(c),se.push(d),ue.push(p),ie.unmap()}const fe=oe.reduce(((e,n)=>e+n.length),0),ce=new Uint32Array(fe),de=new Uint32Array(fe),pe=new Float32Array(fe);let me=0;for(let e=0;e<oe.length;e++)ce.set(oe[e],me),de.set(se[e],me),pe.set(ue[e],me),me+=oe[e].length;return H.destroy(),Q.destroy(),re.destroy(),ie.destroy(),{i:ce,j:de,distance:pe}},new((p=void 0)||(p=Promise))((function(e,n){function t(e){try{a(h.next(e))}catch(e){n(e)}}function r(e){try{a(h.throw(e))}catch(e){n(e)}}function a(n){var a;n.done?e(n.value):(a=n.value,a instanceof p?a:new p((function(e){e(a)}))).then(t,r)}a((h=h.apply(f,c||[])).next())}));var f,c,p,h}function m(e,n,t,r){return e.map(((e,r)=>`\n fn distanceScript${r}(aIndex: u32, bIndex: u32) -> f32 {\n let a = computeInfo.data${r}[aIndex];\n let b = computeInfo.data${r}[bIndex];\n let maxDistance: f32 = ${t};\n ${o[e](n[r],r)}\n }\n `)).join("\n")+"\n"+`\n fn combinedDistance(aIndex: u32, bIndex: u32) -> f32 {\n var distances: array<f32, ${e.length}>;\n ${e.map(((e,n)=>`distances[${n}] = distanceScript${n}(aIndex, bIndex);`)).join("\n")}\n ${a[r](e.length)}\n }\n \n `}var h=t(5731);async function g(e,n,t){const r=e.length,a=[],i=[];for(let o=0;o<r;o++)for(let s=o+1;s<r;s++){const r=e[o],u=e[s];n[r]?.[u]>=t&&(a.push(o),i.push(s))}return function(e,n,t){const r=new Float32Array(t.length).fill(0).map((()=>10*Math.random())),a=new Float32Array(t.length).fill(0).map((()=>10*Math.random())),i=new Float32Array(t.length).fill(0),o=new Float32Array(t.length).fill(0);for(let s=0;s<100;s++){const u=1-s/100;i.fill(0),o.fill(0);for(let t=0;t<e.length;t++){const s=e[t],l=n[t],f=r[s]-r[l],c=a[s]-a[l];Math.abs(f)>=1&&(i[s]-=u*f,i[l]+=u*f),Math.abs(c)>=1&&(o[s]-=u*c,o[l]+=u*c)}for(let e=0;e<t.length;e++){const n=Math.sqrt(i[e]*i[e]+o[e]*o[e]);n>0&&(r[e]+=i[e]/n*u,a[e]+=o[e]/n*u)}}let s=r[0],u=a[0],l=r[0],f=a[0];for(let e=1;e<t.length;e++)s=Math.min(s,r[e]),u=Math.min(u,a[e]),l=Math.max(l,r[e]),f=Math.max(f,a[e]);let c=l-s,d=f-u;0===c&&(c=l);for(let e=0;e<t.length;e++)r[e]=(r[e]-s)/c/2+.5;0===d&&(d=f);for(let e=0;e<t.length;e++)a[e]=(a[e]-u)/d/2+.5;return{embedX:r,embedY:a}}(a,i,e)}function y(e,n,t,r){return a=this,i=void 0,s=function*(){const a=r,i=Math.ceil(a/100),o=Math.ceil(Math.sqrt(i)),s=10*o,u=e.createShaderModule({label:"colwise-normalize",code:`\n @group(0) @binding(0) var<storage, read_write> knnSimilarities : array<f32>;\n @group(0) @binding(1) var<storage, read> offsets : array<u32>;\n @compute @workgroup_size(10, 10) fn normalize(\n @builtin(global_invocation_id) id: vec3<u32>,\n ) {\n let row = id.x;\n let col = id.y;\n let index = row * ${s} + col;\n if (index >= ${r}) {\n return;\n }\n let offsetBegin = offsets[index];\n let offsetEnd = offsets[index + 1];\n var sum = 0.0;\n if (offsetEnd - offsetBegin == 0) {\n return;\n }\n for (var i = offsetBegin; i < offsetEnd; i = i + 1) {\n sum = sum + knnSimilarities[i];\n }\n if (sum > 0.0) {\n for (var i = offsetBegin; i < offsetEnd; i = i + 1) {\n knnSimilarities[i] = knnSimilarities[i] / sum;\n }\n }\n }\n \n `}),l=e.createComputePipeline({label:"hamming compute pipeline",layout:"auto",compute:{module:u,entryPoint:"normalize"}}),f=e.createBuffer({label:"simmilarities info buffer",size:n.byteLength,usage:GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC|GPUBufferUsage.COPY_DST,mappedAtCreation:!0});new Float32Array(f.getMappedRange()).set(n),f.unmap();const c=e.createBuffer({label:"offsets info buffer",size:t.byteLength,usage:GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC|GPUBufferUsage.COPY_DST,mappedAtCreation:!0});new Uint32Array(c.getMappedRange()).set(t),c.unmap();const d=e.createBindGroup({layout:l.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:f}},{binding:1,resource:{buffer:c}}]}),p=e.createCommandEncoder(),m=p.beginComputePass();m.setPipeline(l),m.setBindGroup(0,d),m.dispatchWorkgroups(o,o),m.end();const h=e.createBuffer({label:"out similarity buffer",size:n.byteLength,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST});p.copyBufferToBuffer(f,0,h,0,n.byteLength),e.queue.submit([p.finish()]),yield e.queue.onSubmittedWorkDone(),yield h.mapAsync(GPUMapMode.READ);const g=new Float32Array(h.getMappedRange());n.set(g),h.unmap(),f.destroy(),c.destroy(),h.destroy()},new((o=void 0)||(o=Promise))((function(e,n){function t(e){try{u(s.next(e))}catch(e){n(e)}}function r(e){try{u(s.throw(e))}catch(e){n(e)}}function u(n){var a;n.done?e(n.value):(a=n.value,a instanceof o?a:new o((function(e){e(a)}))).then(t,r)}u((s=s.apply(a,i||[])).next())}));var a,i,o,s}function A(e,n=2){for(let t=0;t<e.length;t++)e[t]=Math.pow(e[t],n)}function b(e,n,t,r,a){return i=this,o=void 0,u=function*(){const i=9e4,o=Math.ceil(900),s=Math.ceil(Math.sqrt(o)),u=10*s,l=Math.floor(Math.max(Math.log10(a),2))+1,f=Math.pow(10,-l),c=new Float32Array(n.length),d=e.createShaderModule({label:"expand",code:`\n struct SparseKNN {\n knnSimilarities: array<f32, ${n.length}>,\n knnIndexes: array<u32, ${t.length}>,\n offsets: array<u32, ${r.length}>,\n rowIndexes: array<u32, ${t.length}>,\n }\n\n @group(0) @binding(0) var<storage, read_write> sparseKNN: SparseKNN;\n @group(0) @binding(1) var<storage, read_write> resultSimBlock: array<f32, 90000>;\n @group(0) @binding(2) var<storage, read_write> startAt: u32;\n @compute @workgroup_size(10, 10) fn expand(\n @builtin(global_invocation_id) id: vec3<u32>,\n ) {\n let col = id.x;\n let row = id.y;\n let index = row * ${u} + col;\n if (index >= 90000) {\n return;\n }\n let workingIndex = index + startAt;\n if (workingIndex >= ${t.length}) {\n return;\n }\n \n let rowIdx = sparseKNN.rowIndexes[workingIndex];\n let colIdx = sparseKNN.knnIndexes[workingIndex];\n let offsetBeginRow = sparseKNN.offsets[rowIdx];\n let offsetEndRow = sparseKNN.offsets[rowIdx + 1];\n let offsetBeginCol = sparseKNN.offsets[colIdx];\n let offsetEndCol = sparseKNN.offsets[colIdx + 1];\n var sum = 0.0;\n for (var i = offsetBeginRow; i < offsetEndRow; i = i + 1) {\n for(var j = offsetBeginCol; j < offsetEndCol; j = j + 1) {\n if (sparseKNN.knnIndexes[i] == sparseKNN.knnIndexes[j]) {\n sum = sum + sparseKNN.knnSimilarities[i] * sparseKNN.knnSimilarities[j];\n break;\n }\n }\n }\n if (sum > ${f}) {\n resultSimBlock[index] = sum;\n } else {\n resultSimBlock[index] = 0.0;\n }\n }\n `}),p=e.createComputePipeline({label:"expand compute pipeline",layout:"auto",compute:{module:d,entryPoint:"expand"}});let m=4*(n.length+t.length+r.length+t.length);const h=15&m;0!==h&&(m+=16-h);const g=e.createBuffer({label:"sparse knn buffer",size:m,usage:GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC|GPUBufferUsage.COPY_DST,mappedAtCreation:!0}),y=g.getMappedRange();new Float32Array(y,0,n.length).set(n),new Uint32Array(y,4*n.length,t.length).set(t),new Uint32Array(y,4*(n.length+t.length),r.length).set(r);const A=function(e){const n=new Uint32Array(e[e.length-1]);for(let t=0;t<e.length-1;t++)for(let r=e[t];r<e[t+1];r++)n[r]=t;return n}(r);new Uint32Array(y,4*(n.length+t.length+r.length),A.length).set(A),g.unmap();const b=e.createBuffer({label:"start end buffer",size:4,usage:GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC|GPUBufferUsage.COPY_DST,mappedAtCreation:!0});new Uint32Array(b.getMappedRange()).set([0]),b.unmap();const w=e.createBuffer({label:"result block buffer",size:36e4,usage:GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC|GPUBufferUsage.COPY_DST}),x=e.createBindGroup({layout:p.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:g}},{binding:1,resource:{buffer:w}},{binding:2,resource:{buffer:b}}]}),E=e.createBuffer({label:"out block buffer",size:w.size,usage:GPUBufferUsage.MAP_READ|GPUBufferUsage.COPY_DST});for(let n=0;n<Math.ceil(t.length/i);n++){const r=n*i,a=Math.min(i,t.length-r);e.queue.writeBuffer(b,0,new Uint32Array([r]));const o=e.createCommandEncoder({label:"expand encoder"}),u=o.beginComputePass({label:"expand compute pass"});u.setPipeline(p),u.setBindGroup(0,x),u.dispatchWorkgroups(s,s),u.end(),o.copyBufferToBuffer(w,0,E,0,E.size),e.queue.submit([o.finish()]),yield e.queue.onSubmittedWorkDone(),yield E.mapAsync(GPUMapMode.READ);const l=new Float32Array(E.getMappedRange(),0,a);c.set(l,r),E.unmap()}return g.destroy(),b.destroy(),w.destroy(),E.destroy(),{KNNIndexes:t,KNNSimilarities:c,indexOffsets:r}},new((s=void 0)||(s=Promise))((function(e,n){function t(e){try{a(u.next(e))}catch(e){n(e)}}function r(e){try{a(u.throw(e))}catch(e){n(e)}}function a(n){var a;n.done?e(n.value):(a=n.value,a instanceof s?a:new s((function(e){e(a)}))).then(t,r)}a((u=u.apply(i,o||[])).next())}));var i,o,s,u}function w(e,n){const t=function(e,n){const t=new Uint32Array(n);for(let n=0;n<e.i.length;n++)t[e.i[n]]++,t[e.j[n]]++;for(let e=0;e<n;e++)t[e]++;return t}(e,n),r=function(e){const n=new Uint32Array(e.length+1);let t=0;for(let r=0;r<e.length;r++)n[r]=t,t+=e[r];return n[e.length]=t,n}(t),a=new Uint32Array(n).fill(1),i=new Uint32Array(2*e.i.length+n),o=new Float32Array(2*e.i.length+n);for(let e=0;e<n;e++)i[r[e]]=e,o[r[e]]=1;for(let n=0;n<e.i.length;n++){const t=e.i[n],s=e.j[n],u=1-e.distance[n];i[r[t]+a[t]]=s,o[r[t]+a[t]]=u,a[t]++,i[r[s]+a[s]]=t,o[r[s]+a[s]]=u,a[s]++}return{KNNIndexes:i,KNNSimilarities:o,indexOffsets:r}}Object.prototype.toString;const x={expandFactor:2,maxIterations:5,inflateFactor:2,multFactor:1};class E{constructor(e={}){this._options={...x,...e}}async transform(e,n){let t=this.toObjectForm(e);if(this._options.maxIterations>0){this.addLoops(t,n),this.normalize(t);for(let e=0;e<this._options.maxIterations;e++)t=this.expand(t,n),this.inflate(t),this.normalize(t)}const{clusters:r,is:a,js:i}=this.assignClusters(t,n);this.correctClusters(r);const o=await this.layout(r,t,n);return{clusters:r,embedX:o.embedX,embedY:o.embedY,is:a,js:i}}async transformWebGPU(e,n){if(0===this._options.maxIterations)return this.transform(e,n);const t=await function(e,n,t=5,r=2){return a=this,i=void 0,s=function*(){const a=yield d();if(!a)throw new Error("no gpu device found");const i=w(e,n);yield y(a,i.KNNSimilarities,i.indexOffsets,n);let o=i;for(let e=0;e<t;e++){const e=yield b(a,o.KNNSimilarities,o.KNNIndexes,o.indexOffsets,n);A(e.KNNSimilarities,r),yield y(a,e.KNNSimilarities,e.indexOffsets,n),o=e}return o},new((o=void 0)||(o=Promise))((function(e,n){function t(e){try{u(s.next(e))}catch(e){n(e)}}function r(e){try{u(s.throw(e))}catch(e){n(e)}}function u(n){var a;n.done?e(n.value):(a=n.value,a instanceof o?a:new o((function(e){e(a)}))).then(t,r)}u((s=s.apply(a,i||[])).next())}));var a,i,o,s}(e,n,this._options.maxIterations,this._options.inflateFactor),r=this.csrToSparseObject(t,n),{clusters:a,is:i,js:o}=this.assignClusters(r,n);this.correctClusters(a);const s=await this.layout(a,r,n);return{clusters:a,embedX:s.embedX,embedY:s.embedY,is:i,js:o}}correctClusters(e){const n={};for(const t of e)n[t]||(n[t]=0),n[t]++;const t=Object.keys(n).map(Number).sort(((e,t)=>n[t]-n[e])),r={};t.forEach(((e,n)=>r[e]=n+1));for(let n=0;n<e.length;n++)e[n]=r[e[n]]}csrToSparseObject(e,n){const t=Math.floor(Math.max(Math.log10(n),2))+1,r=1/Math.pow(10,t),a={};for(let t=0;t<n;t++){a[t]={};for(let n=e.indexOffsets[t];n<e.indexOffsets[t+1];n++){const i=e.KNNIndexes[n];i<=t||e.KNNSimilarities[n]<r||(a[t][i]=e.KNNSimilarities[n])}}return a}async layout(e,n,t){const r=new Float32Array(t).fill(0),a=new Float32Array(t).fill(0),i={};e.forEach(((e,n)=>{i[e]||(i[e]=[]),i[e].push(n)}));let o=0;const s=Object.keys(i);s.sort(((e,n)=>i[n].length-i[e].length));let u=6,l=0;for(const e of s){const t=i[e],s=await g(t,n,.001);o===Math.ceil(u/1.5)&&(o=0,l+=5/u,u=Math.ceil(1.5*u));const f=o%u*5/u*1.5;for(let e=0;e<s.embedX.length;e++)r[t[e]]=5*s.embedX[e]/u+f,a[t[e]]=5*s.embedY[e]/u+l;o++}return{embedX:r,embedY:a}}mergeClusters(e,n,t){const r=e[n],a=e[t];for(let n=0;n<e.length;n++)e[n]===a&&(e[n]=r)}assignClusters(e,n){let t=0;const r=[],a=[],i=Math.floor(Math.max(Math.log10(n),2))+1,o=Math.pow(10,i),s=new Array(n).fill(-1);for(const n of Object.keys(e))for(const i of Object.keys(e[n]))Math.round(e[n][i]*o)/o>0&&e[n][i]!==Number(n)&&Number(i)>Number(n)&&(r.push(Number(n)),a.push(Number(i)),-1!==s[Number(n)]&&-1!==s[Number(i)]?s[Number(n)]!==s[Number(i)]&&this.mergeClusters(s,Number(n),Number(i)):-1!==s[Number(n)]?s[Number(i)]=s[Number(n)]:-1!==s[Number(i)]?s[Number(n)]=s[Number(i)]:(t++,s[Number(n)]=t,s[Number(i)]=t));for(let e=0;e<s.length;e++)-1===s[e]&&(t++,s[e]=t);return{clusters:s,is:new Uint32Array(r),js:new Uint32Array(a)}}assignClustersCSR(e,n){let t=0;const r=[],a=[],i=Math.floor(Math.max(Math.log10(n),2))+1,o=1/Math.pow(10,i),s=new Array(n).fill(-1),u=new Uint32Array(n+1);let l=0;u[0]=0;for(let i=0;i<n;i++){for(let n=e.indexOffsets[i];n<e.indexOffsets[i+1];n++){const u=e.KNNIndexes[n];u<=i||e.KNNSimilarities[n]<=o||(r.push(i),a.push(u),l++,-1!==s[i]&&-1!==s[u]?s[i]!==s[u]&&this.mergeClusters(s,i,u):-1!==s[i]?s[u]=s[i]:-1!==s[u]?s[i]=s[u]:(t++,s[i]=t,s[u]=t))}u[i+1]=l}for(let e=0;e<s.length;e++)-1===s[e]&&(t++,s[e]=t);return{clusters:s,is:new Uint32Array(r),js:new Uint32Array(a),correctedOffsets:u}}toObjectForm(e){const n={};for(let t=0;t<e.i.length;t++)n[e.i[t]]||(n[e.i[t]]={}),n[e.i[t]][e.j[t]]=1-e.distance[t],n[e.j[t]]||(n[e.j[t]]={}),n[e.j[t]][e.i[t]]=1-e.distance[t];return n}addLoops(e,n){for(let t=0;t<n;t++)e[t]||(e[t]={}),e[t][t]=this._options.multFactor}normalize(e){for(const n of Object.keys(e)){const t=e[n];let r=0;for(const e of Object.keys(t))r+=t[e];if(0!==r)for(const a of Object.keys(t))e[n][a]/=r}}expand(e,n){const t={},r=Math.floor(Math.max(Math.log10(n),2))+1,a=Math.pow(10,r);for(let r=0;r<n;r++)if(e[r]){t[r]??(t[r]={});for(let i=r;i<n;i++){if(!e[r]?.[i])continue;const n=this.getExpandValue(e,r,i);Math.round(n*a)/a>0&&(t[r][i]=n,t[i]||(t[i]={}),t[i][r]=n)}}return t}inflate(e){for(const n of Object.keys(e)){const t=e[n];for(const r of Object.keys(t))e[n][r]=Math.pow(e[n][r],this._options.inflateFactor)}}getExpandValue(e,n,t){let r=0;const a=Object.keys(e[n]??{}),i=Object.keys(e[t]??{});for(const o of a)i.includes(o)&&(r+=e[n][o]*e[t][o]);return r}}onmessage=async e=>{const{data:n,threshold:t,weights:r,aggregationMethod:a,distanceFnArgs:i,distanceFns:o,maxIterations:s,useWebGPU:u,inflate:l}=e.data;console.time("sparse matrix");let f=null;if(u)try{f=await p(n,t/100,o,a,r,i)}catch(e){console.error(e)}f||(u&&console.error("WEBGPU sparse matrix calculation failed, falling back to CPU implementation"),f=await(new h.p).calcMultiColumn(n,o,t/100,i,r,a)),console.timeEnd("sparse matrix");const c=new E({maxIterations:s??5,inflateFactor:l??2});console.time("MCL");let d=null;if(u)try{d=await c.transformWebGPU(f,n[0].length)}catch(e){console.error("webGPU MCL failed, falling back to CPU implementation"),console.error(e)}d||(d=await c.transform(f,n[0].length)),console.timeEnd("MCL"),postMessage({res:d})}},606:(e,n,t)=>{t.d(n,{Qt:()=>a,gD:()=>r}),t(6066);const r=e=>null==e;function a(e,n,t,r){if(t>e[e.length-1])return;const a=e.findIndex((e=>t<e));e.pop(),e.splice(a,0,t),n.pop(),n.splice(a,0,r)}}},r={};function a(e){var n=r[e];if(void 0!==n)return n.exports;var i=r[e]={exports:{}};return t[e](i,i.exports,a),i.exports}a.m=t,a.x=()=>{var e=a.O(void 0,[731],(()=>a(9260)));return a.O(e)},e=[],a.O=(n,t,r,i)=>{if(!t){var o=1/0;for(f=0;f<e.length;f++){for(var[t,r,i]=e[f],s=!0,u=0;u<t.length;u++)(!1&i||o>=i)&&Object.keys(a.O).every((e=>a.O[e](t[u])))?t.splice(u--,1):(s=!1,i<o&&(o=i));if(s){e.splice(f--,1);var l=r();void 0!==l&&(n=l)}}return n}i=i||0;for(var f=e.length;f>0&&e[f-1][2]>i;f--)e[f]=e[f-1];e[f]=[t,r,i]},a.d=(e,n)=>{for(var t in n)a.o(n,t)&&!a.o(e,t)&&Object.defineProperty(e,t,{enumerable:!0,get:n[t]})},a.f={},a.e=e=>Promise.all(Object.keys(a.f).reduce(((n,t)=>(a.f[t](e,n),n)),[])),a.u=e=>e+".js",a.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),a.o=(e,n)=>Object.prototype.hasOwnProperty.call(e,n),(()=>{var e;a.g.importScripts&&(e=a.g.location+"");var n=a.g.document;if(!e&&n&&(n.currentScript&&(e=n.currentScript.src),!e)){var t=n.getElementsByTagName("script");if(t.length)for(var r=t.length-1;r>-1&&(!e||!/^http(s?):/.test(e));)e=t[r--].src}if(!e)throw new Error("Automatic publicPath is not supported in this browser");e=e.replace(/#.*$/,"").replace(/\?.*$/,"").replace(/\/[^\/]+$/,"/"),a.p=e})(),(()=>{a.b=self.location+"";var e={317:1};a.f.i=(n,t)=>{e[n]||importScripts(a.p+a.u(n))};var n=self.webpackChunkbio=self.webpackChunkbio||[],t=n.push.bind(n);n.push=n=>{var[r,i,o]=n;for(var s in i)a.o(i,s)&&(a.m[s]=i[s]);for(o&&o(a);r.length;)e[r.pop()]=1;t(n)}})(),n=a.x,a.x=()=>a.e(731).then(n);var i=a.x();bio=i})();
|
|
2
|
-
//# sourceMappingURL=317.js.map
|