@datagrok/eda 1.4.4 → 1.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. package/CHANGELOG.md +11 -0
  2. package/dist/111.js +2 -0
  3. package/dist/111.js.map +1 -0
  4. package/dist/128.js +2 -0
  5. package/dist/128.js.map +1 -0
  6. package/dist/153.js +2 -0
  7. package/dist/153.js.map +1 -0
  8. package/dist/23.js +2 -0
  9. package/dist/23.js.map +1 -0
  10. package/dist/234.js +2 -0
  11. package/dist/234.js.map +1 -0
  12. package/dist/242.js +2 -0
  13. package/dist/242.js.map +1 -0
  14. package/dist/260.js +2 -0
  15. package/dist/260.js.map +1 -0
  16. package/dist/33.js +2 -0
  17. package/dist/33.js.map +1 -0
  18. package/dist/348.js +2 -0
  19. package/dist/348.js.map +1 -0
  20. package/dist/377.js +2 -0
  21. package/dist/377.js.map +1 -0
  22. package/dist/412.js +2 -0
  23. package/dist/412.js.map +1 -0
  24. package/dist/415.js +2 -0
  25. package/dist/415.js.map +1 -0
  26. package/dist/501.js +2 -0
  27. package/dist/501.js.map +1 -0
  28. package/dist/531.js +2 -0
  29. package/dist/531.js.map +1 -0
  30. package/dist/583.js +2 -0
  31. package/dist/583.js.map +1 -0
  32. package/dist/589.js +2 -0
  33. package/dist/589.js.map +1 -0
  34. package/dist/603.js +2 -0
  35. package/dist/603.js.map +1 -0
  36. package/dist/656.js +2 -0
  37. package/dist/656.js.map +1 -0
  38. package/dist/682.js +2 -0
  39. package/dist/682.js.map +1 -0
  40. package/dist/705.js +2 -0
  41. package/dist/705.js.map +1 -0
  42. package/dist/727.js +2 -0
  43. package/dist/727.js.map +1 -0
  44. package/dist/731.js +2 -0
  45. package/dist/731.js.map +1 -0
  46. package/dist/738.js +3 -0
  47. package/dist/738.js.LICENSE.txt +51 -0
  48. package/dist/738.js.map +1 -0
  49. package/dist/763.js +2 -0
  50. package/dist/763.js.map +1 -0
  51. package/dist/778.js +2 -0
  52. package/dist/778.js.map +1 -0
  53. package/dist/783.js +2 -0
  54. package/dist/783.js.map +1 -0
  55. package/dist/793.js +2 -0
  56. package/dist/793.js.map +1 -0
  57. package/dist/801.js +2 -0
  58. package/dist/801.js.map +1 -0
  59. package/dist/810.js +2 -0
  60. package/dist/810.js.map +1 -0
  61. package/dist/860.js +2 -0
  62. package/dist/860.js.map +1 -0
  63. package/dist/907.js +2 -0
  64. package/dist/907.js.map +1 -0
  65. package/dist/950.js +2 -0
  66. package/dist/950.js.map +1 -0
  67. package/dist/980.js +2 -0
  68. package/dist/980.js.map +1 -0
  69. package/dist/990.js +2 -0
  70. package/dist/990.js.map +1 -0
  71. package/dist/package-test.js +1 -26140
  72. package/dist/package-test.js.map +1 -1
  73. package/dist/package.js +1 -30337
  74. package/dist/package.js.map +1 -1
  75. package/package.json +2 -2
  76. package/src/anova/anova-ui.ts +39 -24
  77. package/src/package-api.ts +4 -0
  78. package/src/package.g.ts +33 -32
  79. package/src/package.ts +2 -2
  80. package/test-console-output-1.log +72 -93
  81. package/test-record-1.mp4 +0 -0
  82. package/webpack.config.js +1 -1
  83. package/dist/_d4c0.js +0 -279
  84. package/dist/_d4c0.js.map +0 -1
  85. package/dist/node_modules_datagrok-libraries_math_src_dbscan_wasm_clustering-worker_js.js +0 -279
  86. package/dist/node_modules_datagrok-libraries_math_src_dbscan_wasm_clustering-worker_js.js.map +0 -1
  87. package/dist/node_modules_datagrok-libraries_ml_src_MCL_mcl-sparse-matrix-mult-worker_js.js +0 -59
  88. package/dist/node_modules_datagrok-libraries_ml_src_MCL_mcl-sparse-matrix-mult-worker_js.js.map +0 -1
  89. package/dist/node_modules_datagrok-libraries_ml_src_distance-matrix_distance-matrix-worker_js.js +0 -284
  90. package/dist/node_modules_datagrok-libraries_ml_src_distance-matrix_distance-matrix-worker_js.js.map +0 -1
  91. package/dist/node_modules_datagrok-libraries_ml_src_distance-matrix_single-value-knn-worker_js.js +0 -265
  92. package/dist/node_modules_datagrok-libraries_ml_src_distance-matrix_single-value-knn-worker_js.js.map +0 -1
  93. package/dist/node_modules_datagrok-libraries_ml_src_distance-matrix_sparse-matrix-worker_js.js +0 -287
  94. package/dist/node_modules_datagrok-libraries_ml_src_distance-matrix_sparse-matrix-worker_js.js.map +0 -1
  95. package/dist/src_workers_softmax-worker_ts.js +0 -154
  96. package/dist/src_workers_softmax-worker_ts.js.map +0 -1
  97. package/dist/src_workers_tsne-worker_ts.js +0 -244
  98. package/dist/src_workers_tsne-worker_ts.js.map +0 -1
  99. package/dist/src_workers_umap-worker_ts.js +0 -252
  100. package/dist/src_workers_umap-worker_ts.js.map +0 -1
  101. package/dist/vendors-node_modules_datagrok-libraries_math_src_dbscan_wasm_dbscan_js.js +0 -1253
  102. package/dist/vendors-node_modules_datagrok-libraries_math_src_dbscan_wasm_dbscan_js.js.map +0 -1
  103. package/dist/vendors-node_modules_datagrok-libraries_math_src_hierarchical-clustering_wasm_clustering-worker_js.js +0 -942
  104. package/dist/vendors-node_modules_datagrok-libraries_math_src_hierarchical-clustering_wasm_clustering-worker_js.js.map +0 -1
  105. package/dist/vendors-node_modules_datagrok-libraries_math_src_webGPU_sparse-matrix_webGPU-sparse-matrix_js-07693f.js +0 -1525
  106. package/dist/vendors-node_modules_datagrok-libraries_math_src_webGPU_sparse-matrix_webGPU-sparse-matrix_js-07693f.js.map +0 -1
  107. package/dist/vendors-node_modules_datagrok-libraries_ml_src_MCL_mcl-worker_js-node_modules_datagrok-librar-e4203d.js +0 -2244
  108. package/dist/vendors-node_modules_datagrok-libraries_ml_src_MCL_mcl-worker_js-node_modules_datagrok-librar-e4203d.js.map +0 -1
  109. package/dist/vendors-node_modules_datagrok-libraries_ml_src_distance-matrix_knn-threshold-worker_js.js +0 -286
  110. package/dist/vendors-node_modules_datagrok-libraries_ml_src_distance-matrix_knn-threshold-worker_js.js.map +0 -1
  111. package/dist/vendors-node_modules_datagrok-libraries_ml_src_distance-matrix_knn-worker_js.js +0 -280
  112. package/dist/vendors-node_modules_datagrok-libraries_ml_src_distance-matrix_knn-worker_js.js.map +0 -1
  113. package/dist/vendors-node_modules_datagrok-libraries_ml_src_distance-matrix_sparse-matrix-threshold-worker_js.js +0 -282
  114. package/dist/vendors-node_modules_datagrok-libraries_ml_src_distance-matrix_sparse-matrix-threshold-worker_js.js.map +0 -1
  115. package/dist/vendors-node_modules_datagrok-libraries_ml_src_distance-matrix_utils_js-node_modules_datagrok-72c7b2.js +0 -1821
  116. package/dist/vendors-node_modules_datagrok-libraries_ml_src_distance-matrix_utils_js-node_modules_datagrok-72c7b2.js.map +0 -1
  117. package/dist/vendors-node_modules_datagrok-libraries_ml_src_multi-column-dimensionality-reduction_mulit-co-3800a0.js +0 -7776
  118. package/dist/vendors-node_modules_datagrok-libraries_ml_src_multi-column-dimensionality-reduction_mulit-co-3800a0.js.map +0 -1
  119. package/dist/vendors-node_modules_keckelt_tsne_lib_index_js.js +0 -379
  120. package/dist/vendors-node_modules_keckelt_tsne_lib_index_js.js.map +0 -1
  121. package/dist/vendors-node_modules_ml-matrix_matrix_mjs.js +0 -5946
  122. package/dist/vendors-node_modules_ml-matrix_matrix_mjs.js.map +0 -1
  123. package/dist/vendors-node_modules_umap-js_dist_index_js.js +0 -2284
  124. package/dist/vendors-node_modules_umap-js_dist_index_js.js.map +0 -1
  125. package/dist/wasm_EDAForWebWorker_js-wasm_callWasmForWebWorker_js.js +0 -779
  126. package/dist/wasm_EDAForWebWorker_js-wasm_callWasmForWebWorker_js.js.map +0 -1
  127. package/dist/wasm_workers_errorWorker_js.js +0 -267
  128. package/dist/wasm_workers_errorWorker_js.js.map +0 -1
  129. package/dist/wasm_workers_fitLinearRegressionParamsWithDataNormalizingWorker_js.js +0 -267
  130. package/dist/wasm_workers_fitLinearRegressionParamsWithDataNormalizingWorker_js.js.map +0 -1
  131. package/dist/wasm_workers_fitLinearRegressionParamsWorker_js.js +0 -267
  132. package/dist/wasm_workers_fitLinearRegressionParamsWorker_js.js.map +0 -1
  133. package/dist/wasm_workers_fitSoftmaxWorker_js.js +0 -267
  134. package/dist/wasm_workers_fitSoftmaxWorker_js.js.map +0 -1
  135. package/dist/wasm_workers_generateDatasetWorker_js.js +0 -267
  136. package/dist/wasm_workers_generateDatasetWorker_js.js.map +0 -1
  137. package/dist/wasm_workers_normalizeDatasetWorker_js.js +0 -267
  138. package/dist/wasm_workers_normalizeDatasetWorker_js.js.map +0 -1
  139. package/dist/wasm_workers_partialLeastSquareRegressionWorker_js.js +0 -267
  140. package/dist/wasm_workers_partialLeastSquareRegressionWorker_js.js.map +0 -1
  141. package/dist/wasm_workers_predictByLSSVMWorker_js.js +0 -267
  142. package/dist/wasm_workers_predictByLSSVMWorker_js.js.map +0 -1
  143. package/dist/wasm_workers_principalComponentAnalysisNipalsWorker_js.js +0 -267
  144. package/dist/wasm_workers_principalComponentAnalysisNipalsWorker_js.js.map +0 -1
  145. package/dist/wasm_workers_principalComponentAnalysisWorkerUpd_js.js +0 -271
  146. package/dist/wasm_workers_principalComponentAnalysisWorkerUpd_js.js.map +0 -1
  147. package/dist/wasm_workers_trainAndAnalyzeLSSVMWorker_js.js +0 -267
  148. package/dist/wasm_workers_trainAndAnalyzeLSSVMWorker_js.js.map +0 -1
  149. package/dist/wasm_workers_trainLSSVMWorker_js.js +0 -267
  150. package/dist/wasm_workers_trainLSSVMWorker_js.js.map +0 -1
  151. package/dist/wasm_workers_xgboostWorker_js.js +0 -279
  152. package/dist/wasm_workers_xgboostWorker_js.js.map +0 -1
@@ -1,1525 +0,0 @@
1
- "use strict";
2
- (self["webpackChunkeda"] = self["webpackChunkeda"] || []).push([["vendors-node_modules_datagrok-libraries_math_src_webGPU_sparse-matrix_webGPU-sparse-matrix_js-07693f"],{
3
-
4
- /***/ "./node_modules/@datagrok-libraries/math/src/webGPU/getGPUDevice.js":
5
- /*!**************************************************************************!*\
6
- !*** ./node_modules/@datagrok-libraries/math/src/webGPU/getGPUDevice.js ***!
7
- \**************************************************************************/
8
- /***/ ((__unused_webpack_module, __webpack_exports__, __webpack_require__) => {
9
-
10
- __webpack_require__.r(__webpack_exports__);
11
- /* harmony export */ __webpack_require__.d(__webpack_exports__, {
12
- /* harmony export */ getGPUAdapterDescription: () => (/* binding */ getGPUAdapterDescription),
13
- /* harmony export */ getGPUDevice: () => (/* binding */ getGPUDevice)
14
- /* harmony export */ });
15
- var __awaiter = (undefined && undefined.__awaiter) || function (thisArg, _arguments, P, generator) {
16
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
17
- return new (P || (P = Promise))(function (resolve, reject) {
18
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
19
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
20
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
21
- step((generator = generator.apply(thisArg, _arguments || [])).next());
22
- });
23
- };
24
- let gpuAdapter = null;
25
- let gpuDevice = null;
26
- function getGPUDevice() {
27
- return __awaiter(this, void 0, void 0, function* () {
28
- if (!navigator.gpu) {
29
- console.error('WebGPU is not supported in this browser');
30
- return null;
31
- }
32
- if (!gpuAdapter) {
33
- //reason: only here we get the gpuAdapter
34
- // eslint-disable-next-line no-restricted-syntax
35
- gpuAdapter = yield navigator.gpu.requestAdapter({ powerPreference: 'high-performance' });
36
- if (gpuAdapter == null)
37
- return null;
38
- }
39
- let isLost = false;
40
- if (gpuDevice) {
41
- gpuDevice.lost.then(() => {
42
- isLost = true;
43
- });
44
- yield new Promise((r) => setTimeout(r, 10)); // wait to see if the device is lost
45
- }
46
- if (!gpuDevice || isLost) {
47
- const requiredBufferSize = 1000000000; // ~1000MB
48
- const adapterLimits = gpuAdapter.limits;
49
- const buffferSizeLimit = adapterLimits.maxBufferSize;
50
- const storageBufferSizeLimit = adapterLimits.maxStorageBufferBindingSize;
51
- try {
52
- gpuDevice = yield gpuAdapter.requestDevice({ requiredLimits: {
53
- maxBufferSize: Math.min(buffferSizeLimit, requiredBufferSize),
54
- maxStorageBufferBindingSize: Math.min(storageBufferSizeLimit, requiredBufferSize)
55
- } });
56
- return gpuDevice;
57
- }
58
- catch (e) {
59
- console.error('Failed to create device with required limits', e);
60
- gpuDevice = yield gpuAdapter.requestDevice();
61
- return gpuDevice;
62
- }
63
- }
64
- return gpuDevice;
65
- });
66
- }
67
- function getGPUAdapterDescription() {
68
- return __awaiter(this, void 0, void 0, function* () {
69
- if (!navigator.gpu) {
70
- console.error('WebGPU is not supported in this browser');
71
- return null;
72
- }
73
- if (!gpuAdapter) {
74
- // reason: only here we get the gpuAdapter
75
- // eslint-disable-next-line no-restricted-syntax
76
- gpuAdapter = yield navigator.gpu.requestAdapter();
77
- if (gpuAdapter == null)
78
- return null;
79
- }
80
- let info = null;
81
- if ('info' in gpuAdapter)
82
- info = gpuAdapter.info;
83
- // this option is sort of deprecated but still available in every initial release
84
- // else if ('requestAdapterInfo' in gpuAdapter && typeof gpuAdapter.requestAdapterInfo === 'function')
85
- // info = (await gpuAdapter.requestAdapterInfo()) as GPUAdapterInfo;
86
- if (!info)
87
- return 'No GPU description available';
88
- const outString = replaceEmptyString(info.description, replaceEmptyString(info.vendor, 'No GPU description available'));
89
- return outString;
90
- });
91
- }
92
- function replaceEmptyString(str, replacement) {
93
- return !str || str == '' ? replacement : str;
94
- }
95
- //# sourceMappingURL=getGPUDevice.js.map
96
-
97
- /***/ }),
98
-
99
- /***/ "./node_modules/@datagrok-libraries/math/src/webGPU/multi-col-distances/webGPU-aggregation.js":
100
- /*!****************************************************************************************************!*\
101
- !*** ./node_modules/@datagrok-libraries/math/src/webGPU/multi-col-distances/webGPU-aggregation.js ***!
102
- \****************************************************************************************************/
103
- /***/ ((__unused_webpack_module, __webpack_exports__, __webpack_require__) => {
104
-
105
- __webpack_require__.r(__webpack_exports__);
106
- /* harmony export */ __webpack_require__.d(__webpack_exports__, {
107
- /* harmony export */ WEBGSLAGGREGATION: () => (/* binding */ WEBGSLAGGREGATION),
108
- /* harmony export */ WEBGSLAGGREGATIONFUNCTIONS: () => (/* binding */ WEBGSLAGGREGATIONFUNCTIONS)
109
- /* harmony export */ });
110
- function euclideanAggregationWgsl(arraySize) {
111
- return `
112
- var sum = 0.0;
113
- for (var i = 0u; i < ${arraySize}; i = i + 1u) {
114
- sum = sum + distances[i] * distances[i] * computeInfo.weights[i] * computeInfo.weights[i];
115
- }
116
- return sqrt(sum);
117
- `;
118
- }
119
- ;
120
- function manhattanAggregationWgsl(arraySize) {
121
- return `
122
- var sum = 0.0;
123
- for (var i = 0u; i < ${arraySize}; i = i + 1u) {
124
- sum = sum + abs(distances[i]) * computeInfo.weights[i];
125
- }
126
- return sum;
127
- `;
128
- }
129
- var WEBGSLAGGREGATION;
130
- (function (WEBGSLAGGREGATION) {
131
- WEBGSLAGGREGATION["EUCLIDEAN"] = "EUCLIDEAN";
132
- WEBGSLAGGREGATION["MANHATTAN"] = "MANHATTAN";
133
- })(WEBGSLAGGREGATION || (WEBGSLAGGREGATION = {}));
134
- const WEBGSLAGGREGATIONFUNCTIONS = {
135
- [WEBGSLAGGREGATION.EUCLIDEAN]: euclideanAggregationWgsl,
136
- [WEBGSLAGGREGATION.MANHATTAN]: manhattanAggregationWgsl
137
- };
138
- //# sourceMappingURL=webGPU-aggregation.js.map
139
-
140
- /***/ }),
141
-
142
- /***/ "./node_modules/@datagrok-libraries/math/src/webGPU/multi-col-distances/webGPU-multicol-distances.js":
143
- /*!***********************************************************************************************************!*\
144
- !*** ./node_modules/@datagrok-libraries/math/src/webGPU/multi-col-distances/webGPU-multicol-distances.js ***!
145
- \***********************************************************************************************************/
146
- /***/ ((__unused_webpack_module, __webpack_exports__, __webpack_require__) => {
147
-
148
- __webpack_require__.r(__webpack_exports__);
149
- /* harmony export */ __webpack_require__.d(__webpack_exports__, {
150
- /* harmony export */ TypeSupportedDistances: () => (/* binding */ TypeSupportedDistances),
151
- /* harmony export */ WEBGPUDISTANCE: () => (/* binding */ WEBGPUDISTANCE),
152
- /* harmony export */ distanceFunctionComplexity: () => (/* binding */ distanceFunctionComplexity),
153
- /* harmony export */ webGPUAsymmetricBitArray: () => (/* binding */ webGPUAsymmetricBitArray),
154
- /* harmony export */ webGPUCosineBitArray: () => (/* binding */ webGPUCosineBitArray),
155
- /* harmony export */ webGPUEuclidean: () => (/* binding */ webGPUEuclidean),
156
- /* harmony export */ webGPUFunctions: () => (/* binding */ webGPUFunctions),
157
- /* harmony export */ webGPUHamming: () => (/* binding */ webGPUHamming),
158
- /* harmony export */ webGPULevenstein: () => (/* binding */ webGPULevenstein),
159
- /* harmony export */ webGPUManhattan: () => (/* binding */ webGPUManhattan),
160
- /* harmony export */ webGPUMonomerChemicalDistance: () => (/* binding */ webGPUMonomerChemicalDistance),
161
- /* harmony export */ webGPUNeedlemanWunsch: () => (/* binding */ webGPUNeedlemanWunsch),
162
- /* harmony export */ webGPUNumericDistance: () => (/* binding */ webGPUNumericDistance),
163
- /* harmony export */ webGPUOneHotDistance: () => (/* binding */ webGPUOneHotDistance),
164
- /* harmony export */ webGPUSokalBitArray: () => (/* binding */ webGPUSokalBitArray),
165
- /* harmony export */ webGPUTanimotoBitArray: () => (/* binding */ webGPUTanimotoBitArray),
166
- /* harmony export */ webGPUVectorCosine: () => (/* binding */ webGPUVectorCosine)
167
- /* harmony export */ });
168
- /* eslint-disable max-len */
169
- // in all the functions below, the variables a and b are assumed to be arrays of uint32/f32
170
- //values which are infered from the code this chunk is injected into
171
- // also, we have access to computeInfo struct, which contains the following fields:
172
- // computeInfo.entrySizes: array of arrays of u32 containing the sizes of the entries
173
- // other fields are specific to the distance function should be injected from the main script that calls this function,
174
- // and should be available in the supplementaryInfo struct
175
- // like the similarity matrix for monomer chemical distance.
176
- // the getProcessInfo function should return correct buffer allocation mechanism for the supplementaryInfo,
177
- // for every entry list
178
- // the maxDistance variable is also assumed to be available in the
179
- // scope of the function, in case of knn it is the distance in the last postion of knn on this index,
180
- // in case of sparse matrix, it can be just the threshold for the distance.
181
- // hamming distance for sequnences of uint32 arrays of max length ${maxArraySize}
182
- function webGPUHamming(_maxArraySize, entryIndex) {
183
- return `
184
- let aLength: u32 = computeInfo.entrySizes[${entryIndex}][aIndex];
185
- let bLength: u32 = computeInfo.entrySizes[${entryIndex}][bIndex];
186
- let maxLength: u32 = max(aLength, bLength);
187
- let minLength: u32 = min(aLength, bLength);
188
- let sizeDiff: u32 = maxLength - minLength;
189
-
190
- let maxIntDistance = ceil(maxDistance * f32(maxLength)) - f32(sizeDiff);
191
-
192
- var diff: f32 = 0.0;
193
- for (var i = 0u; i < ${_maxArraySize}; i = i + 1u) {
194
- diff = diff + f32(a[i] != b[i]);
195
- if (diff > maxIntDistance) {
196
- return 1.0;
197
- }
198
- }
199
- diff += f32(sizeDiff);
200
- return diff / ${_maxArraySize};
201
- `;
202
- }
203
- function webGPUMonomerChemicalDistance(_maxArraySize, entryIndex) {
204
- // it is assumet that suppInfo struct contains correct matrix called similarityMatrix${entryIndex}, (similarityMatrix0, similarityMatrix1, etc)
205
- // this should be guaranteed by the getProcessInfo function.
206
- return `
207
- let aLength: u32 = computeInfo.entrySizes[${entryIndex}][aIndex];
208
- let bLength: u32 = computeInfo.entrySizes[${entryIndex}][bIndex];
209
- let maxLength: u32 = max(aLength, bLength);
210
- let minLength: u32 = min(aLength, bLength);
211
- let sizeDiff: u32 = maxLength - minLength;
212
-
213
- let maxIntDistance = ceil(maxDistance * f32(maxLength)) - f32(sizeDiff);
214
-
215
- let simMatrix = &(suppInfo.similarityMatrix${entryIndex}); // using pointers make things faster
216
- var diff: f32 = 0.0;
217
- for (var i = 0u; i < ${_maxArraySize}; i = i + 1u) {
218
- diff = diff + 1.0 - (*simMatrix)[u32(a[i])][u32(b[i])];
219
- if (diff > maxIntDistance) {
220
- return 1.0;
221
- }
222
- }
223
- diff += f32(sizeDiff);
224
- return diff / ${_maxArraySize};
225
- `;
226
- }
227
- function webGPULevenstein(maxArraySize, entryIndex) {
228
- return `
229
- let aLength: u32 = computeInfo.entrySizes[${entryIndex}][aIndex];
230
- let bLength: u32 = computeInfo.entrySizes[${entryIndex}][bIndex];
231
- let maxLength: u32 = max(aLength, bLength);
232
- let minLength: u32 = min(aLength, bLength);
233
-
234
- let maxIntDistance = ceil(maxDistance * f32(maxLength));
235
-
236
- // we will store two arrays as matrix and swap the working indices per pass.
237
- // this way we can reduce memory usage per computation to just O(aLength)
238
- // the grid will have aLength + 1 columns and bLength + 1 rows
239
- // this will be guaranteed by iteration, but the array sizes must be known at compile time, so we will use a fixed size of maxArraySize
240
- var dynamicPassMat: array<array<f32, ${maxArraySize + 1}u>, 2>; // initialize to 0
241
-
242
- var prevIndex: u32 = 0;
243
- var curIndex: u32 = 1; // we will swap these indices per pass
244
-
245
- // initialize the first row
246
- for (var i = 0u; i <= aLength; i = i + 1u) {
247
- dynamicPassMat[prevIndex][i] = f32(i);
248
- }
249
-
250
- // iterate over the rows
251
- for (var i = 1u; i <= bLength; i = i + 1u) {
252
- dynamicPassMat[curIndex][0] = f32(i);
253
- var minEntry: f32 = f32(maxLength);
254
- let prevRow = &dynamicPassMat[prevIndex];
255
- let curRow = &dynamicPassMat[curIndex];
256
- let bMon = u32(b[i - 1]);
257
- for (var j = 1u; j <= aLength; j = j + 1u) {
258
- var cost: f32 = f32(a[j - 1] != bMon);
259
- var res: f32 = min(
260
- min(
261
- (*prevRow)[j] + 1.0, // deletion
262
- (*curRow)[j - 1] + 1.0, // insertion
263
- ),
264
- (*prevRow)[j - 1] + cost // substitution
265
- );
266
- (*curRow)[j] = res;
267
- if (res < minEntry) {
268
- minEntry = res;
269
- }
270
- }
271
- // swap the indices
272
- let temp: u32 = prevIndex;
273
- prevIndex = curIndex;
274
- curIndex = temp;
275
- if (minEntry > maxIntDistance) {
276
- return 1.0;
277
- }
278
- }
279
-
280
- return dynamicPassMat[prevIndex][aLength] / f32(maxLength);
281
- `;
282
- }
283
- function webGPUNeedlemanWunsch(maxArraySize, entryIndex) {
284
- // version of the levenshtain where the cost of substitution is customizable
285
- // it is assumet that suppInfo struct contains correct matrix called similarityMatrix${entryIndex}, (similarityMatrix0, similarityMatrix1, etc)
286
- // and gapOpenPenalty, gapExtensionPenalty
287
- // this should be guaranteed by the getProcessInfo function.
288
- return `
289
- let aLength: u32 = computeInfo.entrySizes[${entryIndex}][aIndex];
290
- let bLength: u32 = computeInfo.entrySizes[${entryIndex}][bIndex];
291
- let maxLength: u32 = max(aLength, bLength);
292
- let minLength: u32 = min(aLength, bLength);
293
-
294
- let maxIntDistance = ceil(maxDistance * f32(maxLength));
295
- // we will store two arrays as matrix and swap the working indices per pass.
296
- // this way we can reduce memory usage per computation to just O(aLength)
297
- // the grid will have aLength + 1 columns and bLength + 1 rows
298
- // this will be guaranteed by iteration, but the array sizes must be known at compile time, so we will use a fixed size of maxArraySize
299
- var dynamicPassMat: array<array<f32, ${maxArraySize + 1}u>, 2>; // initialize to 0
300
-
301
- // we need to keep track of which operation led to the current cell
302
- // i.e. whether we came from the left, top or diagonal to assign gap open/gap extend penalty
303
- var verticalGaps: array<u32, ${maxArraySize + 1}u>;
304
- var horizontalGaps: array<u32, ${maxArraySize + 1}u>;
305
-
306
- let gapOpenPenalty: f32 = suppInfo.gapOpenPenalty${entryIndex};
307
- let gapExtensionPenalty: f32 = suppInfo.gapExtensionPenalty${entryIndex};
308
- var prevIndex: u32 = 0;
309
- var curIndex: u32 = 1; // we will swap these indices per pass
310
- // initialize the first row
311
- for (var i = 0u; i <= aLength; i = i + 1u) {
312
- dynamicPassMat[prevIndex][i] = gapExtensionPenalty + f32(i - 1) * gapExtensionPenalty; // accounting for the fact that left and right gaps are less costly
313
- dynamicPassMat[curIndex][i] = 0.0;
314
- }
315
- dynamicPassMat[0][0] = 0.0;
316
-
317
- let simMatrix = &suppInfo.similarityMatrix${entryIndex}; // using pointers make things faster
318
- // iterate over the rows
319
- for (var i = 1u; i <= bLength; i = i + 1u) {
320
- let prevRow = &dynamicPassMat[prevIndex];
321
- let curRow = &dynamicPassMat[curIndex];
322
- (*curRow)[0] = gapExtensionPenalty + f32(i - 1) * gapExtensionPenalty;
323
- var minEntry: f32 = f32(maxLength);
324
- let monB = u32(b[i - 1]);
325
- for (var j = 1u; j <= aLength; j = j + 1u) {
326
- let monA = u32(a[j - 1]);
327
-
328
- let cost: f32 = (*prevRow)[j - 1] + 1f - (*simMatrix)[monA][monB];
329
- var top = (*prevRow)[j]; // deletion
330
- if (verticalGaps[j] > 0 || i == 1 || i == bLength) {
331
- top = top + gapExtensionPenalty;
332
- } else {
333
- top = top + gapOpenPenalty;
334
- }
335
- var left = (*curRow)[j - 1]; // insertion
336
- if (horizontalGaps[j - 1] > 0 || j == 1 || j == aLength) {
337
- left = left + gapExtensionPenalty;
338
- } else {
339
- left = left + gapOpenPenalty;
340
- }
341
- var res: f32 = min(
342
- min(
343
- top, // deletion
344
- left, // insertion
345
- ),
346
- cost // substitution
347
- );
348
- (*curRow)[j] = res;
349
- if (res < minEntry) {
350
- minEntry = res;
351
- }
352
- // update the horizontal and vertical gaps
353
- if (res == cost) {
354
- verticalGaps[j] = 0;
355
- horizontalGaps[j] = 0;
356
- } else if (res == left) {
357
- verticalGaps[j] = 0;
358
- horizontalGaps[j] = 1;
359
- } else {
360
- verticalGaps[j] = 1;
361
- horizontalGaps[j] = 0;
362
- }
363
- }
364
- // swap the indices
365
- let temp: u32 = prevIndex;
366
- prevIndex = curIndex;
367
- curIndex = temp;
368
- if (minEntry > maxIntDistance) {
369
- return 1.0;
370
- }
371
- }
372
- return dynamicPassMat[prevIndex][aLength] / f32(minLength);
373
-
374
- `;
375
- }
376
- function webGPUEuclidean(maxArraySize, _entryIndex) {
377
- return `
378
- var dist: f32 = 0.0;
379
- for (var i = 0u; i < ${maxArraySize}; i = i + 1u) {
380
- dist = dist + f32(a[i] - b[i]) * f32(a[i] - b[i]);
381
- }
382
- return sqrt(dist);
383
- `;
384
- }
385
- function webGPUVectorCosine(maxArraySize, _entryIndex) {
386
- return `
387
- var dist: f32 = 0.0;
388
- var productSum: f32 = 0.0;
389
- var aSquareSum: f32 = 0.0;
390
- var bSquareSum: f32 = 0.0;
391
- for (var i = 0u; i < ${maxArraySize}; i = i + 1u) {
392
- productSum = productSum + f32(a[i] * b[i]);
393
- aSquareSum = aSquareSum + f32(a[i] * a[i]);
394
- bSquareSum = bSquareSum + f32(b[i] * b[i]);
395
- }
396
- var sim = productSum / (sqrt(aSquareSum) * sqrt(bSquareSum));
397
- return (1.0 - sim) / 2.0;
398
- `;
399
- }
400
- function webGPUManhattan(maxArraySize, _entryIndex) {
401
- return `
402
- var dist: f32 = 0.0;
403
- for (var i = 0u; i < ${maxArraySize}; i = i + 1u) {
404
- dist = dist + abs(f32(a[i] - b[i]));
405
- }
406
- return dist;
407
- `;
408
- }
409
- function webGPUOneHotDistance(_maxArraySize, entryIndex) {
410
- return `
411
- let aLength: u32 = computeInfo.entrySizes[${entryIndex}][aIndex];
412
- let bLength: u32 = computeInfo.entrySizes[${entryIndex}][bIndex];
413
- if (aLength != bLength) {
414
- return 1.0;
415
- }
416
- for (var i = 0u; i < aLength; i = i + 1u) {
417
- if(a[i] != b[i]) {
418
- return 1.0;
419
- }
420
- }
421
- return 0.0;
422
- `;
423
- }
424
- function webGPUNumericDistance(_maxArraySize, entryIndex) {
425
- // we assume that range${entryIndex} is available in the supplementaryInfo struct
426
- return `
427
- let range = suppInfo.range${entryIndex};
428
- return f32(abs(f32(a[0]) - f32(b[0])) / range);
429
- `;
430
- }
431
- // tanimoto distance for uint32 arrays of length ${maxArraySize}
432
- function webGPUTanimotoBitArray(maxArraySize, _entryIndex) {
433
- return `
434
- var onBitsa: u32 = 0u;
435
- var onBitsb: u32 = 0u;
436
- for (var i = 0u; i < ${maxArraySize}u; i = i + 1u) {
437
- onBitsa = onBitsa + countOneBits(a[i]);
438
- onBitsb = onBitsb + countOneBits(b[i]);
439
- }
440
-
441
- if (onBitsa == 0u && onBitsb == 0u) {
442
- return 0.0;
443
- }
444
-
445
- let totalOnBits = onBitsa + onBitsb;
446
- var commonBits: u32 = 0u;
447
- for (var i = 0u; i < ${maxArraySize}u; i = i + 1u) {
448
- commonBits = commonBits + countOneBits(a[i] & b[i]);
449
- }
450
-
451
- return 1.0 - f32(commonBits) / f32(totalOnBits - commonBits);
452
- `;
453
- }
454
- function webGPUAsymmetricBitArray(maxArraySize, _entryIndex) {
455
- return `
456
- var onBitsa: u32 = 0u;
457
- var onBitsb: u32 = 0u;
458
- for (var i = 0u; i < ${maxArraySize}u; i = i + 1u) {
459
- onBitsa = onBitsa + countOneBits(a[i]);
460
- onBitsb = onBitsb + countOneBits(b[i]);
461
- }
462
- let min = min(onBitsa, onBitsb);
463
- if (min == 0u) {
464
- return 1.0;
465
- }
466
- var commonBits: u32 = 0u;
467
- for (var i = 0u; i < ${maxArraySize}u; i = i + 1u) {
468
- commonBits = commonBits + countOneBits(a[i] & b[i]);
469
- }
470
- return 1.0 - f32(commonBits) / f32(min);
471
- `;
472
- }
473
- function webGPUCosineBitArray(maxArraySize, _entryIndex) {
474
- return `
475
- var onBitsa: u32 = 0u;
476
- var onBitsb: u32 = 0u;
477
- for (var i = 0u; i < ${maxArraySize}u; i = i + 1u) {
478
- onBitsa = onBitsa + countOneBits(a[i]);
479
- onBitsb = onBitsb + countOneBits(b[i]);
480
- }
481
- let total = onBitsa * onBitsb; // p.s. here total is taken by multiplying
482
- if (total == 0u) {
483
- return 1.0;
484
- }
485
- var commonBits: u32 = 0u;
486
- for (var i = 0u; i < ${maxArraySize}u; i = i + 1u) {
487
- commonBits = commonBits + countOneBits(a[i] & b[i]);
488
- }
489
- return 1.0 - f32(commonBits) / sqrt(f32(total));
490
- `;
491
- }
492
- function webGPUSokalBitArray(maxArraySize, _entryIndex) {
493
- return `
494
- var onBitsa: u32 = 0u;
495
- var onBitsb: u32 = 0u;
496
- for (var i = 0u; i < ${maxArraySize}u; i = i + 1u) {
497
- onBitsa = onBitsa + countOneBits(a[i]);
498
- onBitsb = onBitsb + countOneBits(b[i]);
499
- }
500
- let total = onBitsa + onBitsb;
501
- if (total == 0u) {
502
- return 1.0;
503
- }
504
- var commonBits: u32 = 0u;
505
- for (var i = 0u; i < ${maxArraySize}u; i = i + 1u) {
506
- commonBits = commonBits + countOneBits(a[i] & b[i]);
507
- }
508
- return 1.0 - f32(commonBits) / f32(total * 2 - commonBits * 3);
509
- `;
510
- }
511
- var WEBGPUDISTANCE;
512
- (function (WEBGPUDISTANCE) {
513
- WEBGPUDISTANCE["HAMMING"] = "Hamming";
514
- WEBGPUDISTANCE["EUCLIDEAN"] = "Euclidean";
515
- WEBGPUDISTANCE["VECTOR_COSINE"] = "Vector Cosine";
516
- WEBGPUDISTANCE["MANHATTAN"] = "Manhattan";
517
- WEBGPUDISTANCE["TANIMOTO"] = "Tanimoto";
518
- WEBGPUDISTANCE["LEVENSTEIN"] = "Levenshtein";
519
- WEBGPUDISTANCE["NEEDLEMAN_WUNSCH"] = "Needlemann-Wunsch";
520
- WEBGPUDISTANCE["MONOMER_CHEMICAL_DISTANCE"] = "Monomer chemical distance";
521
- WEBGPUDISTANCE["SOKAL"] = "Sokal";
522
- WEBGPUDISTANCE["COSINE"] = "Cosine";
523
- WEBGPUDISTANCE["ASYMMETRIC"] = "Asymmetric";
524
- WEBGPUDISTANCE["Difference"] = "Difference";
525
- WEBGPUDISTANCE["OneHot"] = "One-Hot";
526
- })(WEBGPUDISTANCE || (WEBGPUDISTANCE = {}));
527
- const webGPUFunctions = {
528
- [WEBGPUDISTANCE.HAMMING]: webGPUHamming,
529
- [WEBGPUDISTANCE.EUCLIDEAN]: webGPUEuclidean,
530
- [WEBGPUDISTANCE.MANHATTAN]: webGPUManhattan,
531
- [WEBGPUDISTANCE.VECTOR_COSINE]: webGPUVectorCosine,
532
- [WEBGPUDISTANCE.TANIMOTO]: webGPUTanimotoBitArray,
533
- [WEBGPUDISTANCE.LEVENSTEIN]: webGPULevenstein,
534
- [WEBGPUDISTANCE.NEEDLEMAN_WUNSCH]: webGPUNeedlemanWunsch,
535
- [WEBGPUDISTANCE.MONOMER_CHEMICAL_DISTANCE]: webGPUMonomerChemicalDistance,
536
- [WEBGPUDISTANCE.SOKAL]: webGPUSokalBitArray,
537
- [WEBGPUDISTANCE.COSINE]: webGPUCosineBitArray,
538
- [WEBGPUDISTANCE.ASYMMETRIC]: webGPUAsymmetricBitArray,
539
- [WEBGPUDISTANCE.Difference]: webGPUNumericDistance,
540
- [WEBGPUDISTANCE.OneHot]: webGPUOneHotDistance
541
- };
542
- const distanceFunctionComplexity = {
543
- [WEBGPUDISTANCE.HAMMING]: (maxEntrySize) => Math.ceil(maxEntrySize / 30),
544
- [WEBGPUDISTANCE.EUCLIDEAN]: (maxEntrySize) => Math.ceil(maxEntrySize / 30),
545
- [WEBGPUDISTANCE.MANHATTAN]: (maxEntrySize) => Math.ceil(maxEntrySize / 30),
546
- [WEBGPUDISTANCE.TANIMOTO]: (maxEntrySize) => Math.ceil(maxEntrySize / 30),
547
- [WEBGPUDISTANCE.SOKAL]: (maxEntrySize) => Math.ceil(maxEntrySize / 30),
548
- [WEBGPUDISTANCE.COSINE]: (maxEntrySize) => Math.ceil(maxEntrySize / 30),
549
- [WEBGPUDISTANCE.ASYMMETRIC]: (maxEntrySize) => Math.ceil(maxEntrySize / 30),
550
- [WEBGPUDISTANCE.LEVENSTEIN]: (maxEntrySize) => Math.ceil(maxEntrySize * maxEntrySize / 60),
551
- [WEBGPUDISTANCE.NEEDLEMAN_WUNSCH]: (maxEntrySize) => Math.ceil(maxEntrySize * maxEntrySize / 60),
552
- [WEBGPUDISTANCE.MONOMER_CHEMICAL_DISTANCE]: (maxEntrySize) => Math.ceil(maxEntrySize / 25),
553
- [WEBGPUDISTANCE.Difference]: (_maxEntrySize) => 1,
554
- [WEBGPUDISTANCE.OneHot]: (_maxEntrySize) => Math.ceil(_maxEntrySize / 40),
555
- [WEBGPUDISTANCE.VECTOR_COSINE]: (maxEntrySize) => Math.ceil(maxEntrySize / 30)
556
- };
557
- const TypeSupportedDistances = {
558
- ["STRING" /* WGPUENTRYTYPE.STRING */]: new Set([WEBGPUDISTANCE.HAMMING, WEBGPUDISTANCE.LEVENSTEIN, WEBGPUDISTANCE.NEEDLEMAN_WUNSCH, WEBGPUDISTANCE.MONOMER_CHEMICAL_DISTANCE, WEBGPUDISTANCE.OneHot]),
559
- ["UINT32ARRAY" /* WGPUENTRYTYPE.UINT32ARRAY */]: new Set([WEBGPUDISTANCE.HAMMING, WEBGPUDISTANCE.EUCLIDEAN, WEBGPUDISTANCE.MANHATTAN, WEBGPUDISTANCE.MONOMER_CHEMICAL_DISTANCE, WEBGPUDISTANCE.LEVENSTEIN, WEBGPUDISTANCE.NEEDLEMAN_WUNSCH, WEBGPUDISTANCE.TANIMOTO, WEBGPUDISTANCE.COSINE, WEBGPUDISTANCE.VECTOR_COSINE, WEBGPUDISTANCE.SOKAL, WEBGPUDISTANCE.ASYMMETRIC, WEBGPUDISTANCE.OneHot, WEBGPUDISTANCE.Difference]),
560
- ["INT32ARRAY" /* WGPUENTRYTYPE.INT32ARRAY */]: new Set([WEBGPUDISTANCE.EUCLIDEAN, WEBGPUDISTANCE.MANHATTAN, WEBGPUDISTANCE.OneHot, WEBGPUDISTANCE.Difference, WEBGPUDISTANCE.VECTOR_COSINE]),
561
- ["FLOAT32ARRAY" /* WGPUENTRYTYPE.FLOAT32ARRAY */]: new Set([WEBGPUDISTANCE.EUCLIDEAN, WEBGPUDISTANCE.MANHATTAN, WEBGPUDISTANCE.Difference, WEBGPUDISTANCE.VECTOR_COSINE]),
562
- ["NUMBER" /* WGPUENTRYTYPE.NUMBER */]: new Set([WEBGPUDISTANCE.EUCLIDEAN, WEBGPUDISTANCE.MANHATTAN, WEBGPUDISTANCE.Difference]),
563
- ["BITARRAY" /* WGPUENTRYTYPE.BITARRAY */]: new Set([WEBGPUDISTANCE.TANIMOTO, WEBGPUDISTANCE.COSINE, WEBGPUDISTANCE.SOKAL, WEBGPUDISTANCE.ASYMMETRIC])
564
- };
565
- //# sourceMappingURL=webGPU-multicol-distances.js.map
566
-
567
- /***/ }),
568
-
569
- /***/ "./node_modules/@datagrok-libraries/math/src/webGPU/preprocessing/webGPU-process-info.js":
570
- /*!***********************************************************************************************!*\
571
- !*** ./node_modules/@datagrok-libraries/math/src/webGPU/preprocessing/webGPU-process-info.js ***!
572
- \***********************************************************************************************/
573
- /***/ ((__unused_webpack_module, __webpack_exports__, __webpack_require__) => {
574
-
575
- __webpack_require__.r(__webpack_exports__);
576
- /* harmony export */ __webpack_require__.d(__webpack_exports__, {
577
- /* harmony export */ webGPUProcessInfo: () => (/* binding */ webGPUProcessInfo)
578
- /* harmony export */ });
579
- /* harmony import */ var _multi_col_distances_webGPU_multicol_distances__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../multi-col-distances/webGPU-multicol-distances */ "./node_modules/@datagrok-libraries/math/src/webGPU/multi-col-distances/webGPU-multicol-distances.js");
580
- /* eslint-disable max-len */
581
-
582
- function webGPUProcessInfo(entryList, distanceMetric = _multi_col_distances_webGPU_multicol_distances__WEBPACK_IMPORTED_MODULE_0__.WEBGPUDISTANCE.HAMMING, entryIndex, // index of the entries in the list of lists that we want to process
583
- options = { gapOpenPenalty: 1.0, gapExtensionPenalty: 0.6 }) {
584
- var _a, _b;
585
- let entryType = null;
586
- const encodedList = (() => {
587
- if (entryList.some((e) => typeof e === 'string')) {
588
- entryType = "STRING" /* WGPUENTRYTYPE.STRING */;
589
- return entryList.map((entry) => new Uint32Array(entry.split('').map((c) => c.charCodeAt(0))));
590
- }
591
- if (entryList.some((e) => typeof e === 'number')) {
592
- entryType = "NUMBER" /* WGPUENTRYTYPE.NUMBER */;
593
- return entryList.map((entry) => new Float32Array([entry]));
594
- }
595
- if (typeof entryList[0] == 'object' && entryList.some((e) => '_data' in e && '_length' in e)) {
596
- entryType = "BITARRAY" /* WGPUENTRYTYPE.BITARRAY */;
597
- return entryList.map((entry) => entry._data);
598
- }
599
- if (entryList.some((e) => e instanceof Float32Array)) {
600
- entryType = "FLOAT32ARRAY" /* WGPUENTRYTYPE.FLOAT32ARRAY */;
601
- return entryList;
602
- }
603
- if (entryList.some((e) => e instanceof Uint32Array)) {
604
- entryType = "UINT32ARRAY" /* WGPUENTRYTYPE.UINT32ARRAY */;
605
- return entryList;
606
- }
607
- if (entryList.some((e) => e instanceof Int32Array)) {
608
- entryType = "INT32ARRAY" /* WGPUENTRYTYPE.INT32ARRAY */;
609
- return entryList;
610
- }
611
- //return entryList as Uint32Array[];
612
- })();
613
- if (!encodedList || !entryType)
614
- throw new Error('Invalid entry type, could not determine entry type from input list');
615
- const encodedListType = encodedList[0] instanceof Int32Array ? "INT32ARRAY" /* WGPUENTRYTYPE.INT32ARRAY */ :
616
- encodedList[0] instanceof Float32Array ? "FLOAT32ARRAY" /* WGPUENTRYTYPE.FLOAT32ARRAY */ : "UINT32ARRAY" /* WGPUENTRYTYPE.UINT32ARRAY */;
617
- // sizes of each entries might differ, so we need to keep track of that for some distance metrics, like hamming for example
618
- const arraySizes = new Uint32Array(encodedList.map((arr) => arr.length));
619
- if (!_multi_col_distances_webGPU_multicol_distances__WEBPACK_IMPORTED_MODULE_0__.TypeSupportedDistances[entryType] || !_multi_col_distances_webGPU_multicol_distances__WEBPACK_IMPORTED_MODULE_0__.TypeSupportedDistances[entryType].has(distanceMetric))
620
- throw new Error(`Distance metric '${distanceMetric}' not supported for entry type '${entryType}'`);
621
- const maxEntryLen = arraySizes.reduce((a, b) => Math.max(a, b), 0);
622
- // get the complexity of used algorithm
623
- const complexity = _multi_col_distances_webGPU_multicol_distances__WEBPACK_IMPORTED_MODULE_0__.distanceFunctionComplexity[distanceMetric](maxEntryLen);
624
- const EncodedArrayConstructor = encodedListType === "INT32ARRAY" /* WGPUENTRYTYPE.INT32ARRAY */ ? Int32Array :
625
- (encodedListType === "FLOAT32ARRAY" /* WGPUENTRYTYPE.FLOAT32ARRAY */ ? Float32Array : Uint32Array);
626
- const flatSourceArray = new EncodedArrayConstructor(encodedList.length * maxEntryLen);
627
- // when setting, we need to set each array at a specific offset, which is controlled by maxArrayLen because each array might have different sizes.
628
- // this way we will get correct matrix representation in the compute shader
629
- encodedList.forEach((seq, i) => {
630
- flatSourceArray.set(seq, i * maxEntryLen);
631
- });
632
- // NB! all this before the line was generic, now we need to calculate some specific things for some specific distance metrics
633
- // initialize supp info line that will be included in the final shader;
634
- let suppInfoStructWgsl = ''; // the code that will be included in the struct of suppInfo
635
- let suppInfoSize = 0;
636
- let suppInfoType = "FLOAT32ARRAY" /* WGPUENTRYTYPE.FLOAT32ARRAY */;
637
- let suppInfoBuffer = null;
638
- if (distanceMetric === _multi_col_distances_webGPU_multicol_distances__WEBPACK_IMPORTED_MODULE_0__.WEBGPUDISTANCE.NEEDLEMAN_WUNSCH || distanceMetric === _multi_col_distances_webGPU_multicol_distances__WEBPACK_IMPORTED_MODULE_0__.WEBGPUDISTANCE.MONOMER_CHEMICAL_DISTANCE) {
639
- let maxMonomerIndex = options.scoringMatrix && options.alphabetIndexes ?
640
- Object.keys(options.alphabetIndexes).reduce((prev, n) => Math.max(prev, n.charCodeAt(0)), 0) : -1;
641
- // generate default similarity matrix if it is not provided
642
- if (!options.alphabetIndexes || !options.scoringMatrix) {
643
- for (let i = 0; i < flatSourceArray.length; i++) {
644
- if (flatSourceArray[i] > maxMonomerIndex)
645
- maxMonomerIndex = flatSourceArray[i];
646
- }
647
- options.scoringMatrix =
648
- new Array(maxMonomerIndex + 1).fill(null).map(() => new Array(maxMonomerIndex + 1).fill(0));
649
- options.alphabetIndexes = {};
650
- for (let i = 0; i < options.scoringMatrix.length; i++) {
651
- options.scoringMatrix[i][i] = 1;
652
- options.alphabetIndexes[String.fromCharCode(i)] = i;
653
- }
654
- }
655
- const similarityMatrixSize = (maxMonomerIndex + 1) * (maxMonomerIndex + 1);
656
- const transferedSimilarityMatrix = new Array(maxMonomerIndex + 1).fill(null).map(() => new Float32Array(maxMonomerIndex + 1));
657
- // set diagonal to 1
658
- for (let i = 0; i < maxMonomerIndex + 1; i++)
659
- transferedSimilarityMatrix[i][i] = 1;
660
- const alphabetIndexes = options.alphabetIndexes;
661
- for (const key of Object.keys(alphabetIndexes)) {
662
- for (const key2 of Object.keys(alphabetIndexes)) {
663
- if (key === key2)
664
- continue;
665
- transferedSimilarityMatrix[key.charCodeAt(0)][key2.charCodeAt(0)] =
666
- options.scoringMatrix[alphabetIndexes[key]][alphabetIndexes[key2]];
667
- }
668
- }
669
- // in memory layout, we will have 2 float32 s for gapOpen and gapExtension penalties, and then f32 array<array<f32>> for similarity matrix.
670
- // because of primitives, there will be no padding, so we can calculate the size directly
671
- suppInfoSize = 2 + similarityMatrixSize;
672
- suppInfoType = "FLOAT32ARRAY" /* WGPUENTRYTYPE.FLOAT32ARRAY */;
673
- suppInfoBuffer = new Float32Array(suppInfoSize);
674
- suppInfoBuffer[0] = (_a = options.gapOpenPenalty) !== null && _a !== void 0 ? _a : 1.0;
675
- suppInfoBuffer[1] = (_b = options.gapExtensionPenalty) !== null && _b !== void 0 ? _b : 0.6;
676
- let offset = 2;
677
- for (let i = 0; i < transferedSimilarityMatrix.length; i++) {
678
- suppInfoBuffer.set(transferedSimilarityMatrix[i], offset);
679
- offset += transferedSimilarityMatrix[i].length;
680
- }
681
- suppInfoStructWgsl = `
682
- gapOpenPenalty${entryIndex}: f32,
683
- gapExtensionPenalty${entryIndex}: f32,
684
- similarityMatrix${entryIndex}: array<array<f32, ${maxMonomerIndex + 1}>, ${maxMonomerIndex + 1}>`;
685
- }
686
- else if (distanceMetric === _multi_col_distances_webGPU_multicol_distances__WEBPACK_IMPORTED_MODULE_0__.WEBGPUDISTANCE.Difference) {
687
- // for difference, we need range of values for normalization of the difference
688
- if (!options.range || typeof options.range !== 'number' || options.range <= 0) {
689
- const min = flatSourceArray.reduce((a, b) => Math.min(a, b), flatSourceArray[0]);
690
- const max = flatSourceArray.reduce((a, b) => Math.max(a, b), flatSourceArray[0]);
691
- options.range = max - min;
692
- }
693
- if (options.range <= 0)
694
- options.range = 1.0; // this means that all values are the same, and all distances will produce 0.
695
- suppInfoSize = 1;
696
- suppInfoType = "FLOAT32ARRAY" /* WGPUENTRYTYPE.FLOAT32ARRAY */;
697
- suppInfoBuffer = new Float32Array([options.range]);
698
- suppInfoStructWgsl = `
699
- range${entryIndex}: f32`;
700
- }
701
- const dataTypeWGSL = flatSourceArray instanceof Int32Array ? 'i32' : (flatSourceArray instanceof Float32Array ? 'f32' : 'u32');
702
- const dataStructWgsl = `data${entryIndex}: array<array<${dataTypeWGSL}, ${maxEntryLen}>, ${encodedList.length}>`;
703
- // for now, other distances do not require any additional information, so we can skip that
704
- return {
705
- flatSourceArray,
706
- sourceArraySize: flatSourceArray.length,
707
- maxEntryLen,
708
- arraySizes,
709
- complexity,
710
- suppInfoBuffer,
711
- suppInfoSize,
712
- suppInfoType: suppInfoType,
713
- suppInfoStructWgsl,
714
- entryType,
715
- dataTypeWGSL,
716
- dataStructWgsl,
717
- EncodedArrayConstructor
718
- };
719
- }
720
- //# sourceMappingURL=webGPU-process-info.js.map
721
-
722
- /***/ }),
723
-
724
- /***/ "./node_modules/@datagrok-libraries/math/src/webGPU/sparse-matrix/webGPU-sparse-matrix.js":
725
- /*!************************************************************************************************!*\
726
- !*** ./node_modules/@datagrok-libraries/math/src/webGPU/sparse-matrix/webGPU-sparse-matrix.js ***!
727
- \************************************************************************************************/
728
- /***/ ((__unused_webpack_module, __webpack_exports__, __webpack_require__) => {
729
-
730
- __webpack_require__.r(__webpack_exports__);
731
- /* harmony export */ __webpack_require__.d(__webpack_exports__, {
732
- /* harmony export */ multiColWebGPUSparseMatrix: () => (/* binding */ multiColWebGPUSparseMatrix)
733
- /* harmony export */ });
734
- /* harmony import */ var _multi_col_distances_webGPU_aggregation__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../multi-col-distances/webGPU-aggregation */ "./node_modules/@datagrok-libraries/math/src/webGPU/multi-col-distances/webGPU-aggregation.js");
735
- /* harmony import */ var _multi_col_distances_webGPU_multicol_distances__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../multi-col-distances/webGPU-multicol-distances */ "./node_modules/@datagrok-libraries/math/src/webGPU/multi-col-distances/webGPU-multicol-distances.js");
736
- /* harmony import */ var _preprocessing_webGPU_process_info__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ../preprocessing/webGPU-process-info */ "./node_modules/@datagrok-libraries/math/src/webGPU/preprocessing/webGPU-process-info.js");
737
- /* harmony import */ var _getGPUDevice__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ../getGPUDevice */ "./node_modules/@datagrok-libraries/math/src/webGPU/getGPUDevice.js");
738
- var __awaiter = (undefined && undefined.__awaiter) || function (thisArg, _arguments, P, generator) {
739
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
740
- return new (P || (P = Promise))(function (resolve, reject) {
741
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
742
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
743
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
744
- step((generator = generator.apply(thisArg, _arguments || [])).next());
745
- });
746
- };
747
-
748
-
749
-
750
-
751
- /** generate sparse matrix based on list of lists of entries.
752
- * these entries are each encoded as Uint32Array or FLOAT32Array (depending on their type).
753
- * for example, sequences would be encoded as Uint32Array based on char code of the letter at each position.
754
- * [65, 66, 67, 68, 69] would be a sequence of 5 letters.
755
- * for chemical fingerprints, it would be a binary array of 0s and 1s,
756
- * represented as Uint32Array(_data property of DG bitarray).
757
- *
758
- * Be ware that size of entryList, distanceMetrics, weights and options must be the same.
759
- * if there are no options for entries i, pass an empty object.
760
- * for now options are needed for
761
- * needleman-wunsch and monomer chemical distances: see {@link BioDistanceFnOptions} as for how it should be passed
762
- * numeric distances (Difference): {range: number} where range is the range of the values in the column (max - min).
763
- * in both cases, if options are not provided, they will be calculated automatically.
764
- */
765
- function multiColWebGPUSparseMatrix(entryList, // list of lists of entries, for multiple columns
766
- threshold = 0.8, // similarity threshold, be ware that if you use too small threshold, there might be memory overflow...
767
- distanceMetrics, // distance metrics for each column
768
- aggregationFunction, // aggregation function for the distances
769
- weights, // weights for each column
770
- options // supplementary options for each column
771
- ) {
772
- return __awaiter(this, void 0, void 0, function* () {
773
- const device = yield (0,_getGPUDevice__WEBPACK_IMPORTED_MODULE_3__.getGPUDevice)();
774
- if (!device)
775
- return null; // if no device, return null, as we cannot do anything without it.
776
- const availableDistanceMetrics = Object.values(_multi_col_distances_webGPU_multicol_distances__WEBPACK_IMPORTED_MODULE_1__.WEBGPUDISTANCE);
777
- if (distanceMetrics.some((metric) => !availableDistanceMetrics.includes(metric)))
778
- throw new Error('Invalid distance metrics provided: ' + distanceMetrics.join(', '));
779
- const availableAggregationFunctions = Object.values(_multi_col_distances_webGPU_aggregation__WEBPACK_IMPORTED_MODULE_0__.WEBGSLAGGREGATION);
780
- if (!availableAggregationFunctions.includes(aggregationFunction))
781
- throw new Error('Invalid aggregation function provided: ' + aggregationFunction);
782
- const maxDistance = 1 - threshold; // maximum distance
783
- // first, check that all the supplementary options are provided and are the same length:
784
- if (options.length !== entryList.length ||
785
- options.length !== distanceMetrics.length ||
786
- options.length !== weights.length) {
787
- throw new Error('Options, weigths and distance functions must be provided for each column');
788
- }
789
- // check that all the entry lists are the same length
790
- if (entryList.some((list) => list.length !== entryList[0].length))
791
- throw new Error('All entry lists must be the same length');
792
- const numOfColumns = entryList.length; // number of columns
793
- const listSize = entryList[0].length; // size of each list (or column)
794
- const processInfo = entryList.map((entry, i) => {
795
- return (0,_preprocessing_webGPU_process_info__WEBPACK_IMPORTED_MODULE_2__.webGPUProcessInfo)(entry, distanceMetrics[i], i, options[i]);
796
- });
797
- if (numOfColumns === 0) {
798
- throw new Error('No columns provided. Please provide at least one column of data.');
799
- }
800
- if (numOfColumns === 1)
801
- aggregationFunction = _multi_col_distances_webGPU_aggregation__WEBPACK_IMPORTED_MODULE_0__.WEBGSLAGGREGATION.MANHATTAN; // save a bit of time
802
- // combine all struct types into one to put into the suppInfo struct.
803
- let suppInfoWgsl = processInfo
804
- .map((info) => info.suppInfoStructWgsl)
805
- .filter((wgsl) => !!wgsl && wgsl != '')
806
- .join(',\n');
807
- // structures in wgsl must have at least one member, so if we have no structures, we need to add a dummy one
808
- let needsDummy = false;
809
- if (!suppInfoWgsl || suppInfoWgsl.trim() == '') {
810
- needsDummy = true;
811
- suppInfoWgsl = '\ndummy: f32\n';
812
- }
813
- // combine all data wgsl struct code into one
814
- const dataWgsl = processInfo.map((info) => info.dataStructWgsl).filter((wgsl) => !!wgsl && wgsl != '').join(',\n');
815
- // combine all array sizes into one array (easier for setting)
816
- const arraySizes = new Uint32Array(numOfColumns * listSize);
817
- processInfo.forEach((info, i) => {
818
- arraySizes.set(info.arraySizes, i * listSize);
819
- }); // array.flat is not as optimized as this
820
- // if we try to map large arrays directly from GPU, sometimes, device disconnects. so we need to do it in chunks, a good number
821
- // we found is 10000. So we will perform computations in chunks of 10000. meaning that we will dispatch 10000 threads at a time.
822
- const numOfThreads = 10000;
823
- // in this case we do not need to worry about complexity of the algorithm, as the 100 is low enaugh number, which is limited by memory usage.
824
- const sparseResultSizePerThread = 100; // number of iterations per thread (number of pair comparisons)
825
- const combinedComplexity = processInfo.reduce((a, b) => a + b.complexity, 0); // combined complexity of all the columns
826
- const maxIterationsPerThread = Math.ceil(6000 / combinedComplexity); // maximum number of iterations per thread
827
- const workGroupDivision = 10; // how many threads inside of one workgroup dimension (in this case 10 * 10 threads per workgroup)
828
- const threadsPerWorkgroup = workGroupDivision * workGroupDivision;
829
- const workgroupsDim = Math.ceil(Math.sqrt(Math.ceil(numOfThreads / threadsPerWorkgroup))); // how many workgroups per 2d dimension
830
- const globalThreadDimSize = workgroupsDim * workGroupDivision; // how many threads per 2d dimension
831
- const condensedDistanceMatrixSize = listSize * (listSize - 1) / 2; // size of the condensed distance matrix, this many comparisons will be made.
832
- const dmChunkSizePerThread = Math.ceil(condensedDistanceMatrixSize / numOfThreads); // how many comparisons per thread
833
- const module = device.createShaderModule({
834
- label: 'Sparse matrix compute shader',
835
- code: `
836
- // each thread will perform ${sparseResultSizePerThread} iterations at one time, comparing ${sparseResultSizePerThread} pairs of entries.
837
- // in total, each thread will perform at most ${dmChunkSizePerThread} comparisons.
838
- // first is the result struct, containing is, js, and distances. each array with length of ${sparseResultSizePerThread},
839
- // and also integer for how many pairs were found to be below threshold.
840
- struct SparseResult {
841
- i: array<array<u32, ${sparseResultSizePerThread}>, ${numOfThreads}>,
842
- j: array<array<u32, ${sparseResultSizePerThread}>, ${numOfThreads}>,
843
- distances: array<array<f32, ${sparseResultSizePerThread}>, ${numOfThreads}>,
844
- found: array<u32, ${numOfThreads}>,
845
- done: array<u32, ${numOfThreads}>
846
- }
847
- // struct for the data
848
- struct ComputeInfo {
849
- // start at cols and rows, and end at cols and rows for each thread, these will be calculated on cpu and passed to gpu.
850
- startAtCols: array<u32, ${numOfThreads}>,
851
- startAtRows: array<u32, ${numOfThreads}>,
852
- endAtCols: array<u32, ${numOfThreads}>,
853
- endAtRows: array<u32, ${numOfThreads}>,
854
-
855
- // the ACTUALLY sizes of each entry
856
- entrySizes: array<array<u32, ${listSize}>, ${numOfColumns}>,
857
- // the weights for each entry
858
- weights: array<f32, ${numOfColumns}>,
859
- // the data for each entry
860
- ${dataWgsl} // an example of the dataWgsl would be:
861
- //data0: array<array<u32,20>,100>,
862
- //data1: array<array<u32,20>,100>
863
- }
864
-
865
- // struct for the supplementary information
866
- struct SuppInfo {
867
- // struct containing all the supplementary info, like scoring matrix, alphabet indexes, range, etc.
868
- ${suppInfoWgsl}
869
- };
870
-
871
- @group(0) @binding(0) var<storage, read_write> computeInfo: ComputeInfo;
872
- @group(0) @binding(1) var<storage, read_write> suppInfo: SuppInfo;
873
- @group(0) @binding(2) var<storage, read_write> results: SparseResult;
874
- @compute @workgroup_size(${workGroupDivision}, ${workGroupDivision}) fn calcSparseMatrix(
875
- @builtin(global_invocation_id) id: vec3<u32>
876
- ) {
877
- ${needsDummy ? `let otherDummy = suppInfo.dummy * 2;` : ''} // just to make sure that the suppInfo is not optimized out
878
- let threadCol = id.x;
879
- let threadRow = id.y;
880
- let linearIndex = threadRow * ${globalThreadDimSize} + threadCol;
881
- if (linearIndex >= ${numOfThreads}) {
882
- return; // if we are out of bounds, return
883
- }
884
- var startAtCol: u32 = computeInfo.startAtCols[linearIndex];
885
- var startAtRow: u32 = computeInfo.startAtRows[linearIndex];
886
- let endAtCol: u32 = min(computeInfo.endAtCols[linearIndex], ${listSize}u);
887
- let endAtRow: u32 = min(computeInfo.endAtRows[linearIndex], ${listSize}u);
888
- let is = &results.i[linearIndex];
889
- let js = &results.j[linearIndex];
890
- let distances = &results.distances[linearIndex];
891
- results.found[linearIndex] = 0; // initialize the found counter
892
- var found: u32 = 0;
893
- if (results.done[linearIndex] > 0) {
894
- return; // if we are done, return
895
- }
896
- for (var i = 0; i < ${maxIterationsPerThread}; i++) {
897
- if (startAtCol >= endAtCol && startAtRow >= endAtRow) {
898
- results.done[linearIndex] = 1;
899
- break;
900
- }
901
- if (found >= ${sparseResultSizePerThread}) {
902
- break;
903
- }
904
- let dist = combinedDistance(startAtCol, startAtRow);
905
- if (dist <= ${maxDistance}) {
906
- (*is)[found] = startAtCol;
907
- (*js)[found] = startAtRow;
908
- (*distances)[found] = dist;
909
- found = found + 1;
910
- }
911
- startAtCol = startAtCol + 1;
912
- if (startAtCol >= ${listSize}u) {
913
- startAtRow += 1;
914
- startAtCol = startAtRow + 1;
915
- }
916
- }
917
- results.found[linearIndex] = found;
918
- // update the startAtCols and startAtRows
919
- computeInfo.startAtCols[linearIndex] = startAtCol;
920
- computeInfo.startAtRows[linearIndex] = startAtRow;
921
-
922
- }
923
-
924
- // this will generate the distance script for each distance metric and then combine them into one
925
- ${getCombinedDistanceScript(distanceMetrics, processInfo.map((info) => info.maxEntryLen), maxDistance, aggregationFunction)}
926
-
927
-
928
- `
929
- });
930
- const pipeline = device.createComputePipeline({
931
- label: 'sparse matrix compute pipeline',
932
- layout: 'auto',
933
- compute: {
934
- module,
935
- entryPoint: 'calcSparseMatrix',
936
- },
937
- });
938
- // generate startAtCols, startAtRows, endAtCols, endAtRows
939
- const startAtCols = new Uint32Array(numOfThreads);
940
- const startAtRows = new Uint32Array(numOfThreads);
941
- const endAtCols = new Uint32Array(numOfThreads);
942
- const endAtRows = new Uint32Array(numOfThreads);
943
- const chunkSize = Math.floor(condensedDistanceMatrixSize / numOfThreads); // size of the chunk per thread (in total)
944
- let startRow = 0;
945
- let startCol = 1;
946
- console.time('GPUthreadStarts');
947
- for (let i = 0; i < numOfThreads; i++) {
948
- const endIdx = i === numOfThreads - 1 ? condensedDistanceMatrixSize - 1 : (i + 1) * chunkSize;
949
- // fancy formulas to calculate the start and end indices for the condensed distance matrix for each thread start
950
- const endRow = listSize - 2 - Math.floor(Math.sqrt(-8 * endIdx + 4 * listSize * (listSize - 1) - 7) / 2 - 0.5);
951
- const endCol = endIdx - listSize * endRow + Math.floor((endRow + 1) * (endRow + 2) / 2);
952
- startAtCols[i] = startCol;
953
- startAtRows[i] = startRow;
954
- endAtCols[i] = endCol;
955
- endAtRows[i] = endRow;
956
- startRow = endRow;
957
- startCol = endCol;
958
- // const startRow = values[0].length - 2 - Math.floor(
959
- // Math.sqrt(-8 * startIdx + 4 * values[0].length * (values[0].length - 1) - 7) / 2 - 0.5);
960
- // const startCol = startIdx - values[0].length * startRow + Math.floor((startRow + 1) * (startRow + 2) / 2);
961
- }
962
- console.timeEnd('GPUthreadStarts');
963
- // size of the computeInfo buffer
964
- const computeInfoBuffer32Size = numOfThreads * 4 + // startAtCols, startAtRows, endAtCols, endAtRows
965
- listSize * numOfColumns + // entrySizes
966
- numOfColumns + // weights
967
- processInfo.reduce((a, b) => a + b.sourceArraySize, 0);
968
- // size of the suppInfo buffer
969
- const suppInfoBuffer32Size = processInfo.reduce((a, b) => a + b.suppInfoSize, 0);
970
- // size of the results buffer
971
- const sparseMatrixEachArray32Size = sparseResultSizePerThread * numOfThreads;
972
- const resultsBuffer32Size = 3 * sparseMatrixEachArray32Size + numOfThreads + numOfThreads; // i, j, distances, found, done
973
- // create a buffer on the GPU to hold computeInfo
974
- // beware that struct must be padded to 16 bytes, so we need to calculate the size of the struct in 32bit values
975
- const computeInfoBufferSize = computeInfoBuffer32Size * Uint32Array.BYTES_PER_ELEMENT;
976
- let paddedComputeInfoBufferSize = computeInfoBufferSize;
977
- const remainder = computeInfoBufferSize & 15; // check if the size is a multiple of 16
978
- if (remainder !== 0)
979
- paddedComputeInfoBufferSize += 16 - remainder; // pad the size accordingly
980
- const computeInfoBuffer = device.createBuffer({
981
- label: 'compute info buffer',
982
- size: paddedComputeInfoBufferSize,
983
- usage: GPUBufferUsage.STORAGE |
984
- GPUBufferUsage.COPY_SRC |
985
- GPUBufferUsage.COPY_DST,
986
- mappedAtCreation: true,
987
- });
988
- const mappedComputeInfoArrayBuffer = computeInfoBuffer.getMappedRange(); // get full buffer
989
- // dynamic offset for the computeInfo buffer
990
- let computeInfoOffSet = 0;
991
- // first write the startAtCols, startAtRows, endAtCols, endAtRows
992
- const startAtColsBufferView = new Uint32Array(mappedComputeInfoArrayBuffer, computeInfoOffSet, numOfThreads);
993
- startAtColsBufferView.set(startAtCols);
994
- computeInfoOffSet += numOfThreads * Uint32Array.BYTES_PER_ELEMENT; // array of 32bit values
995
- const startAtRowsBufferView = new Uint32Array(mappedComputeInfoArrayBuffer, computeInfoOffSet, numOfThreads);
996
- startAtRowsBufferView.set(startAtRows);
997
- computeInfoOffSet += numOfThreads * Uint32Array.BYTES_PER_ELEMENT; // array of 32bit values
998
- const endAtColsBufferView = new Uint32Array(mappedComputeInfoArrayBuffer, computeInfoOffSet, numOfThreads);
999
- endAtColsBufferView.set(endAtCols);
1000
- computeInfoOffSet += numOfThreads * Uint32Array.BYTES_PER_ELEMENT; // array of 32bit values
1001
- const endAtRowsBufferView = new Uint32Array(mappedComputeInfoArrayBuffer, computeInfoOffSet, numOfThreads);
1002
- endAtRowsBufferView.set(endAtRows);
1003
- computeInfoOffSet += numOfThreads * Uint32Array.BYTES_PER_ELEMENT; // array of 32bit values
1004
- // then write the entrySizes
1005
- const entrySizesView = new Uint32Array(mappedComputeInfoArrayBuffer, computeInfoOffSet, arraySizes.length);
1006
- entrySizesView.set(arraySizes);
1007
- computeInfoOffSet += arraySizes.length * Uint32Array.BYTES_PER_ELEMENT; // array of 32bit values
1008
- // then write the weights
1009
- const weightsView = new Float32Array(mappedComputeInfoArrayBuffer, computeInfoOffSet, numOfColumns);
1010
- weightsView.set(weights);
1011
- computeInfoOffSet += numOfColumns * Float32Array.BYTES_PER_ELEMENT;
1012
- // finally, write the data itself
1013
- for (const info of processInfo) {
1014
- const ArrayConstructor = info.EncodedArrayConstructor;
1015
- const chunkSize = info.sourceArraySize;
1016
- const dataView = new ArrayConstructor(mappedComputeInfoArrayBuffer, computeInfoOffSet, chunkSize); //new ArrayConstructor(computeInfoBuffer.getMappedRange(computeInfoOffSet, chunkByteSize));
1017
- dataView.set(info.flatSourceArray);
1018
- computeInfoOffSet += chunkSize * ArrayConstructor.BYTES_PER_ELEMENT;
1019
- }
1020
- // we are done at this point.
1021
- computeInfoBuffer.unmap();
1022
- // create a buffer on the GPU to hold suppInfo
1023
- // same here, we need to pad the size of the struct to 16 bytes
1024
- const suppInfoBufferSize = suppInfoBuffer32Size * Uint32Array.BYTES_PER_ELEMENT;
1025
- let paddedSuppInfoBufferSize = suppInfoBufferSize;
1026
- const suppInfoRemainder = suppInfoBufferSize & 15; // check if the size is a multiple of 16
1027
- if (suppInfoRemainder !== 0)
1028
- paddedSuppInfoBufferSize += 16 - suppInfoRemainder; // pad the size accordingly
1029
- paddedSuppInfoBufferSize = Math.max(paddedSuppInfoBufferSize, 16);
1030
- const suppInfoBuffer = device.createBuffer({
1031
- label: 'supp info buffer',
1032
- size: paddedSuppInfoBufferSize,
1033
- usage: GPUBufferUsage.STORAGE |
1034
- GPUBufferUsage.COPY_SRC |
1035
- GPUBufferUsage.COPY_DST,
1036
- mappedAtCreation: true,
1037
- });
1038
- const mappedSuppInfoArrayBuffer = suppInfoBuffer.getMappedRange(); // get full buffer
1039
- let suppInfoOffSet = 0;
1040
- for (const info of processInfo) {
1041
- if (info.suppInfoBuffer && info.suppInfoBuffer.byteLength > 0 && info.suppInfoSize > 0) {
1042
- const ArrayConstructor = info.suppInfoType === "UINT32ARRAY" /* WGPUENTRYTYPE.UINT32ARRAY */ ? Uint32Array : Float32Array;
1043
- const suppInfoView = new ArrayConstructor(mappedSuppInfoArrayBuffer, suppInfoOffSet, info.suppInfoBuffer.length); //new ArrayConstructor(suppInfoBuffer.getMappedRange(suppInfoOffSet, info.suppInfoBuffer.byteLength));
1044
- suppInfoView.set(info.suppInfoBuffer);
1045
- suppInfoOffSet += info.suppInfoBuffer.byteLength; // info.suppInfoBuffer.length * ArrayConstructor.BYTES_PER_ELEMENT;
1046
- }
1047
- }
1048
- if (suppInfoOffSet === 0) {
1049
- const dummyView = new Uint32Array(mappedSuppInfoArrayBuffer, 0, 4); //new Uint32Array(suppInfoBuffer.getMappedRange(0, 16));
1050
- dummyView.set([1, 1, 1, 1]);
1051
- }
1052
- suppInfoBuffer.unmap();
1053
- // create a buffer for the results
1054
- const resultsBufferSize = resultsBuffer32Size * Uint32Array.BYTES_PER_ELEMENT;
1055
- let paddedResultsBufferSize = resultsBufferSize;
1056
- const resultsRemainder = resultsBufferSize & 15; // check if the size is a multiple of 16
1057
- if (resultsRemainder !== 0)
1058
- paddedResultsBufferSize += 16 - resultsRemainder; // pad the size accordingly
1059
- const resultsBuffer = device.createBuffer({
1060
- label: 'results buffer',
1061
- size: paddedResultsBufferSize,
1062
- usage: GPUBufferUsage.STORAGE |
1063
- GPUBufferUsage.COPY_SRC
1064
- });
1065
- // Setup a bindGroup to tell the shader which
1066
- // buffer to use for the computation
1067
- const bindGroup = device.createBindGroup({
1068
- label: 'bindGroup for sparse matrix buffer',
1069
- layout: pipeline.getBindGroupLayout(0),
1070
- entries: [
1071
- { binding: 0, resource: { buffer: computeInfoBuffer } },
1072
- { binding: 1, resource: { buffer: suppInfoBuffer } },
1073
- { binding: 2, resource: { buffer: resultsBuffer } },
1074
- ],
1075
- });
1076
- //const pairComparisonsPerPass = maxIterationsPerThread * numOfThreads;
1077
- //const passes = Math.ceil(condensedDistanceMatrixSize / pairComparisonsPerPass);
1078
- // we will distpatch this many passes to the GPU, and it will handle indexes all by itself.
1079
- // we already copied the start/end information to it, so it will know where to start and end on each pass.
1080
- const resultsOutBuffer = device.createBuffer({
1081
- label: 'results out buffer',
1082
- size: resultsBuffer.size,
1083
- usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST,
1084
- });
1085
- const resultIs = [];
1086
- const resultJs = [];
1087
- const resultDistances = [];
1088
- //let combinedFound = 0;
1089
- let isAllDone = false;
1090
- while (!isAllDone) {
1091
- // Encode commands to do the computation
1092
- const encoder = device.createCommandEncoder({
1093
- label: 'distance encoder',
1094
- });
1095
- const pass = encoder.beginComputePass({
1096
- label: 'distance compute pass',
1097
- });
1098
- pass.setPipeline(pipeline);
1099
- pass.setBindGroup(0, bindGroup);
1100
- pass.dispatchWorkgroups(workgroupsDim, workgroupsDim);
1101
- pass.end();
1102
- encoder.copyBufferToBuffer(resultsBuffer, 0, resultsOutBuffer, 0, resultsOutBuffer.size);
1103
- // Finish encoding and submit the commands
1104
- const commandBuffer = encoder.finish();
1105
- device.queue.submit([commandBuffer]);
1106
- // Read the results
1107
- yield device.queue.onSubmittedWorkDone();
1108
- yield resultsOutBuffer.mapAsync(GPUMapMode.READ);
1109
- const resultsOutArrayBuffer = resultsOutBuffer.getMappedRange();
1110
- // read the results
1111
- let resultOffset = 0;
1112
- const resultsI = new Uint32Array(resultsOutArrayBuffer, resultOffset, sparseMatrixEachArray32Size);
1113
- resultOffset += sparseMatrixEachArray32Size * Uint32Array.BYTES_PER_ELEMENT;
1114
- const resultsJ = new Uint32Array(resultsOutArrayBuffer, resultOffset, sparseMatrixEachArray32Size);
1115
- resultOffset += sparseMatrixEachArray32Size * Uint32Array.BYTES_PER_ELEMENT;
1116
- const resultsDistances = new Float32Array(resultsOutArrayBuffer, resultOffset, sparseMatrixEachArray32Size);
1117
- resultOffset += sparseMatrixEachArray32Size * Float32Array.BYTES_PER_ELEMENT;
1118
- const resultsFound = new Uint32Array(resultsOutArrayBuffer, resultOffset, numOfThreads);
1119
- resultOffset += numOfThreads * Uint32Array.BYTES_PER_ELEMENT;
1120
- const resultsDone = new Uint32Array(resultsOutArrayBuffer, resultOffset, numOfThreads);
1121
- isAllDone = resultsDone.every((d) => d === 1);
1122
- const totalResults = resultsFound.reduce((a, b) => a + b, 0);
1123
- const combinedI = new Uint32Array(totalResults);
1124
- const combinedJ = new Uint32Array(totalResults);
1125
- const combinedDistances = new Float32Array(totalResults);
1126
- let combinedOffset = 0;
1127
- for (let resI = 0; resI < resultsFound.length; resI++) {
1128
- const found = resultsFound[resI];
1129
- if (found === 0)
1130
- continue;
1131
- combinedI.set(resultsI.subarray(resI * sparseResultSizePerThread, resI * sparseResultSizePerThread + found), combinedOffset);
1132
- combinedJ.set(resultsJ.subarray(resI * sparseResultSizePerThread, resI * sparseResultSizePerThread + found), combinedOffset);
1133
- combinedDistances.set(resultsDistances.subarray(resI * sparseResultSizePerThread, resI * sparseResultSizePerThread + found), combinedOffset);
1134
- combinedOffset += found;
1135
- }
1136
- resultIs.push(combinedI);
1137
- resultJs.push(combinedJ);
1138
- resultDistances.push(combinedDistances);
1139
- resultsOutBuffer.unmap();
1140
- }
1141
- const totalSize = resultIs.reduce((a, b) => a + b.length, 0);
1142
- const finalI = new Uint32Array(totalSize);
1143
- const finalJ = new Uint32Array(totalSize);
1144
- const finalDistances = new Float32Array(totalSize);
1145
- let finalOffset = 0;
1146
- for (let i = 0; i < resultIs.length; i++) {
1147
- finalI.set(resultIs[i], finalOffset);
1148
- finalJ.set(resultJs[i], finalOffset);
1149
- finalDistances.set(resultDistances[i], finalOffset);
1150
- finalOffset += resultIs[i].length;
1151
- }
1152
- // as rule mandates, destroy all buffers.
1153
- computeInfoBuffer.destroy();
1154
- suppInfoBuffer.destroy();
1155
- resultsBuffer.destroy();
1156
- resultsOutBuffer.destroy();
1157
- return { i: finalI, j: finalJ, distance: finalDistances };
1158
- });
1159
- }
1160
- function getCombinedDistanceScript(distanceMetrics, maxEntryLens, maxDistance, aggregation) {
1161
- const distanceWgsls = distanceMetrics.map((metric, i) => {
1162
- return `
1163
- fn distanceScript${i}(aIndex: u32, bIndex: u32) -> f32 {
1164
- let a = computeInfo.data${i}[aIndex];
1165
- let b = computeInfo.data${i}[bIndex];
1166
- let maxDistance: f32 = ${maxDistance};
1167
- ${_multi_col_distances_webGPU_multicol_distances__WEBPACK_IMPORTED_MODULE_1__.webGPUFunctions[metric](maxEntryLens[i], i)}
1168
- }
1169
- `;
1170
- });
1171
- const allDistanceScripts = distanceWgsls.join('\n');
1172
- const combineDistancesScript = `
1173
- fn combinedDistance(aIndex: u32, bIndex: u32) -> f32 {
1174
- var distances: array<f32, ${distanceMetrics.length}>;
1175
- ${distanceMetrics.map((_, i) => `distances[${i}] = distanceScript${i}(aIndex, bIndex);`).join('\n')}
1176
- ${_multi_col_distances_webGPU_aggregation__WEBPACK_IMPORTED_MODULE_0__.WEBGSLAGGREGATIONFUNCTIONS[aggregation](distanceMetrics.length)}
1177
- }
1178
-
1179
- `;
1180
- return allDistanceScripts + '\n' + combineDistancesScript;
1181
- }
1182
- //# sourceMappingURL=webGPU-sparse-matrix.js.map
1183
-
1184
- /***/ }),
1185
-
1186
- /***/ "./node_modules/@datagrok-libraries/ml/src/distance-matrix/sparse-matrix-service.js":
1187
- /*!******************************************************************************************!*\
1188
- !*** ./node_modules/@datagrok-libraries/ml/src/distance-matrix/sparse-matrix-service.js ***!
1189
- \******************************************************************************************/
1190
- /***/ ((__unused_webpack_module, __webpack_exports__, __webpack_require__) => {
1191
-
1192
- __webpack_require__.r(__webpack_exports__);
1193
- /* harmony export */ __webpack_require__.d(__webpack_exports__, {
1194
- /* harmony export */ SparseMatrixService: () => (/* binding */ SparseMatrixService)
1195
- /* harmony export */ });
1196
- /* harmony import */ var _types__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./types */ "./node_modules/@datagrok-libraries/ml/src/distance-matrix/types.js");
1197
- /* harmony import */ var _utils__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./utils */ "./node_modules/@datagrok-libraries/ml/src/distance-matrix/utils.js");
1198
-
1199
-
1200
- class SparseMatrixService {
1201
- constructor() {
1202
- this._workerCount = Math.max(navigator.hardwareConcurrency - 2, 1);
1203
- }
1204
- static pruneSparseMatrix(orig, maxNum = 1000000) {
1205
- // bin values
1206
- const mult = 200;
1207
- const binRanges = new Uint32Array(mult);
1208
- const len = orig.distance.length;
1209
- const distances = orig.distance;
1210
- for (let i = 0; i < len; i++) {
1211
- const r = Math.floor(distances[i] * mult);
1212
- binRanges[r]++;
1213
- }
1214
- // get the max distance
1215
- let acum = 0;
1216
- let maxIndex = 0;
1217
- for (let i = 0; i < mult; i++) {
1218
- acum += binRanges[i];
1219
- maxIndex = i;
1220
- if (acum >= maxNum)
1221
- break;
1222
- }
1223
- const resIs = new Uint32Array(acum);
1224
- const resJs = new Uint32Array(acum);
1225
- const resDs = new Float32Array(acum);
1226
- const is = orig.i;
1227
- const js = orig.j;
1228
- let ind = 0;
1229
- const maxDistance = (maxIndex + 1) / mult;
1230
- for (let i = 0; i < len; i++) {
1231
- if (distances[i] < maxDistance) {
1232
- resIs[ind] = is[i];
1233
- resJs[ind] = js[i];
1234
- resDs[ind] = distances[i];
1235
- ind++;
1236
- }
1237
- }
1238
- return { i: resIs, j: resJs, distance: resDs };
1239
- }
1240
- async calcMultiColumn(values, fnNames, threshold, opts = [{}], weights = [1], aggregationMethod = _types__WEBPACK_IMPORTED_MODULE_0__.DistanceAggregationMethods.EUCLIDEAN) {
1241
- const matSize = values[0].length * (values[0].length - 1) / 2;
1242
- const chunkSize = Math.floor(matSize / this._workerCount);
1243
- const minThreshold = values[0].length > 20000 ?
1244
- await this.getMinimalThreshold(values, fnNames, opts, weights, aggregationMethod) : 0;
1245
- if (threshold < minThreshold) {
1246
- console.log(`using threshold ${minThreshold}`);
1247
- threshold = minThreshold;
1248
- }
1249
- opts.forEach((_, i) => opts[i]['threshold'] = threshold);
1250
- const promises = new Array(this._workerCount);
1251
- const workers = new Array(this._workerCount)
1252
- .fill(null).map(() => new Worker(new URL(/* worker import */ __webpack_require__.p + __webpack_require__.u("node_modules_datagrok-libraries_ml_src_distance-matrix_sparse-matrix-worker_js"), __webpack_require__.b)));
1253
- for (let idx = 0; idx < this._workerCount; idx++) {
1254
- promises[idx] = new Promise((resolveWorker, rejectWorker) => {
1255
- const startIdx = idx * chunkSize;
1256
- const endIdx = idx === this._workerCount - 1 ? matSize : (idx + 1) * chunkSize;
1257
- if (endIdx <= startIdx)
1258
- resolveWorker({ i: new Int32Array(0), j: new Int32Array(0), distance: new Float32Array(0), idx });
1259
- workers[idx].postMessage({ values, startIdx, endIdx, threshold, fnNames, opts, weights, aggregationMethod });
1260
- workers[idx].onmessage = ({ data: { error, i, j, distance } }) => {
1261
- if (error) {
1262
- workers[idx].terminate();
1263
- rejectWorker(error);
1264
- }
1265
- else {
1266
- workers[idx].terminate();
1267
- resolveWorker({ i, j, distance, idx });
1268
- }
1269
- };
1270
- });
1271
- }
1272
- const results = await Promise.all(promises);
1273
- const fullSize = results.reduce((acc, val) => acc + val.i.length, 0);
1274
- const i = new Int32Array(fullSize);
1275
- const j = new Int32Array(fullSize);
1276
- const distance = new Float32Array(fullSize);
1277
- let offset = 0;
1278
- // setting the results
1279
- for (const res of results) {
1280
- i.set(res.i, offset);
1281
- j.set(res.j, offset);
1282
- distance.set(res.distance, offset);
1283
- offset += res.i.length;
1284
- }
1285
- return { i, j, distance };
1286
- }
1287
- async calc(values, fnName, threshold, opts = {}) {
1288
- //size of full matrix
1289
- return await this.calcMultiColumn([values], [fnName], threshold, [opts], [1]);
1290
- }
1291
- async getKNN(values, fnName, nNeighbours = 15, opts = {}) {
1292
- return await this.multiColumnKNN([values], [fnName], nNeighbours, [opts], [1]);
1293
- }
1294
- async getThresholdKNN(values, fnName, threshold = 0.8, opts = {}) {
1295
- return await this.multiColumnThresholdKnn([values], [fnName], threshold, [opts], [1]);
1296
- }
1297
- async multiColumnThresholdKnn(values, fnNames, threshold = 0.8, opts, weights, aggregationMethod = _types__WEBPACK_IMPORTED_MODULE_0__.DistanceAggregationMethods.EUCLIDEAN) {
1298
- if (values.length !== fnNames.length || values.length !== opts.length || values.length !== weights.length)
1299
- throw new Error('values, distance functions, options and weights arrays should have the same length');
1300
- if (values.some((v) => v.length !== values[0].length))
1301
- throw new Error('all values arrays should have the same length');
1302
- const matSize = values[0].length * (values[0].length - 1) / 2;
1303
- const chunkSize = Math.floor(matSize / this._workerCount);
1304
- const promises = new Array(this._workerCount);
1305
- const workers = new Array(this._workerCount)
1306
- .fill(null).map(() => new Worker(new URL(/* worker import */ __webpack_require__.p + __webpack_require__.u("vendors-node_modules_datagrok-libraries_ml_src_distance-matrix_knn-threshold-worker_js"), __webpack_require__.b)));
1307
- for (let idx = 0; idx < this._workerCount; idx++) {
1308
- promises[idx] = new Promise((resolveWorker, rejectWorker) => {
1309
- const startIdx = idx * chunkSize;
1310
- const endIdx = idx === this._workerCount - 1 ? matSize : (idx + 1) * chunkSize;
1311
- if (endIdx <= startIdx)
1312
- resolveWorker({ knnDistances: new Array(0), knnIndexes: new Array(0) });
1313
- workers[idx].postMessage({ values, startIdx, endIdx, fnNames, opts, threshold, weights, aggregationMethod });
1314
- workers[idx].onmessage = ({ data: { error, knnDistances, knnIndexes } }) => {
1315
- if (error) {
1316
- workers[idx].terminate();
1317
- rejectWorker(error);
1318
- }
1319
- else {
1320
- workers[idx].terminate();
1321
- resolveWorker({ knnDistances, knnIndexes });
1322
- }
1323
- };
1324
- });
1325
- }
1326
- const results = await Promise.all(promises);
1327
- const knnSizes = new Int32Array(values[0].length);
1328
- for (const res of results) {
1329
- for (let i = 0; i < values[0].length; ++i)
1330
- knnSizes[i] += res.knnIndexes[i]?.length ?? 0;
1331
- }
1332
- const knnRes = {
1333
- knnDistances: new Array(values[0].length).fill(null).map((_, i) => new Array(knnSizes[i])),
1334
- knnIndexes: new Array(values[0].length).fill(null).map((_, i) => new Array(knnSizes[i]))
1335
- };
1336
- for (const res of results) {
1337
- for (let i = 0; i < values[0].length; ++i) {
1338
- for (let j = 0; j < (res.knnDistances[i]?.length ?? 0); ++j) {
1339
- knnRes.knnDistances[i][knnSizes[i] - 1] = res.knnDistances[i][j];
1340
- knnRes.knnIndexes[i][knnSizes[i] - 1] = res.knnIndexes[i][j];
1341
- knnSizes[i] -= 1;
1342
- }
1343
- }
1344
- }
1345
- return knnRes;
1346
- }
1347
- async multiColumnSingleValueKNN(values, targetIdx, fnNames, nNeighbours = 15, opts, weights, aggregationMethod = _types__WEBPACK_IMPORTED_MODULE_0__.DistanceAggregationMethods.EUCLIDEAN) {
1348
- if (values.length !== fnNames.length || values.length !== opts.length || values.length !== weights.length)
1349
- throw new Error('values, distance functions, options and weights arrays should have the same length');
1350
- if (values.some((v) => v.length !== values[0].length))
1351
- throw new Error('all values arrays should have the same length');
1352
- const workers = new Array(this._workerCount)
1353
- .fill(null).map(() => new Worker(new URL(/* worker import */ __webpack_require__.p + __webpack_require__.u("node_modules_datagrok-libraries_ml_src_distance-matrix_single-value-knn-worker_js"), __webpack_require__.b)));
1354
- const promises = new Array(this._workerCount);
1355
- const fullSize = values[0].length;
1356
- const target = values.map((v) => v[targetIdx]);
1357
- const chunkSize = Math.ceil(fullSize / this._workerCount);
1358
- for (let idx = 0; idx < this._workerCount; idx++) {
1359
- promises[idx] = new Promise((resolveWorker, rejectWorker) => {
1360
- const startIdx = idx * chunkSize;
1361
- const endIdx = idx === this._workerCount - 1 ? fullSize : (idx + 1) * chunkSize;
1362
- if (endIdx <= startIdx)
1363
- resolveWorker({ knnDistances: new Array(0), knnIndexes: new Array(0) });
1364
- workers[idx].postMessage({ values: values.map((v) => v.slice(startIdx, endIdx)), target, fnNames, opts, nNeighbours, weights, aggregationMethod, startIdx });
1365
- workers[idx].onmessage = ({ data: { error, knnDistances, knnIndexes } }) => {
1366
- if (error) {
1367
- workers[idx].terminate();
1368
- rejectWorker(error);
1369
- }
1370
- else {
1371
- workers[idx].terminate();
1372
- resolveWorker({ knnDistances, knnIndexes });
1373
- }
1374
- };
1375
- });
1376
- }
1377
- const results = await Promise.all(promises);
1378
- const singleValueKnn = { knnDistances: new Array(nNeighbours).fill(99999), knnIndexes: new Array(nNeighbours).fill(-1) };
1379
- for (const res of results) {
1380
- for (let j = 0; j < (res.knnDistances?.length ?? 0); ++j)
1381
- (0,_utils__WEBPACK_IMPORTED_MODULE_1__.insertSmaller)(singleValueKnn.knnDistances, singleValueKnn.knnIndexes, res.knnDistances[j], res.knnIndexes[j]);
1382
- }
1383
- return singleValueKnn;
1384
- }
1385
- async multiColumnKNN(values, fnNames, nNeighbours = 15, opts, weights, aggregationMethod = _types__WEBPACK_IMPORTED_MODULE_0__.DistanceAggregationMethods.EUCLIDEAN) {
1386
- if (values.length !== fnNames.length || values.length !== opts.length || values.length !== weights.length)
1387
- throw new Error('values, distance functions, options and weights arrays should have the same length');
1388
- if (values.some((v) => v.length !== values[0].length))
1389
- throw new Error('all values arrays should have the same length');
1390
- const matSize = values[0].length * (values[0].length - 1) / 2;
1391
- const chunkSize = Math.floor(matSize / this._workerCount);
1392
- const promises = new Array(this._workerCount);
1393
- const workers = new Array(this._workerCount)
1394
- .fill(null).map(() => new Worker(new URL(/* worker import */ __webpack_require__.p + __webpack_require__.u("vendors-node_modules_datagrok-libraries_ml_src_distance-matrix_knn-worker_js"), __webpack_require__.b)));
1395
- for (let idx = 0; idx < this._workerCount; idx++) {
1396
- promises[idx] = new Promise((resolveWorker, rejectWorker) => {
1397
- const startIdx = idx * chunkSize;
1398
- const endIdx = idx === this._workerCount - 1 ? matSize : (idx + 1) * chunkSize;
1399
- if (endIdx <= startIdx)
1400
- resolveWorker({ knnDistances: new Array(0), knnIndexes: new Array(0) });
1401
- workers[idx].postMessage({ values, startIdx, endIdx, fnNames, opts, nNeighbours, weights, aggregationMethod });
1402
- workers[idx].onmessage = ({ data: { error, knnDistances, knnIndexes } }) => {
1403
- if (error) {
1404
- workers[idx].terminate();
1405
- rejectWorker(error);
1406
- }
1407
- else {
1408
- workers[idx].terminate();
1409
- resolveWorker({ knnDistances, knnIndexes });
1410
- }
1411
- };
1412
- });
1413
- }
1414
- const results = await Promise.all(promises);
1415
- const knnRes = {
1416
- knnDistances: new Array(values[0].length).fill(null).map(() => new Array(nNeighbours).fill(99999)),
1417
- knnIndexes: new Array(values[0].length).fill(null).map(() => new Array(nNeighbours).fill(-1))
1418
- };
1419
- for (const res of results) {
1420
- for (let i = 0; i < values[0].length; ++i) {
1421
- for (let j = 0; j < (res.knnDistances[i]?.length ?? 0); ++j)
1422
- (0,_utils__WEBPACK_IMPORTED_MODULE_1__.insertSmaller)(knnRes.knnDistances[i], knnRes.knnIndexes[i], res.knnDistances[i][j], res.knnIndexes[i][j]);
1423
- }
1424
- }
1425
- return knnRes;
1426
- }
1427
- async getSampleDistances(values, fnNames, opts = [], weights, aggregationMethod = _types__WEBPACK_IMPORTED_MODULE_0__.DistanceAggregationMethods.EUCLIDEAN) {
1428
- const thresholdWorkers = new Array(this._workerCount).fill(null)
1429
- .map(() => new Worker(new URL(/* worker import */ __webpack_require__.p + __webpack_require__.u("vendors-node_modules_datagrok-libraries_ml_src_distance-matrix_sparse-matrix-threshold-worker_js"), __webpack_require__.b)));
1430
- try {
1431
- const matSize = values[0].length * (values[0].length - 1) / 2;
1432
- const chunkSize = Math.floor(matSize / this._workerCount);
1433
- const maxSampleSize = 1000000;
1434
- const sampleSise = Math.max(Math.min(matSize / 1000, maxSampleSize), Math.min(matSize, maxSampleSize));
1435
- const testSetSizePerWorker = Math.floor(sampleSise / this._workerCount);
1436
- const tPromises = new Array(this._workerCount);
1437
- for (let idx = 0; idx < this._workerCount; idx++) {
1438
- tPromises[idx] = new Promise((resolveWorker, rejectWorker) => {
1439
- const startIdx = idx * chunkSize;
1440
- const endIdx = idx === this._workerCount - 1 ? matSize : (idx + 1) * chunkSize;
1441
- thresholdWorkers[idx].postMessage({
1442
- values: values, startIdx, endIdx, sampleLength: testSetSizePerWorker,
1443
- fnNames, opts, weights, aggregationMethod
1444
- });
1445
- thresholdWorkers[idx].onmessage = ({ data: { error, distance } }) => {
1446
- thresholdWorkers[idx].terminate();
1447
- if (error)
1448
- rejectWorker(error);
1449
- else
1450
- resolveWorker({ distance });
1451
- };
1452
- });
1453
- }
1454
- const results = await Promise.all(tPromises);
1455
- const fullSize = results.reduce((acc, val) => acc + val.distance.length, 0);
1456
- const distance = new Float32Array(fullSize);
1457
- let offset = 0;
1458
- for (const res of results) {
1459
- distance.set(res.distance, offset);
1460
- offset += res.distance.length;
1461
- }
1462
- distance.sort();
1463
- return distance;
1464
- }
1465
- catch (e) {
1466
- thresholdWorkers?.forEach((w) => w?.terminate());
1467
- console.error(e);
1468
- return new Float32Array(1).fill(0.5);
1469
- }
1470
- }
1471
- async getMinimalThreshold(values, fnNames, opts = [], weights, aggregationMethod = _types__WEBPACK_IMPORTED_MODULE_0__.DistanceAggregationMethods.EUCLIDEAN) {
1472
- //We need to calculate the minimal threshold first,
1473
- //in order to get matrix such that it does not exceed the maximum size of 1GB
1474
- //we have 3 return arrays, each 4 bites per element, so if the maximum size of the matrix is 1GB,
1475
- const maxSparseMatrixSize = 70000000;
1476
- try {
1477
- const matSize = values.length * (values.length - 1) / 2;
1478
- const distance = await this.getSampleDistances(values, fnNames, opts, weights, aggregationMethod);
1479
- const fractionIndex = Math.floor(maxSparseMatrixSize / matSize * distance.length);
1480
- const threshold = 1 - distance[fractionIndex];
1481
- // threshold = Math.max(threshold, 0.3);
1482
- return threshold;
1483
- }
1484
- catch (e) {
1485
- console.error(e);
1486
- return 0.5;
1487
- }
1488
- }
1489
- static calcSync(values, fnName, distanceFn, threshold) {
1490
- const i = [];
1491
- const j = [];
1492
- const distances = [];
1493
- let cnt = 0;
1494
- let mi = 0;
1495
- let mj = 0;
1496
- const fullSize = values.length * (values.length - 1) / 2;
1497
- while (cnt < fullSize) {
1498
- //const value = seq1List[mi] && seq1List[mj] ? hamming(seq1List[mi], seq1List[mj]) : 0;
1499
- const value = !(0,_utils__WEBPACK_IMPORTED_MODULE_1__.isNil)(values[mi]) && !(0,_utils__WEBPACK_IMPORTED_MODULE_1__.isNil)(values[mj]) ?
1500
- distanceFn(values[mi], values[mj]) : 1;
1501
- const similarity = 1 - value;
1502
- if (similarity >= threshold) {
1503
- i.push(mi);
1504
- j.push(mj);
1505
- distances.push(value);
1506
- }
1507
- cnt++;
1508
- mj++;
1509
- if (mj === values.length) {
1510
- mi++;
1511
- mj = mi + 1;
1512
- }
1513
- }
1514
- const iArray = new Int32Array(i);
1515
- const jArray = new Int32Array(j);
1516
- const distanceArray = new Float32Array(distances);
1517
- return { i: iArray, j: jArray, distance: distanceArray };
1518
- }
1519
- }
1520
- //# sourceMappingURL=data:application/json;base64,
1521
-
1522
- /***/ })
1523
-
1524
- }]);
1525
- //# sourceMappingURL=vendors-node_modules_datagrok-libraries_math_src_webGPU_sparse-matrix_webGPU-sparse-matrix_js-07693f.js.map