@datagrok/peptides 1.17.0 → 1.17.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +17 -6
- package/CHANGELOG.md +4 -0
- package/dist/214.js +2 -0
- package/dist/436.js +2 -2
- package/dist/802.js +2 -0
- package/dist/package-test.js +2 -3
- package/dist/package.js +2 -3
- package/package.json +14 -14
- package/src/demo/fasta.ts +8 -2
- package/src/model.ts +783 -532
- package/src/package-test.ts +1 -3
- package/src/package.ts +15 -28
- package/src/tests/benchmarks.ts +31 -11
- package/src/tests/core.ts +11 -6
- package/src/tests/misc.ts +6 -6
- package/src/tests/model.ts +79 -44
- package/src/tests/table-view.ts +48 -38
- package/src/tests/utils.ts +0 -76
- package/src/tests/viewers.ts +30 -12
- package/src/tests/widgets.ts +30 -11
- package/src/utils/algorithms.ts +115 -38
- package/src/utils/cell-renderer.ts +181 -72
- package/src/utils/constants.ts +33 -7
- package/src/utils/misc.ts +244 -10
- package/src/utils/parallel-mutation-cliffs.ts +18 -15
- package/src/utils/statistics.ts +70 -15
- package/src/utils/tooltips.ts +42 -17
- package/src/utils/types.ts +29 -26
- package/src/utils/worker-creator.ts +5 -0
- package/src/viewers/logo-summary.ts +591 -130
- package/src/viewers/sar-viewer.ts +893 -239
- package/src/widgets/distribution.ts +305 -64
- package/src/widgets/manual-alignment.ts +18 -11
- package/src/widgets/mutation-cliffs.ts +44 -18
- package/src/widgets/peptides.ts +86 -91
- package/src/widgets/selection.ts +56 -22
- package/src/widgets/settings.ts +94 -44
- package/src/workers/mutation-cliffs-worker.ts +3 -16
- package/dist/209.js +0 -2
- package/dist/361.js +0 -2
- package/dist/381.js +0 -2
- package/dist/770.js +0 -2
- package/dist/831.js +0 -2
- package/dist/868.js +0 -2
- package/dist/931.js +0 -3
- package/dist/931.js.LICENSE.txt +0 -51
- package/dist/932.js +0 -2
- package/dist/package-test.js.LICENSE.txt +0 -51
- package/dist/package.js.LICENSE.txt +0 -51
- package/src/tests/peptide-space-test.ts +0 -48
- package/src/tests/test-data.ts +0 -649
- package/src/utils/molecular-measure.ts +0 -174
- package/src/utils/peptide-similarity-space.ts +0 -216
- package/src/viewers/peptide-space-viewer.ts +0 -150
- package/src/workers/dimensionality-reducer.ts +0 -25
package/src/utils/algorithms.ts
CHANGED
|
@@ -2,59 +2,104 @@ import * as DG from 'datagrok-api/dg';
|
|
|
2
2
|
import * as C from './constants';
|
|
3
3
|
import * as type from './types';
|
|
4
4
|
import {ParallelMutationCliffs} from './parallel-mutation-cliffs';
|
|
5
|
-
import {CLUSTER_TYPE} from '../
|
|
5
|
+
import {CLUSTER_TYPE} from '../viewers/logo-summary';
|
|
6
6
|
import BitArray from '@datagrok-libraries/utils/src/bit-array';
|
|
7
|
-
import {
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
7
|
+
import {
|
|
8
|
+
ClusterStats,
|
|
9
|
+
ClusterTypeStats,
|
|
10
|
+
getStats,
|
|
11
|
+
MonomerPositionStats,
|
|
12
|
+
PositionStats,
|
|
13
|
+
StatsItem,
|
|
14
|
+
SummaryStats,
|
|
15
|
+
} from './statistics';
|
|
16
|
+
|
|
17
|
+
export type MutationCliffsOptions = {
|
|
18
|
+
maxMutations?: number,
|
|
19
|
+
minActivityDelta?: number,
|
|
20
|
+
targetCol?: type.RawColumn | null,
|
|
21
|
+
currentTarget?: string | null
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Finds mutation cliffs in the set of sequences.
|
|
26
|
+
* @param activityArray - Activity column raw data.
|
|
27
|
+
* @param monomerInfoArray - Split sequence raw columns.
|
|
28
|
+
* @param options - Options for the mutation cliffs algorithm.
|
|
29
|
+
* @return - Mutation cliffs map.
|
|
30
|
+
*/
|
|
11
31
|
export async function findMutations(activityArray: type.RawData, monomerInfoArray: type.RawColumn[],
|
|
12
|
-
|
|
32
|
+
options: MutationCliffsOptions = {}): Promise<type.MutationCliffs> {
|
|
13
33
|
const nCols = monomerInfoArray.length;
|
|
14
|
-
if (nCols === 0)
|
|
34
|
+
if (nCols === 0) {
|
|
15
35
|
throw new Error(`PepAlgorithmError: Couldn't find any column of semType '${C.SEM_TYPES.MONOMER}'`);
|
|
36
|
+
}
|
|
37
|
+
|
|
16
38
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
const
|
|
39
|
+
options.minActivityDelta ??= 0;
|
|
40
|
+
options.maxMutations ??= 1;
|
|
41
|
+
const mutationCliffsService = new ParallelMutationCliffs();
|
|
42
|
+
const substitutionsInfo = await mutationCliffsService.calc(activityArray, monomerInfoArray, options);
|
|
43
|
+
mutationCliffsService.terminate();
|
|
20
44
|
return substitutionsInfo;
|
|
21
45
|
}
|
|
22
46
|
|
|
23
|
-
|
|
24
|
-
|
|
47
|
+
/**
|
|
48
|
+
* Calculates statistics for each monomer position.
|
|
49
|
+
* @param activityCol - Activity column.
|
|
50
|
+
* @param filter - Dataframe filter to consider.
|
|
51
|
+
* @param positionColumns - Position columns containing monomers.
|
|
52
|
+
* @param [options] - Options for the algorithm.
|
|
53
|
+
* @param [options.isFiltered] - Whether the dataframe is filtered.
|
|
54
|
+
* @param [options.columns] - Columns to consider when calculating statistics.
|
|
55
|
+
* @return - Statistics for each monomer position.
|
|
56
|
+
*/
|
|
57
|
+
export function calculateMonomerPositionStatistics(activityCol: DG.Column<number>, filter: DG.BitSet,
|
|
58
|
+
positionColumns: DG.Column<string>[], options: {
|
|
59
|
+
isFiltered?: boolean,
|
|
60
|
+
columns?: string[]
|
|
61
|
+
} = {}): MonomerPositionStats {
|
|
25
62
|
options.isFiltered ??= false;
|
|
26
|
-
const monomerPositionObject = {general: {}} as MonomerPositionStats & {general: SummaryStats};
|
|
27
|
-
const activityCol = df.getCol(C.COLUMNS_NAMES.ACTIVITY);
|
|
63
|
+
const monomerPositionObject = {general: {}} as MonomerPositionStats & { general: SummaryStats };
|
|
28
64
|
let activityColData: Float64Array = activityCol.getRawData() as Float64Array;
|
|
29
|
-
let sourceDfLen =
|
|
65
|
+
let sourceDfLen = activityCol.length;
|
|
30
66
|
|
|
31
67
|
if (options.isFiltered) {
|
|
32
|
-
sourceDfLen =
|
|
68
|
+
sourceDfLen = filter.trueCount;
|
|
33
69
|
const tempActivityData = new Float64Array(sourceDfLen);
|
|
34
|
-
const selectedIndexes =
|
|
35
|
-
for (let i = 0; i < sourceDfLen; ++i)
|
|
70
|
+
const selectedIndexes = filter.getSelectedIndexes();
|
|
71
|
+
for (let i = 0; i < sourceDfLen; ++i) {
|
|
36
72
|
tempActivityData[i] = activityColData[selectedIndexes[i]];
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
|
|
37
76
|
activityColData = tempActivityData;
|
|
38
|
-
positionColumns = DG.DataFrame.fromColumns(positionColumns).clone(
|
|
77
|
+
positionColumns = DG.DataFrame.fromColumns(positionColumns).clone(filter).columns.toList();
|
|
39
78
|
}
|
|
40
79
|
options.columns ??= positionColumns.map((col) => col.name);
|
|
41
80
|
|
|
42
81
|
for (const posCol of positionColumns) {
|
|
43
|
-
if (!options.columns.includes(posCol.name))
|
|
82
|
+
if (!options.columns.includes(posCol.name)) {
|
|
44
83
|
continue;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
|
|
45
87
|
const posColData = posCol.getRawData();
|
|
46
88
|
const posColCateogries = posCol.categories;
|
|
47
|
-
const currentPositionObject = {general: {}} as PositionStats & {general: SummaryStats};
|
|
89
|
+
const currentPositionObject = {general: {}} as PositionStats & { general: SummaryStats };
|
|
48
90
|
|
|
49
91
|
for (let categoryIndex = 0; categoryIndex < posColCateogries.length; ++categoryIndex) {
|
|
50
92
|
const monomer = posColCateogries[categoryIndex];
|
|
51
|
-
if (monomer === '')
|
|
93
|
+
if (monomer === '') {
|
|
52
94
|
continue;
|
|
95
|
+
}
|
|
96
|
+
|
|
53
97
|
|
|
54
98
|
const boolArray: boolean[] = new Array(sourceDfLen).fill(false);
|
|
55
99
|
for (let i = 0; i < sourceDfLen; ++i) {
|
|
56
|
-
if (posColData[i] === categoryIndex)
|
|
100
|
+
if (posColData[i] === categoryIndex) {
|
|
57
101
|
boolArray[i] = true;
|
|
102
|
+
}
|
|
58
103
|
}
|
|
59
104
|
const bitArray = BitArray.fromValues(boolArray);
|
|
60
105
|
const stats = bitArray.allFalse || bitArray.allTrue ?
|
|
@@ -69,69 +114,102 @@ export function calculateMonomerPositionStatistics(df: DG.DataFrame, positionCol
|
|
|
69
114
|
return monomerPositionObject;
|
|
70
115
|
}
|
|
71
116
|
|
|
72
|
-
|
|
73
|
-
|
|
117
|
+
/**
|
|
118
|
+
* Calculates summary statistics for the monomer position statistics such as maximum and minimum values for each
|
|
119
|
+
* statistic in general and on each position.
|
|
120
|
+
* @param genObj - Object to store the summary statistics to.
|
|
121
|
+
* @param stats - Statistics for a single monomer position.
|
|
122
|
+
* @param summaryStats - Summary statistics for all monomer positions.
|
|
123
|
+
*/
|
|
124
|
+
export function getSummaryStats(genObj: SummaryStats, stats: StatsItem | null = null,
|
|
125
|
+
summaryStats: SummaryStats | null = null): void {
|
|
126
|
+
if (stats === null && summaryStats === null) {
|
|
74
127
|
throw new Error(`MonomerPositionStatsError: either stats or summaryStats must be present`);
|
|
128
|
+
}
|
|
129
|
+
|
|
75
130
|
|
|
76
131
|
const possibleMaxCount = stats?.count ?? summaryStats!.maxCount;
|
|
77
132
|
genObj.maxCount ??= possibleMaxCount;
|
|
78
|
-
if (genObj.maxCount < possibleMaxCount)
|
|
133
|
+
if (genObj.maxCount < possibleMaxCount) {
|
|
79
134
|
genObj.maxCount = possibleMaxCount;
|
|
135
|
+
}
|
|
136
|
+
|
|
80
137
|
|
|
81
138
|
const possibleMinCount = stats?.count ?? summaryStats!.minCount;
|
|
82
139
|
genObj.minCount ??= possibleMinCount;
|
|
83
|
-
if (genObj.minCount > possibleMinCount)
|
|
140
|
+
if (genObj.minCount > possibleMinCount) {
|
|
84
141
|
genObj.minCount = possibleMinCount;
|
|
142
|
+
}
|
|
143
|
+
|
|
85
144
|
|
|
86
145
|
const possibleMaxMeanDifference = stats?.meanDifference ?? summaryStats!.maxMeanDifference;
|
|
87
146
|
genObj.maxMeanDifference ??= possibleMaxMeanDifference;
|
|
88
|
-
if (genObj.maxMeanDifference < possibleMaxMeanDifference)
|
|
147
|
+
if (genObj.maxMeanDifference < possibleMaxMeanDifference) {
|
|
89
148
|
genObj.maxMeanDifference = possibleMaxMeanDifference;
|
|
149
|
+
}
|
|
150
|
+
|
|
90
151
|
|
|
91
152
|
const possibleMinMeanDifference = stats?.meanDifference ?? summaryStats!.minMeanDifference;
|
|
92
153
|
genObj.minMeanDifference ??= possibleMinMeanDifference;
|
|
93
|
-
if (genObj.minMeanDifference > possibleMinMeanDifference)
|
|
154
|
+
if (genObj.minMeanDifference > possibleMinMeanDifference) {
|
|
94
155
|
genObj.minMeanDifference = possibleMinMeanDifference;
|
|
156
|
+
}
|
|
157
|
+
|
|
95
158
|
|
|
96
159
|
if (!isNaN(stats?.pValue ?? NaN)) {
|
|
97
160
|
const possibleMaxPValue = stats?.pValue ?? summaryStats!.maxPValue;
|
|
98
161
|
genObj.maxPValue ??= possibleMaxPValue;
|
|
99
|
-
if (genObj.maxPValue < possibleMaxPValue)
|
|
162
|
+
if (genObj.maxPValue < possibleMaxPValue) {
|
|
100
163
|
genObj.maxPValue = possibleMaxPValue;
|
|
164
|
+
}
|
|
165
|
+
|
|
101
166
|
|
|
102
167
|
const possibleMinPValue = stats?.pValue ?? summaryStats!.minPValue;
|
|
103
168
|
genObj.minPValue ??= possibleMinPValue;
|
|
104
|
-
if (genObj.minPValue > possibleMinPValue)
|
|
169
|
+
if (genObj.minPValue > possibleMinPValue) {
|
|
105
170
|
genObj.minPValue = possibleMinPValue;
|
|
171
|
+
}
|
|
106
172
|
}
|
|
107
173
|
|
|
108
174
|
const possibleMaxRatio = stats?.ratio ?? summaryStats!.maxRatio;
|
|
109
175
|
genObj.maxRatio ??= possibleMaxRatio;
|
|
110
|
-
if (genObj.maxRatio < possibleMaxRatio)
|
|
176
|
+
if (genObj.maxRatio < possibleMaxRatio) {
|
|
111
177
|
genObj.maxRatio = possibleMaxRatio;
|
|
178
|
+
}
|
|
179
|
+
|
|
112
180
|
|
|
113
181
|
const possibleMinRatio = stats?.ratio ?? summaryStats!.minRatio;
|
|
114
182
|
genObj.minRatio ??= possibleMinRatio;
|
|
115
|
-
if (genObj.minRatio > possibleMinRatio)
|
|
183
|
+
if (genObj.minRatio > possibleMinRatio) {
|
|
116
184
|
genObj.minRatio = possibleMinRatio;
|
|
185
|
+
}
|
|
117
186
|
}
|
|
118
187
|
|
|
188
|
+
/**
|
|
189
|
+
* Calculates statistics for each cluster type.
|
|
190
|
+
* @param df - Dataframe containing the clusters column.
|
|
191
|
+
* @param clustersColumnName - Name of the original clusters column.
|
|
192
|
+
* @param customClusters - Array of custom clusters columns names.
|
|
193
|
+
* @param activityCol - Activity column.
|
|
194
|
+
* @return - Statistics for each cluster type.
|
|
195
|
+
*/
|
|
119
196
|
export function calculateClusterStatistics(df: DG.DataFrame, clustersColumnName: string,
|
|
120
|
-
customClusters: DG.Column<boolean>[]): ClusterTypeStats {
|
|
197
|
+
customClusters: DG.Column<boolean>[], activityCol: DG.Column<number>): ClusterTypeStats {
|
|
121
198
|
const rowCount = df.rowCount;
|
|
122
199
|
const origClustCol = df.getCol(clustersColumnName);
|
|
123
200
|
const origClustColData = origClustCol.getRawData();
|
|
124
201
|
const origClustColCat = origClustCol.categories;
|
|
125
202
|
const origClustMasks: BitArray[] = Array.from({length: origClustColCat.length},
|
|
126
203
|
() => new BitArray(rowCount, false));
|
|
127
|
-
for (let rowIdx = 0; rowIdx < rowCount; ++rowIdx)
|
|
204
|
+
for (let rowIdx = 0; rowIdx < rowCount; ++rowIdx) {
|
|
128
205
|
origClustMasks[origClustColData[rowIdx]].setTrue(rowIdx);
|
|
206
|
+
}
|
|
207
|
+
|
|
129
208
|
|
|
130
209
|
const customClustMasks = customClusters.map(
|
|
131
210
|
(v) => BitArray.fromUint32Array(rowCount, v.getRawData() as Uint32Array));
|
|
132
211
|
const customClustColNamesList = customClusters.map((v) => v.name);
|
|
133
212
|
|
|
134
|
-
const activityCol = df.getCol(C.COLUMNS_NAMES.ACTIVITY);
|
|
135
213
|
const activityColData = activityCol.getRawData() as Float64Array;
|
|
136
214
|
|
|
137
215
|
const origClustStats: ClusterStats = {};
|
|
@@ -143,10 +221,9 @@ export function calculateClusterStatistics(df: DG.DataFrame, clustersColumnName:
|
|
|
143
221
|
const resultStats = clustType === CLUSTER_TYPE.ORIGINAL ? origClustStats : customClustStats;
|
|
144
222
|
for (let maskIdx = 0; maskIdx < masks.length; ++maskIdx) {
|
|
145
223
|
const mask = masks[maskIdx];
|
|
146
|
-
|
|
224
|
+
resultStats[clustNames[maskIdx]] = mask.allTrue || mask.allFalse ?
|
|
147
225
|
{count: mask.length, meanDifference: 0, ratio: 1.0, pValue: null, mask: mask, mean: activityCol.stats.avg} :
|
|
148
226
|
getStats(activityColData, mask);
|
|
149
|
-
resultStats[clustNames[maskIdx]] = stats;
|
|
150
227
|
}
|
|
151
228
|
}
|
|
152
229
|
|