@datagrok/bio 2.22.11 → 2.22.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/package-test.js +3 -3
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/dockerfiles/container.json +2 -2
- package/package.json +3 -3
- package/src/package-api.ts +4 -0
- package/src/package.g.ts +36 -23
- package/src/package.ts +19 -11
- package/src/tests/renderers-test.ts +1 -1
- package/src/tests/viewers.ts +11 -16
- package/src/utils/monomer-lib/monomer-manager/monomer-manager.ts +48 -1
- package/src/utils/save-as-fasta.ts +25 -22
- package/test-console-output-1.log +1174 -3181
- package/test-record-1.mp4 +0 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Davit Rizhinashvili",
|
|
6
6
|
"email": "drizhinashvili@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.22.
|
|
8
|
+
"version": "2.22.12",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -49,7 +49,7 @@
|
|
|
49
49
|
"@datagrok-libraries/math": "^1.2.4",
|
|
50
50
|
"@datagrok-libraries/ml": "^6.10.4",
|
|
51
51
|
"@datagrok-libraries/tutorials": "^1.6.1",
|
|
52
|
-
"@datagrok-libraries/utils": "^4.6.
|
|
52
|
+
"@datagrok-libraries/utils": "^4.6.5",
|
|
53
53
|
"@webgpu/types": "^0.1.40",
|
|
54
54
|
"ajv": "^8.12.0",
|
|
55
55
|
"ajv-errors": "^3.0.0",
|
|
@@ -73,7 +73,7 @@
|
|
|
73
73
|
"@types/wu": "^2.1.44",
|
|
74
74
|
"@typescript-eslint/eslint-plugin": "^8.8.1",
|
|
75
75
|
"@typescript-eslint/parser": "^8.8.1",
|
|
76
|
-
"datagrok-tools": "^4.14.
|
|
76
|
+
"datagrok-tools": "^4.14.57",
|
|
77
77
|
"eslint": "^8.57.1",
|
|
78
78
|
"eslint-config-google": "^0.14.0",
|
|
79
79
|
"eslint-plugin-rxjs": "^5.0.3",
|
package/src/package-api.ts
CHANGED
|
@@ -40,6 +40,10 @@ export namespace funcs {
|
|
|
40
40
|
return await grok.functions.call('Bio:SequenceTooltip', { col });
|
|
41
41
|
}
|
|
42
42
|
|
|
43
|
+
export async function standardiseMonomerLibrary(library: string ): Promise<string> {
|
|
44
|
+
return await grok.functions.call('Bio:StandardiseMonomerLibrary', { library });
|
|
45
|
+
}
|
|
46
|
+
|
|
43
47
|
export async function getBioLib(): Promise<any> {
|
|
44
48
|
return await grok.functions.call('Bio:GetBioLib', {});
|
|
45
49
|
}
|
package/src/package.g.ts
CHANGED
|
@@ -25,6 +25,15 @@ export async function standardiseMonomerLibrary(library: string) : Promise<strin
|
|
|
25
25
|
return await PackageFunctions.standardiseMonomerLibrary(library);
|
|
26
26
|
}
|
|
27
27
|
|
|
28
|
+
//description: Matches molecules in a column with monomers from the selected library(s)
|
|
29
|
+
//input: dataframe table
|
|
30
|
+
//input: column molecules { semType: Molecule }
|
|
31
|
+
//input: string polymerType = 'PEPTIDE' { choices: ["PEPTIDE","RNA","CHEM"]; caption: Polymer Type }
|
|
32
|
+
//top-menu: Bio | Manage | Match with Monomer Library...
|
|
33
|
+
export async function matchWithMonomerLibrary(table: DG.DataFrame, molecules: DG.Column, polymerType: any) : Promise<void> {
|
|
34
|
+
await PackageFunctions.matchWithMonomerLibrary(table, molecules, polymerType);
|
|
35
|
+
}
|
|
36
|
+
|
|
28
37
|
//output: object monomerLib
|
|
29
38
|
export function getBioLib() : any {
|
|
30
39
|
return PackageFunctions.getBioLib();
|
|
@@ -126,8 +135,8 @@ export function macromoleculeDifferenceCellRenderer() : any {
|
|
|
126
135
|
return PackageFunctions.macromoleculeDifferenceCellRenderer();
|
|
127
136
|
}
|
|
128
137
|
|
|
129
|
-
//input: string alignType { choices: [
|
|
130
|
-
//input: string alignTable { choices: [
|
|
138
|
+
//input: string alignType { choices: ["Local alignment","Global alignment"] }
|
|
139
|
+
//input: string alignTable { choices: ["AUTO","NUCLEOTIDES","BLOSUM45","BLOSUM50","BLOSUM62","BLOSUM80","BLOSUM90","PAM30","PAM70","PAM250","SCHNEIDER","TRANS"] }
|
|
131
140
|
//input: double gap
|
|
132
141
|
//input: string seq1
|
|
133
142
|
//input: string seq2
|
|
@@ -182,9 +191,9 @@ export async function getRegionTopMenu(table: DG.DataFrame, sequence: DG.Column,
|
|
|
182
191
|
//input: dataframe table { description: Input data table }
|
|
183
192
|
//input: string molecules { semType: Macromolecule; description: Input data table }
|
|
184
193
|
//input: column activities
|
|
185
|
-
//input: double similarity
|
|
186
|
-
//input: string methodName { choices: [
|
|
187
|
-
//input: string similarityMetric { choices: [
|
|
194
|
+
//input: double similarity = 80 { description: Similarity cutoff }
|
|
195
|
+
//input: string methodName { choices: ["UMAP","t-SNE"] }
|
|
196
|
+
//input: string similarityMetric { choices: ["Hamming","Levenshtein","Monomer chemical distance"] }
|
|
188
197
|
//input: func preprocessingFunction
|
|
189
198
|
//input: object options { optional: true }
|
|
190
199
|
//input: bool demo { optional: true }
|
|
@@ -198,9 +207,9 @@ export async function activityCliffs(table: DG.DataFrame, molecules: DG.Column<a
|
|
|
198
207
|
//tags: dim-red-preprocessing-function
|
|
199
208
|
//input: column col { semType: Macromolecule }
|
|
200
209
|
//input: string metric
|
|
201
|
-
//input: double gapOpen
|
|
202
|
-
//input: double gapExtend
|
|
203
|
-
//input: string fingerprintType { caption: Fingerprint type;
|
|
210
|
+
//input: double gapOpen = 1 { caption: Gap open penalty; optional: true }
|
|
211
|
+
//input: double gapExtend = 0.6 { caption: Gap extension penalty; optional: true }
|
|
212
|
+
//input: string fingerprintType = 'Morgan' { caption: Fingerprint type; choices: ["Morgan","RDKit","Pattern","AtomPair","MACCS","TopologicalTorsion"]; optional: true }
|
|
204
213
|
//output: object result
|
|
205
214
|
//meta.supportedSemTypes: Macromolecule
|
|
206
215
|
//meta.supportedTypes: string
|
|
@@ -225,12 +234,12 @@ export async function helmPreprocessingFunction(col: DG.Column<any>, _metric: an
|
|
|
225
234
|
//description: Creates 2D sequence space with projected sequences by pairwise distance
|
|
226
235
|
//input: dataframe table
|
|
227
236
|
//input: column molecules { semType: Macromolecule }
|
|
228
|
-
//input: string methodName { choices: [
|
|
229
|
-
//input: string similarityMetric { choices: [
|
|
230
|
-
//input: bool plotEmbeddings
|
|
237
|
+
//input: string methodName { choices: ["UMAP","t-SNE"] }
|
|
238
|
+
//input: string similarityMetric { choices: ["Hamming","Levenshtein","Monomer chemical distance"] }
|
|
239
|
+
//input: bool plotEmbeddings = true
|
|
231
240
|
//input: func preprocessingFunction { optional: true }
|
|
232
241
|
//input: object options { optional: true }
|
|
233
|
-
//input: bool clusterEmbeddings { optional: true
|
|
242
|
+
//input: bool clusterEmbeddings = true { optional: true }
|
|
234
243
|
//input: bool isDemo { optional: true }
|
|
235
244
|
//top-menu: Bio | Analyze | Sequence Space...
|
|
236
245
|
//editor: Bio:SequenceSpaceEditor
|
|
@@ -242,8 +251,8 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, molecules: DG.Co
|
|
|
242
251
|
//description: Converts sequences to molblocks
|
|
243
252
|
//input: dataframe table { description: Input data table }
|
|
244
253
|
//input: column seqCol { semType: Macromolecule; caption: Sequence }
|
|
245
|
-
//input: bool nonlinear
|
|
246
|
-
//input: bool highlight
|
|
254
|
+
//input: bool nonlinear = false { caption: Non-linear; description: Slower mode for cycling/branching HELM structures }
|
|
255
|
+
//input: bool highlight = false { caption: Highlight monomers; description: Highlight monomers' substructures of the molecule }
|
|
247
256
|
//top-menu: Bio | Transform | To Atomic Level...
|
|
248
257
|
export async function toAtomicLevel(table: DG.DataFrame, seqCol: DG.Column, nonlinear: boolean, highlight: boolean) : Promise<void> {
|
|
249
258
|
await PackageFunctions.toAtomicLevel(table, seqCol, nonlinear, highlight);
|
|
@@ -338,7 +347,7 @@ export function monomerCellRenderer() : any {
|
|
|
338
347
|
return PackageFunctions.monomerCellRenderer();
|
|
339
348
|
}
|
|
340
349
|
|
|
341
|
-
//input: string path { choices: [
|
|
350
|
+
//input: string path { choices: ["Demo:Files/","System:AppData/"] }
|
|
342
351
|
//output: dataframe result
|
|
343
352
|
export async function testDetectMacromolecule(path: string) : Promise<any> {
|
|
344
353
|
return await PackageFunctions.testDetectMacromolecule(path);
|
|
@@ -408,7 +417,7 @@ export function SubsequenceSearchTopMenu(macromolecules: DG.Column) : void {
|
|
|
408
417
|
PackageFunctions.SubsequenceSearchTopMenu(macromolecules);
|
|
409
418
|
}
|
|
410
419
|
|
|
411
|
-
//name: Identity
|
|
420
|
+
//name: Identity
|
|
412
421
|
//description: Adds a column with fraction of matching monomers
|
|
413
422
|
//input: dataframe table { description: Table containing Macromolecule column }
|
|
414
423
|
//input: column macromolecule { semType: Macromolecule; description: Sequences to score }
|
|
@@ -419,7 +428,7 @@ export async function sequenceIdentityScoring(table: DG.DataFrame, macromolecule
|
|
|
419
428
|
return await PackageFunctions.sequenceIdentityScoring(table, macromolecule, reference);
|
|
420
429
|
}
|
|
421
430
|
|
|
422
|
-
//name: Similarity
|
|
431
|
+
//name: Similarity
|
|
423
432
|
//description: Adds a column with similarity scores, calculated as sum of monomer fingerprint similarities
|
|
424
433
|
//input: dataframe table { description: Table containing Macromolecule column }
|
|
425
434
|
//input: column macromolecule { semType: Macromolecule; description: Sequences to score }
|
|
@@ -486,30 +495,37 @@ export function bioSubstructureFilterTest() : any {
|
|
|
486
495
|
return PackageFunctions.bioSubstructureFilterTest();
|
|
487
496
|
}
|
|
488
497
|
|
|
498
|
+
//name: webLogoLargeApp
|
|
489
499
|
export async function webLogoLargeApp() : Promise<void> {
|
|
490
500
|
await PackageFunctions.webLogoLargeApp();
|
|
491
501
|
}
|
|
492
502
|
|
|
503
|
+
//name: webLogoAggApp
|
|
493
504
|
export async function webLogoAggApp() : Promise<void> {
|
|
494
505
|
await PackageFunctions.webLogoAggApp();
|
|
495
506
|
}
|
|
496
507
|
|
|
508
|
+
//name: getRegionApp
|
|
497
509
|
export async function getRegionApp() : Promise<void> {
|
|
498
510
|
await PackageFunctions.getRegionApp();
|
|
499
511
|
}
|
|
500
512
|
|
|
513
|
+
//name: getRegionHelmApp
|
|
501
514
|
export async function getRegionHelmApp() : Promise<void> {
|
|
502
515
|
await PackageFunctions.getRegionHelmApp();
|
|
503
516
|
}
|
|
504
517
|
|
|
518
|
+
//name: longSeqTableSeparator
|
|
505
519
|
export function longSeqTableSeparator() : void {
|
|
506
520
|
PackageFunctions.longSeqTableSeparator();
|
|
507
521
|
}
|
|
508
522
|
|
|
523
|
+
//name: longSeqTableFasta
|
|
509
524
|
export function longSeqTableFasta() : void {
|
|
510
525
|
PackageFunctions.longSeqTableFasta();
|
|
511
526
|
}
|
|
512
527
|
|
|
528
|
+
//name: longSeqTableHelm
|
|
513
529
|
export function longSeqTableHelm() : void {
|
|
514
530
|
PackageFunctions.longSeqTableHelm();
|
|
515
531
|
}
|
|
@@ -523,15 +539,14 @@ export function addCopyMenu(cell: any, menu: any) : void {
|
|
|
523
539
|
//description: Sequence similarity tracking and evaluation dataset diversity
|
|
524
540
|
//meta.demoPath: Bioinformatics | Similarity, Diversity
|
|
525
541
|
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Similarity,%20Diversity
|
|
526
|
-
//meta.demoSkip: GROK-14320
|
|
527
542
|
export async function demoBioSimilarityDiversity() : Promise<void> {
|
|
528
543
|
await PackageFunctions.demoBioSimilarityDiversity();
|
|
529
544
|
}
|
|
530
545
|
|
|
531
546
|
//description: Exploring sequence space of Macromolecules, comparison with hierarchical clustering results
|
|
547
|
+
//meta.isDemoDashboard: true
|
|
532
548
|
//meta.demoPath: Bioinformatics | Sequence Space
|
|
533
549
|
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Sequence%20Space
|
|
534
|
-
//meta.demoSkip: GROK-14320
|
|
535
550
|
export async function demoBioSequenceSpace() : Promise<void> {
|
|
536
551
|
await PackageFunctions.demoBioSequenceSpace();
|
|
537
552
|
}
|
|
@@ -539,7 +554,6 @@ export async function demoBioSequenceSpace() : Promise<void> {
|
|
|
539
554
|
//description: Activity Cliffs analysis on Macromolecules data
|
|
540
555
|
//meta.demoPath: Bioinformatics | Activity Cliffs
|
|
541
556
|
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Activity%20Cliffs
|
|
542
|
-
//meta.demoSkip: GROK-14320
|
|
543
557
|
export async function demoBioActivityCliffs() : Promise<void> {
|
|
544
558
|
await PackageFunctions.demoBioActivityCliffs();
|
|
545
559
|
}
|
|
@@ -547,7 +561,6 @@ export async function demoBioActivityCliffs() : Promise<void> {
|
|
|
547
561
|
//description: Atomic level structure of Macromolecules
|
|
548
562
|
//meta.demoPath: Bioinformatics | Atomic Level
|
|
549
563
|
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Atomic%20Level
|
|
550
|
-
//meta.demoSkip: GROK-14320
|
|
551
564
|
export async function demoBioAtomicLevel() : Promise<void> {
|
|
552
565
|
await PackageFunctions.demoBioAtomicLevel();
|
|
553
566
|
}
|
|
@@ -578,7 +591,7 @@ export async function seqIdentity(seq: string, ref: string) : Promise<any> {
|
|
|
578
591
|
|
|
579
592
|
//input: file file
|
|
580
593
|
//input: string colName
|
|
581
|
-
//input: double probeCount
|
|
594
|
+
//input: double probeCount = 100
|
|
582
595
|
export async function detectMacromoleculeProbe(file: DG.FileInfo, colName: string, probeCount: number) : Promise<void> {
|
|
583
596
|
await PackageFunctions.detectMacromoleculeProbe(file, colName, probeCount);
|
|
584
597
|
}
|
|
@@ -590,7 +603,7 @@ export async function getSeqHelper() : Promise<any> {
|
|
|
590
603
|
|
|
591
604
|
//input: dataframe df
|
|
592
605
|
//input: column helmCol
|
|
593
|
-
//input: bool chiralityEngine
|
|
606
|
+
//input: bool chiralityEngine = true
|
|
594
607
|
//output: column result
|
|
595
608
|
export async function getMolFromHelm(df: DG.DataFrame, helmCol: DG.Column<any>, chiralityEngine: boolean) : Promise<any> {
|
|
596
609
|
return await PackageFunctions.getMolFromHelm(df, helmCol, chiralityEngine);
|
package/src/package.ts
CHANGED
|
@@ -70,12 +70,13 @@ import {GetRegionFuncEditor} from './utils/get-region-func-editor';
|
|
|
70
70
|
import {sequenceToMolfile} from './utils/sequence-to-mol';
|
|
71
71
|
import {detectMacromoleculeProbeDo} from './utils/detect-macromolecule-probe';
|
|
72
72
|
import {getMolColumnFromHelm} from './utils/helm-to-molfile/utils';
|
|
73
|
-
import {MonomerManager, standardizeMonomerLibrary} from './utils/monomer-lib/monomer-manager/monomer-manager';
|
|
73
|
+
import {matchMoleculesWithMonomers, MonomerManager, standardizeMonomerLibrary} from './utils/monomer-lib/monomer-manager/monomer-manager';
|
|
74
74
|
import {calculateScoresWithEmptyValues} from './utils/calculate-scores';
|
|
75
75
|
import {SeqHelper} from './utils/seq-helper/seq-helper';
|
|
76
76
|
import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
|
|
77
77
|
import {molecular3DStructureWidget, toAtomicLevelWidget} from './widgets/to-atomic-level-widget';
|
|
78
78
|
import {handleSequenceHeaderRendering} from './widgets/sequence-scrolling-widget';
|
|
79
|
+
import {PolymerType} from '@datagrok-libraries/js-draw-lite/src/types/org';
|
|
79
80
|
export const _package = new BioPackage(/*{debug: true}/**/);
|
|
80
81
|
export * from './package.g';
|
|
81
82
|
|
|
@@ -143,6 +144,14 @@ export class PackageFunctions {
|
|
|
143
144
|
return await standardizeMonomerLibrary(library);
|
|
144
145
|
}
|
|
145
146
|
|
|
147
|
+
@grok.decorators.func({'top-menu': 'Bio | Manage | Match with Monomer Library...', description: 'Matches molecules in a column with monomers from the selected library(s)',})
|
|
148
|
+
static async matchWithMonomerLibrary(table: DG.DataFrame,
|
|
149
|
+
@grok.decorators.param({type: 'column', options: {semType: 'Molecule'}})molecules: DG.Column,
|
|
150
|
+
@grok.decorators.param({type: 'string', options: {choices: ['PEPTIDE', 'RNA', 'CHEM'], initialValue: 'PEPTIDE', caption: 'Polymer Type'}})polymerType: PolymerType = 'PEPTIDE') {
|
|
151
|
+
const matchDF = await matchMoleculesWithMonomers(table, molecules.name, _package.monomerLib, polymerType);
|
|
152
|
+
grok.shell.addTableView(matchDF);
|
|
153
|
+
}
|
|
154
|
+
|
|
146
155
|
// Keep for backward compatibility
|
|
147
156
|
@grok.decorators.func({outputs: [{type: 'object', name: 'monomerLib'}]})
|
|
148
157
|
static getBioLib(): IMonomerLib {
|
|
@@ -939,14 +948,14 @@ export class PackageFunctions {
|
|
|
939
948
|
}
|
|
940
949
|
|
|
941
950
|
@grok.decorators.func({
|
|
942
|
-
name: 'Identity
|
|
951
|
+
name: 'Identity',
|
|
943
952
|
description: 'Adds a column with fraction of matching monomers',
|
|
944
953
|
'top-menu': 'Bio | Calculate | Identity...',
|
|
945
954
|
})
|
|
946
955
|
static async sequenceIdentityScoring(
|
|
947
|
-
@grok.decorators.param({options: {description: 'Table containing Macromolecule column'}})table: DG.DataFrame,
|
|
956
|
+
@grok.decorators.param({options: {description: 'Table containing Macromolecule column'}}) table: DG.DataFrame,
|
|
948
957
|
@grok.decorators.param({options: {semType: 'Macromolecule', description: 'Sequences to score'}}) macromolecule: DG.Column,
|
|
949
|
-
@grok.decorators.param({options: {description: 'Sequence,matching column format'}})reference: string
|
|
958
|
+
@grok.decorators.param({options: {description: 'Sequence,matching column format'}}) reference: string
|
|
950
959
|
): Promise<DG.Column<number>> {
|
|
951
960
|
const seqHelper = _package.seqHelper;
|
|
952
961
|
const scores = calculateScoresWithEmptyValues(table, macromolecule, reference, SCORE.IDENTITY, seqHelper);
|
|
@@ -954,14 +963,14 @@ export class PackageFunctions {
|
|
|
954
963
|
}
|
|
955
964
|
|
|
956
965
|
@grok.decorators.func({
|
|
957
|
-
name: 'Similarity
|
|
966
|
+
name: 'Similarity',
|
|
958
967
|
description: 'Adds a column with similarity scores, calculated as sum of monomer fingerprint similarities',
|
|
959
968
|
'top-menu': 'Bio | Calculate | Similarity...',
|
|
960
969
|
})
|
|
961
970
|
static async sequenceSimilarityScoring(
|
|
962
|
-
@grok.decorators.param({options: {description: 'Table containing Macromolecule column'}})table: DG.DataFrame,
|
|
971
|
+
@grok.decorators.param({options: {description: 'Table containing Macromolecule column'}}) table: DG.DataFrame,
|
|
963
972
|
@grok.decorators.param({options: {semType: 'Macromolecule', description: 'Sequences to score'}}) macromolecule: DG.Column,
|
|
964
|
-
@grok.decorators.param({options: {description: 'Sequence,matching column format'}})reference: string
|
|
973
|
+
@grok.decorators.param({options: {description: 'Sequence,matching column format'}}) reference: string
|
|
965
974
|
): Promise<DG.Column<number>> {
|
|
966
975
|
const seqHelper = _package.seqHelper;
|
|
967
976
|
const scores = calculateScoresWithEmptyValues(table, macromolecule, reference, SCORE.SIMILARITY, seqHelper);
|
|
@@ -1137,7 +1146,6 @@ export class PackageFunctions {
|
|
|
1137
1146
|
description: 'Sequence similarity tracking and evaluation dataset diversity',
|
|
1138
1147
|
demoPath: 'Bioinformatics | Similarity, Diversity',
|
|
1139
1148
|
path: '/apps/Tutorials/Demo/Bioinformatics/Similarity,%20Diversity',
|
|
1140
|
-
demoSkip: 'GROK-14320'
|
|
1141
1149
|
})
|
|
1142
1150
|
static async demoBioSimilarityDiversity(): Promise<void> {
|
|
1143
1151
|
await demoBioSimDiv();
|
|
@@ -1147,7 +1155,9 @@ export class PackageFunctions {
|
|
|
1147
1155
|
description: 'Exploring sequence space of Macromolecules, comparison with hierarchical clustering results',
|
|
1148
1156
|
demoPath: 'Bioinformatics | Sequence Space',
|
|
1149
1157
|
path: '/apps/Tutorials/Demo/Bioinformatics/Sequence%20Space',
|
|
1150
|
-
|
|
1158
|
+
meta: {
|
|
1159
|
+
isDemoDashboard: 'true'
|
|
1160
|
+
}
|
|
1151
1161
|
})
|
|
1152
1162
|
static async demoBioSequenceSpace(): Promise<void> {
|
|
1153
1163
|
await demoSeqSpace();
|
|
@@ -1157,7 +1167,6 @@ export class PackageFunctions {
|
|
|
1157
1167
|
description: 'Activity Cliffs analysis on Macromolecules data',
|
|
1158
1168
|
demoPath: 'Bioinformatics | Activity Cliffs',
|
|
1159
1169
|
path: '/apps/Tutorials/Demo/Bioinformatics/Activity%20Cliffs',
|
|
1160
|
-
demoSkip: 'GROK-14320'
|
|
1161
1170
|
})
|
|
1162
1171
|
static async demoBioActivityCliffs(): Promise<void> {
|
|
1163
1172
|
await demoActivityCliffsCyclic();
|
|
@@ -1167,7 +1176,6 @@ export class PackageFunctions {
|
|
|
1167
1176
|
description: 'Atomic level structure of Macromolecules',
|
|
1168
1177
|
demoPath: 'Bioinformatics | Atomic Level',
|
|
1169
1178
|
path: '/apps/Tutorials/Demo/Bioinformatics/Atomic%20Level',
|
|
1170
|
-
demoSkip: 'GROK-14320'
|
|
1171
1179
|
})
|
|
1172
1180
|
static async demoBioAtomicLevel(): Promise<void> {
|
|
1173
1181
|
await demoToAtomicLevel();
|
|
@@ -65,7 +65,7 @@ category('renderers', () => {
|
|
|
65
65
|
|
|
66
66
|
test('scatterPlotTooltip', async () => {
|
|
67
67
|
await _testScatterPlotTooltip();
|
|
68
|
-
}
|
|
68
|
+
});
|
|
69
69
|
|
|
70
70
|
async function _rendererMacromoleculeFasta() {
|
|
71
71
|
const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/FASTA.csv');
|
package/src/tests/viewers.ts
CHANGED
|
@@ -6,19 +6,14 @@ import {category, test, testViewer} from '@datagrok-libraries/utils/src/test';
|
|
|
6
6
|
import {readDataframe} from './utils';
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
category('viewers', () => {
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
'VdRegions': 'GROK-13162',
|
|
21
|
-
}[v],
|
|
22
|
-
});
|
|
23
|
-
}
|
|
24
|
-
});
|
|
9
|
+
// category('viewers', () => {
|
|
10
|
+
// const viewers = DG.Func.find({package: 'Bio', tags: ['viewer']}).map((f) => f.friendlyName);
|
|
11
|
+
// for (const v of viewers) {
|
|
12
|
+
// test(v, async () => {
|
|
13
|
+
// const df = await readDataframe('samples/FASTA_DNA.csv');
|
|
14
|
+
// await df.meta.detectSemanticTypes();
|
|
15
|
+
// await grok.data.detectSemanticTypes(df);
|
|
16
|
+
// await testViewer(v, df, {detectSemanticTypes: true});
|
|
17
|
+
// });
|
|
18
|
+
// }
|
|
19
|
+
// });
|
|
@@ -57,6 +57,53 @@ export async function standardiseMonomers(monomers: Monomer[]) {
|
|
|
57
57
|
return fixedMonomers;
|
|
58
58
|
}
|
|
59
59
|
|
|
60
|
+
/// matches molecules in the dataframe with monomers in the library by canonical smiles
|
|
61
|
+
export async function matchMoleculesWithMonomers(molDf: DG.DataFrame, molColName: string, monomerLib: IMonomerLib, polymerType: PolymerType = 'PEPTIDE'): Promise<DG.DataFrame> {
|
|
62
|
+
const converterFunc = DG.Func.find({package: 'Chem', name: 'convertMoleculeNotation'})[0];
|
|
63
|
+
if (!converterFunc)
|
|
64
|
+
throw new Error('Function convertMoleculeNotation not found, please install Chem package');
|
|
65
|
+
// first: stamdardize monomers
|
|
66
|
+
const monomers = monomerLib.getMonomerSymbolsByType(polymerType).map((s) => monomerLib.getMonomer(polymerType, s)!).filter((m) => m && (m.smiles || m.molfile));
|
|
67
|
+
const fixedMonomers = await standardiseMonomers(monomers);
|
|
68
|
+
const cappedSmilse = fixedMonomers.map((m, i) => ({sym: m.symbol, smiles: capSmiles(m.smiles ?? '', m.rgroups ?? []), original: m.smiles, source: monomers[i]?.lib?.source})).filter((s) => !!s?.smiles && !s.smiles.includes('[*:'));
|
|
69
|
+
// canonicalize all monomer smiles
|
|
70
|
+
const monomerSmilesCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'MonomerSmiles', cappedSmilse.map((m) => m.smiles!));
|
|
71
|
+
monomerSmilesCol.semType = DG.SEMTYPE.MOLECULE;
|
|
72
|
+
const canonicalizedMonomersSmilesCol: DG.Column = await converterFunc.apply({molecule: monomerSmilesCol, targetNotation: DG.chem.Notation.Smiles});
|
|
73
|
+
if (!canonicalizedMonomersSmilesCol || canonicalizedMonomersSmilesCol.length !== monomerSmilesCol.length)
|
|
74
|
+
throw new Error('Error canonicalizing monomer smiles');
|
|
75
|
+
canonicalizedMonomersSmilesCol.toList().forEach((s, i) => cappedSmilse[i].smiles = s);
|
|
76
|
+
|
|
77
|
+
const molecules = molDf.col(molColName)!;
|
|
78
|
+
const canonicalizedMoleculesCol: DG.Column = await converterFunc.apply({molecule: molecules, targetNotation: DG.chem.Notation.Smiles});
|
|
79
|
+
if (!canonicalizedMoleculesCol || canonicalizedMoleculesCol.length !== molecules.length)
|
|
80
|
+
throw new Error('Error canonicalizing molecules');
|
|
81
|
+
|
|
82
|
+
const canonicalizedMolecules = canonicalizedMoleculesCol.toList();
|
|
83
|
+
|
|
84
|
+
const resultDf = molDf.clone();
|
|
85
|
+
const matchingMonomerSmilesCol = resultDf.columns.addNewString(resultDf.columns.getUnusedName('Matched monomer smiles'));
|
|
86
|
+
matchingMonomerSmilesCol.semType = DG.SEMTYPE.MOLECULE;
|
|
87
|
+
const matchingMonomerSymbolCol = resultDf.columns.addNewString(resultDf.columns.getUnusedName('Matched monomer symbol'));
|
|
88
|
+
matchingMonomerSymbolCol.semType = 'Monomer';
|
|
89
|
+
const sourceLibCol = resultDf.columns.addNewString(resultDf.columns.getUnusedName('Matched monomer source'));
|
|
90
|
+
resultDf.columns.setOrder([molColName, matchingMonomerSymbolCol.name, matchingMonomerSmilesCol.name, sourceLibCol.name]);
|
|
91
|
+
|
|
92
|
+
for (let i = 0; i < canonicalizedMolecules.length; i++) {
|
|
93
|
+
const mol = canonicalizedMolecules[i];
|
|
94
|
+
if (!mol) continue;
|
|
95
|
+
for (let j = 0; j < cappedSmilse.length; j++) {
|
|
96
|
+
if (cappedSmilse[j].smiles === mol) {
|
|
97
|
+
matchingMonomerSmilesCol.set(i, cappedSmilse[j].original!, false);
|
|
98
|
+
matchingMonomerSymbolCol.set(i, cappedSmilse[j].sym, false);
|
|
99
|
+
sourceLibCol.set(i, cappedSmilse[j].source ?? '', false);
|
|
100
|
+
break;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
return resultDf;
|
|
105
|
+
}
|
|
106
|
+
|
|
60
107
|
/** Standardizes the monomer library
|
|
61
108
|
* warning: throws error if the library is not valid or has invalid monomers
|
|
62
109
|
*/
|
|
@@ -127,7 +174,7 @@ export function getMonomersDataFrame(monomers: Monomer[]) {
|
|
|
127
174
|
monomers[i].id,
|
|
128
175
|
JSON.stringify(monomers[i].meta ?? {}),
|
|
129
176
|
monomers[i].lib?.source ?? '',
|
|
130
|
-
]);
|
|
177
|
+
], false);
|
|
131
178
|
// something is wrong with setting dates, so setting it manually for now
|
|
132
179
|
try {
|
|
133
180
|
if (date)
|
|
@@ -15,23 +15,21 @@ const FASTA_LINE_WIDTH = 60;
|
|
|
15
15
|
export function saveAsFastaUI(): void {
|
|
16
16
|
// Use grid for column order adjusted by user
|
|
17
17
|
const grid: DG.Grid = grok.shell.tv.grid;
|
|
18
|
+
const dataFrame: DG.DataFrame = grid.dataFrame;
|
|
18
19
|
|
|
19
|
-
const idGColList: DG.
|
|
20
|
-
.map((colI: number) =>
|
|
21
|
-
.filter((
|
|
22
|
-
const defaultIdGCol: DG.
|
|
23
|
-
.find((
|
|
24
|
-
const idDefaultValue = defaultIdGCol ? [defaultIdGCol
|
|
20
|
+
const idGColList: DG.Column[] = wu.count(0).take(dataFrame.columns.length)
|
|
21
|
+
.map((colI: number) => dataFrame.columns.byIndex(colI)!)
|
|
22
|
+
.filter((col: DG.Column) => col.semType !== DG.SEMTYPE.MACROMOLECULE).toArray();
|
|
23
|
+
const defaultIdGCol: DG.Column | undefined = idGColList
|
|
24
|
+
.find((col: DG.Column) => col.name.toLowerCase().indexOf('id') !== -1);
|
|
25
|
+
const idDefaultValue = defaultIdGCol ? [defaultIdGCol] : [];
|
|
25
26
|
|
|
26
|
-
const idGColListInput = ui.input.
|
|
27
|
-
|
|
28
|
-
items: idGColList.map((gcol: DG.GridColumn) => gcol.name)
|
|
29
|
-
});
|
|
27
|
+
const idGColListInput = ui.input.columns('Seq id columns', {table: dataFrame, value: idDefaultValue,
|
|
28
|
+
filter: (col: DG.Column) => col.semType !== DG.SEMTYPE.MACROMOLECULE});
|
|
30
29
|
|
|
31
|
-
const seqGColList: DG.
|
|
32
|
-
.map((colI: number) =>
|
|
33
|
-
.filter((
|
|
34
|
-
const col: DG.Column | null = gc.column;
|
|
30
|
+
const seqGColList: DG.Column[] = wu.count(0).take(dataFrame.columns.length)/* range rom 0 to grid.columns.length */
|
|
31
|
+
.map((colI: number) => dataFrame.columns.byIndex(colI)!)
|
|
32
|
+
.filter((col: DG.Column) => {
|
|
35
33
|
if (col && col.semType === DG.SEMTYPE.MACROMOLECULE) {
|
|
36
34
|
const sh = _package.seqHelper.getSeqHandler(col);
|
|
37
35
|
return sh.isFasta();
|
|
@@ -39,10 +37,17 @@ export function saveAsFastaUI(): void {
|
|
|
39
37
|
return false;
|
|
40
38
|
}).toArray();
|
|
41
39
|
|
|
42
|
-
const seqDefaultValue = seqGColList.length > 0 ? seqGColList[0]
|
|
43
|
-
const seqColInput = ui.input.
|
|
44
|
-
|
|
45
|
-
|
|
40
|
+
const seqDefaultValue = seqGColList.length > 0 ? seqGColList[0] : null;
|
|
41
|
+
const seqColInput = ui.input.column('Seq column', {
|
|
42
|
+
table: dataFrame,
|
|
43
|
+
value: seqDefaultValue!,
|
|
44
|
+
filter: (col) => {
|
|
45
|
+
if (col && col.semType === DG.SEMTYPE.MACROMOLECULE) {
|
|
46
|
+
const sh = _package.seqHelper.getSeqHandler(col);
|
|
47
|
+
return sh.isFasta();
|
|
48
|
+
}
|
|
49
|
+
return false;
|
|
50
|
+
}
|
|
46
51
|
});
|
|
47
52
|
|
|
48
53
|
const lineWidthInput = ui.input.int('FASTA line width', {value: FASTA_LINE_WIDTH});
|
|
@@ -54,10 +59,8 @@ export function saveAsFastaUI(): void {
|
|
|
54
59
|
lineWidthInput,
|
|
55
60
|
]))
|
|
56
61
|
.onOK(() => {
|
|
57
|
-
const valueIdColList: DG.Column[] = idGColListInput.value
|
|
58
|
-
|
|
59
|
-
const valueSeqCol: DG.Column | null = seqColInput.value ?
|
|
60
|
-
grid.columns.byName(seqColInput.value as string)!.column : null;
|
|
62
|
+
const valueIdColList: DG.Column[] = idGColListInput.value ?? [];
|
|
63
|
+
const valueSeqCol: DG.Column | null = seqColInput.value ?? null;
|
|
61
64
|
const valueLineWidth = lineWidthInput.value ?? FASTA_LINE_WIDTH;
|
|
62
65
|
|
|
63
66
|
if (!valueSeqCol)
|