@datagrok/bio 2.22.11 → 2.22.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "cpu": 1,
3
3
  "memory": 1024,
4
- "on_demand": true,
4
+ "on_demand": true,
5
5
  "shutdown_timeout": 30
6
- }
6
+ }
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Davit Rizhinashvili",
6
6
  "email": "drizhinashvili@datagrok.ai"
7
7
  },
8
- "version": "2.22.11",
8
+ "version": "2.22.12",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -49,7 +49,7 @@
49
49
  "@datagrok-libraries/math": "^1.2.4",
50
50
  "@datagrok-libraries/ml": "^6.10.4",
51
51
  "@datagrok-libraries/tutorials": "^1.6.1",
52
- "@datagrok-libraries/utils": "^4.6.3",
52
+ "@datagrok-libraries/utils": "^4.6.5",
53
53
  "@webgpu/types": "^0.1.40",
54
54
  "ajv": "^8.12.0",
55
55
  "ajv-errors": "^3.0.0",
@@ -73,7 +73,7 @@
73
73
  "@types/wu": "^2.1.44",
74
74
  "@typescript-eslint/eslint-plugin": "^8.8.1",
75
75
  "@typescript-eslint/parser": "^8.8.1",
76
- "datagrok-tools": "^4.14.48",
76
+ "datagrok-tools": "^4.14.57",
77
77
  "eslint": "^8.57.1",
78
78
  "eslint-config-google": "^0.14.0",
79
79
  "eslint-plugin-rxjs": "^5.0.3",
@@ -40,6 +40,10 @@ export namespace funcs {
40
40
  return await grok.functions.call('Bio:SequenceTooltip', { col });
41
41
  }
42
42
 
43
+ export async function standardiseMonomerLibrary(library: string ): Promise<string> {
44
+ return await grok.functions.call('Bio:StandardiseMonomerLibrary', { library });
45
+ }
46
+
43
47
  export async function getBioLib(): Promise<any> {
44
48
  return await grok.functions.call('Bio:GetBioLib', {});
45
49
  }
package/src/package.g.ts CHANGED
@@ -25,6 +25,15 @@ export async function standardiseMonomerLibrary(library: string) : Promise<strin
25
25
  return await PackageFunctions.standardiseMonomerLibrary(library);
26
26
  }
27
27
 
28
+ //description: Matches molecules in a column with monomers from the selected library(s)
29
+ //input: dataframe table
30
+ //input: column molecules { semType: Molecule }
31
+ //input: string polymerType = 'PEPTIDE' { choices: ["PEPTIDE","RNA","CHEM"]; caption: Polymer Type }
32
+ //top-menu: Bio | Manage | Match with Monomer Library...
33
+ export async function matchWithMonomerLibrary(table: DG.DataFrame, molecules: DG.Column, polymerType: any) : Promise<void> {
34
+ await PackageFunctions.matchWithMonomerLibrary(table, molecules, polymerType);
35
+ }
36
+
28
37
  //output: object monomerLib
29
38
  export function getBioLib() : any {
30
39
  return PackageFunctions.getBioLib();
@@ -126,8 +135,8 @@ export function macromoleculeDifferenceCellRenderer() : any {
126
135
  return PackageFunctions.macromoleculeDifferenceCellRenderer();
127
136
  }
128
137
 
129
- //input: string alignType { choices: ['Local alignment','Global alignment'] }
130
- //input: string alignTable { choices: ['AUTO','NUCLEOTIDES','BLOSUM45','BLOSUM50','BLOSUM62','BLOSUM80','BLOSUM90','PAM30','PAM70','PAM250','SCHNEIDER','TRANS'] }
138
+ //input: string alignType { choices: ["Local alignment","Global alignment"] }
139
+ //input: string alignTable { choices: ["AUTO","NUCLEOTIDES","BLOSUM45","BLOSUM50","BLOSUM62","BLOSUM80","BLOSUM90","PAM30","PAM70","PAM250","SCHNEIDER","TRANS"] }
131
140
  //input: double gap
132
141
  //input: string seq1
133
142
  //input: string seq2
@@ -182,9 +191,9 @@ export async function getRegionTopMenu(table: DG.DataFrame, sequence: DG.Column,
182
191
  //input: dataframe table { description: Input data table }
183
192
  //input: string molecules { semType: Macromolecule; description: Input data table }
184
193
  //input: column activities
185
- //input: double similarity { default: 80; description: Similarity cutoff }
186
- //input: string methodName { choices: ['UMAP','t-SNE'] }
187
- //input: string similarityMetric { choices: ['Hamming','Levenshtein','Monomer chemical distance'] }
194
+ //input: double similarity = 80 { description: Similarity cutoff }
195
+ //input: string methodName { choices: ["UMAP","t-SNE"] }
196
+ //input: string similarityMetric { choices: ["Hamming","Levenshtein","Monomer chemical distance"] }
188
197
  //input: func preprocessingFunction
189
198
  //input: object options { optional: true }
190
199
  //input: bool demo { optional: true }
@@ -198,9 +207,9 @@ export async function activityCliffs(table: DG.DataFrame, molecules: DG.Column<a
198
207
  //tags: dim-red-preprocessing-function
199
208
  //input: column col { semType: Macromolecule }
200
209
  //input: string metric
201
- //input: double gapOpen { default: 1; caption: Gap open penalty; optional: true }
202
- //input: double gapExtend { default: 0.6; caption: Gap extension penalty; optional: true }
203
- //input: string fingerprintType { caption: Fingerprint type; default: Morgan; choices: ['Morgan','RDKit','Pattern','AtomPair','MACCS','TopologicalTorsion']; optional: true }
210
+ //input: double gapOpen = 1 { caption: Gap open penalty; optional: true }
211
+ //input: double gapExtend = 0.6 { caption: Gap extension penalty; optional: true }
212
+ //input: string fingerprintType = 'Morgan' { caption: Fingerprint type; choices: ["Morgan","RDKit","Pattern","AtomPair","MACCS","TopologicalTorsion"]; optional: true }
204
213
  //output: object result
205
214
  //meta.supportedSemTypes: Macromolecule
206
215
  //meta.supportedTypes: string
@@ -225,12 +234,12 @@ export async function helmPreprocessingFunction(col: DG.Column<any>, _metric: an
225
234
  //description: Creates 2D sequence space with projected sequences by pairwise distance
226
235
  //input: dataframe table
227
236
  //input: column molecules { semType: Macromolecule }
228
- //input: string methodName { choices: ['UMAP','t-SNE'] }
229
- //input: string similarityMetric { choices: ['Hamming','Levenshtein','Monomer chemical distance'] }
230
- //input: bool plotEmbeddings { default: true }
237
+ //input: string methodName { choices: ["UMAP","t-SNE"] }
238
+ //input: string similarityMetric { choices: ["Hamming","Levenshtein","Monomer chemical distance"] }
239
+ //input: bool plotEmbeddings = true
231
240
  //input: func preprocessingFunction { optional: true }
232
241
  //input: object options { optional: true }
233
- //input: bool clusterEmbeddings { optional: true; default: true }
242
+ //input: bool clusterEmbeddings = true { optional: true }
234
243
  //input: bool isDemo { optional: true }
235
244
  //top-menu: Bio | Analyze | Sequence Space...
236
245
  //editor: Bio:SequenceSpaceEditor
@@ -242,8 +251,8 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, molecules: DG.Co
242
251
  //description: Converts sequences to molblocks
243
252
  //input: dataframe table { description: Input data table }
244
253
  //input: column seqCol { semType: Macromolecule; caption: Sequence }
245
- //input: bool nonlinear { default: false; caption: Non-linear; description: Slower mode for cycling/branching HELM structures }
246
- //input: bool highlight { default: false; caption: Highlight monomers; description: Highlight monomers' substructures of the molecule }
254
+ //input: bool nonlinear = false { caption: Non-linear; description: Slower mode for cycling/branching HELM structures }
255
+ //input: bool highlight = false { caption: Highlight monomers; description: Highlight monomers' substructures of the molecule }
247
256
  //top-menu: Bio | Transform | To Atomic Level...
248
257
  export async function toAtomicLevel(table: DG.DataFrame, seqCol: DG.Column, nonlinear: boolean, highlight: boolean) : Promise<void> {
249
258
  await PackageFunctions.toAtomicLevel(table, seqCol, nonlinear, highlight);
@@ -338,7 +347,7 @@ export function monomerCellRenderer() : any {
338
347
  return PackageFunctions.monomerCellRenderer();
339
348
  }
340
349
 
341
- //input: string path { choices: ['Demo:Files/','System:AppData/'] }
350
+ //input: string path { choices: ["Demo:Files/","System:AppData/"] }
342
351
  //output: dataframe result
343
352
  export async function testDetectMacromolecule(path: string) : Promise<any> {
344
353
  return await PackageFunctions.testDetectMacromolecule(path);
@@ -408,7 +417,7 @@ export function SubsequenceSearchTopMenu(macromolecules: DG.Column) : void {
408
417
  PackageFunctions.SubsequenceSearchTopMenu(macromolecules);
409
418
  }
410
419
 
411
- //name: Identity Scoring
420
+ //name: Identity
412
421
  //description: Adds a column with fraction of matching monomers
413
422
  //input: dataframe table { description: Table containing Macromolecule column }
414
423
  //input: column macromolecule { semType: Macromolecule; description: Sequences to score }
@@ -419,7 +428,7 @@ export async function sequenceIdentityScoring(table: DG.DataFrame, macromolecule
419
428
  return await PackageFunctions.sequenceIdentityScoring(table, macromolecule, reference);
420
429
  }
421
430
 
422
- //name: Similarity Scoring
431
+ //name: Similarity
423
432
  //description: Adds a column with similarity scores, calculated as sum of monomer fingerprint similarities
424
433
  //input: dataframe table { description: Table containing Macromolecule column }
425
434
  //input: column macromolecule { semType: Macromolecule; description: Sequences to score }
@@ -486,30 +495,37 @@ export function bioSubstructureFilterTest() : any {
486
495
  return PackageFunctions.bioSubstructureFilterTest();
487
496
  }
488
497
 
498
+ //name: webLogoLargeApp
489
499
  export async function webLogoLargeApp() : Promise<void> {
490
500
  await PackageFunctions.webLogoLargeApp();
491
501
  }
492
502
 
503
+ //name: webLogoAggApp
493
504
  export async function webLogoAggApp() : Promise<void> {
494
505
  await PackageFunctions.webLogoAggApp();
495
506
  }
496
507
 
508
+ //name: getRegionApp
497
509
  export async function getRegionApp() : Promise<void> {
498
510
  await PackageFunctions.getRegionApp();
499
511
  }
500
512
 
513
+ //name: getRegionHelmApp
501
514
  export async function getRegionHelmApp() : Promise<void> {
502
515
  await PackageFunctions.getRegionHelmApp();
503
516
  }
504
517
 
518
+ //name: longSeqTableSeparator
505
519
  export function longSeqTableSeparator() : void {
506
520
  PackageFunctions.longSeqTableSeparator();
507
521
  }
508
522
 
523
+ //name: longSeqTableFasta
509
524
  export function longSeqTableFasta() : void {
510
525
  PackageFunctions.longSeqTableFasta();
511
526
  }
512
527
 
528
+ //name: longSeqTableHelm
513
529
  export function longSeqTableHelm() : void {
514
530
  PackageFunctions.longSeqTableHelm();
515
531
  }
@@ -523,15 +539,14 @@ export function addCopyMenu(cell: any, menu: any) : void {
523
539
  //description: Sequence similarity tracking and evaluation dataset diversity
524
540
  //meta.demoPath: Bioinformatics | Similarity, Diversity
525
541
  //meta.path: /apps/Tutorials/Demo/Bioinformatics/Similarity,%20Diversity
526
- //meta.demoSkip: GROK-14320
527
542
  export async function demoBioSimilarityDiversity() : Promise<void> {
528
543
  await PackageFunctions.demoBioSimilarityDiversity();
529
544
  }
530
545
 
531
546
  //description: Exploring sequence space of Macromolecules, comparison with hierarchical clustering results
547
+ //meta.isDemoDashboard: true
532
548
  //meta.demoPath: Bioinformatics | Sequence Space
533
549
  //meta.path: /apps/Tutorials/Demo/Bioinformatics/Sequence%20Space
534
- //meta.demoSkip: GROK-14320
535
550
  export async function demoBioSequenceSpace() : Promise<void> {
536
551
  await PackageFunctions.demoBioSequenceSpace();
537
552
  }
@@ -539,7 +554,6 @@ export async function demoBioSequenceSpace() : Promise<void> {
539
554
  //description: Activity Cliffs analysis on Macromolecules data
540
555
  //meta.demoPath: Bioinformatics | Activity Cliffs
541
556
  //meta.path: /apps/Tutorials/Demo/Bioinformatics/Activity%20Cliffs
542
- //meta.demoSkip: GROK-14320
543
557
  export async function demoBioActivityCliffs() : Promise<void> {
544
558
  await PackageFunctions.demoBioActivityCliffs();
545
559
  }
@@ -547,7 +561,6 @@ export async function demoBioActivityCliffs() : Promise<void> {
547
561
  //description: Atomic level structure of Macromolecules
548
562
  //meta.demoPath: Bioinformatics | Atomic Level
549
563
  //meta.path: /apps/Tutorials/Demo/Bioinformatics/Atomic%20Level
550
- //meta.demoSkip: GROK-14320
551
564
  export async function demoBioAtomicLevel() : Promise<void> {
552
565
  await PackageFunctions.demoBioAtomicLevel();
553
566
  }
@@ -578,7 +591,7 @@ export async function seqIdentity(seq: string, ref: string) : Promise<any> {
578
591
 
579
592
  //input: file file
580
593
  //input: string colName
581
- //input: double probeCount { default: 100 }
594
+ //input: double probeCount = 100
582
595
  export async function detectMacromoleculeProbe(file: DG.FileInfo, colName: string, probeCount: number) : Promise<void> {
583
596
  await PackageFunctions.detectMacromoleculeProbe(file, colName, probeCount);
584
597
  }
@@ -590,7 +603,7 @@ export async function getSeqHelper() : Promise<any> {
590
603
 
591
604
  //input: dataframe df
592
605
  //input: column helmCol
593
- //input: bool chiralityEngine { default: true }
606
+ //input: bool chiralityEngine = true
594
607
  //output: column result
595
608
  export async function getMolFromHelm(df: DG.DataFrame, helmCol: DG.Column<any>, chiralityEngine: boolean) : Promise<any> {
596
609
  return await PackageFunctions.getMolFromHelm(df, helmCol, chiralityEngine);
package/src/package.ts CHANGED
@@ -70,12 +70,13 @@ import {GetRegionFuncEditor} from './utils/get-region-func-editor';
70
70
  import {sequenceToMolfile} from './utils/sequence-to-mol';
71
71
  import {detectMacromoleculeProbeDo} from './utils/detect-macromolecule-probe';
72
72
  import {getMolColumnFromHelm} from './utils/helm-to-molfile/utils';
73
- import {MonomerManager, standardizeMonomerLibrary} from './utils/monomer-lib/monomer-manager/monomer-manager';
73
+ import {matchMoleculesWithMonomers, MonomerManager, standardizeMonomerLibrary} from './utils/monomer-lib/monomer-manager/monomer-manager';
74
74
  import {calculateScoresWithEmptyValues} from './utils/calculate-scores';
75
75
  import {SeqHelper} from './utils/seq-helper/seq-helper';
76
76
  import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
77
77
  import {molecular3DStructureWidget, toAtomicLevelWidget} from './widgets/to-atomic-level-widget';
78
78
  import {handleSequenceHeaderRendering} from './widgets/sequence-scrolling-widget';
79
+ import {PolymerType} from '@datagrok-libraries/js-draw-lite/src/types/org';
79
80
  export const _package = new BioPackage(/*{debug: true}/**/);
80
81
  export * from './package.g';
81
82
 
@@ -143,6 +144,14 @@ export class PackageFunctions {
143
144
  return await standardizeMonomerLibrary(library);
144
145
  }
145
146
 
147
+ @grok.decorators.func({'top-menu': 'Bio | Manage | Match with Monomer Library...', description: 'Matches molecules in a column with monomers from the selected library(s)',})
148
+ static async matchWithMonomerLibrary(table: DG.DataFrame,
149
+ @grok.decorators.param({type: 'column', options: {semType: 'Molecule'}})molecules: DG.Column,
150
+ @grok.decorators.param({type: 'string', options: {choices: ['PEPTIDE', 'RNA', 'CHEM'], initialValue: 'PEPTIDE', caption: 'Polymer Type'}})polymerType: PolymerType = 'PEPTIDE') {
151
+ const matchDF = await matchMoleculesWithMonomers(table, molecules.name, _package.monomerLib, polymerType);
152
+ grok.shell.addTableView(matchDF);
153
+ }
154
+
146
155
  // Keep for backward compatibility
147
156
  @grok.decorators.func({outputs: [{type: 'object', name: 'monomerLib'}]})
148
157
  static getBioLib(): IMonomerLib {
@@ -939,14 +948,14 @@ export class PackageFunctions {
939
948
  }
940
949
 
941
950
  @grok.decorators.func({
942
- name: 'Identity Scoring',
951
+ name: 'Identity',
943
952
  description: 'Adds a column with fraction of matching monomers',
944
953
  'top-menu': 'Bio | Calculate | Identity...',
945
954
  })
946
955
  static async sequenceIdentityScoring(
947
- @grok.decorators.param({options: {description: 'Table containing Macromolecule column'}})table: DG.DataFrame,
956
+ @grok.decorators.param({options: {description: 'Table containing Macromolecule column'}}) table: DG.DataFrame,
948
957
  @grok.decorators.param({options: {semType: 'Macromolecule', description: 'Sequences to score'}}) macromolecule: DG.Column,
949
- @grok.decorators.param({options: {description: 'Sequence,matching column format'}})reference: string
958
+ @grok.decorators.param({options: {description: 'Sequence,matching column format'}}) reference: string
950
959
  ): Promise<DG.Column<number>> {
951
960
  const seqHelper = _package.seqHelper;
952
961
  const scores = calculateScoresWithEmptyValues(table, macromolecule, reference, SCORE.IDENTITY, seqHelper);
@@ -954,14 +963,14 @@ export class PackageFunctions {
954
963
  }
955
964
 
956
965
  @grok.decorators.func({
957
- name: 'Similarity Scoring',
966
+ name: 'Similarity',
958
967
  description: 'Adds a column with similarity scores, calculated as sum of monomer fingerprint similarities',
959
968
  'top-menu': 'Bio | Calculate | Similarity...',
960
969
  })
961
970
  static async sequenceSimilarityScoring(
962
- @grok.decorators.param({options: {description: 'Table containing Macromolecule column'}})table: DG.DataFrame,
971
+ @grok.decorators.param({options: {description: 'Table containing Macromolecule column'}}) table: DG.DataFrame,
963
972
  @grok.decorators.param({options: {semType: 'Macromolecule', description: 'Sequences to score'}}) macromolecule: DG.Column,
964
- @grok.decorators.param({options: {description: 'Sequence,matching column format'}})reference: string
973
+ @grok.decorators.param({options: {description: 'Sequence,matching column format'}}) reference: string
965
974
  ): Promise<DG.Column<number>> {
966
975
  const seqHelper = _package.seqHelper;
967
976
  const scores = calculateScoresWithEmptyValues(table, macromolecule, reference, SCORE.SIMILARITY, seqHelper);
@@ -1137,7 +1146,6 @@ export class PackageFunctions {
1137
1146
  description: 'Sequence similarity tracking and evaluation dataset diversity',
1138
1147
  demoPath: 'Bioinformatics | Similarity, Diversity',
1139
1148
  path: '/apps/Tutorials/Demo/Bioinformatics/Similarity,%20Diversity',
1140
- demoSkip: 'GROK-14320'
1141
1149
  })
1142
1150
  static async demoBioSimilarityDiversity(): Promise<void> {
1143
1151
  await demoBioSimDiv();
@@ -1147,7 +1155,9 @@ export class PackageFunctions {
1147
1155
  description: 'Exploring sequence space of Macromolecules, comparison with hierarchical clustering results',
1148
1156
  demoPath: 'Bioinformatics | Sequence Space',
1149
1157
  path: '/apps/Tutorials/Demo/Bioinformatics/Sequence%20Space',
1150
- demoSkip: 'GROK-14320'
1158
+ meta: {
1159
+ isDemoDashboard: 'true'
1160
+ }
1151
1161
  })
1152
1162
  static async demoBioSequenceSpace(): Promise<void> {
1153
1163
  await demoSeqSpace();
@@ -1157,7 +1167,6 @@ export class PackageFunctions {
1157
1167
  description: 'Activity Cliffs analysis on Macromolecules data',
1158
1168
  demoPath: 'Bioinformatics | Activity Cliffs',
1159
1169
  path: '/apps/Tutorials/Demo/Bioinformatics/Activity%20Cliffs',
1160
- demoSkip: 'GROK-14320'
1161
1170
  })
1162
1171
  static async demoBioActivityCliffs(): Promise<void> {
1163
1172
  await demoActivityCliffsCyclic();
@@ -1167,7 +1176,6 @@ export class PackageFunctions {
1167
1176
  description: 'Atomic level structure of Macromolecules',
1168
1177
  demoPath: 'Bioinformatics | Atomic Level',
1169
1178
  path: '/apps/Tutorials/Demo/Bioinformatics/Atomic%20Level',
1170
- demoSkip: 'GROK-14320'
1171
1179
  })
1172
1180
  static async demoBioAtomicLevel(): Promise<void> {
1173
1181
  await demoToAtomicLevel();
@@ -65,7 +65,7 @@ category('renderers', () => {
65
65
 
66
66
  test('scatterPlotTooltip', async () => {
67
67
  await _testScatterPlotTooltip();
68
- }, {skipReason: 'GROK-17450'});
68
+ });
69
69
 
70
70
  async function _rendererMacromoleculeFasta() {
71
71
  const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/FASTA.csv');
@@ -6,19 +6,14 @@ import {category, test, testViewer} from '@datagrok-libraries/utils/src/test';
6
6
  import {readDataframe} from './utils';
7
7
 
8
8
 
9
- category('viewers', () => {
10
- const viewers = DG.Func.find({package: 'Bio', tags: ['viewer']}).map((f) => f.friendlyName);
11
- for (const v of viewers) {
12
- test(v, async () => {
13
- const df = await readDataframe('samples/FASTA_DNA.csv');
14
- await testViewer(v, df, {detectSemanticTypes: true});
15
- }, {
16
- skipReason: {
17
- 'Sequence Similarity Search': 'GROK-13162',
18
- 'Sequence Diversity Search': 'GROK-13162',
19
- 'WebLogo': 'GROK-13162',
20
- 'VdRegions': 'GROK-13162',
21
- }[v],
22
- });
23
- }
24
- });
9
+ // category('viewers', () => {
10
+ // const viewers = DG.Func.find({package: 'Bio', tags: ['viewer']}).map((f) => f.friendlyName);
11
+ // for (const v of viewers) {
12
+ // test(v, async () => {
13
+ // const df = await readDataframe('samples/FASTA_DNA.csv');
14
+ // await df.meta.detectSemanticTypes();
15
+ // await grok.data.detectSemanticTypes(df);
16
+ // await testViewer(v, df, {detectSemanticTypes: true});
17
+ // });
18
+ // }
19
+ // });
@@ -57,6 +57,53 @@ export async function standardiseMonomers(monomers: Monomer[]) {
57
57
  return fixedMonomers;
58
58
  }
59
59
 
60
+ /// matches molecules in the dataframe with monomers in the library by canonical smiles
61
+ export async function matchMoleculesWithMonomers(molDf: DG.DataFrame, molColName: string, monomerLib: IMonomerLib, polymerType: PolymerType = 'PEPTIDE'): Promise<DG.DataFrame> {
62
+ const converterFunc = DG.Func.find({package: 'Chem', name: 'convertMoleculeNotation'})[0];
63
+ if (!converterFunc)
64
+ throw new Error('Function convertMoleculeNotation not found, please install Chem package');
65
+ // first: stamdardize monomers
66
+ const monomers = monomerLib.getMonomerSymbolsByType(polymerType).map((s) => monomerLib.getMonomer(polymerType, s)!).filter((m) => m && (m.smiles || m.molfile));
67
+ const fixedMonomers = await standardiseMonomers(monomers);
68
+ const cappedSmilse = fixedMonomers.map((m, i) => ({sym: m.symbol, smiles: capSmiles(m.smiles ?? '', m.rgroups ?? []), original: m.smiles, source: monomers[i]?.lib?.source})).filter((s) => !!s?.smiles && !s.smiles.includes('[*:'));
69
+ // canonicalize all monomer smiles
70
+ const monomerSmilesCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'MonomerSmiles', cappedSmilse.map((m) => m.smiles!));
71
+ monomerSmilesCol.semType = DG.SEMTYPE.MOLECULE;
72
+ const canonicalizedMonomersSmilesCol: DG.Column = await converterFunc.apply({molecule: monomerSmilesCol, targetNotation: DG.chem.Notation.Smiles});
73
+ if (!canonicalizedMonomersSmilesCol || canonicalizedMonomersSmilesCol.length !== monomerSmilesCol.length)
74
+ throw new Error('Error canonicalizing monomer smiles');
75
+ canonicalizedMonomersSmilesCol.toList().forEach((s, i) => cappedSmilse[i].smiles = s);
76
+
77
+ const molecules = molDf.col(molColName)!;
78
+ const canonicalizedMoleculesCol: DG.Column = await converterFunc.apply({molecule: molecules, targetNotation: DG.chem.Notation.Smiles});
79
+ if (!canonicalizedMoleculesCol || canonicalizedMoleculesCol.length !== molecules.length)
80
+ throw new Error('Error canonicalizing molecules');
81
+
82
+ const canonicalizedMolecules = canonicalizedMoleculesCol.toList();
83
+
84
+ const resultDf = molDf.clone();
85
+ const matchingMonomerSmilesCol = resultDf.columns.addNewString(resultDf.columns.getUnusedName('Matched monomer smiles'));
86
+ matchingMonomerSmilesCol.semType = DG.SEMTYPE.MOLECULE;
87
+ const matchingMonomerSymbolCol = resultDf.columns.addNewString(resultDf.columns.getUnusedName('Matched monomer symbol'));
88
+ matchingMonomerSymbolCol.semType = 'Monomer';
89
+ const sourceLibCol = resultDf.columns.addNewString(resultDf.columns.getUnusedName('Matched monomer source'));
90
+ resultDf.columns.setOrder([molColName, matchingMonomerSymbolCol.name, matchingMonomerSmilesCol.name, sourceLibCol.name]);
91
+
92
+ for (let i = 0; i < canonicalizedMolecules.length; i++) {
93
+ const mol = canonicalizedMolecules[i];
94
+ if (!mol) continue;
95
+ for (let j = 0; j < cappedSmilse.length; j++) {
96
+ if (cappedSmilse[j].smiles === mol) {
97
+ matchingMonomerSmilesCol.set(i, cappedSmilse[j].original!, false);
98
+ matchingMonomerSymbolCol.set(i, cappedSmilse[j].sym, false);
99
+ sourceLibCol.set(i, cappedSmilse[j].source ?? '', false);
100
+ break;
101
+ }
102
+ }
103
+ }
104
+ return resultDf;
105
+ }
106
+
60
107
  /** Standardizes the monomer library
61
108
  * warning: throws error if the library is not valid or has invalid monomers
62
109
  */
@@ -127,7 +174,7 @@ export function getMonomersDataFrame(monomers: Monomer[]) {
127
174
  monomers[i].id,
128
175
  JSON.stringify(monomers[i].meta ?? {}),
129
176
  monomers[i].lib?.source ?? '',
130
- ]);
177
+ ], false);
131
178
  // something is wrong with setting dates, so setting it manually for now
132
179
  try {
133
180
  if (date)
@@ -15,23 +15,21 @@ const FASTA_LINE_WIDTH = 60;
15
15
  export function saveAsFastaUI(): void {
16
16
  // Use grid for column order adjusted by user
17
17
  const grid: DG.Grid = grok.shell.tv.grid;
18
+ const dataFrame: DG.DataFrame = grid.dataFrame;
18
19
 
19
- const idGColList: DG.GridColumn[] = wu.count(0).take(grid.columns.length)
20
- .map((colI: number) => grid.columns.byIndex(colI)!)
21
- .filter((gcol: DG.GridColumn) => gcol.column ? gcol.column.semType !== DG.SEMTYPE.MACROMOLECULE : false).toArray();
22
- const defaultIdGCol: DG.GridColumn | undefined = idGColList
23
- .find((gcol: DG.GridColumn) => gcol.name.toLowerCase().indexOf('id') !== -1);
24
- const idDefaultValue = defaultIdGCol ? [defaultIdGCol.name] : [];
20
+ const idGColList: DG.Column[] = wu.count(0).take(dataFrame.columns.length)
21
+ .map((colI: number) => dataFrame.columns.byIndex(colI)!)
22
+ .filter((col: DG.Column) => col.semType !== DG.SEMTYPE.MACROMOLECULE).toArray();
23
+ const defaultIdGCol: DG.Column | undefined = idGColList
24
+ .find((col: DG.Column) => col.name.toLowerCase().indexOf('id') !== -1);
25
+ const idDefaultValue = defaultIdGCol ? [defaultIdGCol] : [];
25
26
 
26
- const idGColListInput = ui.input.multiChoice('Seq id columns', {
27
- value: idDefaultValue,
28
- items: idGColList.map((gcol: DG.GridColumn) => gcol.name)
29
- });
27
+ const idGColListInput = ui.input.columns('Seq id columns', {table: dataFrame, value: idDefaultValue,
28
+ filter: (col: DG.Column) => col.semType !== DG.SEMTYPE.MACROMOLECULE});
30
29
 
31
- const seqGColList: DG.GridColumn[] = wu.count(0).take(grid.columns.length)/* range rom 0 to grid.columns.length */
32
- .map((colI: number) => grid.columns.byIndex(colI)!)
33
- .filter((gc: DG.GridColumn) => {
34
- const col: DG.Column | null = gc.column;
30
+ const seqGColList: DG.Column[] = wu.count(0).take(dataFrame.columns.length)/* range rom 0 to grid.columns.length */
31
+ .map((colI: number) => dataFrame.columns.byIndex(colI)!)
32
+ .filter((col: DG.Column) => {
35
33
  if (col && col.semType === DG.SEMTYPE.MACROMOLECULE) {
36
34
  const sh = _package.seqHelper.getSeqHandler(col);
37
35
  return sh.isFasta();
@@ -39,10 +37,17 @@ export function saveAsFastaUI(): void {
39
37
  return false;
40
38
  }).toArray();
41
39
 
42
- const seqDefaultValue = seqGColList.length > 0 ? seqGColList[0].name : [];
43
- const seqColInput = ui.input.choice('Seq column', {
44
- value: seqDefaultValue,
45
- items: seqGColList.map((gCol: DG.GridColumn) => gCol.name)
40
+ const seqDefaultValue = seqGColList.length > 0 ? seqGColList[0] : null;
41
+ const seqColInput = ui.input.column('Seq column', {
42
+ table: dataFrame,
43
+ value: seqDefaultValue!,
44
+ filter: (col) => {
45
+ if (col && col.semType === DG.SEMTYPE.MACROMOLECULE) {
46
+ const sh = _package.seqHelper.getSeqHandler(col);
47
+ return sh.isFasta();
48
+ }
49
+ return false;
50
+ }
46
51
  });
47
52
 
48
53
  const lineWidthInput = ui.input.int('FASTA line width', {value: FASTA_LINE_WIDTH});
@@ -54,10 +59,8 @@ export function saveAsFastaUI(): void {
54
59
  lineWidthInput,
55
60
  ]))
56
61
  .onOK(() => {
57
- const valueIdColList: DG.Column[] = idGColListInput.value ?
58
- idGColListInput.value.map((colName: string) => grid.columns.byName(colName)!.column!) : [];
59
- const valueSeqCol: DG.Column | null = seqColInput.value ?
60
- grid.columns.byName(seqColInput.value as string)!.column : null;
62
+ const valueIdColList: DG.Column[] = idGColListInput.value ?? [];
63
+ const valueSeqCol: DG.Column | null = seqColInput.value ?? null;
61
64
  const valueLineWidth = lineWidthInput.value ?? FASTA_LINE_WIDTH;
62
65
 
63
66
  if (!valueSeqCol)