@datagrok/bio 2.22.10 → 2.22.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -57,6 +57,53 @@ export async function standardiseMonomers(monomers: Monomer[]) {
57
57
  return fixedMonomers;
58
58
  }
59
59
 
60
+ /// matches molecules in the dataframe with monomers in the library by canonical smiles
61
+ export async function matchMoleculesWithMonomers(molDf: DG.DataFrame, molColName: string, monomerLib: IMonomerLib, polymerType: PolymerType = 'PEPTIDE'): Promise<DG.DataFrame> {
62
+ const converterFunc = DG.Func.find({package: 'Chem', name: 'convertMoleculeNotation'})[0];
63
+ if (!converterFunc)
64
+ throw new Error('Function convertMoleculeNotation not found, please install Chem package');
65
+ // first: stamdardize monomers
66
+ const monomers = monomerLib.getMonomerSymbolsByType(polymerType).map((s) => monomerLib.getMonomer(polymerType, s)!).filter((m) => m && (m.smiles || m.molfile));
67
+ const fixedMonomers = await standardiseMonomers(monomers);
68
+ const cappedSmilse = fixedMonomers.map((m, i) => ({sym: m.symbol, smiles: capSmiles(m.smiles ?? '', m.rgroups ?? []), original: m.smiles, source: monomers[i]?.lib?.source})).filter((s) => !!s?.smiles && !s.smiles.includes('[*:'));
69
+ // canonicalize all monomer smiles
70
+ const monomerSmilesCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'MonomerSmiles', cappedSmilse.map((m) => m.smiles!));
71
+ monomerSmilesCol.semType = DG.SEMTYPE.MOLECULE;
72
+ const canonicalizedMonomersSmilesCol: DG.Column = await converterFunc.apply({molecule: monomerSmilesCol, targetNotation: DG.chem.Notation.Smiles});
73
+ if (!canonicalizedMonomersSmilesCol || canonicalizedMonomersSmilesCol.length !== monomerSmilesCol.length)
74
+ throw new Error('Error canonicalizing monomer smiles');
75
+ canonicalizedMonomersSmilesCol.toList().forEach((s, i) => cappedSmilse[i].smiles = s);
76
+
77
+ const molecules = molDf.col(molColName)!;
78
+ const canonicalizedMoleculesCol: DG.Column = await converterFunc.apply({molecule: molecules, targetNotation: DG.chem.Notation.Smiles});
79
+ if (!canonicalizedMoleculesCol || canonicalizedMoleculesCol.length !== molecules.length)
80
+ throw new Error('Error canonicalizing molecules');
81
+
82
+ const canonicalizedMolecules = canonicalizedMoleculesCol.toList();
83
+
84
+ const resultDf = molDf.clone();
85
+ const matchingMonomerSmilesCol = resultDf.columns.addNewString(resultDf.columns.getUnusedName('Matched monomer smiles'));
86
+ matchingMonomerSmilesCol.semType = DG.SEMTYPE.MOLECULE;
87
+ const matchingMonomerSymbolCol = resultDf.columns.addNewString(resultDf.columns.getUnusedName('Matched monomer symbol'));
88
+ matchingMonomerSymbolCol.semType = 'Monomer';
89
+ const sourceLibCol = resultDf.columns.addNewString(resultDf.columns.getUnusedName('Matched monomer source'));
90
+ resultDf.columns.setOrder([molColName, matchingMonomerSymbolCol.name, matchingMonomerSmilesCol.name, sourceLibCol.name]);
91
+
92
+ for (let i = 0; i < canonicalizedMolecules.length; i++) {
93
+ const mol = canonicalizedMolecules[i];
94
+ if (!mol) continue;
95
+ for (let j = 0; j < cappedSmilse.length; j++) {
96
+ if (cappedSmilse[j].smiles === mol) {
97
+ matchingMonomerSmilesCol.set(i, cappedSmilse[j].original!, false);
98
+ matchingMonomerSymbolCol.set(i, cappedSmilse[j].sym, false);
99
+ sourceLibCol.set(i, cappedSmilse[j].source ?? '', false);
100
+ break;
101
+ }
102
+ }
103
+ }
104
+ return resultDf;
105
+ }
106
+
60
107
  /** Standardizes the monomer library
61
108
  * warning: throws error if the library is not valid or has invalid monomers
62
109
  */
@@ -127,7 +174,7 @@ export function getMonomersDataFrame(monomers: Monomer[]) {
127
174
  monomers[i].id,
128
175
  JSON.stringify(monomers[i].meta ?? {}),
129
176
  monomers[i].lib?.source ?? '',
130
- ]);
177
+ ], false);
131
178
  // something is wrong with setting dates, so setting it manually for now
132
179
  try {
133
180
  if (date)
@@ -15,23 +15,21 @@ const FASTA_LINE_WIDTH = 60;
15
15
  export function saveAsFastaUI(): void {
16
16
  // Use grid for column order adjusted by user
17
17
  const grid: DG.Grid = grok.shell.tv.grid;
18
+ const dataFrame: DG.DataFrame = grid.dataFrame;
18
19
 
19
- const idGColList: DG.GridColumn[] = wu.count(0).take(grid.columns.length)
20
- .map((colI: number) => grid.columns.byIndex(colI)!)
21
- .filter((gcol: DG.GridColumn) => gcol.column ? gcol.column.semType !== DG.SEMTYPE.MACROMOLECULE : false).toArray();
22
- const defaultIdGCol: DG.GridColumn | undefined = idGColList
23
- .find((gcol: DG.GridColumn) => gcol.name.toLowerCase().indexOf('id') !== -1);
24
- const idDefaultValue = defaultIdGCol ? [defaultIdGCol.name] : [];
20
+ const idGColList: DG.Column[] = wu.count(0).take(dataFrame.columns.length)
21
+ .map((colI: number) => dataFrame.columns.byIndex(colI)!)
22
+ .filter((col: DG.Column) => col.semType !== DG.SEMTYPE.MACROMOLECULE).toArray();
23
+ const defaultIdGCol: DG.Column | undefined = idGColList
24
+ .find((col: DG.Column) => col.name.toLowerCase().indexOf('id') !== -1);
25
+ const idDefaultValue = defaultIdGCol ? [defaultIdGCol] : [];
25
26
 
26
- const idGColListInput = ui.input.multiChoice('Seq id columns', {
27
- value: idDefaultValue,
28
- items: idGColList.map((gcol: DG.GridColumn) => gcol.name)
29
- });
27
+ const idGColListInput = ui.input.columns('Seq id columns', {table: dataFrame, value: idDefaultValue,
28
+ filter: (col: DG.Column) => col.semType !== DG.SEMTYPE.MACROMOLECULE});
30
29
 
31
- const seqGColList: DG.GridColumn[] = wu.count(0).take(grid.columns.length)/* range rom 0 to grid.columns.length */
32
- .map((colI: number) => grid.columns.byIndex(colI)!)
33
- .filter((gc: DG.GridColumn) => {
34
- const col: DG.Column | null = gc.column;
30
+ const seqGColList: DG.Column[] = wu.count(0).take(dataFrame.columns.length)/* range rom 0 to grid.columns.length */
31
+ .map((colI: number) => dataFrame.columns.byIndex(colI)!)
32
+ .filter((col: DG.Column) => {
35
33
  if (col && col.semType === DG.SEMTYPE.MACROMOLECULE) {
36
34
  const sh = _package.seqHelper.getSeqHandler(col);
37
35
  return sh.isFasta();
@@ -39,10 +37,17 @@ export function saveAsFastaUI(): void {
39
37
  return false;
40
38
  }).toArray();
41
39
 
42
- const seqDefaultValue = seqGColList.length > 0 ? seqGColList[0].name : [];
43
- const seqColInput = ui.input.choice('Seq column', {
44
- value: seqDefaultValue,
45
- items: seqGColList.map((gCol: DG.GridColumn) => gCol.name)
40
+ const seqDefaultValue = seqGColList.length > 0 ? seqGColList[0] : null;
41
+ const seqColInput = ui.input.column('Seq column', {
42
+ table: dataFrame,
43
+ value: seqDefaultValue!,
44
+ filter: (col) => {
45
+ if (col && col.semType === DG.SEMTYPE.MACROMOLECULE) {
46
+ const sh = _package.seqHelper.getSeqHandler(col);
47
+ return sh.isFasta();
48
+ }
49
+ return false;
50
+ }
46
51
  });
47
52
 
48
53
  const lineWidthInput = ui.input.int('FASTA line width', {value: FASTA_LINE_WIDTH});
@@ -54,10 +59,8 @@ export function saveAsFastaUI(): void {
54
59
  lineWidthInput,
55
60
  ]))
56
61
  .onOK(() => {
57
- const valueIdColList: DG.Column[] = idGColListInput.value ?
58
- idGColListInput.value.map((colName: string) => grid.columns.byName(colName)!.column!) : [];
59
- const valueSeqCol: DG.Column | null = seqColInput.value ?
60
- grid.columns.byName(seqColInput.value as string)!.column : null;
62
+ const valueIdColList: DG.Column[] = idGColListInput.value ?? [];
63
+ const valueSeqCol: DG.Column | null = seqColInput.value ?? null;
61
64
  const valueLineWidth = lineWidthInput.value ?? FASTA_LINE_WIDTH;
62
65
 
63
66
  if (!valueSeqCol)