@datagrok/bio 2.25.12 → 2.25.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Davit Rizhinashvili",
6
6
  "email": "drizhinashvili@datagrok.ai"
7
7
  },
8
- "version": "2.25.12",
8
+ "version": "2.25.14",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -4,7 +4,6 @@ import * as DG from 'datagrok-api/dg';
4
4
  import {_package, PackageFunctions} from '../package';
5
5
  import {handleError} from './utils';
6
6
  import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
7
- import {delay} from '@datagrok-libraries/test/src/test';
8
7
  import {adjustGridcolAfterRender} from '../utils/ui-utils';
9
8
 
10
9
  export async function demoToAtomicLevel(): Promise<void> {
@@ -63,7 +62,7 @@ export async function demoBio03UI(): Promise<void> {
63
62
  dlg = ui.dialog()
64
63
  .add(sketcher)
65
64
  .show();
66
- await delay(3000);
65
+ await DG.delay(3000);
67
66
  dlg.close();
68
67
  }, {
69
68
  description: 'Display atomic level structure within a sketcher.',
@@ -2,7 +2,6 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
- import {delay} from '@datagrok-libraries/test/src/test';
6
5
  import {TAGS as bioTAGS, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
7
6
  import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
8
7
 
@@ -154,7 +153,7 @@ export async function helmSubstructureSearch(
154
153
  export async function invalidateMols(col: DG.Column<string>, seqHelper: ISeqHelper, pattern: boolean): Promise<void> {
155
154
  const progressBar = DG.TaskBarProgressIndicator.create(`Invalidating molfiles for ${col.name}`);
156
155
  try {
157
- await delay(10);
156
+ await DG.delay(10);
158
157
  const monomersDict = new Map();
159
158
  const monomericMolsCol = await getMonomericMols(col, seqHelper, pattern, monomersDict);
160
159
  col.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS] = monomericMolsCol;
@@ -4,7 +4,6 @@ import * as DG from 'datagrok-api/dg';
4
4
 
5
5
 
6
6
  import {_package} from '../package';
7
- import {delay} from '@datagrok-libraries/test/src/test';
8
7
 
9
8
  type IDetectorReport = { categoriesSample: any[], rejectReason: string };
10
9
 
@@ -31,7 +30,7 @@ export async function detectMacromoleculeProbeDo(
31
30
  if ((progress - progressLast) >= 0.1) {
32
31
  progressLast = progress;
33
32
  pi.update(100 * progress, `detectMacromolecule probe ${failCount}/${i}/${probeCount} ...`);
34
- await delay(0);
33
+ await DG.delay(0);
35
34
  }
36
35
  }
37
36
  if (failCount > 0)
@@ -4,7 +4,6 @@ import * as grok from 'datagrok-api/grok';
4
4
  import * as ui from 'datagrok-api/ui';
5
5
  import * as DG from 'datagrok-api/dg';
6
6
 
7
- import {delay} from '@datagrok-libraries/test/src/test';
8
7
  import {ILogger} from '@datagrok-libraries/bio/src/utils/logger';
9
8
  import {DEFAULT_FILES_LIB_PROVIDER_NAME, findProviderWithLibraryName, IMonomerLib, IMonomerSet} from '@datagrok-libraries/bio/src/types/monomer-library';
10
9
  import {
@@ -91,7 +90,7 @@ export class MonomerLibManager implements IMonomerLibHelper {
91
90
  return true;
92
91
  })(),
93
92
  (async () => {
94
- await delay(timeout);
93
+ await DG.delay(timeout);
95
94
  return false;
96
95
  })(),
97
96
  ]).then((res) => {
@@ -0,0 +1,347 @@
1
+ /* eslint-disable max-lines-per-function */
2
+ /* eslint-disable max-len */
3
+ import * as grok from 'datagrok-api/grok';
4
+ import * as DG from 'datagrok-api/dg';
5
+
6
+ import {IMonomerLib, Monomer} from '@datagrok-libraries/bio/src/types/monomer-library';
7
+ import {PolymerType} from '@datagrok-libraries/bio/src/helm/types';
8
+
9
+ import {STANDRARD_R_GROUPS} from './const';
10
+ import {standardiseMonomers, capSmiles, getCorrectedSmiles} from './monomer-manager';
11
+
12
+ /** Represents a single monomer match result — one monomer that matched a molecule */
13
+ interface MonomerMatch {
14
+ /** monomer symbol from the library */
15
+ symbol: string;
16
+ /** canonical (possibly capped) SMILES used for the match */
17
+ smiles: string;
18
+ /** original SMILES from the monomer definition */
19
+ original: string;
20
+ /** library source name */
21
+ source: string;
22
+ }
23
+
24
+ /** Maps keyed by canonical SMILES, where each key can map to multiple monomers */
25
+ type MonomerSmilesMap = {[smiles: string]: MonomerMatch[]};
26
+
27
+ const MATCH_SEPARATOR = ' | ';
28
+
29
+ /**
30
+ * Builds lookup maps from standardized monomers:
31
+ * - uncappedMap: maps raw canonical monomer SMILES -> MonomerMatch[]
32
+ * - cappedMap: maps capped (R-groups replaced with cap groups) canonical SMILES -> MonomerMatch[]
33
+ * Both maps store arrays so that duplicate monomers (same structure, different symbols/libs) are preserved.
34
+ */
35
+ async function buildMonomerSmilesMaps(
36
+ fixedMonomers: Monomer[], originalMonomers: Monomer[], converterFunc: DG.Func,
37
+ ): Promise<{cappedMap: MonomerSmilesMap; uncappedMap: MonomerSmilesMap}> {
38
+ // build uncapped map from raw monomer SMILES
39
+ const uncappedMap: MonomerSmilesMap = {};
40
+ for (const m of fixedMonomers) {
41
+ if (!m.smiles) continue;
42
+ const match: MonomerMatch = {symbol: m.symbol, smiles: m.smiles, original: m.smiles, source: m.lib?.source ?? ''};
43
+ if (!uncappedMap[m.smiles]) uncappedMap[m.smiles] = [];
44
+ uncappedMap[m.smiles].push(match);
45
+ }
46
+
47
+ // build capped monomer entries: replace R-groups with cap group atoms
48
+ const cappedEntries = fixedMonomers
49
+ .map((m, i) => ({
50
+ symbol: m.symbol,
51
+ smiles: capSmiles(m.smiles ?? '', m.rgroups ?? []),
52
+ original: m.smiles,
53
+ source: originalMonomers[i]?.lib?.source ?? '',
54
+ }))
55
+ .filter((e) => !!e.smiles && !e.smiles.includes('[*:'));
56
+
57
+ // canonicalize all capped SMILES in bulk
58
+ const cappedSmilesCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'CappedSmiles', cappedEntries.map((e) => e.smiles));
59
+ cappedSmilesCol.semType = DG.SEMTYPE.MOLECULE;
60
+ const canonicalCappedCol: DG.Column = await converterFunc.apply({molecule: cappedSmilesCol, targetNotation: DG.chem.Notation.Smiles});
61
+ if (!canonicalCappedCol || canonicalCappedCol.length !== cappedSmilesCol.length)
62
+ throw new Error('Error canonicalizing capped monomer SMILES');
63
+
64
+ // build capped map with canonicalized SMILES as keys
65
+ const cappedMap: MonomerSmilesMap = {};
66
+ const canonicalCappedList = canonicalCappedCol.toList();
67
+ for (let i = 0; i < canonicalCappedList.length; i++) {
68
+ const smi = canonicalCappedList[i];
69
+ if (!smi) continue;
70
+ cappedEntries[i].smiles = smi;
71
+ const match: MonomerMatch = cappedEntries[i];
72
+ if (!cappedMap[smi]) cappedMap[smi] = [];
73
+ cappedMap[smi].push(match);
74
+ }
75
+
76
+ return {cappedMap, uncappedMap};
77
+ }
78
+
79
+ /**
80
+ * Corrects and canonicalizes the input molecule column.
81
+ * Handles both SMILES and molblock inputs.
82
+ * Returns the list of canonical SMILES strings (null for invalid molecules).
83
+ */
84
+ async function canonicalizeMolecules(
85
+ molDf: DG.DataFrame, molColName: string, converterFunc: DG.Func,
86
+ ): Promise<(string | null)[]> {
87
+ const moleculesOriginalCol = molDf.col(molColName)!;
88
+ const correctedList = moleculesOriginalCol.toList().map((s) => {
89
+ if (!s) return s;
90
+ try {
91
+ const isMolBlock = s.includes('\n');
92
+ return getCorrectedSmiles([], isMolBlock ? undefined : s, isMolBlock ? s : undefined);
93
+ } catch (_e) {
94
+ return s;
95
+ }
96
+ });
97
+
98
+ const correctedCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'MoleculesCorrected', correctedList);
99
+ correctedCol.semType = DG.SEMTYPE.MOLECULE;
100
+ // dummy df needed for semtype detection by converterFunc
101
+ const _ddf = DG.DataFrame.fromColumns([correctedCol]);
102
+
103
+ const canonicalCol: DG.Column = await converterFunc.apply({molecule: correctedCol, targetNotation: DG.chem.Notation.Smiles});
104
+ if (!canonicalCol || canonicalCol.length !== correctedCol.length)
105
+ throw new Error('Error canonicalizing molecules');
106
+
107
+ return canonicalCol.toList();
108
+ }
109
+
110
+ /**
111
+ * Attempts to match a single canonical molecule SMILES against the lookup maps.
112
+ * Tries in order: capped map -> uncapped map -> cap the molecule with standard R-groups and retry.
113
+ * Returns all matching monomers (can be multiple from different libraries).
114
+ */
115
+ function matchBySmiles(
116
+ canonicalMol: string, cappedMap: MonomerSmilesMap, uncappedMap: MonomerSmilesMap,
117
+ ): MonomerMatch[] {
118
+ // try direct lookup in capped and uncapped maps
119
+ let matches = cappedMap[canonicalMol] ?? uncappedMap[canonicalMol];
120
+ if (matches && matches.length > 0) return matches;
121
+
122
+ // fallback: cap the molecule with standard R-groups and try again
123
+ const cappedMol = capSmiles(canonicalMol, STANDRARD_R_GROUPS);
124
+ if (cappedMol !== canonicalMol) {
125
+ const correctedMol = grok.chem.convert(cappedMol, DG.chem.Notation.Unknown, DG.chem.Notation.Smiles);
126
+ matches = cappedMap[correctedMol] ?? uncappedMap[correctedMol];
127
+ if (matches && matches.length > 0) return matches;
128
+ }
129
+
130
+ return [];
131
+ }
132
+
133
+ /**
134
+ * Builds a Morgan fingerprint lookup map for all capped monomer SMILES.
135
+ * The map keys are fingerprint binary strings (via DG.BitSet.toBinaryString()),
136
+ * which allows fast exact matching that is tolerant of explicit hydrogen
137
+ * and minor stereochemistry differences.
138
+ */
139
+ async function buildMonomerFingerprintMap(
140
+ cappedMap: MonomerSmilesMap,
141
+ ): Promise<{fpMap: {[fpString: string]: MonomerMatch[]}; cappedSmilesList: string[]}> {
142
+ const cappedSmilesList = Object.keys(cappedMap);
143
+ if (cappedSmilesList.length === 0)
144
+ return {fpMap: {}, cappedSmilesList: []};
145
+
146
+ const monomerCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'MonomerSmiles', cappedSmilesList);
147
+ monomerCol.semType = DG.SEMTYPE.MOLECULE;
148
+
149
+ const fpCol: DG.Column = await grok.functions.call('Chem:getMorganFingerprints', {molColumn: monomerCol});
150
+
151
+ const fpMap: {[fpString: string]: MonomerMatch[]} = {};
152
+ for (let i = 0; i < fpCol.length; i++) {
153
+ const fp: DG.BitSet | null = fpCol.get(i);
154
+ if (!fp) continue;
155
+ const fpStr = fp.toBinaryString();
156
+ // merge monomer matches from the SMILES map into the fingerprint map
157
+ const smilesMatches = cappedMap[cappedSmilesList[i]] ?? [];
158
+ if (!fpMap[fpStr]) fpMap[fpStr] = [];
159
+ fpMap[fpStr].push(...smilesMatches);
160
+ }
161
+
162
+ return {fpMap, cappedSmilesList};
163
+ }
164
+
165
+ /**
166
+ * For molecules that were not matched by exact SMILES, attempts matching via
167
+ * Morgan fingerprints. Computes fingerprints for unmatched molecules and looks
168
+ * them up in the monomer fingerprint map. Also tries capping with standard R-groups.
169
+ */
170
+ async function matchByFingerprint(
171
+ unmatchedIndices: number[],
172
+ canonicalizedMolecules: (string | null)[],
173
+ monomerFpMap: {[fpString: string]: MonomerMatch[]},
174
+ ): Promise<Map<number, MonomerMatch[]>> {
175
+ const results = new Map<number, MonomerMatch[]>();
176
+ if (unmatchedIndices.length === 0 || Object.keys(monomerFpMap).length === 0)
177
+ return results;
178
+
179
+ // collect SMILES for unmatched molecules (uncapped first)
180
+ const uncappedSmiles: string[] = unmatchedIndices.map((idx) => canonicalizedMolecules[idx] ?? '');
181
+
182
+ // also prepare capped versions
183
+ const cappedSmiles: string[] = uncappedSmiles.map((s) => {
184
+ if (!s) return '';
185
+ const capped = capSmiles(s, STANDRARD_R_GROUPS);
186
+ return capped !== s ? grok.chem.convert(capped, DG.chem.Notation.Unknown, DG.chem.Notation.Smiles) : s;
187
+ });
188
+
189
+ // compute fingerprints for both uncapped and capped molecules
190
+ const uncappedCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'UnmatchedMols', uncappedSmiles);
191
+ uncappedCol.semType = DG.SEMTYPE.MOLECULE;
192
+ const cappedCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'UnmatchedMolsCapped', cappedSmiles);
193
+ cappedCol.semType = DG.SEMTYPE.MOLECULE;
194
+
195
+ const [uncappedFpCol, cappedFpCol]: [DG.Column, DG.Column] = await Promise.all([
196
+ grok.functions.call('Chem:getMorganFingerprints', {molColumn: uncappedCol}),
197
+ grok.functions.call('Chem:getMorganFingerprints', {molColumn: cappedCol}),
198
+ ]);
199
+
200
+ for (let i = 0; i < unmatchedIndices.length; i++) {
201
+ const molIdx = unmatchedIndices[i];
202
+
203
+ // try uncapped fingerprint first
204
+ const uncappedFp: DG.BitSet | null = uncappedFpCol.get(i);
205
+ if (uncappedFp) {
206
+ const fpStr = uncappedFp.toBinaryString();
207
+ const matches = monomerFpMap[fpStr];
208
+ if (matches && matches.length > 0) {
209
+ results.set(molIdx, matches);
210
+ continue;
211
+ }
212
+ }
213
+
214
+ // fallback: try capped fingerprint
215
+ const cappedFp: DG.BitSet | null = cappedFpCol.get(i);
216
+ if (cappedFp) {
217
+ const fpStr = cappedFp.toBinaryString();
218
+ const matches = monomerFpMap[fpStr];
219
+ if (matches && matches.length > 0)
220
+ results.set(molIdx, matches);
221
+ }
222
+ }
223
+
224
+ return results;
225
+ }
226
+
227
+ /** Deduplicates matches by symbol, keeping one entry per unique monomer symbol */
228
+ function deduplicateMatches(matches: MonomerMatch[]): MonomerMatch[] {
229
+ const seen = new Set<string>();
230
+ return matches.filter((m) => {
231
+ if (seen.has(m.symbol)) return false;
232
+ seen.add(m.symbol);
233
+ return true;
234
+ });
235
+ }
236
+
237
+ /** Collects all source library names for matched monomers, including known duplicates */
238
+ function collectSources(
239
+ matches: MonomerMatch[], duplicates: {[symbol: string]: Monomer[]},
240
+ ): string {
241
+ const sources = new Set<string>();
242
+ for (const m of matches) {
243
+ // check if monomerLib knows about duplicates for this symbol across libraries
244
+ const dups = duplicates[m.symbol];
245
+ if (dups && dups.length > 0) {
246
+ for (const dup of dups) {
247
+ const s = dup?.lib?.source;
248
+ if (s) sources.add(s);
249
+ }
250
+ } else if (m.source)
251
+ sources.add(m.source);
252
+ }
253
+ return Array.from(sources).join(', ');
254
+ }
255
+
256
+ /**
257
+ * Matches molecules in a dataframe with monomers from a monomer library.
258
+ *
259
+ * Matching pipeline:
260
+ * 1. Standardize monomers and build SMILES lookup maps (capped & uncapped)
261
+ * 2. Canonicalize input molecules
262
+ * 3. Phase 1: exact canonical SMILES matching (capped, uncapped, and fallback-capped molecule)
263
+ * 4. Phase 2: Morgan fingerprint fallback for molecules that didn't match by SMILES
264
+ * 5. Populate result columns (supports multiple matches per molecule via pipe-delimited values)
265
+ *
266
+ * @returns cloned input DataFrame with added match columns
267
+ */
268
+ export async function matchMoleculesWithMonomers(
269
+ molDf: DG.DataFrame, molColName: string, monomerLib: IMonomerLib, polymerType: PolymerType = 'PEPTIDE',
270
+ ): Promise<DG.DataFrame> {
271
+ const duplicates = monomerLib.duplicateMonomers?.[polymerType] ?? {};
272
+ const converterFunc = DG.Func.find({package: 'Chem', name: 'convertMoleculeNotation'})[0];
273
+ if (!converterFunc)
274
+ throw new Error('Function convertMoleculeNotation not found, please install Chem package');
275
+
276
+ // === Step 1: Standardize monomers and build SMILES lookup maps ===
277
+ const monomers = monomerLib.getMonomerSymbolsByType(polymerType)
278
+ .map((s) => monomerLib.getMonomer(polymerType, s)!)
279
+ .filter((m) => m && (m.smiles || m.molfile));
280
+
281
+ const fixedMonomers = await standardiseMonomers(monomers);
282
+ // preserve library reference from original monomers (lost during standardization)
283
+ fixedMonomers.forEach((m, i) => { m.lib = monomers[i].lib; });
284
+
285
+ const {cappedMap, uncappedMap} = await buildMonomerSmilesMaps(fixedMonomers, monomers, converterFunc);
286
+
287
+ // === Step 2: Canonicalize input molecules ===
288
+ const canonicalizedMolecules = await canonicalizeMolecules(molDf, molColName, converterFunc);
289
+
290
+ // === Step 3: Phase 1 — Exact canonical SMILES matching ===
291
+ // matchResults[i] holds all MonomerMatch entries for molecule i (empty array if unmatched)
292
+ const matchResults: MonomerMatch[][] = new Array(canonicalizedMolecules.length).fill(null).map(() => []);
293
+ const unmatchedIndices: number[] = [];
294
+
295
+ for (let i = 0; i < canonicalizedMolecules.length; i++) {
296
+ const mol = canonicalizedMolecules[i];
297
+ if (!mol) continue;
298
+ const smilesMatches = matchBySmiles(mol, cappedMap, uncappedMap);
299
+ if (smilesMatches.length > 0)
300
+ matchResults[i] = smilesMatches;
301
+ else
302
+ unmatchedIndices.push(i);
303
+ }
304
+
305
+ // === Step 4: Phase 2 — Morgan fingerprint fallback for unmatched molecules ===
306
+ if (unmatchedIndices.length > 0) {
307
+ try {
308
+ const {fpMap} = await buildMonomerFingerprintMap(cappedMap);
309
+ const fpMatches = await matchByFingerprint(unmatchedIndices, canonicalizedMolecules, fpMap);
310
+ for (const [idx, matches] of fpMatches)
311
+ matchResults[idx] = matches;
312
+ } catch (e) {
313
+ console.warn('Fingerprint fallback matching failed, continuing with SMILES matches only:', e);
314
+ }
315
+ }
316
+
317
+ // === Step 5: Populate result columns ===
318
+ const resultDf = molDf.clone();
319
+ const symbolCol = resultDf.columns.addNewString(resultDf.columns.getUnusedName('Matched monomer symbol'));
320
+ const smilesCol = resultDf.columns.addNewString(resultDf.columns.getUnusedName('Matched monomer smiles'));
321
+ smilesCol.semType = DG.SEMTYPE.MOLECULE;
322
+ const sourceCol = resultDf.columns.addNewString(resultDf.columns.getUnusedName('Matched monomer source'));
323
+ const matchCountCol = resultDf.columns.addNewInt(resultDf.columns.getUnusedName('Match count'));
324
+ const matchMethodCol = resultDf.columns.addNewString(resultDf.columns.getUnusedName('Match method'));
325
+ resultDf.columns.setOrder([molColName, symbolCol.name, smilesCol.name, sourceCol.name, matchCountCol.name, matchMethodCol.name]);
326
+
327
+ for (let i = 0; i < matchResults.length; i++) {
328
+ const matches = matchResults[i];
329
+ if (matches.length === 0) continue;
330
+
331
+ // deduplicate matches by symbol (same monomer can appear from multiple lookup paths)
332
+ const uniqueMatches = deduplicateMatches(matches);
333
+
334
+ // collect all sources, including duplicates from the monomer library
335
+ const allSources = collectSources(uniqueMatches, duplicates);
336
+
337
+ symbolCol.set(i, uniqueMatches.map((m) => m.symbol).join(MATCH_SEPARATOR), false);
338
+ smilesCol.set(i, uniqueMatches[0].original ?? uniqueMatches[0].smiles, false);
339
+ sourceCol.set(i, allSources, false);
340
+ matchCountCol.set(i, uniqueMatches.length, false);
341
+ // fingerprint matches are those from phase 2 (indices that were in unmatchedIndices)
342
+ const method = unmatchedIndices.includes(i) ? 'fingerprint' : 'exact';
343
+ matchMethodCol.set(i, method, false);
344
+ }
345
+
346
+ return resultDf;
347
+ }
@@ -15,11 +15,10 @@ import {PolymerType, MonomerType} from '@datagrok-libraries/bio/src/helm/types';
15
15
  import {MonomerLibManager} from '../lib-manager';
16
16
 
17
17
  import {MONOMER_RENDERER_TAGS} from '@datagrok-libraries/bio/src/utils/cell-renderer';
18
- import {BioTags} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
18
+ import {BioTags, MONOMER_MOTIF_SPLITTER} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
19
19
  //@ts-ignore
20
20
  import '../../../../css/monomer-manager.css';
21
21
  import {Subscription} from 'rxjs';
22
- import {STANDRARD_R_GROUPS} from './const';
23
22
 
24
23
  // columns of monomers dataframe, note that rgroups is hidden and will be displayed as separate columns
25
24
  export enum MONOMER_DF_COLUMN_NAMES {
@@ -60,84 +59,7 @@ export async function standardiseMonomers(monomers: Monomer[]) {
60
59
  return fixedMonomers;
61
60
  }
62
61
 
63
- /// matches molecules in the dataframe with monomers in the library by canonical smiles
64
- export async function matchMoleculesWithMonomers(molDf: DG.DataFrame, molColName: string, monomerLib: IMonomerLib, polymerType: PolymerType = 'PEPTIDE'): Promise<DG.DataFrame> {
65
- const duplicates = monomerLib.duplicateMonomers?.[polymerType] ?? {};
66
- const converterFunc = DG.Func.find({package: 'Chem', name: 'convertMoleculeNotation'})[0];
67
- if (!converterFunc)
68
- throw new Error('Function convertMoleculeNotation not found, please install Chem package');
69
- // first: stamdardize monomers
70
- const monomers = monomerLib.getMonomerSymbolsByType(polymerType).map((s) => monomerLib.getMonomer(polymerType, s)!).filter((m) => m && (m.smiles || m.molfile));
71
- const fixedMonomers = await standardiseMonomers(monomers);
72
- fixedMonomers.forEach((m, i) => {
73
- m.lib = monomers[i].lib;
74
- });
75
- const unCappedMonomerSmilesMap = fixedMonomers.filter((m) => !!m.smiles).reduce((acc, m) => {
76
- acc[m.smiles] = {symbol: m.symbol, smiles: m.smiles, original: m.smiles, source: m.lib?.source}; return acc;
77
- }, {} as {[smiles: string]: {symbol: string, smiles: string, original: string | undefined, source: string | undefined}});
78
- const cappedMonomerSmiles = fixedMonomers.map((m, i) => ({symbol: m.symbol, smiles: capSmiles(m.smiles ?? '', m.rgroups ?? []), original: m.smiles, source: monomers[i]?.lib?.source}))
79
- .filter((s) => !!s?.smiles && !s.smiles.includes('[*:'));
80
-
81
- // canonicalize all monomer smiles
82
- const monomerSmilesCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'MonomerSmiles', cappedMonomerSmiles.map((m) => m.smiles!));
83
- monomerSmilesCol.semType = DG.SEMTYPE.MOLECULE;
84
- const canonicalizedMonomersSmilesCol: DG.Column = await converterFunc.apply({molecule: monomerSmilesCol, targetNotation: DG.chem.Notation.Smiles});
85
- if (!canonicalizedMonomersSmilesCol || canonicalizedMonomersSmilesCol.length !== monomerSmilesCol.length)
86
- throw new Error('Error canonicalizing monomer smiles');
87
- canonicalizedMonomersSmilesCol.toList().forEach((s, i) => cappedMonomerSmiles[i].smiles = s);
88
- const cappedMonomerSmilesMap = cappedMonomerSmiles.reduce((acc, m) => { acc[m.smiles] = m; return acc; }, {} as {[smiles: string]: {symbol: string, smiles: string, original: string | undefined, source: string | undefined}});
89
-
90
- const moleculesOriginalCol = molDf.col(molColName)!;
91
- const correctedOriginalList = moleculesOriginalCol.toList().map((s) => {
92
- if (!s) return s;
93
- try {
94
- const isMolBlock = s.includes('\n');
95
- return getCorrectedSmiles([], isMolBlock ? undefined : s, isMolBlock ? s : undefined);
96
- } catch (_e) {
97
- return s;
98
- }
99
- });
100
- const moleculesOriginalColCorrected = DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'MoleculesOriginalCorrected', correctedOriginalList);
101
- // create dummy df
102
- moleculesOriginalColCorrected.semType = DG.SEMTYPE.MOLECULE;
103
- const _ddf = DG.DataFrame.fromColumns([moleculesOriginalColCorrected]);
104
- const canonicalizedMoleculesCol: DG.Column = await converterFunc.apply({molecule: moleculesOriginalColCorrected, targetNotation: DG.chem.Notation.Smiles});
105
- if (!canonicalizedMoleculesCol || canonicalizedMoleculesCol.length !== moleculesOriginalColCorrected.length)
106
- throw new Error('Error canonicalizing molecules');
107
-
108
- const canonicalizedMolecules = canonicalizedMoleculesCol.toList();
109
-
110
- const resultDf = molDf.clone();
111
- const matchingMonomerSmilesCol = resultDf.columns.addNewString(resultDf.columns.getUnusedName('Matched monomer smiles'));
112
- matchingMonomerSmilesCol.semType = DG.SEMTYPE.MOLECULE;
113
- const matchingMonomerSymbolCol = resultDf.columns.addNewString(resultDf.columns.getUnusedName('Matched monomer symbol'));
114
- matchingMonomerSymbolCol.semType = 'Monomer';
115
- const sourceLibCol = resultDf.columns.addNewString(resultDf.columns.getUnusedName('Matched monomer source'));
116
- resultDf.columns.setOrder([molColName, matchingMonomerSymbolCol.name, matchingMonomerSmilesCol.name, sourceLibCol.name]);
117
-
118
- for (let i = 0; i < canonicalizedMolecules.length; i++) {
119
- const mol = canonicalizedMolecules[i];
120
- if (!mol) continue;
121
- let match = cappedMonomerSmilesMap[mol] ?? unCappedMonomerSmilesMap[mol];
122
- if (!match) {
123
- // try capping the molecule and matching again
124
- const cappedMol = capSmiles(mol, STANDRARD_R_GROUPS);
125
- if (cappedMol !== mol) {
126
- const correctedMol = grok.chem.convert(cappedMol, DG.chem.Notation.Unknown, DG.chem.Notation.Smiles);
127
- match = cappedMonomerSmilesMap[correctedMol] ?? unCappedMonomerSmilesMap[correctedMol];
128
- }
129
- }
130
- if (match) {
131
- const matchSymbol = match.symbol;
132
- const sources = (duplicates[matchSymbol]?.length ?? 0) > 0 ? duplicates[matchSymbol].map((m) => m?.lib?.source).filter((s) => !!s).join(', ') : (match.source ?? '');
133
- const originalSmiles = match.original ?? match.smiles;
134
- matchingMonomerSmilesCol.set(i, originalSmiles, false);
135
- matchingMonomerSymbolCol.set(i, matchSymbol, false);
136
- sourceLibCol.set(i, sources, false);
137
- }
138
- }
139
- return resultDf;
140
- }
62
+ export {matchMoleculesWithMonomers} from './match-molecules';
141
63
 
142
64
  /** Standardizes the monomer library
143
65
  * warning: throws error if the library is not valid or has invalid monomers
@@ -1217,7 +1139,7 @@ function replaceAllylsInSmiles(smiles: string): string {
1217
1139
  }
1218
1140
 
1219
1141
  /**NB! Can throw error */
1220
- function getCorrectedSmiles(rgroups: RGroup[], smiles?: string, molBlock?: string): string {
1142
+ export function getCorrectedSmiles(rgroups: RGroup[], smiles?: string, molBlock?: string): string {
1221
1143
  if (smiles)
1222
1144
  smiles = replaceAllylsInSmiles(smiles);
1223
1145
  const isSmilesMalformed = !smiles || !grok.chem.checkSmiles(smiles);
@@ -1310,7 +1232,7 @@ export function getCorrectedMolBlock(molBlock: string) {
1310
1232
  }
1311
1233
 
1312
1234
  // reverse of r-group substitution, will substitute rgroups with cap groups
1313
- function capSmiles(smiles: string, rgroups: RGroup[]) {
1235
+ export function capSmiles(smiles: string, rgroups: RGroup[]) {
1314
1236
  let newSmiles = smiles;
1315
1237
  rgroups.forEach((rg, i) => {
1316
1238
  const rgroupNum = rg.label[1] ?? `${i + 1}`; // if label is not in format R#, use index as number
@@ -6,7 +6,6 @@ import * as DG from 'datagrok-api/dg';
6
6
  import * as ui from 'datagrok-api/ui';
7
7
 
8
8
  import {ColumnInputOptions} from '@datagrok-libraries/utils/src/type-declarations';
9
- import {delay} from '@datagrok-libraries/test/src/test';
10
9
  import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
11
10
  import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
12
11
 
@@ -82,7 +81,7 @@ export async function multipleSequenceAlignmentUI(
82
81
  table: table, value: seqCol, onValueChanged: async (value: DG.Column<any>) => {
83
82
  if (!value || value.semType !== DG.SEMTYPE.MACROMOLECULE) {
84
83
  okBtn.disabled = true;
85
- await delay(0); // to
84
+ await DG.delay(0); // to
86
85
  colInput.value = prevSeqCol as DG.Column<string>;
87
86
  return;
88
87
  }
@@ -50,9 +50,19 @@ export class SeqHandler implements ISeqHandler {
50
50
  if (col.type !== DG.TYPE.STRING)
51
51
  throw new Error(`Unexpected column type '${col.type}', must be '${DG.TYPE.STRING}'.`);
52
52
  this._column = col;
53
- const units: string | null = this._column.meta.units;
54
- if (!units)
55
- throw new Error('Units are not specified in column');
53
+ let units: string | null = this._column.meta.units;
54
+ if (!units) {
55
+ // it may be from layout that the macromolecule semtype is set but every other tag is missing, so we manually run detectors
56
+ if (!this._column.temp['seqHandlerDetectorRun']) {
57
+ this._column.temp['seqHandlerDetectorRun'] = true;
58
+ const detectorFunc = DG.Func.find({name: 'detectMacromolecule', meta: {role: 'semTypeDetector'}})[0];
59
+ if (detectorFunc)
60
+ detectorFunc.applySync({col: this._column});
61
+ units = this._column.meta.units;
62
+ }
63
+ if (!units)
64
+ throw new Error('Units are not specified in column');
65
+ }
56
66
  this._units = units!;
57
67
 
58
68
  this._notation = this.getNotation();
@@ -182,7 +192,7 @@ export class SeqHandler implements ISeqHandler {
182
192
 
183
193
  let aligned = uh.column.getTag(TAGS.aligned);
184
194
  if (aligned == null) {
185
- aligned = uh.stats.sameLength ? ALIGNMENT.SEQ_MSA : ALIGNMENT.SEQ;
195
+ aligned = uh.stats.sameLength || uh.column.categories.slice(0, 5).filter((a) => !!a).every((a) => a.length > 100) ? ALIGNMENT.SEQ_MSA : ALIGNMENT.SEQ;
186
196
  uh.column.setTag(TAGS.aligned, aligned);
187
197
  }
188
198
 
@@ -2,7 +2,6 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
- import {delay} from '@datagrok-libraries/test/src/test';
6
5
  import {checkInputColumnUI} from './check-input-column';
7
6
  import {splitAlignedSequences} from '@datagrok-libraries/bio/src/utils/splitter';
8
7
  import * as C from './constants';
@@ -17,7 +16,7 @@ export async function splitToMonomersUI(
17
16
  ): Promise<DG.DataFrame> {
18
17
  // Delay is required for initial function dialog to close before starting invalidating of molfiles.
19
18
  // Otherwise, dialog is freezing
20
- await delay(10);
19
+ await DG.delay(10);
21
20
  if (!checkInputColumnUI(seqCol, 'Sequence space')) return table;
22
21
 
23
22
  const seqHelper = _package.seqHelper;
@@ -4,7 +4,7 @@ import * as DG from 'datagrok-api/dg';
4
4
 
5
5
  import {fromEvent, Observable, Subject, Unsubscribable} from 'rxjs';
6
6
 
7
- import {testEvent} from '@datagrok-libraries/test/src/test';
7
+ import {testEvent} from '@datagrok-libraries/utils/src/test';
8
8
  import {
9
9
  IVdRegionsViewer,
10
10
  VdRegion, VdRegionType,
@@ -22,7 +22,7 @@ import {
22
22
  import {errorToConsole} from '@datagrok-libraries/utils/src/to-console';
23
23
  import {intToHtmlA} from '@datagrok-libraries/utils/src/color';
24
24
  import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
25
- import {testEvent} from '@datagrok-libraries/test/src/test';
25
+ import {testEvent} from '@datagrok-libraries/utils/src/test';
26
26
  import {PromiseSyncer} from '@datagrok-libraries/bio/src/utils/syncer';
27
27
  import {GAP_SYMBOL} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
28
28
  import {IMonomerLibBase} from '@datagrok-libraries/bio/src/types/monomer-library';
@@ -9,7 +9,6 @@ import {App, IHelmWebEditor} from '@datagrok-libraries/bio/src/helm/types';
9
9
  import {getHelmHelper} from '@datagrok-libraries/bio/src/helm/helm-helper';
10
10
  import {ILogger} from '@datagrok-libraries/bio/src/utils/logger';
11
11
  import {errInfo} from '@datagrok-libraries/bio/src/utils/err-info';
12
- import {delay} from '@datagrok-libraries/test/src/test';
13
12
  import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
14
13
 
15
14
  import {updateDivInnerHTML} from '../utils/ui-utils';
@@ -141,7 +140,7 @@ export class HelmBioFilter extends BioFilterBase<BioFilterProps> /* implements I
141
140
  const logPrefix = `${this.viewerToLog()}.substructureSearch( column = <${column.name}> )`;
142
141
  _package.logger.debug(`${logPrefix}, start`);
143
142
  try {
144
- await delay(10);
143
+ await DG.delay(10);
145
144
  const res = await helmSubstructureSearch(this.props.substructure, column, this.seqHelper);
146
145
  return res;
147
146
  } finally {
@@ -163,7 +162,7 @@ export class HelmBioFilter extends BioFilterBase<BioFilterProps> /* implements I
163
162
  // async awaitRendered(timeout: number = 10000): Promise<void> {
164
163
  // const callLog = `awaitRendered( ${timeout} )`;
165
164
  // const logPrefix = `${this.viewerToLog()}.${callLog}`;
166
- // await delay(0);
165
+ // await DG.delay(0);
167
166
  // await testEvent(this.onRendered, () => {
168
167
  // this.logger.debug(`${logPrefix}, ` + '_onRendered event caught');
169
168
  // }, () => {