@datagrok/sequence-translator 1.10.15 → 1.10.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +271 -253
- package/CREDITS.md +236 -0
- package/detectors.js +8 -0
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/package.json +3 -3
- package/src/oligo-renderer/canvas-renderer.ts +8 -3
- package/src/oligo-renderer/cell-renderer.ts +8 -2
- package/src/oligo-renderer/tooltip.ts +5 -1
- package/src/oligo-renderer/types.ts +2 -1
- package/src/package-api.ts +4 -0
- package/src/package.g.ts +6 -0
- package/src/package.ts +28 -0
- package/src/polytool/pt-chem-enum-dialog.ts +50 -10
- package/src/polytool/pt-chem-enum.ts +76 -7
- package/src/polytool/pt-enumerate-seq-dialog.ts +18 -4
- package/test-console-output-1.log +153 -151
- package/test-record-1.mp4 +0 -0
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@datagrok/sequence-translator",
|
|
3
3
|
"friendlyName": "Sequence Translator",
|
|
4
|
-
"version": "1.10.
|
|
4
|
+
"version": "1.10.17",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Davit Rizhinashvili",
|
|
7
7
|
"email": "drizhinashvili@datagrok.ai"
|
|
@@ -22,8 +22,8 @@
|
|
|
22
22
|
}
|
|
23
23
|
],
|
|
24
24
|
"dependencies": {
|
|
25
|
-
"@datagrok-libraries/bio": "^5.65.
|
|
26
|
-
"@datagrok-libraries/chem-meta": "^1.2.
|
|
25
|
+
"@datagrok-libraries/bio": "^5.65.1",
|
|
26
|
+
"@datagrok-libraries/chem-meta": "^1.2.12",
|
|
27
27
|
"@datagrok-libraries/tutorials": "^1.6.1",
|
|
28
28
|
"@datagrok-libraries/utils": "^4.6.5",
|
|
29
29
|
"@types/react": "^18.0.15",
|
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
|
|
26
26
|
import {
|
|
27
27
|
BASE_COLORS, FALLBACK_COLOR,
|
|
28
|
-
canonicalSugarSymbol,
|
|
28
|
+
canonicalPhosphateSymbol, canonicalSugarSymbol,
|
|
29
29
|
ParsedDuplex, ParsedMonomer, ParsedNucleotide, ParsedStrand,
|
|
30
30
|
resolveConjugate, resolvePhosphate, resolveSugar,
|
|
31
31
|
} from './types';
|
|
@@ -364,9 +364,14 @@ function isModifiedSugar(sugar: string): boolean {
|
|
|
364
364
|
}
|
|
365
365
|
|
|
366
366
|
function drawLinkage(g: CanvasRenderingContext2D, link: LinkagePos): void {
|
|
367
|
+
// Only the canonical phosphate (`p` / aliased `P`) is treated as "no marker".
|
|
368
|
+
// Every other linkage — known PS / PS₂ / MeP, or unknown custom symbol that
|
|
369
|
+
// got a hash-derived color — gets a bar in the inter-chip gap so the user
|
|
370
|
+
// can see and hover it. Color comes from resolvePhosphate which is
|
|
371
|
+
// deterministic per symbol, so two distinct unknown symbols get distinct bars.
|
|
372
|
+
const canonical = canonicalPhosphateSymbol(link.phosphateSymbol);
|
|
373
|
+
if (canonical === 'p') return;
|
|
367
374
|
const ps = resolvePhosphate(link.phosphateSymbol);
|
|
368
|
-
if (ps.meta.short !== 'PS' && ps.meta.short !== 'PS₂' && ps.meta.short !== 'MeP')
|
|
369
|
-
return; // only draw markers for non-canonical linkages
|
|
370
375
|
const barW = Math.max(2.5, link.w * PS_BAR_RATIO);
|
|
371
376
|
const barX = link.x + (link.w - barW) / 2;
|
|
372
377
|
g.fillStyle = ps.color;
|
|
@@ -98,8 +98,14 @@ export class OligoNucleotideCellRenderer extends DG.GridCellRenderer {
|
|
|
98
98
|
return m;
|
|
99
99
|
}
|
|
100
100
|
|
|
101
|
+
/** Cache key for a cell's layout. Includes the column's `version` so any
|
|
102
|
+
* edit to the column (which bumps version) orphans previous cache entries
|
|
103
|
+
* — preventing onMouseMove from hit-testing a stale layout that was cached
|
|
104
|
+
* before the edit and not yet replaced by a fresh render(). */
|
|
101
105
|
private cellKey(gridCell: DG.GridCell): string {
|
|
102
|
-
const
|
|
103
|
-
|
|
106
|
+
const col = gridCell.tableColumn;
|
|
107
|
+
const colName = col?.name ?? gridCell.gridColumn?.name ?? '?';
|
|
108
|
+
const ver = col?.version ?? 0;
|
|
109
|
+
return `${colName}@${ver}::${gridCell.tableRowIndex ?? -1}`;
|
|
104
110
|
}
|
|
105
111
|
}
|
|
@@ -22,6 +22,7 @@ import {
|
|
|
22
22
|
canonicalPhosphateSymbol, canonicalSugarSymbol,
|
|
23
23
|
ParsedNucleotide, resolveConjugate, resolvePhosphate, resolveSugar,
|
|
24
24
|
} from './types';
|
|
25
|
+
import {PolymerType} from '@datagrok-libraries/js-draw-lite/src/types/org';
|
|
25
26
|
|
|
26
27
|
const STRUCT_W = 110;
|
|
27
28
|
const STRUCT_H = 90;
|
|
@@ -117,7 +118,10 @@ function findMonomerMolfile(lib: IMonomerLib, rawSymbol: string, kind: Structure
|
|
|
117
118
|
else if (kind === 'phosphate') candidates.push(canonicalPhosphateSymbol(rawSymbol));
|
|
118
119
|
// Bases (A/C/G/U/T) and conjugates: try the symbol as-is.
|
|
119
120
|
|
|
120
|
-
|
|
121
|
+
// make sure RNA and chem come first :D
|
|
122
|
+
const polymerTypes: PolymerType[] = (lib.getPolymerTypes()
|
|
123
|
+
.filter((pt) => pt === 'RNA' || pt === 'CHEM') as PolymerType[])
|
|
124
|
+
.concat(lib.getPolymerTypes().filter((pt) => pt !== 'RNA' && pt !== 'CHEM'));
|
|
121
125
|
for (const sym of candidates) {
|
|
122
126
|
for (const pt of polymerTypes) {
|
|
123
127
|
const monomer = lib.getMonomer(pt, sym);
|
|
@@ -200,7 +200,8 @@ export function resolveConjugate(symbol: string): { color: string; meta: ModMeta
|
|
|
200
200
|
return {color: known.color, meta: known};
|
|
201
201
|
return {
|
|
202
202
|
color: hashColor(symbol),
|
|
203
|
-
meta: {name: symbol, short: symbol.length > 6 ? symbol.slice(0, 6) : symbol,
|
|
203
|
+
meta: {name: symbol, short: symbol.length > 6 ? symbol.slice(0, 6) : symbol,
|
|
204
|
+
color: hashColor(symbol), category: 'conjugate'},
|
|
204
205
|
};
|
|
205
206
|
}
|
|
206
207
|
|
package/src/package-api.ts
CHANGED
|
@@ -133,6 +133,10 @@ export namespace funcs {
|
|
|
133
133
|
return await grok.functions.call('SequenceTranslator:GetPtChemEnumeratorDialog', { cell });
|
|
134
134
|
}
|
|
135
135
|
|
|
136
|
+
export async function getPtOligoEnumeratorDialog(cell: any | null): Promise<void> {
|
|
137
|
+
return await grok.functions.call('SequenceTranslator:GetPtOligoEnumeratorDialog', { cell });
|
|
138
|
+
}
|
|
139
|
+
|
|
136
140
|
/**
|
|
137
141
|
Enumerate provided HELM sequence on provided positions with provided monomers and generates new table
|
|
138
142
|
*/
|
package/src/package.g.ts
CHANGED
|
@@ -197,6 +197,12 @@ export async function getPtChemEnumeratorDialog(cell?: any) : Promise<void> {
|
|
|
197
197
|
await PackageFunctions.getPtChemEnumeratorDialog(cell);
|
|
198
198
|
}
|
|
199
199
|
|
|
200
|
+
//name: Polytool Oligo Enumerator dialog
|
|
201
|
+
//input: object cell { nullable: true }
|
|
202
|
+
export async function getPtOligoEnumeratorDialog(cell?: any) : Promise<void> {
|
|
203
|
+
await PackageFunctions.getPtOligoEnumeratorDialog(cell);
|
|
204
|
+
}
|
|
205
|
+
|
|
200
206
|
//name: Enumerate Single HELM Sequence
|
|
201
207
|
//description: Enumerate provided HELM sequence on provided positions with provided monomers and generates new table
|
|
202
208
|
//input: string helmSequence
|
package/src/package.ts
CHANGED
|
@@ -358,6 +358,34 @@ export class PackageFunctions {
|
|
|
358
358
|
return polyToolEnumerateChemUI(cell);
|
|
359
359
|
}
|
|
360
360
|
|
|
361
|
+
|
|
362
|
+
/** Enumerator entry for OligoNucleotide cells.
|
|
363
|
+
*
|
|
364
|
+
* The cell value is HELM (under the hood). The enumerator dialog is built
|
|
365
|
+
* around `Macromolecule` cells, so we wrap the oligo HELM in a temp
|
|
366
|
+
* Macromolecule column and pass that cell in. The `outputAsOligo` flag
|
|
367
|
+
* makes the dialog tag the enumerated result column as OligoNucleotide so
|
|
368
|
+
* the duplex renderer picks it up automatically. */
|
|
369
|
+
@grok.decorators.func({
|
|
370
|
+
name: 'Polytool Oligo Enumerator dialog'
|
|
371
|
+
})
|
|
372
|
+
static async getPtOligoEnumeratorDialog(
|
|
373
|
+
@grok.decorators.param({type: 'object', options: {nullable: true}}) cell?: DG.Cell) {
|
|
374
|
+
if (!cell || cell.value == null)
|
|
375
|
+
return polyToolEnumerateHelmUI(undefined, true);
|
|
376
|
+
|
|
377
|
+
const helm = String(cell.value);
|
|
378
|
+
const tempCol = DG.Column.fromStrings('helm', [helm]);
|
|
379
|
+
tempCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
380
|
+
tempCol.meta.units = 'helm';
|
|
381
|
+
tempCol.setTag('aligned', 'SEQ');
|
|
382
|
+
tempCol.setTag('alphabet', 'RNA');
|
|
383
|
+
tempCol.setTag('cell.renderer', 'helm');
|
|
384
|
+
const tempDf = DG.DataFrame.fromColumns([tempCol]);
|
|
385
|
+
const tempCell = tempDf.cell(0, 'helm');
|
|
386
|
+
return polyToolEnumerateHelmUI(tempCell, true);
|
|
387
|
+
}
|
|
388
|
+
|
|
361
389
|
@grok.decorators.func({
|
|
362
390
|
name: 'Enumerate Single HELM Sequence',
|
|
363
391
|
description: 'Enumerate provided HELM sequence on provided positions with provided monomers and generates new table',
|
|
@@ -45,6 +45,7 @@ interface CardOpts {
|
|
|
45
45
|
subtitle: string;
|
|
46
46
|
error?: string;
|
|
47
47
|
onEdit?: () => void;
|
|
48
|
+
onDuplicate?: () => void;
|
|
48
49
|
onRemove?: () => void;
|
|
49
50
|
}
|
|
50
51
|
|
|
@@ -108,15 +109,22 @@ function buildCard(opts: CardOpts): HTMLElement {
|
|
|
108
109
|
}});
|
|
109
110
|
if (opts.onEdit) {
|
|
110
111
|
const editBtn = ui.icons.edit((e: MouseEvent) => { e.stopPropagation(); opts.onEdit!(); }, 'Edit');
|
|
112
|
+
editBtn.style.color = 'var(--blue-3)';
|
|
111
113
|
actions.appendChild(editBtn);
|
|
112
114
|
}
|
|
115
|
+
if (opts.onDuplicate) {
|
|
116
|
+
const dupBtn = ui.icons.copy((e: MouseEvent) => { e.stopPropagation(); opts.onDuplicate!(); }, 'Duplicate');
|
|
117
|
+
dupBtn.style.color = 'var(--blue-3)';
|
|
118
|
+
actions.appendChild(dupBtn);
|
|
119
|
+
}
|
|
113
120
|
if (opts.onRemove) {
|
|
114
121
|
const delBtn = ui.icons.delete((e: MouseEvent) => { e.stopPropagation(); opts.onRemove!(); }, 'Remove');
|
|
122
|
+
delBtn.style.color = 'var(--red-3)';
|
|
115
123
|
actions.appendChild(delBtn);
|
|
116
124
|
}
|
|
117
125
|
card.addEventListener('mouseenter', () => { actions.style.display = 'flex'; });
|
|
118
126
|
card.addEventListener('mouseleave', () => { actions.style.display = 'none'; });
|
|
119
|
-
if (opts.onEdit || opts.onRemove) card.appendChild(actions);
|
|
127
|
+
if (opts.onEdit || opts.onDuplicate || opts.onRemove) card.appendChild(actions);
|
|
120
128
|
|
|
121
129
|
return card;
|
|
122
130
|
}
|
|
@@ -197,25 +205,38 @@ async function openRGroupSketchDialog(rdkit: RDModule, initialMolfile?: string,
|
|
|
197
205
|
const status = ui.divText('', {style: {fontSize: '11px', padding: '4px 0', minHeight: '18px'}});
|
|
198
206
|
let currentSmiles: string | null = null;
|
|
199
207
|
let detectedRs: number[] = [];
|
|
208
|
+
let isSingleAtom = false;
|
|
200
209
|
let okBtn: HTMLButtonElement | null = null;
|
|
201
210
|
let userTouchedR = false;
|
|
202
211
|
rNumberInput.onChanged.subscribe(() => { userTouchedR = true; updateOk(); });
|
|
203
212
|
|
|
204
213
|
const updateOk = () => {
|
|
205
214
|
const n = rNumberInput.value;
|
|
206
|
-
const ok = currentSmiles != null && n != null && n >= 1 &&
|
|
215
|
+
const ok = currentSmiles != null && n != null && n >= 1 &&
|
|
216
|
+
(detectedRs.length === 1 || isSingleAtom);
|
|
207
217
|
if (okBtn) okBtn.disabled = !ok;
|
|
208
218
|
};
|
|
209
219
|
|
|
210
220
|
const revalidate = async () => {
|
|
211
221
|
currentSmiles = await smilesFromSketcher(sk, rdkit);
|
|
212
222
|
detectedRs = currentSmiles ? extractRNumbers(currentSmiles) : [];
|
|
223
|
+
// Probe single-atom mode only when there's no R-label — keeps the
|
|
224
|
+
// common labeled path free of an extra RDKit parse.
|
|
225
|
+
isSingleAtom = false;
|
|
226
|
+
if (currentSmiles && detectedRs.length === 0) {
|
|
227
|
+
const probe = makeRGroup(currentSmiles, rNumberInput.value ?? 1, '', rdkit);
|
|
228
|
+
isSingleAtom = !!probe.isSingleAtom;
|
|
229
|
+
}
|
|
213
230
|
if (!currentSmiles) {
|
|
214
|
-
status.innerText = 'Draw an R-group with
|
|
231
|
+
status.innerText = 'Draw an R-group with one attachment point, or a single atom (e.g. N, O, Cl).';
|
|
215
232
|
status.style.color = 'var(--grey-4)';
|
|
216
|
-
} else if (detectedRs.length === 0) {
|
|
217
|
-
status.innerText = 'No R-group detected — add [*:N] to mark the attachment point.';
|
|
233
|
+
} else if (detectedRs.length === 0 && !isSingleAtom) {
|
|
234
|
+
status.innerText = 'No R-group detected — add [*:N] to mark the attachment point, or draw a single atom (e.g. N, O, Cl).';
|
|
218
235
|
status.style.color = 'var(--red-3)';
|
|
236
|
+
} else if (detectedRs.length === 0 && isSingleAtom) {
|
|
237
|
+
const targetN = rNumberInput.value;
|
|
238
|
+
status.innerText = `Single atom — will be substituted into [*:${targetN ?? '?'}] in the core.`;
|
|
239
|
+
status.style.color = 'var(--green-2)';
|
|
219
240
|
} else if (detectedRs.length > 1) {
|
|
220
241
|
status.innerText = `R-group must contain exactly one attachment point. Found ${detectedRs.length}: ${detectedRs.map((n) => 'R' + n).join(', ')}.`;
|
|
221
242
|
status.style.color = 'var(--red-3)';
|
|
@@ -266,6 +287,10 @@ async function openImportWizard(
|
|
|
266
287
|
const columnInput = ui.input.choice<string>('Column', {items: [] as string[], nullable: true});
|
|
267
288
|
const rNumberInput = kind === 'rgroups' ?
|
|
268
289
|
ui.input.int('Target R#', {value: 1, min: 1}) : null;
|
|
290
|
+
if (rNumberInput) {
|
|
291
|
+
ui.tooltip.bind(rNumberInput.input,
|
|
292
|
+
'Target R-number for these R-groups. Single-atom rows (no [*:N] label, e.g. just N or O) get substituted into the core\'s R# slot directly.');
|
|
293
|
+
}
|
|
269
294
|
const dedupInput = ui.input.bool('Remove duplicates', {value: defaultDedup});
|
|
270
295
|
ui.tooltip.bind(dedupInput.input,
|
|
271
296
|
kind === 'cores' ?
|
|
@@ -361,10 +386,10 @@ async function openImportWizard(
|
|
|
361
386
|
built.rGroups = rGroups;
|
|
362
387
|
rGroups.forEach((rg, i) => {
|
|
363
388
|
if (rg.error) errs.push(`R-group ${i + 1}: ${rg.error}`);
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
});
|
|
389
|
+
const subtitle = rg.error ? 'invalid' :
|
|
390
|
+
rg.isSingleAtom ? `R${rg.rNumber} · atom` :
|
|
391
|
+
`R${rg.rNumber}${rg.sourceRNumber != null && rg.sourceRNumber !== rg.rNumber ? ` (from R${rg.sourceRNumber})` : ''}`;
|
|
392
|
+
items.push({smi: rg.error ? '' : rg.smiles, err: rg.error, subtitle});
|
|
368
393
|
});
|
|
369
394
|
countText.innerText = `${rGroups.length} R-group${rGroups.length === 1 ? '' : 's'} for R${n}` +
|
|
370
395
|
(dedup && dupCount ? ` (${dupCount} duplicate${dupCount === 1 ? '' : 's'} skipped)` : '') + '.';
|
|
@@ -558,6 +583,10 @@ function buildChemEnumPanel(rdkit: RDModule, preloadCore: ChemEnumCore | null):
|
|
|
558
583
|
const edited = await openCoreSketchDialog(rdkit, c.smiles);
|
|
559
584
|
if (edited) { state.cores[i] = edited; refresh(); }
|
|
560
585
|
},
|
|
586
|
+
onDuplicate: async () => {
|
|
587
|
+
const dup = await openCoreSketchDialog(rdkit, c.smiles);
|
|
588
|
+
if (dup) { state.cores.push(dup); refresh(); }
|
|
589
|
+
},
|
|
561
590
|
onRemove: () => { state.cores.splice(i, 1); refresh(); },
|
|
562
591
|
});
|
|
563
592
|
};
|
|
@@ -625,9 +654,12 @@ function buildChemEnumPanel(rdkit: RDModule, preloadCore: ChemEnumCore | null):
|
|
|
625
654
|
const renderer = (i: number): HTMLElement => {
|
|
626
655
|
const rg = list[i];
|
|
627
656
|
const remap = rg.sourceRNumber != null && rg.sourceRNumber !== rg.rNumber ? ` (from R${rg.sourceRNumber})` : '';
|
|
657
|
+
const subtitle = rg.error ? 'invalid' :
|
|
658
|
+
rg.isSingleAtom ? `r group ${i + 1} · R${rg.rNumber} · atom` :
|
|
659
|
+
`r group ${i + 1} · R${rg.rNumber}${remap}`;
|
|
628
660
|
return buildCard({
|
|
629
661
|
smiles: rg.error ? '' : rg.smiles,
|
|
630
|
-
subtitle
|
|
662
|
+
subtitle,
|
|
631
663
|
error: rg.error,
|
|
632
664
|
onEdit: async () => {
|
|
633
665
|
const edited = await openRGroupSketchDialog(rdkit, rg.smiles, rg.rNumber);
|
|
@@ -644,6 +676,14 @@ function buildChemEnumPanel(rdkit: RDModule, preloadCore: ChemEnumCore | null):
|
|
|
644
676
|
}
|
|
645
677
|
refresh();
|
|
646
678
|
},
|
|
679
|
+
onDuplicate: async () => {
|
|
680
|
+
const dup = await openRGroupSketchDialog(rdkit, rg.smiles, rg.rNumber);
|
|
681
|
+
if (!dup) return;
|
|
682
|
+
const target = state.rGroupsByNum.get(dup.rNumber) ?? [];
|
|
683
|
+
target.push(dup);
|
|
684
|
+
state.rGroupsByNum.set(dup.rNumber, target);
|
|
685
|
+
refresh();
|
|
686
|
+
},
|
|
647
687
|
onRemove: () => {
|
|
648
688
|
list.splice(i, 1);
|
|
649
689
|
if (list.length === 0) state.rGroupsByNum.delete(n);
|
|
@@ -42,7 +42,11 @@ export interface ChemEnumCore {
|
|
|
42
42
|
}
|
|
43
43
|
|
|
44
44
|
export interface ChemEnumRGroup {
|
|
45
|
-
/**
|
|
45
|
+
/**
|
|
46
|
+
* Normalized SMILES with its single R-label remapped to the target R number,
|
|
47
|
+
* OR — when {@link isSingleAtom} is set — the canonical single-atom token
|
|
48
|
+
* (e.g. `N`, `O`, `[N+]`) to splice into the core's `[*:N]` slot.
|
|
49
|
+
*/
|
|
46
50
|
smiles: string;
|
|
47
51
|
/** SMILES as supplied (pre-normalization and pre-remap). */
|
|
48
52
|
originalSmiles: string;
|
|
@@ -50,6 +54,12 @@ export interface ChemEnumRGroup {
|
|
|
50
54
|
rNumber: number;
|
|
51
55
|
/** R number as originally written in `originalSmiles` (pre-remap). */
|
|
52
56
|
sourceRNumber?: number;
|
|
57
|
+
/**
|
|
58
|
+
* True when the R-group has zero R-labels and is exactly one heavy atom.
|
|
59
|
+
* Such groups are spliced into the core's `[*:N]` slot via plain string
|
|
60
|
+
* replace instead of ring-closure joining.
|
|
61
|
+
*/
|
|
62
|
+
isSingleAtom?: boolean;
|
|
53
63
|
id: string;
|
|
54
64
|
error?: string;
|
|
55
65
|
}
|
|
@@ -169,9 +179,21 @@ export function makeRGroup(
|
|
|
169
179
|
const rNumbers = extractRNumbers(normalized);
|
|
170
180
|
|
|
171
181
|
if (rNumbers.length === 0) {
|
|
182
|
+
// Single-atom mode: a no-R-label SMILES is acceptable iff RDKit confirms
|
|
183
|
+
// exactly one heavy atom. The atom is then substituted into the core's
|
|
184
|
+
// `[*:N]` slot by `buildJoinedSmiles` instead of ring-closure-joined.
|
|
185
|
+
if (rdkit) {
|
|
186
|
+
const atomSmi = trySingleAtomCanonical(normalized, rdkit);
|
|
187
|
+
if (atomSmi) {
|
|
188
|
+
return {
|
|
189
|
+
smiles: atomSmi, originalSmiles, rNumber: targetRNumber, id,
|
|
190
|
+
isSingleAtom: true,
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
}
|
|
172
194
|
return {
|
|
173
195
|
smiles: normalized, originalSmiles, rNumber: targetRNumber, id,
|
|
174
|
-
error: 'R-group must contain exactly one R label (
|
|
196
|
+
error: 'R-group must contain exactly one R label, or be a single atom (e.g. N, O, Cl)'};
|
|
175
197
|
}
|
|
176
198
|
if (rNumbers.length > 1) {
|
|
177
199
|
return {
|
|
@@ -208,6 +230,27 @@ function tryParse(smi: string, rdkit: RDModule): string | null {
|
|
|
208
230
|
}
|
|
209
231
|
}
|
|
210
232
|
|
|
233
|
+
/**
|
|
234
|
+
* Returns the canonical, H-stripped SMILES iff `smi` parses as exactly one
|
|
235
|
+
* heavy atom — used to detect single-atom R-groups (`N`, `O`, `[N+]`, …) that
|
|
236
|
+
* substitute into the core's `[*:N]` slot directly. Returns null otherwise.
|
|
237
|
+
*/
|
|
238
|
+
function trySingleAtomCanonical(smi: string, rdkit: RDModule): string | null {
|
|
239
|
+
let mol: RDMol | null = null;
|
|
240
|
+
try {
|
|
241
|
+
mol = rdkit.get_mol(smi);
|
|
242
|
+
if (!mol || !mol.is_valid()) return null;
|
|
243
|
+
if (mol.get_num_atoms(true) !== 1) return null;
|
|
244
|
+
mol.remove_hs_in_place();
|
|
245
|
+
const canon = mol.get_smiles();
|
|
246
|
+
return canon && canon.length > 0 ? canon : null;
|
|
247
|
+
} catch {
|
|
248
|
+
return null;
|
|
249
|
+
} finally {
|
|
250
|
+
mol?.delete();
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
211
254
|
// ─── Count + validation ─────────────────────────────────────────────────────
|
|
212
255
|
|
|
213
256
|
/** Results per core: depends on mode and the R-numbers the core uses. */
|
|
@@ -302,17 +345,34 @@ export function buildJoinedSmiles(
|
|
|
302
345
|
): string | null {
|
|
303
346
|
if (rgSmilesByNum.size === 0) return null;
|
|
304
347
|
|
|
305
|
-
|
|
348
|
+
// Single-atom R-groups (no `[*:k]` in their SMILES) are spliced into the
|
|
349
|
+
// core's `[*:k]` slot directly. Labeled R-groups go through the standard
|
|
350
|
+
// ring-closure join. The two paths cooperate when both are present:
|
|
351
|
+
// atoms are substituted first, then the labeled rest is joined.
|
|
352
|
+
const atomReps = new Map<number, string>();
|
|
353
|
+
const labeled = new Map<number, string>();
|
|
354
|
+
for (const [k, s] of rgSmilesByNum) {
|
|
355
|
+
if (extractRNumbers(s).includes(k)) labeled.set(k, s);
|
|
356
|
+
else atomReps.set(k, s);
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
let preparedCore = coreSmiles;
|
|
360
|
+
for (const [k, atom] of atomReps)
|
|
361
|
+
preparedCore = substituteRLabelWithAtom(preparedCore, k, atom);
|
|
362
|
+
|
|
363
|
+
if (labeled.size === 0) return preparedCore;
|
|
364
|
+
|
|
365
|
+
const coreFixed = moveStartRLabelToBranch(preparedCore);
|
|
306
366
|
const rgsFixed = new Map<number, string>();
|
|
307
|
-
for (const [k, s] of
|
|
367
|
+
for (const [k, s] of labeled) rgsFixed.set(k, moveStartRLabelToBranch(s));
|
|
308
368
|
|
|
309
369
|
const allPieces = [coreFixed, ...rgsFixed.values()];
|
|
310
|
-
const digits = pickFreeRingDigits(allPieces,
|
|
311
|
-
if (digits.length <
|
|
370
|
+
const digits = pickFreeRingDigits(allPieces, labeled.size);
|
|
371
|
+
if (digits.length < labeled.size) return null;
|
|
312
372
|
|
|
313
373
|
const digitByNum = new Map<number, string>();
|
|
314
374
|
let i = 0;
|
|
315
|
-
for (const k of
|
|
375
|
+
for (const k of labeled.keys()) digitByNum.set(k, formatRingDigit(digits[i++]));
|
|
316
376
|
|
|
317
377
|
let assembledCore = coreFixed;
|
|
318
378
|
for (const [k, d] of digitByNum)
|
|
@@ -325,6 +385,15 @@ export function buildJoinedSmiles(
|
|
|
325
385
|
return [assembledCore, ...assembledRgs].join('.');
|
|
326
386
|
}
|
|
327
387
|
|
|
388
|
+
/**
|
|
389
|
+
* Splices an atom token into every `[*:n]` slot of `smi`. Unlike ring-digit
|
|
390
|
+
* substitution, parens around the label (`(N)` etc.) are valid SMILES, so a
|
|
391
|
+
* plain string replace is enough.
|
|
392
|
+
*/
|
|
393
|
+
export function substituteRLabelWithAtom(smi: string, n: number, atom: string): string {
|
|
394
|
+
return smi.split(`[*:${n}]`).join(atom);
|
|
395
|
+
}
|
|
396
|
+
|
|
328
397
|
/**
|
|
329
398
|
* Joins a core with one R-group per R-number and canonicalizes via RDKit.
|
|
330
399
|
* Per-molecule sync RDKit call — **do not use in bulk**; prefer {@link buildJoinedSmiles}
|
|
@@ -33,6 +33,7 @@ import {Chain} from './conversion/pt-chain';
|
|
|
33
33
|
import {polyToolConvert} from './pt-dialog';
|
|
34
34
|
|
|
35
35
|
import {_package, applyNotationProviderForCyclized, PackageFunctions} from '../package';
|
|
36
|
+
import {tagAsOligoNucleotide} from '../oligo-renderer/converters';
|
|
36
37
|
import {buildMonomerHoverLink} from '@datagrok-libraries/bio/src/monomer-works/monomer-hover';
|
|
37
38
|
import {getRdKitModule} from '@datagrok-libraries/bio/src/chem/rdkit-module';
|
|
38
39
|
|
|
@@ -84,8 +85,12 @@ type PolyToolEnumerateHelmSerialized = {
|
|
|
84
85
|
rules: string[],
|
|
85
86
|
};
|
|
86
87
|
|
|
87
|
-
/** Entry point: creates, sizes, and shows the enumeration dialog.
|
|
88
|
-
|
|
88
|
+
/** Entry point: creates, sizes, and shows the enumeration dialog.
|
|
89
|
+
* @param outputAsOligo If true, the enumerated HELM column in the result df
|
|
90
|
+
* is tagged as `OligoNucleotide` (semType + units=helm + cellRenderer hints)
|
|
91
|
+
* so the duplex cell renderer picks it up. Used when the source cell was an
|
|
92
|
+
* OligoNucleotide column. */
|
|
93
|
+
export async function polyToolEnumerateHelmUI(cell?: DG.Cell, outputAsOligo: boolean = false): Promise<void> {
|
|
89
94
|
await _package.initPromise;
|
|
90
95
|
|
|
91
96
|
// Capture viewport dimensions for dialog sizing
|
|
@@ -122,7 +127,7 @@ export async function polyToolEnumerateHelmUI(cell?: DG.Cell): Promise<void> {
|
|
|
122
127
|
}
|
|
123
128
|
}
|
|
124
129
|
};
|
|
125
|
-
dialog = await getPolyToolEnumerateDialog(cell, resizeInputs);
|
|
130
|
+
dialog = await getPolyToolEnumerateDialog(cell, resizeInputs, outputAsOligo);
|
|
126
131
|
|
|
127
132
|
// On first show, center the dialog at 70% of viewport; on subsequent resizes, just reflow inputs
|
|
128
133
|
let isFirstShow = true;
|
|
@@ -155,7 +160,7 @@ export async function polyToolEnumerateHelmUI(cell?: DG.Cell): Promise<void> {
|
|
|
155
160
|
|
|
156
161
|
/** Builds and configures the enumeration dialog with all inputs, validators, and event handlers. */
|
|
157
162
|
async function getPolyToolEnumerateDialog(
|
|
158
|
-
cell?: DG.Cell, resizeInputs?: () => void
|
|
163
|
+
cell?: DG.Cell, resizeInputs?: () => void, outputAsOligo: boolean = false,
|
|
159
164
|
): Promise<DG.Dialog> {
|
|
160
165
|
const logPrefix = `ST: PT: HelmDialog()`;
|
|
161
166
|
let inputs: PolyToolEnumerateInputs;
|
|
@@ -712,6 +717,15 @@ async function getPolyToolEnumerateDialog(
|
|
|
712
717
|
rules: await ruleInputs.getActive()
|
|
713
718
|
} : false,
|
|
714
719
|
helmHelper);
|
|
720
|
+
|
|
721
|
+
// When the source was an OligoNucleotide cell, tag the enumerated
|
|
722
|
+
// HELM column as OligoNucleotide so the duplex renderer picks it up.
|
|
723
|
+
if (outputAsOligo) {
|
|
724
|
+
const enumCol = enumeratorResDf.col('Enumerated');
|
|
725
|
+
if (enumCol && enumCol.type === DG.COLUMN_TYPE.STRING)
|
|
726
|
+
tagAsOligoNucleotide(enumCol as DG.Column<string>);
|
|
727
|
+
}
|
|
728
|
+
|
|
715
729
|
const appendTarget = inputs.appendToTable.value;
|
|
716
730
|
if (appendTarget) {
|
|
717
731
|
appendTarget.append(enumeratorResDf, true);
|