@datagrok/sequence-translator 1.10.13 → 1.10.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/detectors.js +30 -2
  3. package/dist/455.js +1 -1
  4. package/dist/455.js.map +1 -1
  5. package/dist/package-test.js +1 -1
  6. package/dist/package-test.js.map +1 -1
  7. package/dist/package.js +1 -1
  8. package/dist/package.js.map +1 -1
  9. package/files/samples/sirna-demo.csv +38 -0
  10. package/files/tests/chem_enum_cores.csv +5 -0
  11. package/files/tests/chem_enum_rgroups.csv +5 -0
  12. package/package.json +2 -2
  13. package/src/apps/structure/view/ui.ts +1 -1
  14. package/src/apps/translator/view/ui.ts +1 -1
  15. package/src/oligo-renderer/canvas-renderer.ts +500 -0
  16. package/src/oligo-renderer/cell-renderer.ts +105 -0
  17. package/src/oligo-renderer/converters.ts +77 -0
  18. package/src/oligo-renderer/helm-parser.ts +154 -0
  19. package/src/oligo-renderer/legend-panel.ts +154 -0
  20. package/src/oligo-renderer/structures-panel.ts +96 -0
  21. package/src/oligo-renderer/tooltip.ts +223 -0
  22. package/src/oligo-renderer/types.ts +221 -0
  23. package/src/package-api.ts +43 -1
  24. package/src/package-test.ts +2 -0
  25. package/src/package.g.ts +56 -3
  26. package/src/package.ts +92 -5
  27. package/src/polytool/const.ts +1 -1
  28. package/src/polytool/pt-chem-enum-dialog.ts +940 -0
  29. package/src/polytool/pt-chem-enum.ts +553 -0
  30. package/src/polytool/pt-dialog.ts +4 -125
  31. package/src/polytool/pt-enumerate-seq-dialog.ts +3 -3
  32. package/src/tests/oligo-renderer-tests.ts +299 -0
  33. package/src/tests/polytool-enumerate-chem-tests.ts +408 -0
  34. package/test-console-output-1.log +303 -97
  35. package/test-record-1.mp4 +0 -0
  36. package/src/polytool/pt-enumeration-chem.ts +0 -100
@@ -0,0 +1,408 @@
1
+ /* eslint-disable max-len */
2
+ import * as grok from 'datagrok-api/grok';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+ import {before, category, expect, expectArray, test} from '@datagrok-libraries/test/src/test';
6
+ import {getRdKitModule} from '@datagrok-libraries/bio/src/chem/rdkit-module';
7
+ import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
8
+
9
+ import {
10
+ assembleMolecule,
11
+ ChemEnumModes,
12
+ countForCore,
13
+ enumerate,
14
+ extractRNumbers,
15
+ makeCore,
16
+ makeRGroup,
17
+ moveStartRLabelToBranch,
18
+ normalizeRLabels,
19
+ pickFreeRingDigits,
20
+ remapSingleRLabel,
21
+ substituteRLabelWithRingDigit,
22
+ validateParams,
23
+ } from '../polytool/pt-chem-enum';
24
+
25
+ import {_package} from '../package-test';
26
+
27
+ /** Parses canonical SMILES via RDKit so two syntactically different SMILES can be compared structurally. */
28
+ function canon(smi: string, rdkit: RDModule): string {
29
+ const mol = rdkit.get_mol(smi);
30
+ try {
31
+ expect(mol.is_valid(), true, `RDKit rejected SMILES: ${smi}`);
32
+ return mol.get_smiles();
33
+ } finally { mol.delete(); }
34
+ }
35
+
36
+ // ─── Regex / normalization / remap — no RDKit required ──────────────────────
37
+
38
+ category('PolyTool: ChemEnum: R-labels', () => {
39
+ test('extract from every supported form', async () => {
40
+ expectArray(extractRNumbers('C[1*]'), [1]);
41
+ expectArray(extractRNumbers('C[*:1]'), [1]);
42
+ expectArray(extractRNumbers('C[*1]'), [1]);
43
+ expectArray(extractRNumbers('C[R1]'), [1]);
44
+ expectArray(extractRNumbers('C[R:1]'), [1]);
45
+ });
46
+
47
+ test('extract multi-digit numbers', async () => {
48
+ expectArray(extractRNumbers('C1CC(C[*:100])(N[*:1])C([*:101])N1[*:2]'), [1, 2, 100, 101]);
49
+ expectArray(extractRNumbers('C[R100]N[R:101]'), [100, 101]);
50
+ });
51
+
52
+ test('normalize all spellings to [*:N]', async () => {
53
+ expect(normalizeRLabels('C[1*]'), 'C[*:1]');
54
+ expect(normalizeRLabels('C[*:1]'), 'C[*:1]');
55
+ expect(normalizeRLabels('C[*1]'), 'C[*:1]');
56
+ expect(normalizeRLabels('C[R1]'), 'C[*:1]');
57
+ expect(normalizeRLabels('C[R:1]'), 'C[*:1]');
58
+ expect(normalizeRLabels('C[R100]N[R:101]'), 'C[*:100]N[*:101]');
59
+ });
60
+
61
+ test('remap the one-and-only R-label to a new slot', async () => {
62
+ expect(remapSingleRLabel('C[1*]', 5), 'C[*:5]');
63
+ expect(remapSingleRLabel('C[R:3]CC', 1), 'C[*:1]CC');
64
+ expect(remapSingleRLabel('[*:7]C', 2), '[*:2]C');
65
+ });
66
+
67
+ test('deduplicate repeated R numbers', async () => {
68
+ expectArray(extractRNumbers('C[*:1]C[*:1]'), [1]);
69
+ });
70
+
71
+ test('normalize does not touch regular bracket atoms', async () => {
72
+ expect(normalizeRLabels('C[NH3+]C[*:1]'), 'C[NH3+]C[*:1]');
73
+ expect(normalizeRLabels('[13C]C[*:2]'), '[13C]C[*:2]');
74
+ });
75
+ });
76
+
77
+ // ─── SMILES surgery helpers — no RDKit required ─────────────────────────────
78
+
79
+ category('PolyTool: ChemEnum: SMILES surgery', () => {
80
+ test('moveStartRLabelToBranch: bare atom at start', async () => {
81
+ expect(moveStartRLabelToBranch('[*:1]CC'), 'C([*:1])C');
82
+ expect(moveStartRLabelToBranch('[*:1]Cl'), 'Cl([*:1])');
83
+ expect(moveStartRLabelToBranch('[*:1][NH3+]C'), '[NH3+]([*:1])C');
84
+ });
85
+
86
+ test('moveStartRLabelToBranch: with explicit bond', async () => {
87
+ expect(moveStartRLabelToBranch('[*:1]=CC'), 'C(=[*:1])C');
88
+ expect(moveStartRLabelToBranch('[*:1]#C'), 'C(#[*:1])');
89
+ });
90
+
91
+ test('moveStartRLabelToBranch: leaves interior unchanged', async () => {
92
+ expect(moveStartRLabelToBranch('CC[*:1]'), 'CC[*:1]');
93
+ expect(moveStartRLabelToBranch('CC([*:1])CC'), 'CC([*:1])CC');
94
+ });
95
+
96
+ test('substituteRLabelWithRingDigit collapses branch form', async () => {
97
+ expect(substituteRLabelWithRingDigit('CC([*:1])CC', 1, '9'), 'CC9CC');
98
+ expect(substituteRLabelWithRingDigit('CC([*:2])([*:1])CC', 1, '%50'), 'CC([*:2])%50CC');
99
+ });
100
+
101
+ test('substituteRLabelWithRingDigit handles trailing form', async () => {
102
+ expect(substituteRLabelWithRingDigit('CC[*:1]', 1, '7'), 'CC7');
103
+ expect(substituteRLabelWithRingDigit('CC=[*:2]', 2, '%42'), 'CC=%42');
104
+ });
105
+
106
+ test('pickFreeRingDigits avoids digits already in use', async () => {
107
+ // 1 is in use in the core → pick 2, 3
108
+ const d = pickFreeRingDigits(['C1CCCCC1[*:1]', '[*:1]C'], 2);
109
+ expect(d.includes(1), false);
110
+ expect(d.length, 2);
111
+ });
112
+
113
+ test('pickFreeRingDigits ignores digits inside bracket atoms', async () => {
114
+ // `[*:1]` contains "1" but it's inside brackets — must not count as ring digit 1
115
+ const d = pickFreeRingDigits(['C[*:1]', '[*:1]C'], 1);
116
+ expectArray(d, [1]);
117
+ });
118
+ });
119
+
120
+ // ─── Construction validators — no RDKit required ────────────────────────────
121
+
122
+ category('PolyTool: ChemEnum: validators', () => {
123
+ test('core without any R-label is rejected', async () => {
124
+ const c = makeCore('CCO', 'row 1');
125
+ expect(c.error != null, true);
126
+ });
127
+
128
+ test('r-group with zero R-labels is rejected', async () => {
129
+ const rg = makeRGroup('CC', 1, 'row 1');
130
+ expect(rg.error != null, true);
131
+ });
132
+
133
+ test('r-group with multiple R-labels is rejected', async () => {
134
+ const rg = makeRGroup('[*:1]CC[*:2]', 1, 'row 1');
135
+ expect(rg.error != null, true);
136
+ });
137
+
138
+ test('r-group with wrong R-number is auto-remapped', async () => {
139
+ const rg = makeRGroup('CC[*:3]', 1, 'row 1');
140
+ expect(rg.error == null, true);
141
+ expect(rg.rNumber, 1);
142
+ expect(rg.sourceRNumber, 3);
143
+ expect(rg.smiles, 'CC[*:1]');
144
+ });
145
+ });
146
+
147
+ // ─── Validation + count prediction — no RDKit required ──────────────────────
148
+
149
+ category('PolyTool: ChemEnum: count & validate', () => {
150
+ test('zip length mismatch is flagged', async () => {
151
+ const c = makeCore('C[*:1]C[*:2]', 'c1');
152
+ const v = validateParams({
153
+ cores: [c],
154
+ rGroups: new Map([
155
+ [1, [makeRGroup('C[*:1]', 1, 'r1-a'), makeRGroup('N[*:1]', 1, 'r1-b')]],
156
+ [2, [makeRGroup('O[*:2]', 2, 'r2-a')]],
157
+ ]),
158
+ mode: ChemEnumModes.Zip,
159
+ });
160
+ expect(v.ok, false);
161
+ expect(v.errors.some((e) => e.includes('Zip')), true);
162
+ });
163
+
164
+ test('cartesian count = product across Rs, summed over cores', async () => {
165
+ const cA = makeCore('C[*:1]', 'a'); // R1 only
166
+ const cB = makeCore('C[*:1]N[*:2]', 'b'); // R1 + R2
167
+ const v = validateParams({
168
+ cores: [cA, cB],
169
+ rGroups: new Map([
170
+ [1, [makeRGroup('C[*:1]', 1, 'a'), makeRGroup('N[*:1]', 1, 'b'), makeRGroup('O[*:1]', 1, 'c')]], // 3
171
+ [2, [makeRGroup('C[*:2]', 2, 'a'), makeRGroup('N[*:2]', 2, 'b')]], // 2
172
+ ]),
173
+ mode: ChemEnumModes.Cartesian,
174
+ });
175
+ // cA contributes 3 (just R1), cB contributes 3*2 = 6 → total 9
176
+ expect(v.predictedCount, 9);
177
+ expect(v.ok, true);
178
+ });
179
+
180
+ test('zip count = N per core, summed', async () => {
181
+ const cA = makeCore('C[*:1]', 'a');
182
+ const cB = makeCore('C[*:1]N[*:2]', 'b');
183
+ const v = validateParams({
184
+ cores: [cA, cB],
185
+ rGroups: new Map([
186
+ [1, [makeRGroup('C[*:1]', 1, 'a'), makeRGroup('N[*:1]', 1, 'b')]], // 2
187
+ [2, [makeRGroup('O[*:2]', 2, 'a'), makeRGroup('S[*:2]', 2, 'b')]], // 2
188
+ ]),
189
+ mode: ChemEnumModes.Zip,
190
+ });
191
+ // cA uses only R1 → zip length = 2; cB uses R1 & R2 (both 2) → zip length = 2 → total 4
192
+ expect(v.predictedCount, 4);
193
+ });
194
+
195
+ test('uncovered R-number is flagged per core', async () => {
196
+ const c = makeCore('C[*:1]N[*:7]', 'c');
197
+ const v = validateParams({
198
+ cores: [c],
199
+ rGroups: new Map([[1, [makeRGroup('O[*:1]', 1, 'a')]]]),
200
+ mode: ChemEnumModes.Cartesian,
201
+ });
202
+ expect(v.ok, false);
203
+ expect(v.errors.some((e) => e.includes('R7')), true);
204
+ });
205
+
206
+ test('hard cap rejects over-limit enumerations', async () => {
207
+ const c = makeCore('C[*:1]N[*:2]', 'c');
208
+ const many1 = Array.from({length: 1000}, (_, i) => makeRGroup('C[*:1]', 1, `r1-${i}`));
209
+ const many2 = Array.from({length: 1001}, (_, i) => makeRGroup('O[*:2]', 2, `r2-${i}`));
210
+ const v = validateParams({
211
+ cores: [c],
212
+ rGroups: new Map([[1, many1], [2, many2]]),
213
+ mode: ChemEnumModes.Cartesian,
214
+ maxResults: 1_000_000,
215
+ });
216
+ expect(v.overCap, true);
217
+ expect(v.ok, false);
218
+ });
219
+
220
+ test('countForCore: zip with mismatched per-core lengths → -1', async () => {
221
+ const c = makeCore('C[*:1]N[*:2]', 'c');
222
+ const {count} = countForCore(c, new Map([
223
+ [1, [makeRGroup('C[*:1]', 1, 'a'), makeRGroup('N[*:1]', 1, 'b')]],
224
+ [2, [makeRGroup('O[*:2]', 2, 'a')]],
225
+ ]), ChemEnumModes.Zip);
226
+ expect(count, -1);
227
+ });
228
+ });
229
+
230
+ // ─── Assembly + full enumeration — needs RDKit ──────────────────────────────
231
+
232
+ category('PolyTool: ChemEnum: assembly', () => {
233
+ let rdkit: RDModule;
234
+
235
+ before(async () => { rdkit = await getRdKitModule(); });
236
+
237
+ test('single R-group: joins correctly', async () => {
238
+ const res = assembleMolecule('C[*:1]', new Map([[1, 'O[*:1]']]), rdkit);
239
+ expect(res != null, true);
240
+ expect(res, canon('CO', rdkit));
241
+ });
242
+
243
+ test('multi R-group: R1 and R2 both joined', async () => {
244
+ const res = assembleMolecule('C[*:1]N[*:2]', new Map([[1, 'O[*:1]'], [2, 'S[*:2]']]), rdkit);
245
+ expect(res != null, true);
246
+ expect(res, canon('OCNS', rdkit));
247
+ });
248
+
249
+ test('R-label at SMILES start is handled', async () => {
250
+ const res = assembleMolecule('[*:1]CC', new Map([[1, 'Br[*:1]']]), rdkit);
251
+ expect(res != null, true);
252
+ expect(res, canon('BrCC', rdkit));
253
+ });
254
+
255
+ test('core uses existing ring digit 1 — free digit is picked', async () => {
256
+ // Cyclohexyl with an R1 sticking off; joining must not clash with ring-1
257
+ const res = assembleMolecule('C1CCCCC1[*:1]', new Map([[1, 'O[*:1]']]), rdkit);
258
+ expect(res != null, true);
259
+ expect(res, canon('OC1CCCCC1', rdkit));
260
+ });
261
+
262
+ test('multi-digit R numbers (R100, R101)', async () => {
263
+ const res = assembleMolecule(
264
+ 'C1CC(C[*:100])(N[*:1])C([*:101])N1[*:2]',
265
+ new Map([[1, 'C[*:1]'], [2, 'C[*:2]'], [100, 'C[*:100]'], [101, 'C[*:101]']]),
266
+ rdkit);
267
+ expect(res != null, true);
268
+ // Sanity check: result parses and has no remaining R labels
269
+ const mol = rdkit.get_mol(res!);
270
+ try {
271
+ expect(mol.is_valid(), true);
272
+ expect(res!.includes('[*:'), false);
273
+ } finally { mol.delete(); }
274
+ });
275
+ });
276
+
277
+ // ─── End-to-end enumerate ────────────────────────────────────────────────────
278
+
279
+ category('PolyTool: ChemEnum: enumerate', () => {
280
+ let rdkit: RDModule;
281
+
282
+ before(async () => { rdkit = await getRdKitModule(); });
283
+
284
+ test('cartesian across two cores with different R-sets', async () => {
285
+ const cA = makeCore('C[*:1]', 'A');
286
+ const cB = makeCore('C[*:1]N[*:2]', 'B');
287
+ const r1 = [makeRGroup('O[*:1]', 1, 'r1-a'), makeRGroup('S[*:1]', 1, 'r1-b')];
288
+ const r2 = [makeRGroup('C[*:2]', 2, 'r2-a'), makeRGroup('N[*:2]', 2, 'r2-b'), makeRGroup('F[*:2]', 2, 'r2-c')];
289
+ const results = enumerate(
290
+ {cores: [cA, cB], rGroups: new Map([[1, r1], [2, r2]]), mode: ChemEnumModes.Cartesian},
291
+ rdkit)!;
292
+ // cA: |R1|=2 → 2; cB: |R1|*|R2|=6 → total 8
293
+ expect(results.length, 8);
294
+ const first = results.filter((r) => r.coreSmiles === 'C[*:1]');
295
+ const second = results.filter((r) => r.coreSmiles === 'C[*:1]N[*:2]');
296
+ expect(first.length, 2);
297
+ expect(second.length, 6);
298
+ });
299
+
300
+ test('zip across two cores with shared R-list length', async () => {
301
+ const cA = makeCore('C[*:1]', 'A');
302
+ const cB = makeCore('C[*:1]N[*:2]', 'B');
303
+ const r1 = [makeRGroup('O[*:1]', 1, 'r1-a'), makeRGroup('S[*:1]', 1, 'r1-b')];
304
+ const r2 = [makeRGroup('C[*:2]', 2, 'r2-a'), makeRGroup('N[*:2]', 2, 'r2-b')];
305
+ const results = enumerate(
306
+ {cores: [cA, cB], rGroups: new Map([[1, r1], [2, r2]]), mode: ChemEnumModes.Zip},
307
+ rdkit)!;
308
+ // cA zip-length = 2, cB zip-length = 2 → total 4
309
+ expect(results.length, 4);
310
+ // each cB result has O+C (i=0) or S+N (i=1)
311
+ const cBResults = results.filter((r) => r.coreSmiles === 'C[*:1]N[*:2]');
312
+ expect(cBResults.length, 2);
313
+ expect(cBResults[0].rGroupSmilesByNum.get(1), 'O[*:1]');
314
+ expect(cBResults[0].rGroupSmilesByNum.get(2), 'C[*:2]');
315
+ expect(cBResults[1].rGroupSmilesByNum.get(1), 'S[*:1]');
316
+ expect(cBResults[1].rGroupSmilesByNum.get(2), 'N[*:2]');
317
+ });
318
+
319
+ test('invalid params return null from enumerate()', async () => {
320
+ const c = makeCore('C[*:1]N[*:2]', 'c');
321
+ const res = enumerate({
322
+ cores: [c],
323
+ rGroups: new Map([[1, [makeRGroup('O[*:1]', 1, 'x')]]]), // R2 missing
324
+ mode: ChemEnumModes.Cartesian,
325
+ }, rdkit);
326
+ expect(res, null);
327
+ });
328
+ });
329
+
330
+ // ─── File-based smoke test using the two test CSVs ──────────────────────────
331
+
332
+ category('PolyTool: ChemEnum: CSV fixtures', () => {
333
+ let rdkit: RDModule;
334
+
335
+ before(async () => { rdkit = await getRdKitModule(); });
336
+
337
+ async function loadCsv(relPath: string): Promise<DG.DataFrame> {
338
+ const txt = await _package.files.readAsText(relPath);
339
+ return DG.DataFrame.fromCsv(txt);
340
+ }
341
+
342
+ test('cores file parses and all rows validate', async () => {
343
+ const df = await loadCsv('tests/chem_enum_cores.csv');
344
+ const col = df.col('Core')!;
345
+ const cores = Array.from({length: col.length}, (_, i) =>
346
+ makeCore(col.get(i), `Cores[${i}]`, rdkit));
347
+ expect(cores.length, 4);
348
+ expect(cores.every((c) => !c.error), true, cores.map((c) => c.error ?? '').filter((e) => e).join('; '));
349
+ // All four reference R1 and R2
350
+ for (const c of cores)
351
+ expect(c.rNumbers.includes(1) && c.rNumbers.includes(2), true, `core ${c.id} R-numbers: ${c.rNumbers}`);
352
+ });
353
+
354
+ test('r-groups file: columns R1, R2, R3 populate their slots', async () => {
355
+ const df = await loadCsv('tests/chem_enum_rgroups.csv');
356
+ const buildList = (colName: string, n: number) => {
357
+ const col = df.col(colName)!;
358
+ const out = [];
359
+ for (let i = 0; i < col.length; i++) {
360
+ const v = col.get(i);
361
+ if (v == null || String(v).trim() === '') continue;
362
+ out.push(makeRGroup(String(v), n, `${colName}[${i}]`, rdkit));
363
+ }
364
+ return out;
365
+ };
366
+ const r1 = buildList('R1', 1);
367
+ const r2 = buildList('R2', 2);
368
+ const r3 = buildList('R3', 3);
369
+ expect(r1.length, 4);
370
+ expect(r2.length, 4);
371
+ expect(r3.length, 1);
372
+ for (const list of [r1, r2, r3])
373
+ expect(list.every((rg) => !rg.error), true, list.map((rg) => rg.error ?? '').filter((e) => e).join('; '));
374
+ });
375
+
376
+ test('end-to-end cartesian with CSV fixtures produces valid, canonical SMILES', async () => {
377
+ const coresDf = await loadCsv('tests/chem_enum_cores.csv');
378
+ const rgsDf = await loadCsv('tests/chem_enum_rgroups.csv');
379
+
380
+ const coreCol = coresDf.col('Core')!;
381
+ const cores = Array.from({length: coreCol.length}, (_, i) =>
382
+ makeCore(coreCol.get(i), `Cores[${i}]`, rdkit));
383
+
384
+ const rGroups = new Map<number, any[]>();
385
+ for (const colName of ['R1', 'R2', 'R3']) {
386
+ const n = parseInt(colName.substring(1), 10);
387
+ const col = rgsDf.col(colName)!;
388
+ const list = [];
389
+ for (let i = 0; i < col.length; i++) {
390
+ const v = col.get(i);
391
+ if (v == null || String(v).trim() === '') continue;
392
+ list.push(makeRGroup(String(v), n, `${colName}[${i}]`, rdkit));
393
+ }
394
+ rGroups.set(n, list);
395
+ }
396
+
397
+ const results = enumerate({cores, rGroups, mode: ChemEnumModes.Cartesian}, rdkit)!;
398
+ expect(results != null && results.length > 0, true);
399
+ // All outputs must parse and contain no residual R-labels
400
+ for (const r of results) {
401
+ expect(r.smiles.includes('[*:'), false, `residual R-label in result: ${r.smiles}`);
402
+ const m = rdkit.get_mol(r.smiles);
403
+ try {
404
+ expect(m.is_valid(), true, `invalid SMILES: ${r.smiles}`);
405
+ } finally { m.delete(); }
406
+ }
407
+ });
408
+ });