@datagrok/bio 2.27.3 → 2.27.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents/package-knowledge.yaml +53 -0
- package/dist/455.js +1 -1
- package/dist/455.js.map +1 -1
- package/dist/package-test.js +3 -3
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +3 -3
- package/dist/package.js.map +1 -1
- package/package.json +2 -2
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +24 -11
- package/src/tests/detectors-tests.ts +5 -1
- package/src/tests/splitters-test.ts +8 -4
- package/src/tests/to-atomic-level-tests.ts +144 -0
- package/src/utils/seq-helper/seq-handler.ts +25 -9
- package/test-console-output-1.log +610 -568
- package/test-record-1.mp4 +0 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Davit Rizhinashvili",
|
|
6
6
|
"email": "drizhinashvili@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.27.
|
|
8
|
+
"version": "2.27.4",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -44,7 +44,7 @@
|
|
|
44
44
|
],
|
|
45
45
|
"dependencies": {
|
|
46
46
|
"@biowasm/aioli": "^3.1.0",
|
|
47
|
-
"@datagrok-libraries/bio": "^5.
|
|
47
|
+
"@datagrok-libraries/bio": "^5.64.0",
|
|
48
48
|
"@datagrok-libraries/chem-meta": "^1.2.9",
|
|
49
49
|
"@datagrok-libraries/math": "^1.2.6",
|
|
50
50
|
"@datagrok-libraries/ml": "^6.10.11",
|
|
@@ -13,6 +13,7 @@ import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
|
13
13
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
14
14
|
import {getClusterMatrixWorker} from '@datagrok-libraries/math';
|
|
15
15
|
import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/types';
|
|
16
|
+
import {awaitCheck} from '@datagrok-libraries/test/src/test';
|
|
16
17
|
|
|
17
18
|
const dataFn: string = 'samples/FASTA_PT_activity.csv';
|
|
18
19
|
|
|
@@ -117,25 +118,37 @@ export async function demoActivityCliffsCyclic() {
|
|
|
117
118
|
ui.setUpdateIndicator(tv.root, true);
|
|
118
119
|
try {
|
|
119
120
|
const seqEncodingFunc = DG.Func.find({name: 'macromoleculePreprocessingFunction', package: 'Bio'})[0];
|
|
120
|
-
|
|
121
|
+
await PackageFunctions.activityCliffs(
|
|
121
122
|
df, df.getCol('Sequence'), df.getCol('Activity'),
|
|
122
123
|
96, DimReductionMethods.UMAP, MmDistanceFunctionsNames.MONOMER_CHEMICAL_DISTANCE,
|
|
123
|
-
seqEncodingFunc, {}, true)
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
124
|
+
seqEncodingFunc, {}, true);
|
|
125
|
+
|
|
126
|
+
let scatterPlot: DG.Viewer | undefined;
|
|
127
|
+
await awaitCheck(() => {
|
|
128
|
+
for (const v of tv.viewers) {
|
|
129
|
+
if (v.type === DG.VIEWER.SCATTER_PLOT) {
|
|
130
|
+
scatterPlot = v;
|
|
131
|
+
return true;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
return false;
|
|
135
|
+
}, '', 10000);
|
|
136
|
+
|
|
137
|
+
let link: HTMLCollectionOf<Element> | undefined;
|
|
138
|
+
await awaitCheck(() => {
|
|
139
|
+
link = scatterPlot!.root.getElementsByClassName('scatter_plot_link');
|
|
140
|
+
return link.length > 0;
|
|
141
|
+
}, '', 5000);
|
|
142
|
+
(link![0] as HTMLElement).click();
|
|
143
|
+
await DG.delay(500);
|
|
144
|
+
|
|
130
145
|
tv.grid.props.rowHeight = 180;
|
|
131
146
|
tv.grid.col('sequence') && (tv.grid.col('sequence')!.width = 300);
|
|
132
147
|
tv.grid.col('structure') && (tv.grid.col('structure')!.width = 300);
|
|
133
148
|
const cliffsGrid = Array.from(tv.viewers).find((v) => v !== tv.grid && v.type === DG.VIEWER.GRID) as DG.Grid;
|
|
134
149
|
if (cliffsGrid) {
|
|
135
150
|
cliffsGrid.props.rowHeight = 40;
|
|
136
|
-
cliffsGrid.col('seq_diff')!.width = 600;
|
|
137
|
-
tv.dockManager.dock(cliffsGrid, DG.DOCK_TYPE.DOWN, null, 'Cliffs', 0.35);
|
|
138
|
-
tv.dockManager.dock(activityCliffsViewer, DG.DOCK_TYPE.RIGHT, null, 'Activity Cliffs', 0.55);
|
|
151
|
+
cliffsGrid.col('seq_diff') && (cliffsGrid.col('seq_diff')!.width = 600);
|
|
139
152
|
}
|
|
140
153
|
} catch (err: any) {
|
|
141
154
|
handleError(err);
|
|
@@ -462,8 +462,12 @@ MWRSWY-CKHPMWRSWY-CKHP`;
|
|
|
462
462
|
// sample_testHelm.csv
|
|
463
463
|
// columns: ID,Test type,HELM string,Valid?,Mol Weight,Mol Formula,SMILES
|
|
464
464
|
test('samplesTestHelmCsv', async () => {
|
|
465
|
+
// Alphabet size of 8 reflects splitterAsHelm's triplet-splitting of HELM
|
|
466
|
+
// RNA monomers (each `sugar(base)phosphate` becomes 3 tokens), which can
|
|
467
|
+
// collapse what used to be N distinct triple-tokens into a smaller union
|
|
468
|
+
// of {sugar(s), bases, phosphate(s)} symbols.
|
|
465
469
|
await _testDf(readSamples(Samples.testHelmCsv), {
|
|
466
|
-
'HELM string': new PosCol(NOTATION.HELM, null, null,
|
|
470
|
+
'HELM string': new PosCol(NOTATION.HELM, null, null, 8, true),
|
|
467
471
|
}, seqHelper);
|
|
468
472
|
});
|
|
469
473
|
|
|
@@ -59,18 +59,22 @@ category('splitters', async () => {
|
|
|
59
59
|
'D-Tyr_Et', 'D-Dap', 'dV', 'E', 'N', 'pnG', 'Phe_4Me'],
|
|
60
60
|
],
|
|
61
61
|
|
|
62
|
+
// splitterAsHelm triplet-splits HELM RNA monomers `sugar(base)phosphate`
|
|
63
|
+
// into 3 tokens, and `sugar(base)` (terminal-only) into 2. Standalone
|
|
64
|
+
// tokens that don't match either form (e.g. lone `P`, or non-terminal
|
|
65
|
+
// `R(U)` without a phosphate) are kept verbatim.
|
|
62
66
|
testHelm1: [
|
|
63
67
|
'RNA1{R(U)P.R(T)P.R(G)P.R(C)P.R(A)}$$$$',
|
|
64
|
-
['R
|
|
68
|
+
['R', 'U', 'P', 'R', 'T', 'P', 'R', 'G', 'P', 'R', 'C', 'P', 'R', 'A'],
|
|
65
69
|
],
|
|
66
70
|
|
|
67
71
|
testHelm2: [
|
|
68
72
|
'RNA1{P.R(U)P.R(T)}$$$$',
|
|
69
|
-
['P', 'R
|
|
73
|
+
['P', 'R', 'U', 'P', 'R', 'T'],
|
|
70
74
|
],
|
|
71
75
|
testHelm3: [
|
|
72
|
-
'RNA1{P.R(U).P.R(T)}$$$$',
|
|
73
|
-
['P', 'R(U)', 'P', 'R
|
|
76
|
+
'RNA1{P.R(U).P.R(T)}$$$$', // invalid helm, but oh well,
|
|
77
|
+
['P', 'R(U)', 'P', 'R', 'T'],
|
|
74
78
|
],
|
|
75
79
|
};
|
|
76
80
|
|
|
@@ -356,6 +356,150 @@ PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.Thr_PO3H2.Aca.Tyr
|
|
|
356
356
|
}
|
|
357
357
|
});
|
|
358
358
|
|
|
359
|
+
/** Tests for the linear HELM-RNA path: must preserve modified sugars,
|
|
360
|
+
* phosphates, and bases per nucleotide. The non-linear (HELM via POM)
|
|
361
|
+
* path is the reference; the linear path is expected to match it on
|
|
362
|
+
* canonical SMILES for these inputs. */
|
|
363
|
+
category('toAtomicLevelHelmRna', async () => {
|
|
364
|
+
let monomerLibHelper: IMonomerLibHelper;
|
|
365
|
+
let userLibSettings: UserLibSettings;
|
|
366
|
+
let seqHelper: ISeqHelper;
|
|
367
|
+
let monomerLib: IMonomerLib;
|
|
368
|
+
let rdKitModule: RDModule;
|
|
369
|
+
|
|
370
|
+
before(async () => {
|
|
371
|
+
rdKitModule = await getRdKitModule();
|
|
372
|
+
seqHelper = await getSeqHelper();
|
|
373
|
+
monomerLibHelper = await getMonomerLibHelper();
|
|
374
|
+
userLibSettings = await getUserLibSettings();
|
|
375
|
+
await monomerLibHelper.loadMonomerLibForTests();
|
|
376
|
+
monomerLib = monomerLibHelper.getMonomerLib();
|
|
377
|
+
});
|
|
378
|
+
|
|
379
|
+
after(async () => {
|
|
380
|
+
await setUserLibSettings(userLibSettings);
|
|
381
|
+
await monomerLibHelper.loadMonomerLib(true);
|
|
382
|
+
});
|
|
383
|
+
|
|
384
|
+
/** Build a single-row HELM RNA dataframe and run the linear converter,
|
|
385
|
+
* returning the canonical SMILES of the resulting molfile. */
|
|
386
|
+
async function helmRnaLinearToSmiles(srcHelm: string): Promise<string> {
|
|
387
|
+
const srcCsv = `seq\n${srcHelm}`;
|
|
388
|
+
const df = DG.DataFrame.fromCsv(srcCsv);
|
|
389
|
+
await grok.data.detectSemanticTypes(df);
|
|
390
|
+
const seqCol = df.getCol('seq');
|
|
391
|
+
expect(seqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
392
|
+
|
|
393
|
+
const res = await _toAtomicLevel(df, seqCol, monomerLib, seqHelper, rdKitModule);
|
|
394
|
+
if (!res.molCol)
|
|
395
|
+
throw new Error(`_toAtomicLevel returned no molCol for HELM '${srcHelm}'. ` +
|
|
396
|
+
`Warnings: ${(res.warnings ?? []).join(' / ')}`);
|
|
397
|
+
|
|
398
|
+
const molfile: string | null = res.molCol.get(0);
|
|
399
|
+
if (!molfile)
|
|
400
|
+
throw new Error(`_toAtomicLevel produced an empty molfile for HELM '${srcHelm}'`);
|
|
401
|
+
let smiles: string;
|
|
402
|
+
try {
|
|
403
|
+
smiles = grok.chem.convert(molfile, grok.chem.Notation.Unknown, grok.chem.Notation.Smiles);
|
|
404
|
+
} catch (err: any) {
|
|
405
|
+
throw new Error(`SMILES conversion threw for HELM '${srcHelm}': ${err?.message ?? err}\n` +
|
|
406
|
+
`--- MOLFILE START ---\n${molfile}\n--- MOLFILE END ---`);
|
|
407
|
+
}
|
|
408
|
+
// RDKit signals a parse failure by returning the literal string
|
|
409
|
+
// "MALFORMED_INPUT_VALUE" — surface it together with the offending molfile.
|
|
410
|
+
if (smiles === 'MALFORMED_INPUT_VALUE' || /^MALFORMED/.test(smiles)) {
|
|
411
|
+
throw new Error(`RDKit could not parse molfile produced for HELM '${srcHelm}'.\n` +
|
|
412
|
+
`--- MOLFILE START ---\n${molfile}\n--- MOLFILE END ---`);
|
|
413
|
+
}
|
|
414
|
+
return smiles;
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
// Unmodified RNA HELM — regression baseline. The linear path must produce
|
|
418
|
+
// a real RNA backbone (sugar + phosphate + base per nucleotide), not just
|
|
419
|
+
// a chain of bases.
|
|
420
|
+
test('rna-canonical', async () => {
|
|
421
|
+
const smiles = await helmRnaLinearToSmiles(`RNA1{r(A)p.r(C)p.r(G)p}$$$$`);
|
|
422
|
+
// Should at minimum contain phosphate (P), ribose oxygens, and a purine ring.
|
|
423
|
+
expect(/P/.test(smiles), true, `expected phosphate in SMILES: ${smiles}`);
|
|
424
|
+
// Purine fragment (any ring closure digit): n<d>cnc<d> or N<d>C=N (case insensitive).
|
|
425
|
+
expect(/n\dcnc\d/.test(smiles) || /n\dcnc/i.test(smiles), true,
|
|
426
|
+
`expected purine ring fragment in SMILES: ${smiles}`);
|
|
427
|
+
});
|
|
428
|
+
|
|
429
|
+
// Modified base — 5-methylcytosine. Linear path should preserve the
|
|
430
|
+
// methyl branch on the cytidine of position 0.
|
|
431
|
+
test('rna-modified-base', async () => {
|
|
432
|
+
const smilesPlain = await helmRnaLinearToSmiles(`RNA1{r(C)p.r(A)p}$$$$`);
|
|
433
|
+
const smilesMod = await helmRnaLinearToSmiles(`RNA1{r([m5C])p.r(A)p}$$$$`);
|
|
434
|
+
expect(smilesPlain !== smilesMod, true,
|
|
435
|
+
`m5C must change the SMILES vs. plain C. plain=${smilesPlain} mod=${smilesMod}`);
|
|
436
|
+
});
|
|
437
|
+
|
|
438
|
+
// Modified phosphate — phosphorothioate. The linker between positions 0
|
|
439
|
+
// and 1 must change (S replaces a non-bridging O).
|
|
440
|
+
test('rna-modified-phosphate', async () => {
|
|
441
|
+
const smilesPlain = await helmRnaLinearToSmiles(`RNA1{r(A)p.r(C)p}$$$$`);
|
|
442
|
+
const smilesMod = await helmRnaLinearToSmiles(`RNA1{r(A)[Rsp].r(C)p}$$$$`);
|
|
443
|
+
expect(smilesPlain !== smilesMod, true,
|
|
444
|
+
`Rsp phosphorothioate must change the SMILES vs. plain p. plain=${smilesPlain} mod=${smilesMod}`);
|
|
445
|
+
expect(/S/.test(smilesMod), true,
|
|
446
|
+
`expected sulfur in phosphorothioate SMILES: ${smilesMod}`);
|
|
447
|
+
// HELM explicitly wrote 2 phosphates (one Rsp at position 0, one p at
|
|
448
|
+
// position 1); both must appear in the molecule, so two P atoms total.
|
|
449
|
+
const pCountPlain = (smilesPlain.match(/P/g) || []).length;
|
|
450
|
+
const pCountMod = (smilesMod.match(/P/g) || []).length;
|
|
451
|
+
expect(pCountPlain, 2, `expected 2 phosphates in plain: ${smilesPlain}`);
|
|
452
|
+
expect(pCountMod, 2, `expected 2 phosphates in modified: ${smilesMod}`);
|
|
453
|
+
});
|
|
454
|
+
|
|
455
|
+
// Modified sugar — 2'-fluoro ribose. Position 0 sugar gets a fluorine.
|
|
456
|
+
test('rna-modified-sugar', async () => {
|
|
457
|
+
const smilesPlain = await helmRnaLinearToSmiles(`RNA1{r(A)p.r(C)p}$$$$`);
|
|
458
|
+
const smilesMod = await helmRnaLinearToSmiles(`RNA1{[fl2r](A)p.r(C)p}$$$$`);
|
|
459
|
+
expect(smilesPlain !== smilesMod, true,
|
|
460
|
+
`fl2r (2'-F ribose) must change the SMILES vs. plain r. plain=${smilesPlain} mod=${smilesMod}`);
|
|
461
|
+
expect(/F/.test(smilesMod), true,
|
|
462
|
+
`expected fluorine in 2'-F ribose SMILES: ${smilesMod}`);
|
|
463
|
+
});
|
|
464
|
+
|
|
465
|
+
// HELM omits the trailing phosphate (3'-OH terminus on the sugar). The
|
|
466
|
+
// splitter must split the partial `r(C)` into [r, C], assembly must skip
|
|
467
|
+
// the trailing P emit, and counts must agree.
|
|
468
|
+
test('rna-no-trailing-phosphate', async () => {
|
|
469
|
+
const smilesWith = await helmRnaLinearToSmiles(`RNA1{r(A)p.r(C)p}$$$$`);
|
|
470
|
+
const smilesNoTail = await helmRnaLinearToSmiles(`RNA1{r(A)p.r(C)}$$$$`);
|
|
471
|
+
// Both should produce valid molecules with at least one P (the linker
|
|
472
|
+
// between the two nucleotides is always present).
|
|
473
|
+
expect(/P/.test(smilesNoTail), true,
|
|
474
|
+
`expected the inter-nucleotide phosphate to remain: ${smilesNoTail}`);
|
|
475
|
+
// The version WITH trailing phosphate should have exactly one more P
|
|
476
|
+
// atom than the version without.
|
|
477
|
+
const pCountWith = (smilesWith.match(/P/g) || []).length;
|
|
478
|
+
const pCountNoTail = (smilesNoTail.match(/P/g) || []).length;
|
|
479
|
+
expect(pCountWith, pCountNoTail + 1,
|
|
480
|
+
`expected pCountWith - pCountNoTail === 1, got with=${pCountWith}, noTail=${pCountNoTail}. ` +
|
|
481
|
+
`with=${smilesWith}, noTail=${smilesNoTail}`);
|
|
482
|
+
});
|
|
483
|
+
|
|
484
|
+
// Missing trailing phosphate combined with modifications.
|
|
485
|
+
test('rna-no-trailing-phosphate-with-modifications', async () => {
|
|
486
|
+
const smiles = await helmRnaLinearToSmiles(`RNA1{[fl2r]([m5C])[Rsp].r(A)}$$$$`);
|
|
487
|
+
expect(/F/.test(smiles), true, `expected fluorine: ${smiles}`);
|
|
488
|
+
expect(/S/.test(smiles), true, `expected sulfur: ${smiles}`);
|
|
489
|
+
// Exactly one phosphate (the Rsp linker), no trailing P.
|
|
490
|
+
const pCount = (smiles.match(/P/g) || []).length;
|
|
491
|
+
expect(pCount, 1, `expected exactly 1 phosphate: ${smiles}`);
|
|
492
|
+
});
|
|
493
|
+
|
|
494
|
+
// All three modifications combined. End-to-end smoke test.
|
|
495
|
+
test('rna-all-modifications', async () => {
|
|
496
|
+
const smiles = await helmRnaLinearToSmiles(`RNA1{[fl2r]([m5C])[Rsp].r(A)p}$$$$`);
|
|
497
|
+
expect(/F/.test(smiles), true, `expected fluorine: ${smiles}`);
|
|
498
|
+
expect(/S/.test(smiles), true, `expected sulfur: ${smiles}`);
|
|
499
|
+
expect(/P/.test(smiles), true, `expected phosphorus: ${smiles}`);
|
|
500
|
+
});
|
|
501
|
+
});
|
|
502
|
+
|
|
359
503
|
|
|
360
504
|
function polishMolfile(mol: string): string {
|
|
361
505
|
return mol.replaceAll('\r\n', '\n')
|
|
@@ -11,7 +11,7 @@ import {detectAlphabet, detectHelmAlphabet, splitterAsFastaSimple, StringListSeq
|
|
|
11
11
|
import {mmDistanceFunctions, MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
12
12
|
import {mmDistanceFunctionType} from '@datagrok-libraries/ml/src/macromolecule-distance-functions/types';
|
|
13
13
|
import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/types/monomer-library';
|
|
14
|
-
import {HELM_POLYMER_TYPE, HELM_WRAPPERS_REGEXP, PHOSPHATE_SYMBOL} from '@datagrok-libraries/bio/src/utils/const';
|
|
14
|
+
import {DEOXYRIBOSE_SYMBOL, HELM_POLYMER_TYPE, HELM_WRAPPERS_REGEXP, PHOSPHATE_SYMBOL, RIBOSE_SYMBOL} from '@datagrok-libraries/bio/src/utils/const';
|
|
15
15
|
import {GAP_SYMBOL, GapOriginals} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
|
|
16
16
|
import {CellRendererBackBase, GridCellRendererTemp} from '@datagrok-libraries/bio/src/utils/cell-renderer-back-base';
|
|
17
17
|
import {HelmTypes} from '@datagrok-libraries/bio/src/helm/consts';
|
|
@@ -939,6 +939,11 @@ export class SeqHandler implements ISeqHandler {
|
|
|
939
939
|
|
|
940
940
|
if (cm === GAP_SYMBOL)
|
|
941
941
|
om = GapOriginals[NOTATION.FASTA];
|
|
942
|
+
// For HELM RNA, the splitter triplet-splits each nucleotide into
|
|
943
|
+
// [sugar, base, phosphate]; FASTA conversion keeps only the base, so
|
|
944
|
+
// drop standalone sugar/phosphate tokens.
|
|
945
|
+
else if (isHelm && (cm === PHOSPHATE_SYMBOL || cm === RIBOSE_SYMBOL || cm === DEOXYRIBOSE_SYMBOL))
|
|
946
|
+
om = '';
|
|
942
947
|
else if (cm === PHOSPHATE_SYMBOL)
|
|
943
948
|
om = '';
|
|
944
949
|
else if (om.length > 1)
|
|
@@ -978,7 +983,9 @@ export class SeqHandler implements ISeqHandler {
|
|
|
978
983
|
return joinToBiln(srcSS);
|
|
979
984
|
}
|
|
980
985
|
|
|
981
|
-
/** Splits Helm sequence adjusting nucleotides to single char symbols. (!) Removes lone phosphorus
|
|
986
|
+
/** Splits Helm sequence adjusting nucleotides to single char symbols. (!) Removes lone phosphorus,
|
|
987
|
+
* ribose, and deoxyribose tokens (which the underlying splitter emits when triplet-splitting
|
|
988
|
+
* each nucleotide of an RNA chain). */
|
|
982
989
|
private splitterAsHelmNucl(src: string): ISeqSplitted {
|
|
983
990
|
const srcMList: ISeqSplitted = this.splitter(src);
|
|
984
991
|
const tgtMList: (string | null)[] = new Array<string>(srcMList.length);
|
|
@@ -988,7 +995,8 @@ export class SeqHandler implements ISeqHandler {
|
|
|
988
995
|
let om: string | null = srcMList.getOriginal(posIdx);
|
|
989
996
|
if (isDna || isRna) {
|
|
990
997
|
om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
|
|
991
|
-
om
|
|
998
|
+
if (om === PHOSPHATE_SYMBOL || om === RIBOSE_SYMBOL || om === DEOXYRIBOSE_SYMBOL)
|
|
999
|
+
om = null;
|
|
992
1000
|
}
|
|
993
1001
|
tgtMList[posIdx] = om ? om : null;
|
|
994
1002
|
}
|
|
@@ -1009,18 +1017,26 @@ export class SeqHandler implements ISeqHandler {
|
|
|
1009
1017
|
// -- joiners --
|
|
1010
1018
|
|
|
1011
1019
|
function joinToSeparator(seqS: ISeqSplitted, tgtSeparator: string, isHelm: boolean): string {
|
|
1012
|
-
const resMList: string[] =
|
|
1020
|
+
const resMList: string[] = [];
|
|
1013
1021
|
for (let posIdx: number = 0; posIdx < seqS.length; ++posIdx) {
|
|
1014
1022
|
const cm = seqS.getCanonical(posIdx);
|
|
1015
1023
|
let om = seqS.getOriginal(posIdx);
|
|
1016
1024
|
if (isHelm)
|
|
1017
1025
|
om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
|
|
1018
1026
|
|
|
1019
|
-
if (cm === GAP_SYMBOL)
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1027
|
+
if (cm === GAP_SYMBOL) {
|
|
1028
|
+
resMList.push(GapOriginals[NOTATION.SEPARATOR]);
|
|
1029
|
+
continue;
|
|
1030
|
+
}
|
|
1031
|
+
// For HELM RNA, the splitter triplet-splits each nucleotide into
|
|
1032
|
+
// [sugar, base, phosphate]; separator conversion keeps only the base, so
|
|
1033
|
+
// skip standalone sugar/phosphate tokens entirely (rather than emitting
|
|
1034
|
+
// an empty cell that would show up as an extra separator in the output).
|
|
1035
|
+
if (isHelm && (cm === PHOSPHATE_SYMBOL || cm === RIBOSE_SYMBOL || cm === DEOXYRIBOSE_SYMBOL))
|
|
1036
|
+
continue;
|
|
1037
|
+
if (cm === PHOSPHATE_SYMBOL)
|
|
1038
|
+
continue;
|
|
1039
|
+
resMList.push(om);
|
|
1024
1040
|
}
|
|
1025
1041
|
return resMList.join(tgtSeparator);
|
|
1026
1042
|
}
|