@datagrok/bio 2.9.0 → 2.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.9.0",
8
+ "version": "2.10.0",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -34,7 +34,7 @@
34
34
  ],
35
35
  "dependencies": {
36
36
  "@biowasm/aioli": "^3.1.0",
37
- "@datagrok-libraries/bio": "^5.37.0",
37
+ "@datagrok-libraries/bio": "^5.38.0",
38
38
  "@datagrok-libraries/chem-meta": "^1.0.1",
39
39
  "@datagrok-libraries/ml": "^6.3.39",
40
40
  "@datagrok-libraries/tutorials": "^1.3.6",
@@ -62,8 +62,8 @@
62
62
  "webpack": "^5.76.3",
63
63
  "webpack-bundle-analyzer": "latest",
64
64
  "webpack-cli": "^4.9.1",
65
- "@datagrok/chem": "1.4.21",
66
- "@datagrok/helm": "2.1.7"
65
+ "@datagrok/chem": "1.7.2",
66
+ "@datagrok/helm": "2.1.16"
67
67
  },
68
68
  "scripts": {
69
69
  "link-api": "npm link datagrok-api",
@@ -6,7 +6,6 @@ import {ISequenceSpaceParams} from '@datagrok-libraries/ml/src/viewers/activity-
6
6
  import {invalidateMols, MONOMERIC_COL_TAGS} from '../substructure-search/substructure-search';
7
7
  import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
8
8
  import * as grok from 'datagrok-api/grok';
9
- import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
10
9
  import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
11
10
  import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
12
11
 
@@ -57,17 +56,17 @@ export async function sequenceSpaceByFingerprints(spaceParams: ISequenceSpacePar
57
56
  }
58
57
 
59
58
  export async function getSequenceSpace(spaceParams: ISequenceSpaceParams): Promise<ISequenceSpaceResult> {
60
- const nc = new NotationConverter(spaceParams.seqCol);
61
- if (nc.isFasta() || (nc.isSeparator() && nc.alphabet && nc.alphabet !== ALPHABET.UN)) {
59
+ const ncUH = UnitsHandler.getOrCreate(spaceParams.seqCol);
60
+ if (ncUH.isFasta() || (ncUH.isSeparator() && ncUH.alphabet && ncUH.alphabet !== ALPHABET.UN)) {
62
61
  let distanceFName = MmDistanceFunctionsNames.LEVENSHTEIN;
63
62
  let seqList = spaceParams.seqCol.toList();
64
- if (nc.isSeparator()) {
65
- const fastaCol = nc.convert(NOTATION.FASTA);
63
+ if (ncUH.isSeparator()) {
64
+ const fastaCol = ncUH.convert(NOTATION.FASTA);
66
65
  seqList = fastaCol.toList();
67
66
  const uh = UnitsHandler.getOrCreate(fastaCol);
68
67
  distanceFName = uh.getDistanceFunctionName();
69
68
  } else {
70
- distanceFName = nc.getDistanceFunctionName();
69
+ distanceFName = ncUH.getDistanceFunctionName();
71
70
  }
72
71
  for (let i = 0; i < seqList.length; i++) {
73
72
  // toList puts empty values in array and it causes downstream errors. replace with null
@@ -0,0 +1,56 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+ import {TAGS, positionSeparator} from '@datagrok-libraries/bio/src/utils/macromolecule';
6
+ import {IWebLogoViewer} from '@datagrok-libraries/bio/src/viewers/web-logo';
7
+
8
+ import {_package} from '../package';
9
+
10
+ const csv = `seq,value
11
+ ATCCGTCGT,0.5
12
+ TGTTCGTCA,0.4
13
+ ATGGTCGTA,0.7
14
+ ATCCGTGCA,0.1`;
15
+
16
+ const positionNames = ['1', '1A', '1C', '2', '4', '4A', '4B', '5', '6'].join(positionSeparator);
17
+
18
+ const regions = [
19
+ {name: 'first region', start: '1', end: '2'},
20
+ {name: 'second region', start: '1C', end: '4'},
21
+ {name: 'overlapping second', start: '1C', end: '4A'},
22
+ {name: 'whole sequence', start: '1', end: '6'},
23
+ {name: 'bad start', start: '0', end: '6'},
24
+ {name: 'bad end', start: '1', end: '4C'},
25
+ {name: 'bad start & end', start: '0', end: '4C'},
26
+ ];
27
+
28
+ export class GetRegionApp {
29
+ df: DG.DataFrame;
30
+ view: DG.TableView;
31
+
32
+ constructor(
33
+ private readonly urlParams: URLSearchParams,
34
+ private readonly funcName: string
35
+ ) {}
36
+
37
+ async init(): Promise<void> {
38
+ this.df = DG.DataFrame.fromCsv(csv);
39
+ const seqCol = this.df.getCol('seq');
40
+ seqCol.setTag(TAGS.positionNames, positionNames);
41
+ seqCol.setTag(TAGS.regions, JSON.stringify(regions));
42
+
43
+ await this.buildView();
44
+ }
45
+
46
+ // -- View --
47
+
48
+ async buildView(): Promise<void> {
49
+ this.view = grok.shell.addTableView(this.df);
50
+ this.view.path = this.view.basePath = `func/${_package.name}.${this.funcName}`;
51
+
52
+ const viewer: DG.Viewer & IWebLogoViewer = (await this.view.dataFrame.plot
53
+ .fromType('WebLogo')) as DG.Viewer & IWebLogoViewer;
54
+ this.view.dockManager.dock(viewer, DG.DOCK_TYPE.DOWN, null, 'WebLogo', 0.35);
55
+ }
56
+ }
@@ -11,15 +11,12 @@ import {PROPS as wlPROPS} from '../viewers/web-logo-viewer';
11
11
  import {_package} from '../package';
12
12
 
13
13
  export class WebLogoApp {
14
- private _funcName: string = '';
15
-
16
14
  df: DG.DataFrame;
17
15
  view: DG.TableView;
18
16
 
19
- constructor(private readonly urlParams: URLSearchParams) {}
17
+ constructor(private readonly urlParams: URLSearchParams, private readonly funcName: string) {}
20
18
 
21
- async init(df: DG.DataFrame, funcName: string): Promise<void> {
22
- this._funcName = funcName;
19
+ async init(df: DG.DataFrame): Promise<void> {
23
20
  this.df = df;
24
21
 
25
22
  await this.buildView();
@@ -33,7 +30,7 @@ export class WebLogoApp {
33
30
  .toArray().join('&');
34
31
 
35
32
  this.view = grok.shell.addTableView(this.df);
36
- this.view.path = this.view.basePath = `func/${_package.name}.${this._funcName}?${urlParamsTxt}`;
33
+ this.view.path = this.view.basePath = `func/${_package.name}.${this.funcName}?${urlParamsTxt}`;
37
34
 
38
35
  const options: { [p: string]: any } = {sequenceColumnName: 'sequence'};
39
36
  for (const [optName, optValue] of this.urlParams.entries()) {
@@ -24,6 +24,7 @@ import './tests/pepsea-tests';
24
24
  import './tests/viewers';
25
25
  import './tests/units-handler-tests';
26
26
  import './tests/units-handler-splitted-tests';
27
+ import './tests/units-handler-get-region';
27
28
  import './tests/to-atomic-level-tests';
28
29
  import './tests/mm-distance-tests';
29
30
  import './tests/activity-cliffs-tests';
@@ -57,4 +57,17 @@ export class BioPackage extends DG.Package {
57
57
  public get properties(): BioPackageProperties { return this._properties; };
58
58
 
59
59
  public set properties(value: BioPackageProperties) { this._properties = value; }
60
+
61
+ private _initialized: boolean = false;
62
+
63
+ public get initialized(): boolean { return this._initialized;}
64
+
65
+ public completeInit(): void { this._initialized = true;}
66
+
67
+ handleErrorUI(err: any) {
68
+ const errMsg = err instanceof Error ? err.message : err.toString();
69
+ const errStack = err instanceof Error ? err.stack : undefined;
70
+ grok.shell.error(errMsg);
71
+ this.logger.error(errMsg, undefined, errStack);
72
+ }
60
73
  }
package/src/package.ts CHANGED
@@ -55,7 +55,6 @@ import {checkInputColumnUI} from './utils/check-input-column';
55
55
  import {multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
56
56
  import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
57
57
  import {BitArrayMetrics, BitArrayMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
58
- import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
59
58
  import {WebLogoApp} from './apps/web-logo-app';
60
59
  import {SplitToMonomersFunctionEditor} from './function-edtiors/split-to-monomers-editor';
61
60
  import {splitToMonomersUI} from './utils/split-to-monomers';
@@ -66,10 +65,13 @@ import {PackageSettingsEditorWidget} from './widgets/package-settings-editor-wid
66
65
  import {getCompositionAnalysisWidget} from './widgets/composition-analysis-widget';
67
66
  import {MacromoleculeColumnWidget} from './utils/macromolecule-column-widget';
68
67
  import {addCopyMenuUI} from './utils/context-menu';
68
+ import {getRegionDo} from './utils/get-region';
69
+ import {GetRegionApp} from './apps/get-region-app';
70
+ import {GetRegionFuncEditor} from './utils/get-region-func-editor';
69
71
 
70
72
  export const _package = new BioPackage();
71
73
 
72
- // /** Avoid reassinging {@link monomerLib} because consumers subscribe to {@link IMonomerLib.onChanged} event */
74
+ // /** Avoid reassigning {@link monomerLib} because consumers subscribe to {@link IMonomerLib.onChanged} event */
73
75
  // let monomerLib: MonomerLib | null = null;
74
76
 
75
77
  //name: getMonomerLibHelper
@@ -104,7 +106,9 @@ export async function initBio() {
104
106
  const bioPkgProps = new BioPackageProperties(pkgProps);
105
107
  _package.properties = bioPkgProps;
106
108
  })(),
107
- ]);
109
+ ]).finally(() => {
110
+ _package.completeInit();
111
+ });
108
112
 
109
113
  const monomerLib = MonomerLibHelper.instance.getBioLib();
110
114
  const monomers: string[] = [];
@@ -150,14 +154,96 @@ export function getBioLib(): IMonomerLib {
150
154
  return MonomerLibHelper.instance.getBioLib();
151
155
  }
152
156
 
157
+ // -- Panels --
158
+
159
+ //name: Get Region
160
+ //description: Creates a new column with sequences of the region between start and end
161
+ //tags: panel
162
+ //input: column seqCol {semType: Macromolecule}
163
+ //output: widget result
164
+ export function getRegionPanel(seqCol: DG.Column<string>): DG.Widget {
165
+ // const host = ui.divV([
166
+ // ui.inputs([
167
+ // ui.stringInput('Region', ''),
168
+ // ]),
169
+ // ui.button('Ok', () => {})
170
+ // ]);
171
+ // return DG.Widget.fromRoot(host);
172
+ const funcName: string = 'getRegionTopMenu';
173
+ const funcList = DG.Func.find({package: _package.name, name: funcName});
174
+ if (funcList.length !== 1) throw new Error(`Package '${_package.name}' func '${funcName}' not found`);
175
+ const func = funcList[0];
176
+ const funcCall = func.prepare({table: seqCol.dataFrame, sequence: seqCol});
177
+ const funcEditor = new GetRegionFuncEditor(funcCall);
178
+ return funcEditor.widget();
179
+ }
180
+
153
181
  //name: Manage Libraries
154
- //input: column seqColumn {semType: Macromolecule}
182
+ //description:
155
183
  //tags: panel, exclude-actions-panel
184
+ //input: column seqColumn {semType: Macromolecule}
156
185
  //output: widget result
157
186
  export async function libraryPanel(_seqColumn: DG.Column): Promise<DG.Widget> {
158
187
  return getLibraryPanelUI();
159
188
  }
160
189
 
190
+ // -- Func Editors --
191
+
192
+ //name: GetRegionEditor
193
+ //tags: editor
194
+ //input: funccall call
195
+ export function GetRegionEditor(call: DG.FuncCall): void {
196
+ try {
197
+ const funcEditor = new GetRegionFuncEditor(call);
198
+ funcEditor.dialog();
199
+ } catch (err: any) {
200
+ const errMsg = err instanceof Error ? err.message : err.toString();
201
+ const errStack = err instanceof Error ? err.stack : undefined;
202
+ grok.shell.error(`Get region editor error: ${errMsg}`);
203
+ _package.logger.error(errMsg, undefined, errStack);
204
+ }
205
+ }
206
+
207
+ //name: SplitToMonomersEditor
208
+ //tags: editor
209
+ //input: funccall call
210
+ export function SplitToMonomersEditor(call: DG.FuncCall): void {
211
+ const funcEditor = new SplitToMonomersFunctionEditor();
212
+ ui.dialog({title: 'Split to Monomers'})
213
+ .add(funcEditor.paramsUI)
214
+ .onOK(async () => {
215
+ return call.func.prepare(funcEditor.funcParams).call(true);
216
+ })
217
+ .show();
218
+ }
219
+
220
+ //name: SequenceSpaceEditor
221
+ //tags: editor
222
+ //input: funccall call
223
+ export function SequenceSpaceEditor(call: DG.FuncCall) {
224
+ const funcEditor = new SequenceSpaceFunctionEditor(DG.SEMTYPE.MACROMOLECULE);
225
+ ui.dialog({title: 'Sequence Space'})
226
+ .add(funcEditor.paramsUI)
227
+ .onOK(async () => {
228
+ return call.func.prepare(funcEditor.funcParams).call(true);
229
+ })
230
+ .show();
231
+ }
232
+
233
+ //name: SeqActivityCliffsEditor
234
+ //tags: editor
235
+ //input: funccall call
236
+ export function SeqActivityCliffsEditor(call: DG.FuncCall) {
237
+ const funcEditor = new ActivityCliffsFunctionEditor(DG.SEMTYPE.MACROMOLECULE);
238
+ ui.dialog({title: 'Activity Cliffs'})
239
+ .add(funcEditor.paramsUI)
240
+ .onOK(async () => {
241
+ return call.func.prepare(funcEditor.funcParams).call(true);
242
+ })
243
+ .show();
244
+ }
245
+
246
+
161
247
  // -- Package settings editor --
162
248
 
163
249
  //name: packageSettingsEditor
@@ -182,6 +268,8 @@ export function fastaSequenceCellRenderer(): MacromoleculeSequenceCellRenderer {
182
268
  return new MacromoleculeSequenceCellRenderer();
183
269
  }
184
270
 
271
+ // -- Property panels --
272
+
185
273
  //name: Sequence Renderer
186
274
  //input: column molColumn {semType: Macromolecule}
187
275
  //tags: panel
@@ -251,17 +339,39 @@ export function vdRegionsViewer() {
251
339
  return new VdRegionsViewer();
252
340
  }
253
341
 
254
- //name: SeqActivityCliffsEditor
255
- //tags: editor
256
- //input: funccall call
257
- export function SeqActivityCliffsEditor(call: DG.FuncCall) {
258
- const funcEditor = new ActivityCliffsFunctionEditor(DG.SEMTYPE.MACROMOLECULE);
259
- ui.dialog({title: 'Activity Cliffs'})
260
- .add(funcEditor.paramsUI)
261
- .onOK(async () => {
262
- return call.func.prepare(funcEditor.funcParams).call(true);
263
- })
264
- .show();
342
+
343
+ // -- Top menu --
344
+
345
+ //name: getRegion
346
+ //description: Gets a new column with sequences of the region between start and end
347
+ //input: column sequence
348
+ //input: string start {optional: true}
349
+ //input: string end {optional: true}
350
+ //input: string name {optional: true} [Name of the column to be created]
351
+ //output: column result
352
+ export function getRegion(
353
+ sequence: DG.Column<string>, start: string | undefined, end: string | undefined, name: string | undefined
354
+ ): DG.Column<string> {
355
+ return getRegionDo(sequence,
356
+ start ?? null, end ?? null, name ?? null);
357
+ }
358
+
359
+ //top-menu: Bio | Convert | Get Region...
360
+ //name: Get Region
361
+ //description: Get sequences for a region specified from a Macromolecule
362
+ //input: dataframe table [Input data table]
363
+ //input: column sequence {semType: Macromolecule} [Sequence column]
364
+ //input: string start {optional: true} [Region start position name]
365
+ //input: string end {optional: true} [Region end position name]
366
+ //input: string name {optional: true} [Region column name]
367
+ //editor: Bio:GetRegionEditor
368
+ export function getRegionTopMenu(
369
+ table: DG.DataFrame, sequence: DG.Column,
370
+ start: string | undefined, end: string | undefined, name: string | undefined
371
+ ): void {
372
+ const regCol = getRegionDo(sequence, start ?? null, end ?? null, name ?? null);
373
+ sequence.dataFrame.columns.add(regCol);
374
+ regCol.setTag(DG.TAGS.CELL_RENDERER, 'sequence');
265
375
  }
266
376
 
267
377
  //top-menu: Bio | Analyze | Activity Cliffs...
@@ -287,14 +397,14 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
287
397
  'separator': macroMolecule.getTag(bioTAGS.separator),
288
398
  'alphabet': macroMolecule.getTag(bioTAGS.alphabet),
289
399
  };
290
- const nc = new NotationConverter(macroMolecule);
400
+ const ncUH = UnitsHandler.getOrCreate(macroMolecule);
291
401
  let columnDistanceMetric: BitArrayMetrics | MmDistanceFunctionsNames = BitArrayMetricsNames.Tanimoto;
292
402
  let seqCol = macroMolecule;
293
- if (nc.isFasta() || (nc.isSeparator() && nc.alphabet && nc.alphabet !== ALPHABET.UN)) {
294
- if (nc.isFasta()) {
295
- columnDistanceMetric = nc.getDistanceFunctionName();
403
+ if (ncUH.isFasta() || (ncUH.isSeparator() && ncUH.alphabet && ncUH.alphabet !== ALPHABET.UN)) {
404
+ if (ncUH.isFasta()) {
405
+ columnDistanceMetric = ncUH.getDistanceFunctionName();
296
406
  } else {
297
- seqCol = nc.convert(NOTATION.FASTA);
407
+ seqCol = ncUH.convert(NOTATION.FASTA);
298
408
  const uh = UnitsHandler.getOrCreate(seqCol);
299
409
  columnDistanceMetric = uh.getDistanceFunctionName();
300
410
  tags.units = NOTATION.FASTA;
@@ -345,19 +455,6 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
345
455
  }
346
456
  }
347
457
 
348
- //name: SequenceSpaceEditor
349
- //tags: editor
350
- //input: funccall call
351
- export function SequenceSpaceEditor(call: DG.FuncCall) {
352
- const funcEditor = new SequenceSpaceFunctionEditor(DG.SEMTYPE.MACROMOLECULE);
353
- ui.dialog({title: 'Sequence Space'})
354
- .add(funcEditor.paramsUI)
355
- .onOK(async () => {
356
- return call.func.prepare(funcEditor.funcParams).call(true);
357
- })
358
- .show();
359
- }
360
-
361
458
  //top-menu: Bio | Analyze | Sequence Space...
362
459
  //name: Sequence Space
363
460
  //description: Creates 2D sequence space with projected sequences by pairwise distance
@@ -667,19 +764,6 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
667
764
  return resDf;
668
765
  }
669
766
 
670
- //name: SplitToMonomersEditor
671
- //tags: editor
672
- //input: funccall call
673
- export function SplitToMonomersEditor(call: DG.FuncCall): void {
674
- const funcEditor = new SplitToMonomersFunctionEditor();
675
- ui.dialog({title: 'Split to Monomers'})
676
- .add(funcEditor.paramsUI)
677
- .onOK(async () => {
678
- return call.func.prepare(funcEditor.funcParams).call(true);
679
- })
680
- .show();
681
- }
682
-
683
767
  //top-menu: Bio | Convert | Split to Monomers...
684
768
  //name: Split to Monomers
685
769
  //input: dataframe table
@@ -759,26 +843,30 @@ export function SubsequenceSearchTopMenu(macromolecules: DG.Column): void {
759
843
  grok.shell.tv.grid.scrollToCell(macromolecules, 0);
760
844
  }
761
845
 
762
- //top-menu: Bio | Caclulate | Identity...
846
+ //top-menu: Bio | Calculate | Identity...
763
847
  //name: Identity Scoring
764
848
  //description: Adds a column with fraction of matching monomers
765
849
  //input: dataframe table [Table containing Macromolecule column]
766
850
  //input: column macromolecules {semType: Macromolecule} [Sequences to score]
767
851
  //input: string reference [Sequence, matching column format]
768
852
  //output: column scores
769
- export async function sequenceIdentityScoring(table: DG.DataFrame, macromolecule: DG.Column, reference: string): Promise<DG.Column<number>> {
853
+ export async function sequenceIdentityScoring(
854
+ table: DG.DataFrame, macromolecule: DG.Column, reference: string
855
+ ): Promise<DG.Column<number>> {
770
856
  const scores = calculateScores(table, macromolecule, reference, SCORE.IDENTITY);
771
857
  return scores;
772
858
  }
773
859
 
774
- //top-menu: Bio | Caclulate | Similarity...
860
+ //top-menu: Bio | Calculate | Similarity...
775
861
  //name: Similarity Scoring
776
862
  //description: Adds a column with similarity scores, calculated as sum of monomer fingerprint similarities
777
863
  //input: dataframe table [Table containing Macromolecule column]
778
864
  //input: column macromolecules {semType: Macromolecule} [Sequences to score]
779
865
  //input: string reference [Sequence, matching column format]
780
866
  //output: column scores
781
- export async function sequenceSimilarityScoring(table: DG.DataFrame, macromolecule: DG.Column, reference: string): Promise<DG.Column<number>> {
867
+ export async function sequenceSimilarityScoring(
868
+ table: DG.DataFrame, macromolecule: DG.Column, reference: string
869
+ ): Promise<DG.Column<number>> {
782
870
  const scores = calculateScores(table, macromolecule, reference, SCORE.SIMILARITY);
783
871
  return scores;
784
872
  }
@@ -806,10 +894,22 @@ export async function webLogoLargeApp(): Promise<void> {
806
894
  const pi = DG.TaskBarProgressIndicator.create('WebLogo');
807
895
  try {
808
896
  const urlParams = new URLSearchParams(window.location.search);
809
- const app = new WebLogoApp(urlParams);
897
+ const app = new WebLogoApp(urlParams, 'webLogoLargeApp');
810
898
  const df: DG.DataFrame = await _package.files.readCsv('data/sample_PT_100000x5.csv');
811
899
  await grok.data.detectSemanticTypes(df);
812
- await app.init(df, 'webLogoLargeApp');
900
+ await app.init(df);
901
+ } finally {
902
+ pi.close();
903
+ }
904
+ }
905
+
906
+ //name: getRegionApp
907
+ export async function getRegionApp(): Promise<void> {
908
+ const pi = DG.TaskBarProgressIndicator.create('getRegion ...');
909
+ try {
910
+ const urlParams = new URLSearchParams(window.location.search);
911
+ const app = new GetRegionApp(urlParams, 'getRegionApp');
912
+ await app.init();
813
913
  } finally {
814
914
  pi.close();
815
915
  }
@@ -5,7 +5,6 @@ import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src
5
5
 
6
6
  import {ConverterFunc} from './types';
7
7
  import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
8
- import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
9
8
  import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
10
9
 
11
10
  // import {mmSemType} from '../const';
@@ -134,8 +133,8 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$$`,
134
133
  throw new Error(`Argument 'separator' is mandatory for target notation '${tgtNotation.toString()}'.`);
135
134
 
136
135
  return function(srcCol: DG.Column): DG.Column {
137
- const converter = new NotationConverter(srcCol);
138
- const resCol = converter.convert(tgtNotation, tgtSeparator);
136
+ const converterUH = UnitsHandler.getOrCreate(srcCol);
137
+ const resCol = converterUH.convert(tgtNotation, tgtSeparator);
139
138
  expect(resCol.getTag(DG.TAGS.UNITS), tgtNotation);
140
139
  return resCol;
141
140
  };
@@ -26,13 +26,17 @@ category('Scoring', () => {
26
26
 
27
27
  test('Identity', async () => {
28
28
  const scoresCol = await sequenceIdentityScoring(table, seqCol, reference);
29
- for (let i = 0; i < scoresCol.length; i++)
30
- expectFloat(scoresCol.get(i)!, table.get(expectedIdentity, i), 0.01, `Wrong identity score for sequence at position ${i}`);
29
+ for (let i = 0; i < scoresCol.length; i++) {
30
+ expectFloat(scoresCol.get(i)!, table.get(expectedIdentity, i), 0.01,
31
+ `Wrong identity score for sequence at position ${i}`);
32
+ }
31
33
  });
32
34
 
33
35
  test('Similarity', async () => {
34
36
  const scoresCol = await sequenceSimilarityScoring(table, seqCol, reference);
35
- for (let i = 0; i < scoresCol.length; i++)
36
- expectFloat(scoresCol.get(i)!, table.get(expectedSimilarity, i), 0.01, `Wrong similarity score for sequence at position ${i}`);
37
+ for (let i = 0; i < scoresCol.length; i++) {
38
+ expectFloat(scoresCol.get(i)!, table.get(expectedSimilarity, i), 0.01,
39
+ `Wrong similarity score for sequence at position ${i}`);
40
+ }
37
41
  });
38
42
  });
@@ -0,0 +1,116 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as DG from 'datagrok-api/dg';
3
+
4
+ import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
5
+ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
6
+ import {ALPHABET, NOTATION, TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
7
+
8
+ category('UnitsHandler: getRegion', () => {
9
+ const data: {
10
+ [testName: string]: {
11
+ srcCsv: string,
12
+ startIdx: number | null,
13
+ endIdx: number | null,
14
+ tgtCsv: string,
15
+ units: NOTATION,
16
+ alphabet: ALPHABET | null, /* alphabet is not applicable for units 'helm' */
17
+
18
+ positionNames?: { tag: string | null, start: string | null, end: string | null }
19
+ }
20
+ } = {
21
+ 'fastaDna': {
22
+ srcCsv: `seq
23
+ ATTCGT
24
+ ACTGCTC
25
+ ATTCCGTA`,
26
+ startIdx: 2,
27
+ endIdx: 4,
28
+ tgtCsv: `seq
29
+ TCG
30
+ TGC
31
+ TCC`,
32
+ units: NOTATION.FASTA,
33
+ alphabet: ALPHABET.DNA,
34
+
35
+ positionNames: {tag: 'a, b, c, d, e, f, g, h', start: 'c', end: 'e'},
36
+ },
37
+ 'separatorPt': {
38
+ srcCsv: `seq
39
+ M-D-Y-K-E-T-L
40
+ M-I-E-V-F-L-F-G-I
41
+ M-M-`,
42
+ startIdx: 5,
43
+ endIdx: null,
44
+ tgtCsv: `seq
45
+ T-L--
46
+ L-F-G-I
47
+ ---`,
48
+ units: NOTATION.SEPARATOR,
49
+ alphabet: ALPHABET.PT,
50
+
51
+ positionNames: {tag: '1, 1A, 1B, 2, 3, 4, 4A, 4A, 4C', start: '4', end: null},
52
+ },
53
+ 'helm': {
54
+ srcCsv: `seq
55
+ PEPTIDE1{[meI].[hHis].[Aca].N.T.[dE].[Thr_PO3H2].[Aca].[D-Tyr_Et].[Tyr_ab-dehydroMe].[dV].E.N.[D-Orn]}$$$$
56
+ PEPTIDE1{[meI].[hHis].[Aca].[Cys_SEt].T.[dK].[Thr_PO3H2].[Aca].[Tyr_PO3H2].[D-Chg].[dV].[Phe_ab-dehydro]}$$$$
57
+ PEPTIDE1{[Lys_Boc].[hHis].[Aca].[Cys_SEt].T}$$$$`,
58
+ startIdx: 3,
59
+ endIdx: 6,
60
+ tgtCsv: `seq
61
+ PEPTIDE1{N.T.[dE].[Thr_PO3H2]}$$$$
62
+ PEPTIDE1{[Cys_SEt].T.[dK].[Thr_PO3H2]}$$$$
63
+ PEPTIDE1{[Cys_SEt].T.*.*}$$$$`,
64
+ units: NOTATION.HELM,
65
+ alphabet: null,
66
+
67
+ positionNames: {tag: null, start: '4', end: '7'}
68
+ }
69
+ };
70
+
71
+ for (const [testName, testData] of Object.entries(data)) {
72
+ test(`${testName}-idx`, async () => {
73
+ const srcDf = DG.DataFrame.fromCsv(testData.srcCsv);
74
+ const srcSeqCol = srcDf.getCol('seq');
75
+
76
+ const semType: string | null = await grok.functions.call('Bio:detectMacromolecule', {col: srcSeqCol});
77
+ if (semType) srcSeqCol.semType = semType;
78
+
79
+ const srcUh = UnitsHandler.getOrCreate(srcSeqCol);
80
+ const resSeqCol = srcUh.getRegion(testData.startIdx, testData.endIdx, 'regSeq');
81
+
82
+ const tgtDf = DG.DataFrame.fromCsv(testData.tgtCsv);
83
+ const tgtSeqCol = tgtDf.getCol('seq');
84
+
85
+ expect(srcSeqCol.getTag(DG.TAGS.UNITS), testData.units);
86
+ expect(resSeqCol.getTag(DG.TAGS.UNITS), testData.units);
87
+ expect(srcSeqCol.getTag(TAGS.alphabet), testData.alphabet);
88
+ expect(resSeqCol.getTag(TAGS.alphabet), testData.alphabet);
89
+ expectArray(resSeqCol.toList(), tgtSeqCol.toList());
90
+ });
91
+
92
+ if (testData.positionNames) {
93
+ test(`${testName}-positionNames`, async () => {
94
+ const srcDf = DG.DataFrame.fromCsv(testData.srcCsv);
95
+ const srcSeqCol = srcDf.getCol('seq');
96
+ if (testData.positionNames!.tag)
97
+ srcSeqCol.setTag(TAGS.positionNames, testData.positionNames!.tag);
98
+
99
+ const semType: string | null = await grok.functions.call('Bio:detectMacromolecule', {col: srcSeqCol});
100
+ if (semType) srcSeqCol.semType = semType;
101
+
102
+ const resSeqCol = await grok.functions.call('Bio:getRegion',
103
+ {sequence: srcSeqCol, start: testData.positionNames!.start, end: testData.positionNames!.end});
104
+
105
+ const tgtDf = DG.DataFrame.fromCsv(testData.tgtCsv);
106
+ const tgtSeqCol = tgtDf.getCol('seq');
107
+
108
+ expect(srcSeqCol.getTag(DG.TAGS.UNITS), testData.units);
109
+ expect(resSeqCol.getTag(DG.TAGS.UNITS), testData.units);
110
+ expect(srcSeqCol.getTag(TAGS.alphabet), testData.alphabet);
111
+ expect(resSeqCol.getTag(TAGS.alphabet), testData.alphabet);
112
+ expectArray(resSeqCol.toList(), tgtSeqCol.toList());
113
+ });
114
+ }
115
+ }
116
+ });