@datagrok/bio 2.4.16 → 2.4.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.4.16",
8
+ "version": "2.4.18",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -43,8 +43,8 @@ export async function getSimilaritiesMatrix(
43
43
  }
44
44
 
45
45
  export async function getChemSimilaritiesMatrix(dim: number, seqCol: DG.Column,
46
- df: DG.DataFrame, colName: string, simArr: DG.Column[])
47
- : Promise<DG.Column[]> {
46
+ df: DG.DataFrame, colName: string, simArr: (DG.Column | null)[])
47
+ : Promise<(DG.Column | null)[]> {
48
48
  if (seqCol.version !== seqCol.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
49
49
  await invalidateMols(seqCol, false);
50
50
  const fpDf = DG.DataFrame.create(seqCol.length);
@@ -6,6 +6,7 @@ import {Matrix} from '@datagrok-libraries/utils/src/type-declarations';
6
6
  import BitArray from '@datagrok-libraries/utils/src/bit-array';
7
7
  import {ISequenceSpaceParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
8
8
  import {invalidateMols, MONOMERIC_COL_TAGS} from '../substructure-search/substructure-search';
9
+ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
9
10
  import * as grok from 'datagrok-api/grok';
10
11
 
11
12
  export interface ISequenceSpaceResult {
@@ -53,6 +54,23 @@ export async function sequenceSpaceByFingerprints(spaceParams: ISequenceSpacePar
53
54
  return result;
54
55
  }
55
56
 
57
+ export async function getSequenceSpace(spaceParams: ISequenceSpaceParams): Promise<ISequenceSpaceResult> {
58
+ const uh = new UnitsHandler(spaceParams.seqCol);
59
+ if (uh.isFasta()) {
60
+ const distanceFName = uh.getDistanceFunctionName();
61
+ const sequenceSpaceResult = await reduceDimensinalityWithNormalization(
62
+ spaceParams.seqCol.toList(),
63
+ spaceParams.methodName,
64
+ distanceFName,
65
+ spaceParams.options);
66
+ console.log(sequenceSpaceResult);
67
+ const cols: DG.Column[] = spaceParams.embedAxesNames.map(
68
+ (name: string, index: number) => DG.Column.fromFloat32Array(name, sequenceSpaceResult.embedding[index]));
69
+ return {distance: sequenceSpaceResult.distance, coordinates: new DG.ColumnList(cols)};
70
+ } else {
71
+ return await sequenceSpaceByFingerprints(spaceParams);
72
+ }
73
+ }
56
74
 
57
75
  export function getEmbeddingColsNames(df: DG.DataFrame) {
58
76
  const axes = ['Embed_X', 'Embed_Y'];
@@ -18,7 +18,7 @@ export async function demoBio01UI() {
18
18
  try {
19
19
  const demoScript = new DemoScript('Demo', 'Sequence similarity / diversity search');
20
20
  await demoScript
21
- .step(`Loading DNA notation 'fasta'`, async () => {
21
+ .step(`Load DNA sequences`, async () => {
22
22
  grok.shell.windows.showContextPanel = false;
23
23
  grok.shell.windows.showProperties = false;
24
24
 
@@ -32,7 +32,7 @@ export async function demoBio01UI() {
32
32
  description: `Load dataset with macromolecules of 'fasta' notation, 'DNA' alphabet.`,
33
33
  delay: 1200
34
34
  })
35
- .step('Sequence similarity search', async () => {
35
+ .step('Find the most similar sequences to the current one', async () => {
36
36
  const simViewer = await df.plot.fromType('Sequence Similarity Search', {
37
37
  moleculeColumnName: 'sequence',
38
38
  similarColumnLabel: 'Similar to current',
@@ -42,7 +42,7 @@ export async function demoBio01UI() {
42
42
  description: `Add 'Sequence Similarity Search' viewer.`,
43
43
  delay: 1600
44
44
  })
45
- .step('Sequence diversity search', async () => {
45
+ .step('Explore most diverse sequences in a dataset', async () => {
46
46
  const divViewer = await df.plot.fromType('Sequence Diversity Search', {
47
47
  moleculeColumnName: 'sequence',
48
48
  diverseColumnLabel: 'Top diverse sequences of all data'
@@ -52,16 +52,16 @@ export async function demoBio01UI() {
52
52
  description: `Add 'Sequence Deversity Search' viewer.`,
53
53
  delay: 1600
54
54
  })
55
- .step('Set current row 3', async () => {
55
+ .step('Choose another sequence for similarity search', async () => {
56
56
  df.currentRowIdx = 3;
57
57
  }, {
58
58
  description: 'Handling current row changed of data frame showing update of similar sequences.',
59
59
  delay: 1600,
60
60
  })
61
- .step('Set current row 7', async () => {
61
+ .step('One more sequence for similarity search', async () => {
62
62
  df.currentRowIdx = 7;
63
63
  }, {
64
- description: 'Changing current row to another.',
64
+ description: 'Just one more sequence to search similar ones.',
65
65
  delay: 1600,
66
66
  })
67
67
  .start();
@@ -28,7 +28,7 @@ export async function demoBio01aUI() {
28
28
  try {
29
29
  const demoScript = new DemoScript('Demo', 'Exploring sequence space');
30
30
  await demoScript
31
- .step(`Loading DNA notation 'fasta'`, async () => {
31
+ .step(`Load DNA sequences`, async () => {
32
32
  [df, treeHelper, dendrogramSvc] = await Promise.all([
33
33
  _package.files.readCsv(dataFn),
34
34
  getTreeHelper(),
@@ -43,13 +43,13 @@ export async function demoBio01aUI() {
43
43
  description: `Load dataset with macromolecules of 'fasta' notation, 'DNA' alphabet.`,
44
44
  delay: 1600,
45
45
  })
46
- .step('Building sequence space', async () => {
46
+ .step('Build sequence space', async () => {
47
47
  spViewer = await demoSequenceSpace(view, df, seqColName, method);
48
48
  }, {
49
49
  description: `Reduce sequence space dimensionality to display on 2D representation.`,
50
50
  delay: 1600
51
51
  })
52
- .step('Hierarchical clustering', async () => {
52
+ .step('Cluster sequences', async () => {
53
53
  const seqCol: DG.Column<string> = df.getCol(seqColName);
54
54
  const seqList = seqCol.toList();
55
55
  const distance: DistanceMatrix = DistanceMatrix.calc(seqList, (aSeq: string, bSeq: string) => {
@@ -62,7 +62,7 @@ export async function demoBio01aUI() {
62
62
  description: `Perform hierarchical clustering to reveal relationships between sequences.`,
63
63
  delay: 1600,
64
64
  })
65
- .step('Selection', async () => {
65
+ .step('Select a sequence', async () => {
66
66
  df.selection.init((idx: number) => [15].includes(idx));
67
67
  }, {
68
68
  description: `Handling selection of data frame row reflecting on linked viewers.`,
@@ -28,7 +28,7 @@ export async function demoBio01bUI() {
28
28
  try {
29
29
  const demoScript = new DemoScript('Demo', '');
30
30
  await demoScript
31
- .step(`Loading DNA notation \'fasta\'`, async () => {
31
+ .step(`Load DNA sequences`, async () => {
32
32
  grok.shell.windows.showContextPanel = false;
33
33
  grok.shell.windows.showProperties = false;
34
34
 
@@ -48,7 +48,7 @@ export async function demoBio01bUI() {
48
48
  description: 'Load dataset with macromolecules of \'fasta\' notation, \'DNA\' alphabet.',
49
49
  delay: 1600,
50
50
  })
51
- .step('Analyze for activity cliffs', async () => {
51
+ .step('Find activity cliffs', async () => {
52
52
  activityCliffsViewer = (await activityCliffs(
53
53
  df, df.getCol('Sequence'), df.getCol('Activity'),
54
54
  80, method)) as DG.ScatterPlotViewer;
@@ -62,7 +62,7 @@ export async function demoBio01bUI() {
62
62
  description: 'Reveal similar sequences with a cliff of activity.',
63
63
  delay: 1600
64
64
  })
65
- .step('Hierarchical clustering', async () => {
65
+ .step('Cluster sequences', async () => {
66
66
  const seqCol: DG.Column<string> = df.getCol('sequence');
67
67
  const seqList = seqCol.toList();
68
68
  const distance: DistanceMatrix = DistanceMatrix.calc(seqList, (aSeq: string, bSeq: string) => {
@@ -0,0 +1,44 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+
6
+ import {_package, toAtomicLevel} from '../package';
7
+ import $ from 'cash-dom';
8
+ import {handleError} from './utils';
9
+ import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
10
+
11
+ const dataFn: string = 'sample/sample_FASTA.csv';
12
+
13
+ export async function demoBio03UI(): Promise<void> {
14
+ let df: DG.DataFrame;
15
+ let view: DG.TableView;
16
+
17
+ try {
18
+ await new DemoScript(
19
+ 'Atomic Level',
20
+ 'Atomic level structure of Macromolecules'
21
+ )
22
+ .step(`Loading Macromolecules notation 'Helm'`, async () => {
23
+ df = await _package.files.readCsv(dataFn);
24
+ view = grok.shell.addTableView(df);
25
+ for (let colI: number = 0; colI < view.grid.columns.length; colI++) {
26
+ const gCol: DG.GridColumn = view.grid.columns.byIndex(colI)!;
27
+ if (!(['Sequence', 'Activity'].includes(gCol.name))) gCol.visible = false;
28
+ }
29
+ }, {
30
+ description: `Load dataset with macromolecules of 'fasta' notation, 'PT' alphabet (protein, aminoacids).`,
31
+ delay: 1600,
32
+ })
33
+ .step('To atomic level', async () => {
34
+ const seqCol = df.getCol('Sequence');
35
+ await toAtomicLevel(df, seqCol);
36
+ }, {
37
+ description: 'Get atomic level structures of Macromolecules.',
38
+ delay: 1600,
39
+ })
40
+ .start();
41
+ } catch (err: any) {
42
+ handleError(err);
43
+ }
44
+ }
@@ -26,7 +26,7 @@ export async function demoBio05UI(): Promise<void> {
26
26
  try {
27
27
  const demoScript = new DemoScript('Demo', 'MSA and composition analysis on Helm data.');
28
28
  await demoScript
29
- .step(`Loading peptides notation 'HELM'`, async () => {
29
+ .step(`Load peptides with non-natural aminoacids in 'HELM' notation`, async () => {
30
30
  view = grok.shell.addTableView(df = await _package.files.readCsv(helmFn));
31
31
 
32
32
  grok.shell.windows.showContextPanel = false;
@@ -35,7 +35,7 @@ export async function demoBio05UI(): Promise<void> {
35
35
  description: 'Load dataset with macromolecules of \'Helm\' notation.',
36
36
  delay: 1600,
37
37
  })
38
- .step('MSA on non-natural aminoacids with PepSeA', async () => {
38
+ .step('Align paptides with non-natural aminoacids with PepSeA', async () => {
39
39
  helmCol = df.getCol(helmColName);
40
40
  const method: string = pepseaMethods[0];
41
41
  const gapOpen: number = 1.53;
@@ -47,7 +47,7 @@ export async function demoBio05UI(): Promise<void> {
47
47
  description: 'Multiple sequence alignment (MSA) performed with PepSeA tool operating on non-natural aminoacids as well.',
48
48
  delay: 1600,
49
49
  })
50
- .step('Building sequence space', async () => {
50
+ .step('Build sequence space', async () => {
51
51
  const method: string = 'UMAP';
52
52
  ssViewer = (await sequenceSpaceTopMenu(df, msaHelmCol,
53
53
  'UMAP', StringMetricsNames.Levenshtein, true)) as DG.ScatterPlotViewer;
@@ -56,7 +56,7 @@ export async function demoBio05UI(): Promise<void> {
56
56
  description: 'Reduce sequence space dimensionality to display on 2D representation.',
57
57
  delay: 1600
58
58
  })
59
- .step('Composition analysis on MSA results', async () => {
59
+ .step('Analyse sequence composition', async () => {
60
60
  wlViewer = await df.plot.fromType('WebLogo', {
61
61
  sequenceColumnName: msaHelmColName,
62
62
  maxHeight: 50,
package/src/package.ts CHANGED
@@ -38,7 +38,10 @@ import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
38
38
  import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
39
39
  import {WebLogoViewer} from './viewers/web-logo-viewer';
40
40
  import {createJsonMonomerLibFromSdf, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
41
- import {LIB_PATH, LIB_STORAGE_NAME, MonomerLibHelper} from './utils/monomer-lib';
41
+ import {
42
+ LIB_PATH, MonomerLibHelper,
43
+ LIB_STORAGE_NAME, LibSettings, getUserLibSettings, setUserLibSetting, getLibFileNameList
44
+ } from './utils/monomer-lib';
42
45
  import {getMacromoleculeColumn} from './utils/ui-utils';
43
46
  import {ITSNEOptions, IUMAPOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
44
47
  import {SequenceSpaceFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/seq-space-editor';
@@ -46,10 +49,10 @@ import {ActivityCliffsFunctionEditor} from '@datagrok-libraries/ml/src/functionE
46
49
  import {demoBio01UI} from './demo/bio01-similarity-diversity';
47
50
  import {demoBio01aUI} from './demo/bio01a-hierarchical-clustering-and-sequence-space';
48
51
  import {demoBio01bUI} from './demo/bio01b-hierarchical-clustering-and-activity-cliffs';
52
+ import {demoBio03UI} from './demo/bio03-atomic-level';
49
53
  import {demoBio05UI} from './demo/bio05-helm-msa-sequence-space';
50
54
  import {checkInputColumnUI} from './utils/check-input-column';
51
55
  import {multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
52
- import { runKalign } from './utils/multiple-sequence-alignment';
53
56
 
54
57
  export const _package = new DG.Package();
55
58
 
@@ -150,25 +153,25 @@ export async function libraryPanel(seqColumn: DG.Column): Promise<DG.Widget> {
150
153
  //@ts-ignore
151
154
  const filesButton: HTMLButtonElement = ui.button('Manage', manageFiles);
152
155
  const divInputs: HTMLDivElement = ui.div();
153
- const libFileNameList: string[] = (await grok.dapi.files.list(`${LIB_PATH}`, false, ''))
154
- .map((it) => it.fileName);
156
+ const libFileNameList: string[] = await getLibFileNameList();
155
157
  const librariesUserSettingsSet: Set<string> = new Set<string>(Object.keys(
156
158
  await grok.dapi.userDataStorage.get(LIB_STORAGE_NAME, true)));
157
159
 
158
160
  let userStoragePromise: Promise<void> = Promise.resolve();
159
161
  for (const libFileName of libFileNameList) {
160
- const libInput: DG.InputBase<boolean | null> = ui.boolInput(libFileName, librariesUserSettingsSet.has(libFileName),
162
+ const settings = await getUserLibSettings();
163
+ const libInput: DG.InputBase<boolean | null> = ui.boolInput(libFileName, !settings.exclude.includes(libFileName),
161
164
  () => {
162
165
  userStoragePromise = userStoragePromise.then(async () => {
163
166
  if (libInput.value == true) {
164
- // Save checked library to user settings 'Libraries'
165
- await grok.dapi.userDataStorage.postValue(LIB_STORAGE_NAME, libFileName, libFileName, true);
166
- await MonomerLibHelper.instance.loadLibraries(); // from libraryPanel()
167
+ // Checked library remove from excluded list
168
+ settings.exclude = settings.exclude.filter((l) => l != libFileName);
167
169
  } else {
168
- // Remove unchecked library from user settings 'Libraries'
169
- await grok.dapi.userDataStorage.remove(LIB_STORAGE_NAME, libFileName, true);
170
- await MonomerLibHelper.instance.loadLibraries(true); // from libraryPanel()
170
+ // Unchecked library add to excluded list
171
+ if (!settings.exclude.includes(libFileName)) settings.exclude.push(libFileName);
171
172
  }
173
+ await setUserLibSetting(settings);
174
+ await MonomerLibHelper.instance.loadLibraries(true); // from libraryPanel()
172
175
  grok.shell.info('Monomer library user settings saved.');
173
176
  });
174
177
  });
@@ -668,7 +671,7 @@ export function bioSubstructureFilter(): BioSubstructureFilter {
668
671
  // demoBio01
669
672
  //name: demoBioSimilarityDiversity
670
673
  //meta.demoPath: Bioinformatics | Similarity, Diversity
671
- //description:
674
+ //description: Sequence similarity tracking and evaluation dataset diversity
672
675
  //meta.path: /apps/Tutorials/Demo/Bioinformatics/Similarity,%20Diversity
673
676
  export async function demoBioSimilarityDiversity(): Promise<void> {
674
677
  await demoBio01UI();
@@ -677,7 +680,7 @@ export async function demoBioSimilarityDiversity(): Promise<void> {
677
680
  // demoBio01a
678
681
  //name:demoBioSequenceSpace
679
682
  //meta.demoPath: Bioinformatics | Sequence Space
680
- //description:
683
+ //description: Exploring sequence space of Macromolecules, comparison with hierarchical clustering results
681
684
  //meta.path: /apps/Tutorials/Demo/Bioinformatics/Sequence%20Space
682
685
  export async function demoBioSequenceSpace(): Promise<void> {
683
686
  await demoBio01aUI();
@@ -686,16 +689,25 @@ export async function demoBioSequenceSpace(): Promise<void> {
686
689
  // demoBio01b
687
690
  //name: demoBioActivityCliffs
688
691
  //meta.demoPath: Bioinformatics | Activity Cliffs
689
- //description:
692
+ //description: Activity Cliffs analysis on Macromolecules data
690
693
  //meta.path: /apps/Tutorials/Demo/Bioinformatics/Activity%20Cliffs
691
694
  export async function demoBioActivityCliffs(): Promise<void> {
692
695
  await demoBio01bUI();
693
696
  }
694
697
 
698
+ // demoBio03
699
+ //name: demoBioAtomicLevel
700
+ //meta.demoPath: Bioinformatics | Atomic Level
701
+ //description: Atomic level structure of Macromolecules
702
+ //meta.path: /apps/Tutorials/Demo/Bioinformatics/Atomic%20Level
703
+ export async function demoBioAtomicLevel(): Promise<void> {
704
+ await demoBio03UI();
705
+ }
706
+
695
707
  // demoBio05
696
708
  //name: demoBioHelmMsaSequenceSpace
697
709
  //meta.demoPath: Bioinformatics | Helm, MSA, Sequence Space
698
- //description:
710
+ //description: MSA and composition analysis on Helm data
699
711
  //meta.path: /apps/Tutorials/Demo/Bioinformatics/Helm,%20MSA,%20Sequence%20Space
700
712
  export async function demoBioHelmMsaSequenceSpace(): Promise<void> {
701
713
  await demoBio05UI();
@@ -27,8 +27,8 @@ category('monomerLibraries', () => {
27
27
  await grok.dapi.userDataStorage.put(LIB_STORAGE_NAME, {}, true);
28
28
  await monomerLibHelper.loadLibraries(true); // test defaultLib
29
29
 
30
- // Currently default monomer lib is empty
30
+ // Currently default monomer lib set is of all files at LIB_PATH (at least HELMCoreLibrary.json)
31
31
  const currentMonomerLib = monomerLibHelper.getBioLib();
32
- expect(currentMonomerLib.getTypes().length, 0);
32
+ expect(currentMonomerLib.getTypes().length > 0, true);
33
33
  });
34
34
  });
@@ -15,6 +15,31 @@ export const LIB_STORAGE_NAME = 'Libraries';
15
15
  export const LIB_PATH = 'System:AppData/Bio/libraries/';
16
16
  export const LIB_DEFAULT: { [fileName: string]: string } = {'HELMCoreLibrary.json': 'HELMCoreLibrary.json'};
17
17
 
18
+ /** Type for user settings of monomer library set to use. */
19
+ export type LibSettings = {
20
+ exclude: string[],
21
+ }
22
+
23
+ export async function getLibFileNameList(): Promise<string[]> {
24
+ const res: string[] = (await grok.dapi.files.list(`${LIB_PATH}`, false, ''))
25
+ .map((it) => it.fileName);
26
+ return res;
27
+ }
28
+
29
+ export async function getUserLibSettings(): Promise<LibSettings> {
30
+ const resStr: string = await grok.dapi.userDataStorage.getValue(LIB_STORAGE_NAME, 'Settings', true);
31
+ const res: LibSettings = resStr ? JSON.parse(resStr) : {exclude: []};
32
+
33
+ // Fix empty object returned in case there is no settings stored for user
34
+ res.exclude = res.exclude instanceof Array ? res.exclude : [];
35
+
36
+ return res;
37
+ }
38
+
39
+ export async function setUserLibSetting(value: LibSettings): Promise<void> {
40
+ await grok.dapi.userDataStorage.postValue(LIB_STORAGE_NAME, 'Settings', JSON.stringify(value), true);
41
+ }
42
+
18
43
  export class MonomerLib implements IMonomerLib {
19
44
  private _monomers: { [type: string]: { [name: string]: Monomer } } = {};
20
45
  private _onChanged = new Subject<any>();
@@ -104,11 +129,16 @@ export class MonomerLibHelper implements IMonomerLibHelper {
104
129
  */
105
130
  async loadLibraries(reload: boolean = false): Promise<void> {
106
131
  return this.loadLibrariesPromise = this.loadLibrariesPromise.then(async () => {
107
- const userLibrariesSettings: string[] = Object.keys(await grok.dapi.userDataStorage.get(LIB_STORAGE_NAME, true));
108
- const libs: IMonomerLib[] = await Promise.all(userLibrariesSettings.map((libFileName) => {
109
- //TODO handle whether files are in place
110
- return this.readLibrary(LIB_PATH, libFileName);
111
- }));
132
+ const [libFileNameList, settings]: [string[], LibSettings] = await Promise.all([
133
+ getLibFileNameList(),
134
+ getUserLibSettings()
135
+ ]);
136
+ const libs: IMonomerLib[] = await Promise.all(libFileNameList
137
+ .filter((libFileName) => !settings.exclude.includes(libFileName))
138
+ .map((libFileName) => {
139
+ //TODO handle whether files are in place
140
+ return this.readLibrary(LIB_PATH, libFileName);
141
+ }));
112
142
  this._monomerLib.updateLibs(libs, reload);
113
143
  });
114
144
  }