@datagrok/bio 2.4.17 → 2.4.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.4.17",
8
+ "version": "2.4.18",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -14,9 +14,9 @@
14
14
  },
15
15
  "dependencies": {
16
16
  "@biowasm/aioli": "^3.1.0",
17
- "@datagrok-libraries/bio": "^5.29.3",
17
+ "@datagrok-libraries/bio": "^5.29.0",
18
18
  "@datagrok-libraries/chem-meta": "^1.0.1",
19
- "@datagrok-libraries/ml": "^6.3.22",
19
+ "@datagrok-libraries/ml": "^6.3.16",
20
20
  "@datagrok-libraries/tutorials": "^1.2.1",
21
21
  "@datagrok-libraries/utils": "^2.1.3",
22
22
  "cash-dom": "^8.0.0",
@@ -18,7 +18,7 @@ export async function demoBio01UI() {
18
18
  try {
19
19
  const demoScript = new DemoScript('Demo', 'Sequence similarity / diversity search');
20
20
  await demoScript
21
- .step(`Loading DNA notation 'fasta'`, async () => {
21
+ .step(`Load DNA sequences`, async () => {
22
22
  grok.shell.windows.showContextPanel = false;
23
23
  grok.shell.windows.showProperties = false;
24
24
 
@@ -32,7 +32,7 @@ export async function demoBio01UI() {
32
32
  description: `Load dataset with macromolecules of 'fasta' notation, 'DNA' alphabet.`,
33
33
  delay: 1200
34
34
  })
35
- .step('Sequence similarity search', async () => {
35
+ .step('Find the most similar sequences to the current one', async () => {
36
36
  const simViewer = await df.plot.fromType('Sequence Similarity Search', {
37
37
  moleculeColumnName: 'sequence',
38
38
  similarColumnLabel: 'Similar to current',
@@ -42,7 +42,7 @@ export async function demoBio01UI() {
42
42
  description: `Add 'Sequence Similarity Search' viewer.`,
43
43
  delay: 1600
44
44
  })
45
- .step('Sequence diversity search', async () => {
45
+ .step('Explore most diverse sequences in a dataset', async () => {
46
46
  const divViewer = await df.plot.fromType('Sequence Diversity Search', {
47
47
  moleculeColumnName: 'sequence',
48
48
  diverseColumnLabel: 'Top diverse sequences of all data'
@@ -52,16 +52,16 @@ export async function demoBio01UI() {
52
52
  description: `Add 'Sequence Deversity Search' viewer.`,
53
53
  delay: 1600
54
54
  })
55
- .step('Set current row 3', async () => {
55
+ .step('Choose another sequence for similarity search', async () => {
56
56
  df.currentRowIdx = 3;
57
57
  }, {
58
58
  description: 'Handling current row changed of data frame showing update of similar sequences.',
59
59
  delay: 1600,
60
60
  })
61
- .step('Set current row 7', async () => {
61
+ .step('One more sequence for similarity search', async () => {
62
62
  df.currentRowIdx = 7;
63
63
  }, {
64
- description: 'Changing current row to another.',
64
+ description: 'Just one more sequence to search similar ones.',
65
65
  delay: 1600,
66
66
  })
67
67
  .start();
@@ -28,7 +28,7 @@ export async function demoBio01aUI() {
28
28
  try {
29
29
  const demoScript = new DemoScript('Demo', 'Exploring sequence space');
30
30
  await demoScript
31
- .step(`Loading DNA notation 'fasta'`, async () => {
31
+ .step(`Load DNA sequences`, async () => {
32
32
  [df, treeHelper, dendrogramSvc] = await Promise.all([
33
33
  _package.files.readCsv(dataFn),
34
34
  getTreeHelper(),
@@ -43,13 +43,13 @@ export async function demoBio01aUI() {
43
43
  description: `Load dataset with macromolecules of 'fasta' notation, 'DNA' alphabet.`,
44
44
  delay: 1600,
45
45
  })
46
- .step('Building sequence space', async () => {
46
+ .step('Build sequence space', async () => {
47
47
  spViewer = await demoSequenceSpace(view, df, seqColName, method);
48
48
  }, {
49
49
  description: `Reduce sequence space dimensionality to display on 2D representation.`,
50
50
  delay: 1600
51
51
  })
52
- .step('Hierarchical clustering', async () => {
52
+ .step('Cluster sequences', async () => {
53
53
  const seqCol: DG.Column<string> = df.getCol(seqColName);
54
54
  const seqList = seqCol.toList();
55
55
  const distance: DistanceMatrix = DistanceMatrix.calc(seqList, (aSeq: string, bSeq: string) => {
@@ -62,7 +62,7 @@ export async function demoBio01aUI() {
62
62
  description: `Perform hierarchical clustering to reveal relationships between sequences.`,
63
63
  delay: 1600,
64
64
  })
65
- .step('Selection', async () => {
65
+ .step('Select a sequence', async () => {
66
66
  df.selection.init((idx: number) => [15].includes(idx));
67
67
  }, {
68
68
  description: `Handling selection of data frame row reflecting on linked viewers.`,
@@ -28,7 +28,7 @@ export async function demoBio01bUI() {
28
28
  try {
29
29
  const demoScript = new DemoScript('Demo', '');
30
30
  await demoScript
31
- .step(`Loading DNA notation \'fasta\'`, async () => {
31
+ .step(`Load DNA sequences`, async () => {
32
32
  grok.shell.windows.showContextPanel = false;
33
33
  grok.shell.windows.showProperties = false;
34
34
 
@@ -48,7 +48,7 @@ export async function demoBio01bUI() {
48
48
  description: 'Load dataset with macromolecules of \'fasta\' notation, \'DNA\' alphabet.',
49
49
  delay: 1600,
50
50
  })
51
- .step('Analyze for activity cliffs', async () => {
51
+ .step('Find activity cliffs', async () => {
52
52
  activityCliffsViewer = (await activityCliffs(
53
53
  df, df.getCol('Sequence'), df.getCol('Activity'),
54
54
  80, method)) as DG.ScatterPlotViewer;
@@ -62,7 +62,7 @@ export async function demoBio01bUI() {
62
62
  description: 'Reveal similar sequences with a cliff of activity.',
63
63
  delay: 1600
64
64
  })
65
- .step('Hierarchical clustering', async () => {
65
+ .step('Cluster sequences', async () => {
66
66
  const seqCol: DG.Column<string> = df.getCol('sequence');
67
67
  const seqList = seqCol.toList();
68
68
  const distance: DistanceMatrix = DistanceMatrix.calc(seqList, (aSeq: string, bSeq: string) => {
@@ -0,0 +1,44 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+
6
+ import {_package, toAtomicLevel} from '../package';
7
+ import $ from 'cash-dom';
8
+ import {handleError} from './utils';
9
+ import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
10
+
11
+ const dataFn: string = 'sample/sample_FASTA.csv';
12
+
13
+ export async function demoBio03UI(): Promise<void> {
14
+ let df: DG.DataFrame;
15
+ let view: DG.TableView;
16
+
17
+ try {
18
+ await new DemoScript(
19
+ 'Atomic Level',
20
+ 'Atomic level structure of Macromolecules'
21
+ )
22
+ .step(`Loading Macromolecules notation 'Helm'`, async () => {
23
+ df = await _package.files.readCsv(dataFn);
24
+ view = grok.shell.addTableView(df);
25
+ for (let colI: number = 0; colI < view.grid.columns.length; colI++) {
26
+ const gCol: DG.GridColumn = view.grid.columns.byIndex(colI)!;
27
+ if (!(['Sequence', 'Activity'].includes(gCol.name))) gCol.visible = false;
28
+ }
29
+ }, {
30
+ description: `Load dataset with macromolecules of 'fasta' notation, 'PT' alphabet (protein, aminoacids).`,
31
+ delay: 1600,
32
+ })
33
+ .step('To atomic level', async () => {
34
+ const seqCol = df.getCol('Sequence');
35
+ await toAtomicLevel(df, seqCol);
36
+ }, {
37
+ description: 'Get atomic level structures of Macromolecules.',
38
+ delay: 1600,
39
+ })
40
+ .start();
41
+ } catch (err: any) {
42
+ handleError(err);
43
+ }
44
+ }
@@ -26,7 +26,7 @@ export async function demoBio05UI(): Promise<void> {
26
26
  try {
27
27
  const demoScript = new DemoScript('Demo', 'MSA and composition analysis on Helm data.');
28
28
  await demoScript
29
- .step(`Loading peptides notation 'HELM'`, async () => {
29
+ .step(`Load peptides with non-natural aminoacids in 'HELM' notation`, async () => {
30
30
  view = grok.shell.addTableView(df = await _package.files.readCsv(helmFn));
31
31
 
32
32
  grok.shell.windows.showContextPanel = false;
@@ -35,7 +35,7 @@ export async function demoBio05UI(): Promise<void> {
35
35
  description: 'Load dataset with macromolecules of \'Helm\' notation.',
36
36
  delay: 1600,
37
37
  })
38
- .step('MSA on non-natural aminoacids with PepSeA', async () => {
38
+ .step('Align paptides with non-natural aminoacids with PepSeA', async () => {
39
39
  helmCol = df.getCol(helmColName);
40
40
  const method: string = pepseaMethods[0];
41
41
  const gapOpen: number = 1.53;
@@ -47,7 +47,7 @@ export async function demoBio05UI(): Promise<void> {
47
47
  description: 'Multiple sequence alignment (MSA) performed with PepSeA tool operating on non-natural aminoacids as well.',
48
48
  delay: 1600,
49
49
  })
50
- .step('Building sequence space', async () => {
50
+ .step('Build sequence space', async () => {
51
51
  const method: string = 'UMAP';
52
52
  ssViewer = (await sequenceSpaceTopMenu(df, msaHelmCol,
53
53
  'UMAP', StringMetricsNames.Levenshtein, true)) as DG.ScatterPlotViewer;
@@ -56,7 +56,7 @@ export async function demoBio05UI(): Promise<void> {
56
56
  description: 'Reduce sequence space dimensionality to display on 2D representation.',
57
57
  delay: 1600
58
58
  })
59
- .step('Composition analysis on MSA results', async () => {
59
+ .step('Analyse sequence composition', async () => {
60
60
  wlViewer = await df.plot.fromType('WebLogo', {
61
61
  sequenceColumnName: msaHelmColName,
62
62
  maxHeight: 50,
package/src/package.ts CHANGED
@@ -9,7 +9,7 @@ import {
9
9
  } from './utils/cell-renderer';
10
10
  import {VdRegionsViewer} from './viewers/vd-regions-viewer';
11
11
  import {SequenceAlignment} from './seq_align';
12
- import {getEmbeddingColsNames, sequenceSpaceByFingerprints, getSequenceSpace} from './analysis/sequence-space';
12
+ import {getEmbeddingColsNames, sequenceSpaceByFingerprints} from './analysis/sequence-space';
13
13
  import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
14
14
  import {
15
15
  createLinesGrid,
@@ -38,7 +38,10 @@ import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
38
38
  import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
39
39
  import {WebLogoViewer} from './viewers/web-logo-viewer';
40
40
  import {createJsonMonomerLibFromSdf, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
41
- import {LIB_PATH, LIB_STORAGE_NAME, MonomerLibHelper} from './utils/monomer-lib';
41
+ import {
42
+ LIB_PATH, MonomerLibHelper,
43
+ LIB_STORAGE_NAME, LibSettings, getUserLibSettings, setUserLibSetting, getLibFileNameList
44
+ } from './utils/monomer-lib';
42
45
  import {getMacromoleculeColumn} from './utils/ui-utils';
43
46
  import {ITSNEOptions, IUMAPOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
44
47
  import {SequenceSpaceFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/seq-space-editor';
@@ -46,10 +49,10 @@ import {ActivityCliffsFunctionEditor} from '@datagrok-libraries/ml/src/functionE
46
49
  import {demoBio01UI} from './demo/bio01-similarity-diversity';
47
50
  import {demoBio01aUI} from './demo/bio01a-hierarchical-clustering-and-sequence-space';
48
51
  import {demoBio01bUI} from './demo/bio01b-hierarchical-clustering-and-activity-cliffs';
52
+ import {demoBio03UI} from './demo/bio03-atomic-level';
49
53
  import {demoBio05UI} from './demo/bio05-helm-msa-sequence-space';
50
54
  import {checkInputColumnUI} from './utils/check-input-column';
51
55
  import {multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
52
- import { runKalign } from './utils/multiple-sequence-alignment';
53
56
 
54
57
  export const _package = new DG.Package();
55
58
 
@@ -150,25 +153,25 @@ export async function libraryPanel(seqColumn: DG.Column): Promise<DG.Widget> {
150
153
  //@ts-ignore
151
154
  const filesButton: HTMLButtonElement = ui.button('Manage', manageFiles);
152
155
  const divInputs: HTMLDivElement = ui.div();
153
- const libFileNameList: string[] = (await grok.dapi.files.list(`${LIB_PATH}`, false, ''))
154
- .map((it) => it.fileName);
156
+ const libFileNameList: string[] = await getLibFileNameList();
155
157
  const librariesUserSettingsSet: Set<string> = new Set<string>(Object.keys(
156
158
  await grok.dapi.userDataStorage.get(LIB_STORAGE_NAME, true)));
157
159
 
158
160
  let userStoragePromise: Promise<void> = Promise.resolve();
159
161
  for (const libFileName of libFileNameList) {
160
- const libInput: DG.InputBase<boolean | null> = ui.boolInput(libFileName, librariesUserSettingsSet.has(libFileName),
162
+ const settings = await getUserLibSettings();
163
+ const libInput: DG.InputBase<boolean | null> = ui.boolInput(libFileName, !settings.exclude.includes(libFileName),
161
164
  () => {
162
165
  userStoragePromise = userStoragePromise.then(async () => {
163
166
  if (libInput.value == true) {
164
- // Save checked library to user settings 'Libraries'
165
- await grok.dapi.userDataStorage.postValue(LIB_STORAGE_NAME, libFileName, libFileName, true);
166
- await MonomerLibHelper.instance.loadLibraries(); // from libraryPanel()
167
+ // Checked library remove from excluded list
168
+ settings.exclude = settings.exclude.filter((l) => l != libFileName);
167
169
  } else {
168
- // Remove unchecked library from user settings 'Libraries'
169
- await grok.dapi.userDataStorage.remove(LIB_STORAGE_NAME, libFileName, true);
170
- await MonomerLibHelper.instance.loadLibraries(true); // from libraryPanel()
170
+ // Unchecked library add to excluded list
171
+ if (!settings.exclude.includes(libFileName)) settings.exclude.push(libFileName);
171
172
  }
173
+ await setUserLibSetting(settings);
174
+ await MonomerLibHelper.instance.loadLibraries(true); // from libraryPanel()
172
175
  grok.shell.info('Monomer library user settings saved.');
173
176
  });
174
177
  });
@@ -287,23 +290,19 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
287
290
  'separator': macroMolecule.getTag(bioTAGS.separator),
288
291
  'alphabet': macroMolecule.getTag(bioTAGS.alphabet),
289
292
  };
290
- const uh = new UnitsHandler(macroMolecule);
291
- let columnDistanceMetric = 'Tanimoto';
292
- if (uh.isFasta())
293
- columnDistanceMetric = uh.getDistanceFunctionName();
294
293
  const sp = await getActivityCliffs(
295
294
  df,
296
295
  macroMolecule,
297
296
  null,
298
297
  axesNames,
299
- columnDistanceMetric,
298
+ 'Activity cliffs',
300
299
  activities,
301
300
  similarity,
302
301
  'Tanimoto',
303
302
  methodName,
304
303
  DG.SEMTYPE.MACROMOLECULE,
305
304
  tags,
306
- getSequenceSpace,
305
+ sequenceSpaceByFingerprints,
307
306
  getChemSimilaritiesMatrix,
308
307
  createTooltipElement,
309
308
  createPropPanelElement,
@@ -354,7 +353,7 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
354
353
  embedAxesNames: embedColsNames,
355
354
  options: options
356
355
  };
357
- const sequenceSpaceRes = await getSequenceSpace(chemSpaceParams);
356
+ const sequenceSpaceRes = await sequenceSpaceByFingerprints(chemSpaceParams);
358
357
  const embeddings = sequenceSpaceRes.coordinates;
359
358
  for (const col of embeddings) {
360
359
  const listValues = col.toList();
@@ -672,7 +671,7 @@ export function bioSubstructureFilter(): BioSubstructureFilter {
672
671
  // demoBio01
673
672
  //name: demoBioSimilarityDiversity
674
673
  //meta.demoPath: Bioinformatics | Similarity, Diversity
675
- //description:
674
+ //description: Sequence similarity tracking and evaluation dataset diversity
676
675
  //meta.path: /apps/Tutorials/Demo/Bioinformatics/Similarity,%20Diversity
677
676
  export async function demoBioSimilarityDiversity(): Promise<void> {
678
677
  await demoBio01UI();
@@ -681,7 +680,7 @@ export async function demoBioSimilarityDiversity(): Promise<void> {
681
680
  // demoBio01a
682
681
  //name:demoBioSequenceSpace
683
682
  //meta.demoPath: Bioinformatics | Sequence Space
684
- //description:
683
+ //description: Exploring sequence space of Macromolecules, comparison with hierarchical clustering results
685
684
  //meta.path: /apps/Tutorials/Demo/Bioinformatics/Sequence%20Space
686
685
  export async function demoBioSequenceSpace(): Promise<void> {
687
686
  await demoBio01aUI();
@@ -690,16 +689,25 @@ export async function demoBioSequenceSpace(): Promise<void> {
690
689
  // demoBio01b
691
690
  //name: demoBioActivityCliffs
692
691
  //meta.demoPath: Bioinformatics | Activity Cliffs
693
- //description:
692
+ //description: Activity Cliffs analysis on Macromolecules data
694
693
  //meta.path: /apps/Tutorials/Demo/Bioinformatics/Activity%20Cliffs
695
694
  export async function demoBioActivityCliffs(): Promise<void> {
696
695
  await demoBio01bUI();
697
696
  }
698
697
 
698
+ // demoBio03
699
+ //name: demoBioAtomicLevel
700
+ //meta.demoPath: Bioinformatics | Atomic Level
701
+ //description: Atomic level structure of Macromolecules
702
+ //meta.path: /apps/Tutorials/Demo/Bioinformatics/Atomic%20Level
703
+ export async function demoBioAtomicLevel(): Promise<void> {
704
+ await demoBio03UI();
705
+ }
706
+
699
707
  // demoBio05
700
708
  //name: demoBioHelmMsaSequenceSpace
701
709
  //meta.demoPath: Bioinformatics | Helm, MSA, Sequence Space
702
- //description:
710
+ //description: MSA and composition analysis on Helm data
703
711
  //meta.path: /apps/Tutorials/Demo/Bioinformatics/Helm,%20MSA,%20Sequence%20Space
704
712
  export async function demoBioHelmMsaSequenceSpace(): Promise<void> {
705
713
  await demoBio05UI();
@@ -27,8 +27,8 @@ category('monomerLibraries', () => {
27
27
  await grok.dapi.userDataStorage.put(LIB_STORAGE_NAME, {}, true);
28
28
  await monomerLibHelper.loadLibraries(true); // test defaultLib
29
29
 
30
- // Currently default monomer lib is empty
30
+ // Currently default monomer lib set is of all files at LIB_PATH (at least HELMCoreLibrary.json)
31
31
  const currentMonomerLib = monomerLibHelper.getBioLib();
32
- expect(currentMonomerLib.getTypes().length, 0);
32
+ expect(currentMonomerLib.getTypes().length > 0, true);
33
33
  });
34
34
  });
@@ -15,6 +15,31 @@ export const LIB_STORAGE_NAME = 'Libraries';
15
15
  export const LIB_PATH = 'System:AppData/Bio/libraries/';
16
16
  export const LIB_DEFAULT: { [fileName: string]: string } = {'HELMCoreLibrary.json': 'HELMCoreLibrary.json'};
17
17
 
18
+ /** Type for user settings of monomer library set to use. */
19
+ export type LibSettings = {
20
+ exclude: string[],
21
+ }
22
+
23
+ export async function getLibFileNameList(): Promise<string[]> {
24
+ const res: string[] = (await grok.dapi.files.list(`${LIB_PATH}`, false, ''))
25
+ .map((it) => it.fileName);
26
+ return res;
27
+ }
28
+
29
+ export async function getUserLibSettings(): Promise<LibSettings> {
30
+ const resStr: string = await grok.dapi.userDataStorage.getValue(LIB_STORAGE_NAME, 'Settings', true);
31
+ const res: LibSettings = resStr ? JSON.parse(resStr) : {exclude: []};
32
+
33
+ // Fix empty object returned in case there is no settings stored for user
34
+ res.exclude = res.exclude instanceof Array ? res.exclude : [];
35
+
36
+ return res;
37
+ }
38
+
39
+ export async function setUserLibSetting(value: LibSettings): Promise<void> {
40
+ await grok.dapi.userDataStorage.postValue(LIB_STORAGE_NAME, 'Settings', JSON.stringify(value), true);
41
+ }
42
+
18
43
  export class MonomerLib implements IMonomerLib {
19
44
  private _monomers: { [type: string]: { [name: string]: Monomer } } = {};
20
45
  private _onChanged = new Subject<any>();
@@ -104,11 +129,16 @@ export class MonomerLibHelper implements IMonomerLibHelper {
104
129
  */
105
130
  async loadLibraries(reload: boolean = false): Promise<void> {
106
131
  return this.loadLibrariesPromise = this.loadLibrariesPromise.then(async () => {
107
- const userLibrariesSettings: string[] = Object.keys(await grok.dapi.userDataStorage.get(LIB_STORAGE_NAME, true));
108
- const libs: IMonomerLib[] = await Promise.all(userLibrariesSettings.map((libFileName) => {
109
- //TODO handle whether files are in place
110
- return this.readLibrary(LIB_PATH, libFileName);
111
- }));
132
+ const [libFileNameList, settings]: [string[], LibSettings] = await Promise.all([
133
+ getLibFileNameList(),
134
+ getUserLibSettings()
135
+ ]);
136
+ const libs: IMonomerLib[] = await Promise.all(libFileNameList
137
+ .filter((libFileName) => !settings.exclude.includes(libFileName))
138
+ .map((libFileName) => {
139
+ //TODO handle whether files are in place
140
+ return this.readLibrary(LIB_PATH, libFileName);
141
+ }));
112
142
  this._monomerLib.updateLibs(libs, reload);
113
143
  });
114
144
  }