@datagrok/bio 2.4.45 → 2.4.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,16 +5,24 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.4.45",
8
+ "version": "2.4.47",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
12
12
  "url": "https://github.com/datagrok-ai/public.git",
13
13
  "directory": "packages/Bio"
14
14
  },
15
+ "properties": [
16
+ {
17
+ "name": "MaxMonomerLength",
18
+ "propertyType": "int",
19
+ "defaultValue": 3,
20
+ "nullable": false
21
+ }
22
+ ],
15
23
  "dependencies": {
16
24
  "@biowasm/aioli": "^3.1.0",
17
- "@datagrok-libraries/bio": "^5.32.3",
25
+ "@datagrok-libraries/bio": "^5.32.5",
18
26
  "@datagrok-libraries/chem-meta": "^1.0.1",
19
27
  "@datagrok-libraries/ml": "^6.3.39",
20
28
  "@datagrok-libraries/tutorials": "^1.3.2",
@@ -16,6 +16,14 @@
16
16
  # input: double fasta_separator = '' [Separator for a FASTA notation]
17
17
  # output: dataframe sequences
18
18
 
19
+ """
20
+ The most simple options set running from command line
21
+ python sequence_generator.py -c 4 -s 50 > output_file.tsv
22
+ Basic options:
23
+ -с number of clusters
24
+ -s cluster size (number of sequences per cluster)
25
+ """
26
+
19
27
  import random
20
28
  import argparse
21
29
  import sys
@@ -0,0 +1,40 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as DG from 'datagrok-api/dg';
3
+ import * as ui from 'datagrok-api/ui';
4
+
5
+ import {Observable, Subject} from 'rxjs';
6
+ import {ObjectPropertyBag} from 'datagrok-api/dg';
7
+
8
+ /** Names of package properties/settings declared in properties section of {@link './package.json'} */
9
+ export const enum BioPackagePropertiesNames {
10
+ MaxMonomerLength = 'MaxMonomerLength',
11
+ }
12
+
13
+
14
+ export class BioPackageProperties extends Map<string, any> {
15
+
16
+ private _onPropertyChanged: Subject<string> = new Subject<string>();
17
+ public get onPropertyChanged(): Observable<string> { return this._onPropertyChanged; }
18
+
19
+ /** Monomer name maximum length displayed in short mode. */
20
+ public get maxMonomerLength(): number {
21
+ return super.get(BioPackagePropertiesNames.MaxMonomerLength) as unknown as number;
22
+ }
23
+
24
+ public set maxMonomerLength(value: number) {
25
+ super.set(BioPackagePropertiesNames.MaxMonomerLength, value as unknown as object);
26
+ this._onPropertyChanged.next(BioPackagePropertiesNames.MaxMonomerLength);
27
+ }
28
+
29
+ constructor(source: any) {
30
+ super(Object.entries(source));
31
+ }
32
+ }
33
+
34
+ export class BioPackage extends DG.Package {
35
+ private _properties: BioPackageProperties;
36
+ /** Package properties/settings declared in properties section of {@link './package.json'} */
37
+ public get properties(): BioPackageProperties { return this._properties; };
38
+
39
+ public set properties(value: BioPackageProperties) { this._properties = value; }
40
+ }
package/src/package.ts CHANGED
@@ -14,7 +14,7 @@ import {
14
14
  createLinesGrid, createPropPanelElement, createTooltipElement, getChemSimilaritiesMatrix,
15
15
  } from './analysis/sequence-activity-cliffs';
16
16
  import {convert} from './utils/convert';
17
- import {getMacroMolColumnPropertyPanel} from './widgets/representations';
17
+ import {getMacromoleculeColumnPropertyPanel} from './widgets/representations';
18
18
  import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
19
19
  import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
20
20
  import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils';
@@ -53,8 +53,12 @@ import {WebLogoApp} from './apps/web-logo-app';
53
53
  import {SplitToMonomersFunctionEditor} from './function-edtiors/split-to-monomers-editor';
54
54
  import {splitToMonomersUI} from './utils/split-to-monomers';
55
55
  import {MonomerCellRenderer} from './utils/monomer-cell-renderer';
56
+ import {BioPackage, BioPackageProperties} from './package-types';
57
+ import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
58
+ import {ObjectPropertyBag} from 'datagrok-api/dg';
59
+ import {PackageSettingsEditorWidget} from './widgets/package-settings-editor-widget';
56
60
 
57
- export const _package = new DG.Package();
61
+ export const _package = new BioPackage();
58
62
 
59
63
  // /** Avoid reassinging {@link monomerLib} because consumers subscribe to {@link IMonomerLib.onChanged} event */
60
64
  // let monomerLib: MonomerLib | null = null;
@@ -82,11 +86,20 @@ export class SeqPaletteCustom implements SeqPalette {
82
86
 
83
87
  //tags: init
84
88
  export async function initBio() {
85
- await MonomerLibHelper.instance.loadLibraries();
89
+ let module: RDModule;
90
+ await Promise.all([
91
+ (async () => { await MonomerLibHelper.instance.loadLibraries(); })(),
92
+ (async () => { module = await grok.functions.call('Chem:getRdKitModule'); })(),
93
+ (async () => {
94
+ const pkgProps = await _package.getProperties();
95
+ const bioPkgProps = new BioPackageProperties(pkgProps);
96
+ _package.properties = bioPkgProps;
97
+ })(),
98
+ ]);
99
+
86
100
  const monomerLib = MonomerLibHelper.instance.getBioLib();
87
101
  const monomers: string[] = [];
88
102
  const logPs: number[] = [];
89
- const module = await grok.functions.call('Chem:getRdKitModule');
90
103
 
91
104
  const series = monomerLib!.getMonomerMolsByPolymerType('PEPTIDE')!;
92
105
  Object.keys(series).forEach((symbol) => {
@@ -173,6 +186,21 @@ export async function libraryPanel(_seqColumn: DG.Column): Promise<DG.Widget> {
173
186
  return new DG.Widget(ui.divV([inputsForm, ui.div(filesButton)]));
174
187
  }
175
188
 
189
+ // -- Package settings editor --
190
+
191
+ //name: packageSettingsEditor
192
+ //description: The database connection
193
+ //tags: packageSettingsEditor
194
+ //input: object propList
195
+ //output: widget result
196
+ export function packageSettingsEditor(propList: DG.Property[]): DG.Widget {
197
+ const widget = new PackageSettingsEditorWidget(propList);
198
+ widget.init().then(); // Ignore promise returned
199
+ return widget;
200
+ }
201
+
202
+ // -- Cell renderers --
203
+
176
204
  //name: fastaSequenceCellRenderer
177
205
  //tags: cellRenderer
178
206
  //meta.cellType: sequence
@@ -187,7 +215,7 @@ export function fastaSequenceCellRenderer(): MacromoleculeSequenceCellRenderer {
187
215
  //tags: panel
188
216
  //output: widget result
189
217
  export function macroMolColumnPropertyPanel(molColumn: DG.Column): DG.Widget {
190
- return getMacroMolColumnPropertyPanel(molColumn);
218
+ return getMacromoleculeColumnPropertyPanel(molColumn);
191
219
  }
192
220
 
193
221
  //name: separatorSequenceCellRenderer
@@ -743,7 +771,7 @@ export async function webLogoLargeApp(): Promise<void> {
743
771
  const pi = DG.TaskBarProgressIndicator.create('WebLogo');
744
772
  try {
745
773
  const app = new WebLogoApp();
746
- const df: DG.DataFrame = await _package.files.readCsv('data/sample_PT_10000.csv');
774
+ const df: DG.DataFrame = await _package.files.readCsv('data/sample_PT_100000x5.csv');
747
775
  await grok.data.detectSemanticTypes(df);
748
776
  await app.init(df, 'webLogoLargeApp');
749
777
  } finally {
@@ -61,8 +61,8 @@ ATC-G-TTGC--
61
61
 
62
62
  for (let i = 0; i < positions.length; i++) {
63
63
  expect(positions[i].name, resAllDf1[i].name);
64
- for (const key in positions[i].freq)
65
- expect(positions[i].freq[key].count, resAllDf1[i].freq[key].count);
64
+ for (const m of positions[i].getMonomers())
65
+ expect(positions[i].getFreq(m).count, resAllDf1[i].getFreq(m).count);
66
66
  }
67
67
  }, {skipReason: 'GROK-13300'});
68
68
 
@@ -109,8 +109,8 @@ ATC-G-TTGC--
109
109
 
110
110
  for (let i = 0; i < positions.length; i++) {
111
111
  expect(positions[i].name, resAllDf1[i].name);
112
- for (const key in positions[i].freq)
113
- expect(positions[i].freq[key].count, resAllDf1[i].freq[key].count);
112
+ for (const m of positions[i].getMonomers())
113
+ expect(positions[i].getFreq(m).count, resAllDf1[i].getFreq(m).count);
114
114
  }
115
115
  }, {skipReason: 'GROK-13300'});
116
116
 
@@ -187,10 +187,10 @@ ATC-G-TTGC--
187
187
 
188
188
  function expectPositionInfo(actualPos: PI, expectedPos: PI): void {
189
189
  expect(actualPos.name, expectedPos.name);
190
- expectArray(Object.keys(actualPos.freq), Object.keys(expectedPos.freq));
191
- for (const key in actualPos.freq) {
190
+ expectArray(actualPos.getMonomers(), expectedPos.getMonomers());
191
+ for (const key of actualPos.getMonomers()) {
192
192
  //
193
- expect(actualPos.freq[key].count, expectedPos.freq[key].count);
193
+ expect(actualPos.getFreq(key).count, expectedPos.getFreq(key).count);
194
194
  }
195
195
  }
196
196
 
@@ -9,9 +9,6 @@ import {multipleSequenceAlignmentUI} from '../utils/multiple-sequence-alignment-
9
9
  import {awaitContainerStart} from './utils';
10
10
  //import * as grok from 'datagrok-api/grok';
11
11
 
12
- export const _package = new DG.Package();
13
-
14
-
15
12
  category('MSA', async () => {
16
13
  //table = await grok.data.files.openTable('Demo:Files/bio/peptides.csv');
17
14
  const fromCsv = `seq
@@ -0,0 +1,27 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+ export enum MonomerWidthMode {
6
+ long = 'long',
7
+ short = 'short',
8
+ }
9
+
10
+ export const enum Tags {
11
+ calculated = '.mm.cellRenderer.calculated',
12
+ }
13
+
14
+ export const enum Temps {
15
+ monomerWidth = '.mm.cellRenderer.monomerWidth',
16
+ maxMonomerLength = '.mm.cellRenderer.maxMonomerLength',
17
+ colorCode = '.mm.cellRenderer.colorCode',
18
+ compareWithCurrent = '.mm.cellRenderer.compareWithCurrent',
19
+ highlightDifference = '.mm.cellRenderer.highlightDifference',
20
+ }
21
+
22
+ // export const MacromoleculeCellRendererDefaults = new class {
23
+ // monomerWidth: MonomerWidthMode = MonomerWidthMode.short;
24
+ // maxMonomerLength: number = 3;
25
+ // colorCode: boolean = true;
26
+ // compareWithCurrent: boolean = true;
27
+ // }();
@@ -15,9 +15,10 @@ import {
15
15
  import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
16
16
  import {UnknownSeqPalettes} from '@datagrok-libraries/bio/src/unknown';
17
17
  import {MonomerWorks} from '@datagrok-libraries/bio/src/monomer-works/monomer-works';
18
+ import {Tags as mmcrTags, Temps as mmcrTemps} from '../utils/cell-renderer-consts';
18
19
 
19
- import {_package, getMonomerLibHelper} from '../package';
20
20
  import * as C from './constants';
21
+ import {_package} from '../package';
21
22
 
22
23
  const enum tempTAGS {
23
24
  referenceSequence = 'reference-sequence',
@@ -154,19 +155,19 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
154
155
  const referenceSequence: string[] = splitterFunc(
155
156
  ((tempReferenceSequence != null) && (tempReferenceSequence != '')) ?
156
157
  tempReferenceSequence : tempCurrentWord ?? '');
157
- const monomerWidth: string = (tempMonomerWidth != null) ? tempMonomerWidth : 'short';
158
+ const monomerWidth: string = tempMonomerWidth ?? 'short';
158
159
 
159
160
  let gapRenderer = 5;
160
161
  let maxIndex = 0;
161
- let maxLengthOfMonomer = 8;
162
+ let maxLengthOfMonomer: number = 8;
162
163
 
163
164
  if (monomerWidth === 'short') {
164
165
  gapRenderer = 12;
165
- maxLengthOfMonomer = 1;
166
+ maxLengthOfMonomer = colTemp[mmcrTemps.maxMonomerLength] ?? _package.properties.maxMonomerLength;
166
167
  }
167
168
 
168
169
  let maxLengthWords: any = {};
169
- if (gridCell.cell.column.getTag('.calculatedCellRender') !== splitLimit.toString()) {
170
+ if (gridCell.cell.column.getTag(mmcrTags.calculated) !== splitLimit.toString()) {
170
171
  let samples = 0;
171
172
  while (samples < Math.min(gridCell.cell.column.length, 100)) {
172
173
  const column = gridCell.cell.column.get(samples);
@@ -188,7 +189,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
188
189
  colTemp[tempTAGS.bioSumMaxLengthWords] = maxLengthWordSum;
189
190
  colTemp[tempTAGS.bioMaxIndex] = maxIndex;
190
191
  colTemp[tempTAGS.bioMaxLengthWords] = maxLengthWords;
191
- gridCell.cell.column.setTag('.calculatedCellRender', splitLimit.toString());
192
+ gridCell.cell.column.setTag(mmcrTags.calculated, splitLimit.toString());
192
193
  }
193
194
  } else {
194
195
  maxLengthWords = colTemp[tempTAGS.bioMaxLengthWords];