@datagrok/bio 2.10.2 → 2.10.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/link-bio ADDED
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+
3
+ cd ../../libraries/bio
4
+ ./cleanup && npm i && npm run build && npm link
5
+
6
+ cd -
7
+ npm link @datagrok-libraries/bio
package/package.json CHANGED
@@ -1,11 +1,12 @@
1
1
  {
2
2
  "name": "@datagrok/bio",
3
3
  "friendlyName": "Bio",
4
+ "skipCI": true,
4
5
  "author": {
5
6
  "name": "Leonid Stolbov",
6
7
  "email": "lstolbov@datagrok.ai"
7
8
  },
8
- "version": "2.10.2",
9
+ "version": "2.10.4",
9
10
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
11
  "repository": {
11
12
  "type": "git",
@@ -34,7 +35,7 @@
34
35
  ],
35
36
  "dependencies": {
36
37
  "@biowasm/aioli": "^3.1.0",
37
- "@datagrok-libraries/bio": "^5.38.2",
38
+ "@datagrok-libraries/bio": "^5.38.3",
38
39
  "@datagrok-libraries/chem-meta": "^1.0.1",
39
40
  "@datagrok-libraries/ml": "^6.3.39",
40
41
  "@datagrok-libraries/tutorials": "^1.3.6",
package/setup ADDED
@@ -0,0 +1,52 @@
1
+ #!/bin/bash
2
+
3
+ log() {
4
+ GREEN='\e[0;32m'
5
+ NO_COLOR='\e[0m'
6
+ echo -e $GREEN $1 $NO_COLOR
7
+ }
8
+
9
+ unlink_cleanup() {
10
+ npm uninstall --location=global datagrok-api @datagrok-libraries/utils @datagrok-libraries/ml @datagrok-libraries/bio
11
+
12
+ for dir in ${dirs[@]}; do
13
+ echo "Removing node_modules and dist in $(pwd)"
14
+ cd $package_dir && cd $dir
15
+ rm -rf node_modules dist
16
+ done
17
+ }
18
+
19
+ package_dir=$(pwd)
20
+
21
+ dirs=(
22
+ "../../js-api/"
23
+ "../../libraries/utils/"
24
+ "../../libraries/ml/"
25
+ "../../libraries/bio/"
26
+ )
27
+
28
+ unlink_cleanup
29
+
30
+ for dir in ${dirs[@]}; do
31
+ cd $package_dir && cd $dir
32
+ if [ $dir == "../../js-api/" ]; then
33
+ git clean -f -X -d ./src
34
+ fi
35
+ log "npm install in $(pwd)"
36
+ npm install
37
+ log "npm link in $(pwd)"
38
+ npm link
39
+ done
40
+
41
+ for dir in ${dirs[@]}; do
42
+ cd $package_dir && cd $dir
43
+ if [ $dir != "../../js-api/" ]; then
44
+ log "npm link-all in $(pwd)"
45
+ npm run link-all
46
+ fi
47
+ log "npm run build in $(pwd)"
48
+ npm run build || exit
49
+ done
50
+
51
+ cd $package_dir
52
+ npm run link-all
package/src/package.ts CHANGED
@@ -65,6 +65,7 @@ import {PackageSettingsEditorWidget} from './widgets/package-settings-editor-wid
65
65
  import {getCompositionAnalysisWidget} from './widgets/composition-analysis-widget';
66
66
  import {MacromoleculeColumnWidget} from './utils/macromolecule-column-widget';
67
67
  import {addCopyMenuUI} from './utils/context-menu';
68
+ import {_getEnumeratorWidget, _setPeptideColumn} from './utils/enumerator-tools';
68
69
  import {getRegionDo} from './utils/get-region';
69
70
  import {GetRegionApp} from './apps/get-region-app';
70
71
  import {GetRegionFuncEditor} from './utils/get-region-func-editor';
@@ -97,10 +98,9 @@ export class SeqPaletteCustom implements SeqPalette {
97
98
 
98
99
  //tags: init
99
100
  export async function initBio() {
100
- let module: RDModule;
101
+ const module = await grok.functions.call('Chem:getRdKitModule');
101
102
  await Promise.all([
102
103
  (async () => { await MonomerLibHelper.instance.loadLibraries(); })(),
103
- (async () => { module = await grok.functions.call('Chem:getRdKitModule'); })(),
104
104
  (async () => {
105
105
  const pkgProps = await _package.getProperties();
106
106
  const bioPkgProps = new BioPackageProperties(pkgProps);
@@ -981,3 +981,19 @@ export async function demoBioAtomicLevel(): Promise<void> {
981
981
  export async function demoBioHelmMsaSequenceSpace(): Promise<void> {
982
982
  await demoBio05UI();
983
983
  }
984
+
985
+ //name: enumeratorColumnChoice
986
+ //input: dataframe df [Input data table]
987
+ //input: column macroMolecule
988
+ export async function enumeratorColumnChoice(df: DG.DataFrame, macroMolecule: DG.Column): Promise<void> {
989
+ _setPeptideColumn(macroMolecule);
990
+ await grok.data.detectSemanticTypes(df);
991
+ }
992
+
993
+ //name: PolyTool
994
+ //input: column molColumn {semType: Macromolecule}
995
+ //tags: panel, exclude-actions-panel
996
+ //output: widget result
997
+ export function getEnumeratorWidget(molColumn: DG.Column): DG.Widget {
998
+ return _getEnumeratorWidget(molColumn);
999
+ }
@@ -0,0 +1,162 @@
1
+
2
+ import * as grok from 'datagrok-api/grok';
3
+ import * as ui from 'datagrok-api/ui';
4
+ import * as DG from 'datagrok-api/dg';
5
+
6
+ import {NOTATION, ALIGNMENT, ALPHABET} from '@datagrok-libraries/bio/src/utils/macromolecule';
7
+ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
8
+ import {HELM_POLYMER_TYPE} from '@datagrok-libraries/bio/src/utils/const';
9
+ import {MonomerLibHelper} from '../utils/monomer-lib';
10
+ import {_package} from '../package';
11
+ import * as rxjs from 'rxjs';
12
+
13
+ const LEFT_HELM_WRAPPER = 'PEPTIDE1{';
14
+ const RIGHT_HELM_WRAPPER = '}$$$$';
15
+ const ALL_MONOMERS = '<All>';
16
+
17
+ const enum CYCLIZATION_TYPE {
18
+ NO = 'N-O',
19
+ R3 = 'R3-R3',
20
+ }
21
+
22
+ function addCommonTags(col: DG.Column):void {
23
+ col.setTag('quality', DG.SEMTYPE.MACROMOLECULE);
24
+ col.setTag('aligned', ALIGNMENT.SEQ);
25
+ col.setTag('alphabet', ALPHABET.PT);
26
+ }
27
+
28
+ export function _setPeptideColumn(col: DG.Column): void {
29
+ addCommonTags(col);
30
+ col.setTag('units', NOTATION.SEPARATOR);
31
+ col.setTag('separator', '-');
32
+ // col.setTag('cell.renderer', 'sequence');
33
+ }
34
+
35
+ async function enumerator(
36
+ molColumn: DG.Column, cyclizationType: CYCLIZATION_TYPE, leftTerminal: string, rightTerminal: string
37
+ ): Promise<void> {
38
+ function hasR3Terminals(helm: string, leftTerminal: string, rightTerminal: string): boolean {
39
+ if (leftTerminal === ALL_MONOMERS || rightTerminal === ALL_MONOMERS)
40
+ return true;
41
+ const positions = getLinkedR3Positions(helm);
42
+ return positions.every((el) => el > 0);
43
+ }
44
+
45
+ function hasNOTerminals(helm: string, leftTerminal: string, rightTerminal: string): boolean {
46
+ if (leftTerminal === ALL_MONOMERS || rightTerminal === ALL_MONOMERS)
47
+ return true;
48
+ return helm.includes(LEFT_HELM_WRAPPER + leftTerminal) && helm.includes(rightTerminal + RIGHT_HELM_WRAPPER);
49
+ }
50
+
51
+ function applyModification(helm: string): string {
52
+ if (cyclizationType === CYCLIZATION_TYPE.R3)
53
+ return applyR3Modification(helm);
54
+ else
55
+ return applyNOModification(helm);
56
+ }
57
+
58
+ function applyNOModification(helm: string): string {
59
+ if (hasNOTerminals(helm, leftTerminal, rightTerminal))
60
+ return getNOCycle(helm, getLinkedNOPositions(helm));
61
+ return helm;
62
+ }
63
+
64
+ function applyR3Modification(helm: string): string {
65
+ if (hasR3Terminals(helm, leftTerminal, rightTerminal))
66
+ return getR3Cycle(helm, getLinkedR3Positions(helm));
67
+ return helm;
68
+ }
69
+
70
+ function getLinkedR3Positions(helm: string): [number, number] {
71
+ const seq = helm.replace(LEFT_HELM_WRAPPER, '').replace(RIGHT_HELM_WRAPPER, '');
72
+ const monomers = seq.split('.');
73
+ const start = monomers.findIndex((el) => el === leftTerminal);
74
+ const end = monomers.findIndex((el, idx) => el === rightTerminal && idx > start);
75
+ return [start + 1, end + 1];
76
+ }
77
+
78
+ function getLinkedNOPositions(helm: string): [number, number] {
79
+ const seq = helm.replace(LEFT_HELM_WRAPPER, '').replace(RIGHT_HELM_WRAPPER, '');
80
+ const lastMonomerNumber = seq.split('.').length;
81
+ return [1, lastMonomerNumber];
82
+ }
83
+
84
+ function getR3Cycle(helm: string, position: [number, number]): string {
85
+ const result = helm.replace(RIGHT_HELM_WRAPPER,
86
+ `}$PEPTIDE1,PEPTIDE1,${position[0]}:R3-${position[1]}:R3${'$'.repeat(6)}`);
87
+ return result;
88
+ }
89
+
90
+ function getNOCycle(helm: string, position: [number, number]): string {
91
+ const result = helm.replace(RIGHT_HELM_WRAPPER,
92
+ `}$PEPTIDE1,PEPTIDE1,${position[1]}:R2-${position[0]}:R1${'$'.repeat(6)}`);
93
+ return result;
94
+ }
95
+
96
+ const df = molColumn.dataFrame;
97
+ const uh = UnitsHandler.getOrCreate(molColumn);
98
+ const sourceHelmCol = uh.convert(NOTATION.HELM);
99
+ const targetList = sourceHelmCol.toList().map((helm) => applyModification(helm));
100
+ const colName = df.columns.getUnusedName('Cyclization(' + molColumn.name + ')');
101
+ const targetHelmCol = DG.Column.fromList('string', colName, targetList);
102
+
103
+ addCommonTags(targetHelmCol);
104
+ targetHelmCol.setTag('units', NOTATION.HELM);
105
+ targetHelmCol.setTag('cell.renderer', 'helm');
106
+
107
+ df.columns.add(targetHelmCol);
108
+ await grok.data.detectSemanticTypes(df);
109
+ }
110
+
111
+ export function _getEnumeratorWidget(molColumn: DG.Column): DG.Widget {
112
+ function updateMonomerList(): void {
113
+ console.log('hi from update:');
114
+ if (cyclizationTypeChoice.value === cyclizationTypes[0]) {
115
+ console.log('hi from first branch:');
116
+ monomerList = [ALL_MONOMERS].concat(
117
+ monomerLib.getMonomerSymbolsByType(HELM_POLYMER_TYPE.PEPTIDE)
118
+ );
119
+ } else if (cyclizationTypeChoice.value === cyclizationTypes[1]) {
120
+ monomerList = [ALL_MONOMERS].concat(
121
+ monomerLib.getMonomerSymbolsByRGroup(3, HELM_POLYMER_TYPE.PEPTIDE)
122
+ );
123
+ console.log('hi from second branch:');
124
+ }
125
+ leftTerminalChoice = ui.choiceInput('R1:', monomerList[0], monomerList);
126
+ rightTerminalChoice = ui.choiceInput('R2:', monomerList[0], monomerList);
127
+ ui.empty(terminalControls);
128
+ [leftTerminalChoice, rightTerminalChoice].forEach((el) => { terminalControls.appendChild(el.root); });
129
+ }
130
+
131
+ const onCyclizationChoice = new rxjs.Subject<string>();
132
+ onCyclizationChoice.subscribe(() => updateMonomerList());
133
+
134
+ const modifications = ['Cyclization'];
135
+ const modificationChoice = ui.choiceInput('Modification', modifications[0], modifications);
136
+
137
+ const cyclizationTypes = [CYCLIZATION_TYPE.NO, CYCLIZATION_TYPE.R3];
138
+ const cyclizationTypeChoice = ui.choiceInput(
139
+ 'Type', cyclizationTypes[0], cyclizationTypes, () => { onCyclizationChoice.next(); }
140
+ );
141
+
142
+ const monomerLib = MonomerLibHelper.instance.getBioLib();
143
+ let monomerList: string[] = [];
144
+ let leftTerminalChoice = ui.choiceInput('R1:', monomerList[0], monomerList);
145
+ let rightTerminalChoice = ui.choiceInput('R2:', monomerList[0], monomerList);
146
+ const terminalControls = ui.divV([leftTerminalChoice.root, rightTerminalChoice.root]);
147
+
148
+ updateMonomerList();
149
+
150
+ const btn = ui.bigButton('Run', async () =>
151
+ enumerator(molColumn, cyclizationTypeChoice.value!, leftTerminalChoice.value!, rightTerminalChoice.value!)
152
+ );
153
+
154
+ const div = ui.div([
155
+ modificationChoice,
156
+ cyclizationTypeChoice,
157
+ terminalControls,
158
+ btn
159
+ ]);
160
+
161
+ return new DG.Widget(div);
162
+ }
@@ -5,11 +5,16 @@ import * as DG from 'datagrok-api/dg';
5
5
  import {Observable, Subject} from 'rxjs';
6
6
 
7
7
  import {IMonomerLib, Monomer} from '@datagrok-libraries/bio/src/types/index';
8
+ import {MolfileHandler} from '@datagrok-libraries/chem-meta/src/parsing-utils/molfile-handler';
8
9
  import {
9
10
  createJsonMonomerLibFromSdf,
11
+ getJsonMonomerLibForEnumerator,
10
12
  IMonomerLibHelper,
13
+ isValidEnumeratorLib,
11
14
  } from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
12
- import {HELM_REQUIRED_FIELDS as REQ, HELM_OPTIONAL_FIELDS as OPT} from '@datagrok-libraries/bio/src/utils/const';
15
+ import {
16
+ HELM_REQUIRED_FIELDS as REQ, HELM_OPTIONAL_FIELDS as OPT, HELM_POLYMER_TYPE
17
+ } from '@datagrok-libraries/bio/src/utils/const';
13
18
 
14
19
  import {_package} from '../package';
15
20
 
@@ -135,6 +140,39 @@ export class MonomerLib implements IMonomerLib {
135
140
  return Object.keys(this._monomers[polymerType]);
136
141
  }
137
142
 
143
+ /** Get a list of monomers with specified element attached to specified
144
+ * R-group
145
+ * WARNING: RGroup numbering starts from 1, not 0*/
146
+ getMonomerSymbolsByRGroup(rGroupNumber: number, polymerType: string, element?: string): string[] {
147
+ const monomerSymbols = this.getMonomerSymbolsByType(polymerType);
148
+ let monomers = monomerSymbols.map((sym) => this.getMonomer(polymerType, sym));
149
+ monomers = monomers.filter((el) => el !== null);
150
+ if (monomers.length === 0)
151
+ return [];
152
+
153
+ function findAllIndices<T>(arr: T[], element: T): number[] {
154
+ return arr.map((value, index) => (value === element ? index : -1))
155
+ .filter((index) => index !== -1);
156
+ }
157
+
158
+ monomers = monomers.filter((monomer) => {
159
+ if (!monomer?.rgroups)
160
+ return false;
161
+ console.log('monomer symbol:', monomer.symbol);
162
+ let criterion = monomer?.rgroups.length >= rGroupNumber;
163
+ console.log(`has rGroupNumber ${rGroupNumber}`, criterion);
164
+ const molfileHandler = MolfileHandler.getInstance(monomer.molfile);
165
+ console.log(molfileHandler.atomTypes);
166
+ const rGroupIndices = findAllIndices(molfileHandler.atomTypes, 'R#');
167
+ console.log('rGroup indices', rGroupIndices);
168
+ console.log(molfileHandler.pairsOfBondedAtoms);
169
+ criterion &&= true;
170
+ console.log('criterion', criterion);
171
+ return criterion;
172
+ });
173
+ return monomers.map((monomer) => monomer?.symbol!);
174
+ }
175
+
138
176
  get onChanged(): Observable<any> {
139
177
  return this._onChanged;
140
178
  }
@@ -244,9 +282,27 @@ export class MonomerLibHelper implements IMonomerLibHelper {
244
282
  } else {
245
283
  grok.shell.warning('Chem package is not installed');
246
284
  }
247
- } else {
285
+ } else if (fileName.endsWith('.json')) {
248
286
  const file = await fileSource.readAsText(fileName);
249
287
  rawLibData = JSON.parse(file);
288
+ } else if (fileName.endsWith('.csv')) {
289
+ // todo: replace by DataFrame's method after update of js-api
290
+ function toJson(df: DG.DataFrame): any[] {
291
+ return Array.from({length: df.rowCount}, (_, idx) =>
292
+ df.columns.names().reduce((entry: {[key: string]: any}, colName) => {
293
+ entry[colName] = df.get(colName, idx);
294
+ return entry;
295
+ }, {})
296
+ );
297
+ }
298
+ const df = await fileSource.readCsv(fileName);
299
+ const json = toJson(df);
300
+ if (isValidEnumeratorLib(json))
301
+ rawLibData = getJsonMonomerLibForEnumerator(json);
302
+ else
303
+ throw new Error('Invalid format of CSV monomer lib');
304
+ } else {
305
+ throw new Error('Monomer library of unknown file format, supported formats: SDF, JSON, CSV');
250
306
  }
251
307
 
252
308
  const monomers: { [polymerType: string]: { [monomerSymbol: string]: Monomer } } = {};
@@ -1,22 +0,0 @@
1
- #!/bin/bash
2
- package_dir=$(pwd)
3
-
4
- GREEN='\e[0;32m'
5
- NO_COLOR='\e[0m'
6
-
7
- dirs=(
8
- "../../js-api/"
9
- "../../libraries/utils/"
10
- "../../libraries/ml/"
11
- "../../libraries/bio/"
12
- )
13
-
14
- npm uninstall --location=global datagrok-api @datagrok-libraries/utils @datagrok-libraries/ml @datagrok-libraries/bio
15
-
16
- for dir in ${dirs[@]}; do
17
- cd $package_dir
18
- cd $dir
19
- echo -e $GREEN Removing node_modules and dist in $(pwd) $NO_COLOR
20
- rm -rf node_modules dist
21
- # rm package-lock.json
22
- done
package/setup.sh DELETED
@@ -1,38 +0,0 @@
1
- #!/bin/bash
2
-
3
- ./setup-unlink-clean.sh
4
-
5
- GREEN='\e[0;32m'
6
- NO_COLOR='\e[0m'
7
-
8
- package_dir=$(pwd)
9
-
10
- dirs=(
11
- "../../js-api/"
12
- "../../libraries/utils/"
13
- "../../libraries/ml/"
14
- "../../libraries/bio/"
15
- )
16
-
17
- for dir in ${dirs[@]}; do
18
- cd $package_dir
19
- cd $dir
20
- echo -e $GREEN npm install in $(pwd) $NO_COLOR
21
- npm install
22
- echo -e $GREEN npm link in $(pwd) $NO_COLOR
23
- npm link
24
- done
25
-
26
- for dir in ${dirs[@]}; do
27
- cd $package_dir
28
- cd $dir
29
- if [ $dir != "../../js-api/" ]; then
30
- echo -e $GREEN npm link-all in $(pwd) $NO_COLOR
31
- npm run link-all
32
- fi
33
- echo -e $GREEN npm run build in$(pwd) $NO_COLOR
34
- npm run build || exit
35
- done
36
-
37
- cd $package_dir
38
- npm run link-all