@datagrok/bio 2.4.29 → 2.4.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,7 +24,7 @@ RUN savedAptMark="$(apt-mark showmanual)" ; \
24
24
  ; \
25
25
  apt-mark auto '.*' > /dev/null ; \
26
26
  [ -z "$savedAptMark" ] || apt-mark manual $savedAptMark ; \
27
- wget https://mafft.cbrc.jp/alignment/software/mafft_7.511-1_amd64.deb -O mafft.deb; \
27
+ wget https://mafft.cbrc.jp/alignment/software/mafft_7.520-1_amd64.deb -O mafft.deb; \
28
28
  apt install -y ./mafft.deb; \
29
29
  rm -rf mafft.deb; \
30
30
  wget https://github.com/Merck/PepSeA/archive/refs/heads/main.zip -O PepSeA.zip; \
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.4.29",
8
+ "version": "2.4.31",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -16,9 +16,9 @@
16
16
  "@biowasm/aioli": "^3.1.0",
17
17
  "@datagrok-libraries/bio": "^5.30.0",
18
18
  "@datagrok-libraries/chem-meta": "^1.0.1",
19
- "@datagrok-libraries/ml": "^6.3.23",
19
+ "@datagrok-libraries/ml": "^6.3.27",
20
20
  "@datagrok-libraries/tutorials": "^1.3.2",
21
- "@datagrok-libraries/utils": "^2.1.3",
21
+ "@datagrok-libraries/utils": "^4.0.8",
22
22
  "cash-dom": "^8.0.0",
23
23
  "css-loader": "^6.7.3",
24
24
  "datagrok-api": "^1.13.3",
@@ -3,8 +3,8 @@
3
3
  # description: Create the model peptides/DNA sequences with peptides data
4
4
  # language: python
5
5
  # tags: template, demo
6
- # input: int clusters = 1 [Number of superclusters]
7
- # input: int num_sequences = 500 [Number of sequences in each supercluster]
6
+ # input: int clusters = 5 [Number of superclusters]
7
+ # input: int num_sequences = 50 [Number of sequences in each supercluster]
8
8
  # input: int motif_length = 12 [Average length of motif]
9
9
  # input: int max_variants_position = 3 [Maximum number of different letters in conservative position in motif]
10
10
  # input: int random_length = 3 [Average length of random sequence parts before and after motif]
@@ -59,7 +59,9 @@ def generate_motif_template(
59
59
 
60
60
 
61
61
  def generate_motif(template: motif_template_type, alphabet: alphabet_type) -> str:
62
- template_with_any = [(letters if not "?" in letters else alphabet) for letters in template]
62
+ template_with_any = [
63
+ (letters if not "?" in letters else alphabet) for letters in template
64
+ ]
63
65
  return "".join([random.choice(letters) for letters in template_with_any])
64
66
 
65
67
 
@@ -70,18 +72,24 @@ def motif_notation(motif_template: motif_template_type) -> str:
70
72
  else:
71
73
  return f"[{''.join(letter_choice)}]"
72
74
 
73
- return "".join([motif_notation_code(letter_choice) for letter_choice in motif_template])
75
+ return "".join(
76
+ [motif_notation_code(letter_choice) for letter_choice in motif_template]
77
+ )
74
78
 
75
79
 
76
80
  def generate_random(n: int, alphabet: alphabet_type) -> str:
77
81
  return "".join([random.choice(alphabet) for i in range(n)])
78
82
 
79
83
 
80
- def make_cliff(motif_template: motif_template_type, alphabet: alphabet_type, motif: str) -> str:
84
+ def make_cliff(
85
+ motif_template: motif_template_type, alphabet: alphabet_type, motif: str
86
+ ) -> str:
81
87
  # Mutate conservative letter in motif
82
88
  pos = random.randrange(len(motif_template))
83
89
  while "?" in motif_template[pos]:
84
- pos = (pos + 1) % len(motif_template) # always will find letters since ends of motif can't be any symbol
90
+ pos = (pos + 1) % len(
91
+ motif_template
92
+ ) # always will find letters since ends of motif can't be any symbol
85
93
  outlier_letters = list(set(alphabet) - set(motif_template[pos]))
86
94
  return motif[:pos] + random.choice(outlier_letters) + motif[pos + 1 :]
87
95
 
@@ -97,7 +105,9 @@ def generate_cluster(
97
105
  cliff_probability: float,
98
106
  cliff_strength: float,
99
107
  ) -> Iterator[sequence_record_type]:
100
- motif_template = generate_motif_template(motif_length, alphabet, max_variants_position)
108
+ motif_template = generate_motif_template(
109
+ motif_length, alphabet, max_variants_position
110
+ )
101
111
 
102
112
  activity_average = random.random() * 10
103
113
  activity_dispersion = random.random()
@@ -166,7 +176,9 @@ def generate_sequences(
166
176
  cliff_probability,
167
177
  cliff_strength,
168
178
  ):
169
- sequences.append((n_cluster, f"c{n_cluster}_s{n_seq}", seq, activity, is_cliff))
179
+ sequences.append(
180
+ (n_cluster, f"c{n_cluster}_s{n_seq}", seq, activity, is_cliff)
181
+ )
170
182
  return headers, sequences
171
183
 
172
184
 
@@ -178,15 +190,19 @@ def parse_command_line_args() -> Any:
178
190
  epilog="Utility support: Gennadii Zakharov",
179
191
  )
180
192
 
181
- parser.add_argument("-c", "--clusters", type=int, default=1, help="Number of superclusters")
193
+ parser.add_argument(
194
+ "-c", "--clusters", type=int, default=5, help="Number of superclusters"
195
+ )
182
196
  parser.add_argument(
183
197
  "-s",
184
198
  "--sequences",
185
199
  type=int,
186
- default=500,
200
+ default=50,
187
201
  help="Number of sequences in each supercluster",
188
202
  )
189
- parser.add_argument("-m,", "--motif-length", type=int, default=12, help="Average length of motif")
203
+ parser.add_argument(
204
+ "-m,", "--motif-length", type=int, default=12, help="Average length of motif"
205
+ )
190
206
 
191
207
  parser.add_argument(
192
208
  "-r,",
@@ -208,7 +224,8 @@ def parse_command_line_args() -> Any:
208
224
  "--alphabet",
209
225
  type=str,
210
226
  default=list(alphabets.keys())[0],
211
- help=f"Sequence alphabet: {available_alphabets}. Custom alphabet is a list of values separated " f"by comma",
227
+ help=f"Sequence alphabet: {available_alphabets}. Custom alphabet is a list of values separated "
228
+ f"by comma",
212
229
  )
213
230
  parser.add_argument(
214
231
  "--max-variants-position",
@@ -258,7 +275,11 @@ if not grok:
258
275
  cliff_probability = args.cliff_probability
259
276
  cliff_strength = args.cliff_strength
260
277
 
261
- alphabet: alphabet_type = alphabets[alphabet_key].split(",") if alphabet_key in alphabets else alphabet_key.split(",")
278
+ alphabet: alphabet_type = (
279
+ alphabets[alphabet_key].split(",")
280
+ if alphabet_key in alphabets
281
+ else alphabet_key.split(",")
282
+ )
262
283
 
263
284
  # Running sequence generator
264
285
  header, data = generate_sequences(
@@ -4,7 +4,7 @@ import * as DG from 'datagrok-api/dg';
4
4
 
5
5
  import {ITooltipAndPanelParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
6
6
  import {getSimilarityFromDistance} from '@datagrok-libraries/ml/src/distance-metrics-methods';
7
- import {AvailableMetrics, AvailableMetricsTypes, StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
7
+ import {AvailableMetrics, DistanceMetricsSubjects, StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
8
8
  import {drawMoleculeDifferenceOnCanvas} from '../utils/cell-renderer';
9
9
  import * as C from '../utils/constants';
10
10
  import {GridColumn} from 'datagrok-api/dg';
@@ -15,7 +15,7 @@ export async function getDistances(col: DG.Column, seq: string): Promise<Array<n
15
15
  const stringArray = col.toList();
16
16
  const distances = new Array(stringArray.length).fill(0);
17
17
  const distanceMethod: (x: string, y: string) => number =
18
- AvailableMetrics[AvailableMetricsTypes.String][StringMetricsNames.Levenshtein];
18
+ AvailableMetrics[DistanceMetricsSubjects.String][StringMetricsNames.Levenshtein];
19
19
  for (let i = 0; i < stringArray.length; ++i) {
20
20
  const distance = stringArray[i] ? distanceMethod(stringArray[i], seq) : null;
21
21
  distances[i] = distance ? distance / Math.max((stringArray[i] as string).length, seq.length) : null;
@@ -2,14 +2,13 @@ import * as ui from 'datagrok-api/ui';
2
2
  import * as DG from 'datagrok-api/dg';
3
3
  import * as grok from 'datagrok-api/grok';
4
4
 
5
- import BitArray from '@datagrok-libraries/utils/src/bit-array';
6
5
  import {getDiverseSubset} from '@datagrok-libraries/utils/src/similarity-metrics';
7
- import $ from 'cash-dom';
8
- import {ArrayUtils} from '@datagrok-libraries/utils/src/array-utils';
9
6
  import {SequenceSearchBaseViewer} from './sequence-search-base-viewer';
10
7
  import {getMonomericMols} from '../calculations/monomerLevelMols';
11
8
  import {updateDivInnerHTML} from '../utils/ui-utils';
12
9
  import {Subject} from 'rxjs';
10
+ import {calcMmDistanceMatrix, dmLinearIndex} from './workers/mm-distance-worker-creator';
11
+ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
13
12
 
14
13
  export class SequenceDiversityViewer extends SequenceSearchBaseViewer {
15
14
  diverseColumnLabel: string | null; // Use postfix Label to prevent activating table column selection editor
@@ -28,15 +27,9 @@ export class SequenceDiversityViewer extends SequenceSearchBaseViewer {
28
27
  return;
29
28
  if (this.dataFrame) {
30
29
  if (computeData && this.moleculeColumn) {
31
- const monomericMols = await getMonomericMols(this.moleculeColumn);
32
- //need to create df to calculate fingerprints
33
- const monomericMolsDf = DG.DataFrame.fromColumns([monomericMols]);
34
- this.renderMolIds = await grok.functions.call('Chem:callChemDiversitySearch', {
35
- col: monomericMols,
36
- metricName: this.distanceMetric,
37
- limit: this.limit,
38
- fingerprint: this.fingerprint
39
- });
30
+ const uh = new UnitsHandler(this.moleculeColumn);
31
+ await (uh.isFasta() ? this.computeByMM() : this.computeByChem());
32
+
40
33
  const diverseColumnName: string = this.diverseColumnLabel != null ? this.diverseColumnLabel :
41
34
  `diverse (${this.moleculeColumnName})`;
42
35
  const resCol = DG.Column.string(diverseColumnName, this.renderMolIds!.length)
@@ -49,4 +42,24 @@ export class SequenceDiversityViewer extends SequenceSearchBaseViewer {
49
42
  }
50
43
  }
51
44
  }
45
+
46
+ private async computeByChem() {
47
+ const monomericMols = await getMonomericMols(this.moleculeColumn!);
48
+ //need to create df to calculate fingerprints
49
+ const _monomericMolsDf = DG.DataFrame.fromColumns([monomericMols]);
50
+ this.renderMolIds = await grok.functions.call('Chem:callChemDiversitySearch', {
51
+ col: monomericMols,
52
+ metricName: this.distanceMetric,
53
+ limit: this.limit,
54
+ fingerprint: this.fingerprint
55
+ });
56
+ }
57
+
58
+ private async computeByMM() {
59
+ const distanceMatrixData = await calcMmDistanceMatrix(this.moleculeColumn!);
60
+ const len = this.moleculeColumn!.length;
61
+ const linearizeFunc = dmLinearIndex(len);
62
+ this.renderMolIds = getDiverseSubset(len, Math.min(len, this.limit),
63
+ (i1: number, i2: number) => distanceMatrixData[linearizeFunc(i1, i2)]);
64
+ }
52
65
  }
@@ -9,6 +9,8 @@ import {createDifferenceCanvas, createDifferencesWithPositions} from './sequence
9
9
  import {updateDivInnerHTML} from '../utils/ui-utils';
10
10
  import {Subject} from 'rxjs';
11
11
  import {TAGS as bioTAGS, getSplitter} from '@datagrok-libraries/bio/src/utils/macromolecule';
12
+ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
13
+ import {calcMmDistanceMatrix, dmLinearIndex} from './workers/mm-distance-worker-creator';
12
14
 
13
15
  export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
14
16
  cutoff: number;
@@ -23,6 +25,8 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
23
25
  gridSelect: boolean = false;
24
26
  targetMoleculeIdx: number = 0;
25
27
  computeCompleted = new Subject<boolean>();
28
+ distanceMatrixComputed: boolean = false;
29
+ mmDistanceMatrix: Float32Array;
26
30
 
27
31
  constructor() {
28
32
  super('similarity');
@@ -43,20 +47,9 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
43
47
  this.curIdx = this.dataFrame!.currentRowIdx == -1 ? 0 : this.dataFrame!.currentRowIdx;
44
48
  if (computeData && !this.gridSelect) {
45
49
  this.targetMoleculeIdx = this.dataFrame!.currentRowIdx == -1 ? 0 : this.dataFrame!.currentRowIdx;
46
- const monomericMols = await getMonomericMols(this.moleculeColumn);
47
- //need to create df to calculate fingerprints
48
- const monomericMolsDf = DG.DataFrame.fromColumns([monomericMols]);
49
- const df = await grok.functions.call('Chem:callChemSimilaritySearch', {
50
- df: this.dataFrame,
51
- col: monomericMols,
52
- molecule: monomericMols.get(this.targetMoleculeIdx),
53
- metricName: this.distanceMetric,
54
- limit: this.limit,
55
- minScore: this.cutoff,
56
- fingerprint: this.fingerprint
57
- });
58
- this.idxs = df.getCol('indexes');
59
- this.scores = df.getCol('score');
50
+ const uh = new UnitsHandler(this.moleculeColumn!);
51
+
52
+ await (uh.isFasta() ? this.computeByMM() : this.computeByChem());
60
53
  const similarColumnName: string = this.similarColumnLabel != null ? this.similarColumnLabel :
61
54
  `similar (${this.moleculeColumnName})`;
62
55
  this.molCol = DG.Column.string(similarColumnName,
@@ -83,15 +76,51 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
83
76
  }
84
77
  }
85
78
 
79
+ private async computeByChem() {
80
+ const monomericMols = await getMonomericMols(this.moleculeColumn!);
81
+ //need to create df to calculate fingerprints
82
+ const _monomericMolsDf = DG.DataFrame.fromColumns([monomericMols]);
83
+ const df = await grok.functions.call('Chem:callChemSimilaritySearch', {
84
+ df: this.dataFrame,
85
+ col: monomericMols,
86
+ molecule: monomericMols.get(this.targetMoleculeIdx),
87
+ metricName: this.distanceMetric,
88
+ limit: this.limit,
89
+ minScore: this.cutoff,
90
+ fingerprint: this.fingerprint
91
+ });
92
+ this.idxs = df.getCol('indexes');
93
+ this.scores = df.getCol('score');
94
+ }
95
+
96
+ private async computeByMM() {
97
+ if (!this.distanceMatrixComputed) {
98
+ this.mmDistanceMatrix = await calcMmDistanceMatrix(this.moleculeColumn!);
99
+ this.distanceMatrixComputed = true;
100
+ }
101
+ const len = this.moleculeColumn!.length;
102
+ const linearizeFunc = dmLinearIndex(len);
103
+ // array that keeps track of the indexes and scores together
104
+ const indexWScore = Array(len).fill(0)
105
+ .map((_, i) => ({idx: i, score: i === this.targetMoleculeIdx ? 1 :
106
+ 1 - this.mmDistanceMatrix[linearizeFunc(this.targetMoleculeIdx, i)]}));
107
+ indexWScore.sort((a, b) => b.score - a.score);
108
+ // get the most similar molecules
109
+ const actualLimit = Math.min(this.limit, len);
110
+ const mostSimilar = indexWScore.slice(0, actualLimit);
111
+ this.idxs = DG.Column.int('indexes', actualLimit).init((i) => mostSimilar[i].idx);
112
+ this.scores = DG.Column.float('score', actualLimit).init((i) => mostSimilar[i].score);
113
+ }
86
114
 
87
115
  createPropertyPanel(resDf: DG.DataFrame) {
88
116
  const propPanel = ui.div();
89
117
  const molDifferences: { [key: number]: HTMLCanvasElement } = {};
90
- const units = resDf.col('sequence')!.getTag(DG.TAGS.UNITS);
91
- const separator = resDf.col('sequence')!.getTag(bioTAGS.separator);
118
+ const molColName = this.molCol?.name!;
119
+ const units = resDf.col(molColName)!.getTag(DG.TAGS.UNITS);
120
+ const separator = resDf.col(molColName)!.getTag(bioTAGS.separator);
92
121
  const splitter = getSplitter(units, separator);
93
122
  const subParts1 = splitter(this.moleculeColumn!.get(this.targetMoleculeIdx));
94
- const subParts2 = splitter(resDf.get('sequence', resDf.currentRowIdx));
123
+ const subParts2 = splitter(resDf.get(molColName, resDf.currentRowIdx));
95
124
  const canvas = createDifferenceCanvas(subParts1, subParts2, units, molDifferences);
96
125
  propPanel.append(ui.div(canvas, {style: {width: '300px', overflow: 'scroll'}}));
97
126
  if (subParts1.length !== subParts2.length) {
@@ -44,7 +44,7 @@ export async function sequenceSpace(spaceParams: ISequenceSpaceParams): Promise<
44
44
 
45
45
  export async function sequenceSpaceByFingerprints(spaceParams: ISequenceSpaceParams): Promise<ISequenceSpaceResult> {
46
46
  if (spaceParams.seqCol.version !== spaceParams.seqCol.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
47
- await invalidateMols(spaceParams.seqCol, false);
47
+ await invalidateMols(spaceParams.seqCol as unknown as DG.Column<string>, false); //we expect only string columns here
48
48
 
49
49
  const result = await grok.functions.call('Chem:getChemSpaceEmbeddings', {
50
50
  col: spaceParams.seqCol.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS],
@@ -0,0 +1,31 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
5
+
6
+ export async function calcMmDistanceMatrix(column: DG.Column<any>): Promise<Float32Array> {
7
+ const values = column.toList();
8
+ const worker = new Worker(new URL('./mm-distance-worker.ts', import.meta.url));
9
+ if (column.semType !== DG.SEMTYPE.MACROMOLECULE)
10
+ throw new Error('Column has to be of macromolecule type');
11
+ const uh = new UnitsHandler(column);
12
+ const fnName = uh.getDistanceFunctionName();
13
+ worker.postMessage({values, fnName});
14
+ return new Promise((resolve, reject) => {
15
+ worker.onmessage = ({data: {error, distanceMatrixData}}): void => {
16
+ worker.terminate();
17
+ error ? reject(error) : resolve(distanceMatrixData);
18
+ };
19
+ });
20
+ }
21
+
22
+ // gets index of compressed distance matrix from 2d coordinates
23
+ export function dmLinearIndex(size: number) {
24
+ return (i: number, j: number) => {
25
+ const getLinearIndex = (i: number, j: number) => {
26
+ return size * i + j - Math.floor(((i + 2) * (i + 1)) / 2);
27
+ };
28
+ if (i <= j) return getLinearIndex(i, j);
29
+ else return getLinearIndex(j, i);
30
+ };
31
+ }
@@ -0,0 +1,16 @@
1
+ import {DistanceMatrix} from '@datagrok-libraries/bio/src/trees/distance-matrix';
2
+ import {mmDistanceFunctions, MmDistanceFunctionsNames}
3
+ from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
4
+
5
+ onmessage = (event) => {
6
+ const {values, fnName} = event.data;
7
+ const data: { error?: any; distanceMatrixData?: Float32Array } = {};
8
+ try {
9
+ const distanceMatrix = DistanceMatrix.calc(values, mmDistanceFunctions[fnName as MmDistanceFunctionsNames]());
10
+ distanceMatrix.normalize();
11
+ data.distanceMatrixData = distanceMatrix.data;
12
+ } catch (e) {
13
+ data.error = e;
14
+ }
15
+ postMessage(data);
16
+ };
@@ -12,6 +12,7 @@ import {getTreeHelper, ITreeHelper} from '@datagrok-libraries/bio/src/trees/tree
12
12
  import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/src/trees/dendrogram';
13
13
  import {handleError} from './utils';
14
14
  import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
15
+ import { DimReductionMethods } from '@datagrok-libraries/ml/src/reduce-dimensionality';
15
16
 
16
17
  const dataFn: string = 'data/sample_FASTA_PT_activity.csv';
17
18
 
@@ -23,7 +24,7 @@ export async function demoBio01bUI() {
23
24
  let view: DG.TableView;
24
25
  let activityCliffsViewer: DG.ScatterPlotViewer;
25
26
 
26
- const dimRedMethod: string = 'UMAP';
27
+ const dimRedMethod: DimReductionMethods = DimReductionMethods.UMAP;
27
28
  const idRows: { [id: number]: number } = {};
28
29
 
29
30
  try {
@@ -7,8 +7,9 @@ import {handleError} from './utils';
7
7
 
8
8
  import {IWebLogoViewer} from '@datagrok-libraries/bio/src/viewers/web-logo';
9
9
  import {pepseaMethods, runPepsea} from '../utils/pepsea';
10
- import {StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
11
10
  import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
11
+ import { DimReductionMethods } from '@datagrok-libraries/ml/src/reduce-dimensionality';
12
+ import { MmDistanceFunctionsNames } from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
12
13
 
13
14
  const helmFn: string = 'samples/sample_HELM.csv';
14
15
 
@@ -22,7 +23,7 @@ export async function demoBio05UI(): Promise<void> {
22
23
 
23
24
  const helmColName: string = 'HELM';
24
25
  const msaHelmColName: string = 'msa(HELM)';
25
- const dimRedMethod: string = 'UMAP';
26
+ const dimRedMethod: DimReductionMethods = DimReductionMethods.UMAP;
26
27
 
27
28
  try {
28
29
  const demoScript = new DemoScript(
@@ -52,7 +53,7 @@ export async function demoBio05UI(): Promise<void> {
52
53
  })
53
54
  .step('Build sequence space', async () => {
54
55
  ssViewer = (await sequenceSpaceTopMenu(df, msaHelmCol,
55
- dimRedMethod, StringMetricsNames.Levenshtein, true)) as DG.ScatterPlotViewer;
56
+ dimRedMethod, MmDistanceFunctionsNames.LEVENSHTEIN, true)) as DG.ScatterPlotViewer;
56
57
  view.dockManager.dock(ssViewer, DG.DOCK_TYPE.RIGHT, null, 'Sequence Space', 0.35);
57
58
  }, {
58
59
  description: 'Reduce sequence space dimensionality to display on 2D representation.',
package/src/demo/utils.ts CHANGED
@@ -6,6 +6,8 @@ import {_package, sequenceSpaceTopMenu} from '../package';
6
6
  import {reduceDimensinalityWithNormalization} from '@datagrok-libraries/ml/src/sequence-space';
7
7
  import {StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
8
8
  import {delay} from '@datagrok-libraries/utils/src/test';
9
+ import { DimReductionMethods } from '@datagrok-libraries/ml/src/reduce-dimensionality';
10
+ import { MmDistanceFunctionsNames } from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
9
11
 
10
12
  enum EMBED_COL_NAMES {
11
13
  X = 'Embed_X',
@@ -63,7 +65,7 @@ export async function demoSequenceSpace(
63
65
  })) as DG.ScatterPlotViewer;
64
66
  } else {
65
67
  resSpaceViewer = (await sequenceSpaceTopMenu(df, df.getCol(colName),
66
- 'UMAP', StringMetricsNames.Levenshtein, true)) as DG.ScatterPlotViewer;
68
+ DimReductionMethods.UMAP, MmDistanceFunctionsNames.LEVENSHTEIN, true)) as DG.ScatterPlotViewer;
67
69
  }
68
70
  view.dockManager.dock(resSpaceViewer!, DG.DOCK_TYPE.RIGHT, null, 'Sequence Space', 0.35);
69
71
  return resSpaceViewer;
package/src/package.ts CHANGED
@@ -10,7 +10,7 @@ import {
10
10
  import {VdRegionsViewer} from './viewers/vd-regions-viewer';
11
11
  import {SequenceAlignment} from './seq_align';
12
12
  import {getEmbeddingColsNames, sequenceSpaceByFingerprints, getSequenceSpace} from './analysis/sequence-space';
13
- import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
13
+ import {ISequenceSpaceParams, getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
14
14
  import {
15
15
  createLinesGrid,
16
16
  createPropPanelElement,
@@ -43,7 +43,7 @@ import {
43
43
  LIB_STORAGE_NAME, LibSettings, getUserLibSettings, setUserLibSetting, getLibFileNameList
44
44
  } from './utils/monomer-lib';
45
45
  import {getMacromoleculeColumn} from './utils/ui-utils';
46
- import {ITSNEOptions, IUMAPOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
46
+ import {DimReductionMethods, ITSNEOptions, IUMAPOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
47
47
  import {SequenceSpaceFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/seq-space-editor';
48
48
  import {ActivityCliffsFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/activity-cliffs-editor';
49
49
  import {demoBio01UI} from './demo/bio01-similarity-diversity';
@@ -53,6 +53,8 @@ import {demoBio03UI} from './demo/bio03-atomic-level';
53
53
  import {demoBio05UI} from './demo/bio05-helm-msa-sequence-space';
54
54
  import {checkInputColumnUI} from './utils/check-input-column';
55
55
  import {multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
56
+ import { MmDistanceFunctionsNames } from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
57
+ import { BitArrayMetrics, BitArrayMetricsNames, StringMetricsNames } from '@datagrok-libraries/ml/src/typed-metrics';
56
58
  import { NotationConverter } from '@datagrok-libraries/bio/src/utils/notation-converter';
57
59
 
58
60
  export const _package = new DG.Package();
@@ -280,7 +282,7 @@ export function SeqActivityCliffsEditor(call: DG.FuncCall) {
280
282
  //output: viewer result
281
283
  //editor: Bio:SeqActivityCliffsEditor
282
284
  export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column, activities: DG.Column,
283
- similarity: number, methodName: string, options?: IUMAPOptions | ITSNEOptions
285
+ similarity: number, methodName: DimReductionMethods, options?: IUMAPOptions | ITSNEOptions
284
286
  ): Promise<DG.Viewer | undefined> {
285
287
  if (!checkInputColumnUI(macroMolecule, 'Activity Cliffs'))
286
288
  return;
@@ -292,7 +294,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
292
294
  'alphabet': macroMolecule.getTag(bioTAGS.alphabet),
293
295
  };
294
296
  const nc = new NotationConverter(macroMolecule);
295
- let columnDistanceMetric = 'Tanimoto';
297
+ let columnDistanceMetric: BitArrayMetricsNames | MmDistanceFunctionsNames = BitArrayMetricsNames.Tanimoto;
296
298
  let seqCol = macroMolecule;
297
299
  if (nc.isFasta() || (nc.isSeparator() && nc.alphabet && nc.alphabet !== ALPHABET.UN)){
298
300
  if (nc.isFasta()){
@@ -347,8 +349,8 @@ export function SequenceSpaceEditor(call: DG.FuncCall) {
347
349
  //input: bool plotEmbeddings = true
348
350
  //input: object options {optional: true}
349
351
  //editor: Bio:SequenceSpaceEditor
350
- export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: string,
351
- similarityMetric: string = 'Tanimoto', plotEmbeddings: boolean, options?: IUMAPOptions | ITSNEOptions
352
+ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: DimReductionMethods,
353
+ similarityMetric: BitArrayMetrics | MmDistanceFunctionsNames = BitArrayMetricsNames.Tanimoto, plotEmbeddings: boolean, options?: IUMAPOptions | ITSNEOptions
352
354
  ): Promise<DG.Viewer | undefined> {
353
355
  // Delay is required for initial function dialog to close before starting invalidating of molfiles.
354
356
  // Otherwise, dialog is freezing
@@ -360,7 +362,7 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
360
362
  const withoutEmptyValues = DG.DataFrame.fromColumns([macroMolecule]).clone();
361
363
  const emptyValsIdxs = removeEmptyStringRows(withoutEmptyValues, macroMolecule);
362
364
 
363
- const chemSpaceParams = {
365
+ const chemSpaceParams: ISequenceSpaceParams = {
364
366
  seqCol: withoutEmptyValues.col(macroMolecule.name)!,
365
367
  methodName: methodName,
366
368
  similarityMetric: similarityMetric,
@@ -6,6 +6,7 @@ import {after, before, category, test} from '@datagrok-libraries/utils/src/test'
6
6
 
7
7
  import {readDataframe} from './utils';
8
8
  import {_testActivityCliffsOpen} from './activity-cliffs-utils';
9
+ import { DimReductionMethods } from '@datagrok-libraries/ml/src/reduce-dimensionality';
9
10
 
10
11
 
11
12
  category('activityCliffs', async () => {
@@ -33,7 +34,7 @@ category('activityCliffs', async () => {
33
34
  actCliffsTableView = grok.shell.addTableView(actCliffsDf);
34
35
  viewList.push(actCliffsTableView);
35
36
 
36
- await _testActivityCliffsOpen(actCliffsDf, 57, 'UMAP', 'MSA');
37
+ await _testActivityCliffsOpen(actCliffsDf, 57, DimReductionMethods.UMAP, 'MSA');
37
38
  }, {skipReason: 'GROK-12774'});
38
39
 
39
40
  test('activityCliffsWithEmptyRows', async () => {
@@ -42,6 +43,6 @@ category('activityCliffs', async () => {
42
43
  actCliffsTableViewWithEmptyRows = grok.shell.addTableView(actCliffsDfWithEmptyRows);
43
44
  viewList.push(actCliffsTableViewWithEmptyRows);
44
45
 
45
- await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, 57, 'UMAP', 'MSA');
46
+ await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, 57, DimReductionMethods.UMAP, 'MSA');
46
47
  }, {skipReason: 'GROK-12774'});
47
48
  });
@@ -3,8 +3,9 @@ import * as grok from 'datagrok-api/grok';
3
3
 
4
4
  import {delay, expect} from '@datagrok-libraries/utils/src/test';
5
5
  import {activityCliffs} from '../package';
6
+ import { DimReductionMethods } from '@datagrok-libraries/ml/src/reduce-dimensionality';
6
7
 
7
- export async function _testActivityCliffsOpen(df: DG.DataFrame, numberCliffs: number, method: string, colName: string) {
8
+ export async function _testActivityCliffsOpen(df: DG.DataFrame, numberCliffs: number, method: DimReductionMethods, colName: string) {
8
9
  await grok.data.detectSemanticTypes(df);
9
10
  const scatterPlot = await activityCliffs(
10
11
  df, df.getCol(colName), df.getCol('Activity'),
@@ -5,6 +5,7 @@ import * as DG from 'datagrok-api/dg';
5
5
  import {after, before, category, test, expect, delay} from '@datagrok-libraries/utils/src/test';
6
6
  import {readDataframe} from './utils';
7
7
  import {_testSequenceSpaceReturnsResult} from './sequence-space-utils';
8
+ import { DimReductionMethods } from '@datagrok-libraries/ml/src/reduce-dimensionality';
8
9
 
9
10
  category('sequenceSpace', async () => {
10
11
  let testFastaDf: DG.DataFrame;
@@ -15,7 +16,7 @@ category('sequenceSpace', async () => {
15
16
  test('sequenceSpaceOpens', async () => {
16
17
  testFastaDf = await readDataframe('tests/sample_MSA_data.csv');
17
18
  testFastaTableView = grok.shell.addTableView(testFastaDf);
18
- await _testSequenceSpaceReturnsResult(testFastaDf, 'UMAP', 'MSA');
19
+ await _testSequenceSpaceReturnsResult(testFastaDf, DimReductionMethods.UMAP, 'MSA');
19
20
  grok.shell.closeTable(testFastaDf);
20
21
  testFastaTableView.close();
21
22
  }, {skipReason: 'GROK-12775'});
@@ -23,7 +24,7 @@ category('sequenceSpace', async () => {
23
24
  test('sequenceSpaceWithEmptyRows', async () => {
24
25
  testHelmWithEmptyRows = await readDataframe('tests/sample_MSA_data_empty_vals.csv');
25
26
  testHelmWithEmptyRowsTableView = grok.shell.addTableView(testHelmWithEmptyRows);
26
- await _testSequenceSpaceReturnsResult(testHelmWithEmptyRows, 'UMAP', 'MSA');
27
+ await _testSequenceSpaceReturnsResult(testHelmWithEmptyRows, DimReductionMethods.UMAP, 'MSA');
27
28
  grok.shell.closeTable(testHelmWithEmptyRows);
28
29
  testHelmWithEmptyRowsTableView.close();
29
30
  }, {skipReason: 'GROK-12775'});
@@ -2,14 +2,16 @@ import * as DG from 'datagrok-api/dg';
2
2
  import * as grok from 'datagrok-api/grok';
3
3
  import {expect} from '@datagrok-libraries/utils/src/test';
4
4
  import {sequenceSpaceTopMenu} from '../package';
5
+ import { MmDistanceFunctionsNames } from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
6
+ import { DimReductionMethods } from '@datagrok-libraries/ml/src/reduce-dimensionality';
5
7
 
6
- export async function _testSequenceSpaceReturnsResult(df: DG.DataFrame, algorithm: string, colName: string) {
8
+ export async function _testSequenceSpaceReturnsResult(df: DG.DataFrame, algorithm: DimReductionMethods, colName: string) {
7
9
  // await grok.data.detectSemanticTypes(df);
8
10
  const col: DG.Column = df.getCol(colName);
9
11
  const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
10
12
  if (semType)
11
13
  col.semType = semType;
12
14
 
13
- const sp = await sequenceSpaceTopMenu(df, df.col(colName)!, algorithm, 'Levenshtein', true);
15
+ const sp = await sequenceSpaceTopMenu(df, df.col(colName)!, algorithm, MmDistanceFunctionsNames.LEVENSHTEIN, true);
14
16
  expect(sp != null, true);
15
17
  }
@@ -1,8 +1,8 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
- // import * as grok from 'datagrok-api/grok';
2
+ import * as grok from 'datagrok-api/grok';
3
3
  //import * as ui from 'datagrok-api/ui';
4
4
 
5
- import {category, test, testViewer} from '@datagrok-libraries/utils/src/test';
5
+ import {category, delay, test, testViewer} from '@datagrok-libraries/utils/src/test';
6
6
  import {readDataframe} from './utils';
7
7
 
8
8
 
@@ -10,7 +10,12 @@ category('viewers', () => {
10
10
  const viewers = DG.Func.find({package: 'Bio', tags: ['viewer']}).map((f) => f.friendlyName);
11
11
  for (const v of viewers) {
12
12
  test(v, async () => {
13
- await testViewer(v, await readDataframe('data/sample_FASTA_DNA.csv'), true);
13
+ const df = await readDataframe('data/sample_FASTA_DNA.csv');
14
+ const tv = grok.shell.addTableView(df);
15
+ await grok.data.detectSemanticTypes(df);
16
+ tv.addViewer(v);
17
+ await delay(2000);
18
+ // await testViewer(v, df, {detectSemanticTypes: true});
14
19
  });
15
20
  }
16
21
  });