npm - @datagrok/bio - Versions diffs - 2.4.15 → 2.4.17 - Mend

@datagrok/bio 2.4.15 → 2.4.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/dist/105.js +2 -0
package/dist/105.js.map +1 -0
package/dist/367.js +2 -0
package/dist/367.js.map +1 -0
package/dist/864.js +2 -0
package/dist/864.js.map +1 -0
package/dist/package-test.js +1 -1
package/dist/package-test.js.map +1 -1
package/dist/package.js +1 -1
package/dist/package.js.map +1 -1
package/package.json +3 -3
package/scripts/sequence_generator.py +289 -0
package/src/analysis/sequence-activity-cliffs.ts +2 -2
package/src/analysis/sequence-diversity-viewer.ts +7 -4
package/src/analysis/sequence-similarity-viewer.ts +7 -2
package/src/analysis/sequence-space.ts +18 -0
package/src/demo/bio01-similarity-diversity.ts +19 -4
package/src/demo/bio01a-hierarchical-clustering-and-sequence-space.ts +3 -0
package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +3 -0
package/src/demo/bio05-helm-msa-sequence-space.ts +13 -9
package/src/package.ts +21 -5
package/src/tests/checkInputColumn-tests.ts +2 -2
package/src/tests/msa-tests.ts +1 -1
package/src/tests/renderers-test.ts +2 -2
package/src/utils/cell-renderer.ts +4 -4
package/src/utils/multiple-sequence-alignment-ui.ts +19 -18
package/dist/153.js +0 -2
package/dist/153.js.map +0 -1
package/scripts/motif_generator.py +0 -119

package/package.json CHANGED Viewed

@@ -5,7 +5,7 @@
     "name": "Leonid Stolbov",
     "email": "lstolbov@datagrok.ai"
   },
-  "version": "2.4.15",
+  "version": "2.4.17",
   "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
   "repository": {
     "type": "git",
@@ -14,9 +14,9 @@
   },
   "dependencies": {
     "@biowasm/aioli": "^3.1.0",
-    "@datagrok-libraries/bio": "^5.28.4",
+    "@datagrok-libraries/bio": "^5.29.3",
     "@datagrok-libraries/chem-meta": "^1.0.1",
-    "@datagrok-libraries/ml": "^6.3.16",
+    "@datagrok-libraries/ml": "^6.3.22",
     "@datagrok-libraries/tutorials": "^1.2.1",
     "@datagrok-libraries/utils": "^2.1.3",
     "cash-dom": "^8.0.0",

package/scripts/sequence_generator.py ADDED Viewed

@@ -0,0 +1,289 @@
+#!/usr/bin/env python3
+# name: Sequence generator
+# description: Create the model peptides/DNA sequences with peptides data
+# language: python
+# tags: template, demo
+# input: int clusters = 1 [Number of superclusters]
+# input: int num_sequences = 500 [Number of sequences in each supercluster]
+# input: int motif_length = 12 [Average length of motif]
+# input: int max_variants_position = 3 [Maximum number of different letters in conservative position in motif]
+# input: int random_length = 3 [Average length of random sequence parts before and after motif]
+# input: int dispersion = 2 [Variation of total sequence length]
+# input: string alphabet_key = 'PT' [Sequence alphabet: PT/DNA/RNA/custom. Custom alphabet is a list of values separated by comma]
+# input: bool disable_cliffs = False [Disable generation of cliffs]
+# input: double cliff_probability = 0.01 [Probability to make activity cliff of a sequence]
+# input: double cliff_strength = 4.0 [Strength of cliff]
+# output: dataframe sequences
+import random
+import argparse
+import sys
+from typing import List, Tuple, Dict, Iterator, Any
+alphabet_type = List[str]
+letter_choice_type = List[str]
+motif_template_type = List[letter_choice_type]
+sequence_record_type = Tuple[int, str, float, bool]
+sequence_record_cluster_type = Tuple[int, str, str, float, bool]
+alphabets: Dict[str, str] = {
+    "PT": "A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y",
+    "DNA": "A,T,G,C",
+    "RNA": "A,U,G,C",
+}
+def mean_range(mean: int, disp: int) -> int:
+    return random.randint(max(mean - disp, 0), mean + disp)
+def generate_motif_template(
+    motif_length: int,
+    alphabet: alphabet_type,
+    max_variants_cluster: int,
+    prob_any: float = 0.2,
+) -> motif_template_type:
+    motif_template = []
+    for position in range(motif_length):
+        # Selecting letters for position i
+        if (0 < position < motif_length - 1) and (random.random() <= prob_any):
+            letters = ["?"]  # this stands for any symbol
+        else:
+            n_variants = random.randrange(max_variants_cluster) + 1
+            letters = [random.choice(alphabet) for i in range(n_variants)]
+        motif_template.append(letters)
+    return motif_template
+def generate_motif(template: motif_template_type, alphabet: alphabet_type) -> str:
+    template_with_any = [(letters if not "?" in letters else alphabet) for letters in template]
+    return "".join([random.choice(letters) for letters in template_with_any])
+def motif_notation(motif_template: motif_template_type) -> str:
+    def motif_notation_code(letter_choice: letter_choice_type) -> str:
+        if len(letter_choice) == 1:
+            return letter_choice[0]
+        else:
+            return f"[{''.join(letter_choice)}]"
+    return "".join([motif_notation_code(letter_choice) for letter_choice in motif_template])
+def generate_random(n: int, alphabet: alphabet_type) -> str:
+    return "".join([random.choice(alphabet) for i in range(n)])
+def make_cliff(motif_template: motif_template_type, alphabet: alphabet_type, motif: str) -> str:
+    # Mutate conservative letter in motif
+    pos = random.randrange(len(motif_template))
+    while "?" in motif_template[pos]:
+        pos = (pos + 1) % len(motif_template)  # always will find letters since ends of motif can't be any symbol
+    outlier_letters = list(set(alphabet) - set(motif_template[pos]))
+    return motif[:pos] + random.choice(outlier_letters) + motif[pos + 1 :]
+def generate_cluster(
+    n_sequences: int,
+    motif_length: int,
+    prefix_length: int,
+    suffix_length: int,
+    max_variants_position: int,
+    make_cliffs: bool,
+    alphabet: alphabet_type,
+    cliff_probability: float,
+    cliff_strength: float,
+) -> Iterator[sequence_record_type]:
+    motif_template = generate_motif_template(motif_length, alphabet, max_variants_position)
+    activity_average = random.random() * 10
+    activity_dispersion = random.random()
+    sys.stderr.write(f"Motif template: {motif_notation(motif_template)}\n")
+    for n_seq in range(n_sequences):
+        activity = random.gauss(activity_average, activity_dispersion)
+        motif = generate_motif(motif_template, alphabet)
+        prefix = generate_random(prefix_length, alphabet)
+        suffix = generate_random(suffix_length, alphabet)
+        seq = prefix + motif + suffix
+        is_cliff = make_cliffs and (random.random() <= cliff_probability)
+        sequence_record: sequence_record_type = (n_seq, seq, activity, is_cliff)
+        yield sequence_record
+        if is_cliff:
+            # Making activity cliff
+            cliff_motif = make_cliff(motif_template, alphabet, motif)
+            cliff_seq = prefix + cliff_motif + suffix
+            # Recalculating activity
+            cliff_disp = activity_dispersion * cliff_strength * (0.5 + random.random())
+            activity = activity_average - cliff_disp
+            cliff_activity = activity_average + cliff_disp
+            # sys.stderr.write(f"Cliff for sequence #{line_number:4}, cluster {n_cluster} \n")
+            # sys.stderr.write(f"{activity_average}\t{motif}\t{activity}\n")
+            # sys.stderr.write(f"{activity_average}\t{cliff_motif}\t{cliff_activity}\n")
+            n_seq += 1
+            sequence_record = (n_seq, cliff_seq, cliff_activity, is_cliff)
+            yield sequence_record
+def generate_sequences(
+    n_clusters: int,
+    n_sequences: int,
+    average_motif_length: int,
+    max_variants_position: int,
+    average_random_length: int,
+    dispersion: int,
+    alphabet: alphabet_type,
+    make_cliffs: bool,
+    cliff_probability: float,
+    cliff_strength: float,
+) -> Tuple[List[str], List[sequence_record_cluster_type]]:
+    headers: List[str] = ["cluster", "sequence_id", "sequence", "activity", "is_cliff"]
+    sequences: List[sequence_record_cluster_type] = []
+    for n_cluster in range(n_clusters):
+        motif_length = mean_range(average_motif_length, dispersion)
+        # sys.stderr.write(f"Cluster {n_cluster:2} motif template: {motif_notation(motif_template)}\n")
+        total_length = mean_range(average_random_length * 2, args.dispersion) + motif_length
+        prefix_length = mean_range(average_random_length, args.dispersion // 2)
+        suffix_length = total_length - motif_length - prefix_length
+        sys.stderr.write(f"Generating sequences for cluster {n_cluster}\n")
+        for n_seq, seq, activity, is_cliff in generate_cluster(
+            n_sequences,
+            motif_length,
+            prefix_length,
+            suffix_length,
+            max_variants_position,
+            make_cliffs,
+            alphabet,
+            cliff_probability,
+            cliff_strength,
+        ):
+            sequences.append((n_cluster, f"c{n_cluster}_s{n_seq}", seq, activity, is_cliff))
+    return headers, sequences
+def parse_command_line_args() -> Any:
+    parser = argparse.ArgumentParser(
+        prog="MotifSequencesGenerator",
+        description="The program generates set of sequences containing sequence motifs "
+        "for SAR fucntionality testing",
+        epilog="Utility support: Gennadii Zakharov",
+    )
+    parser.add_argument("-c", "--clusters", type=int, default=1, help="Number of superclusters")
+    parser.add_argument(
+        "-s",
+        "--sequences",
+        type=int,
+        default=500,
+        help="Number of sequences in each supercluster",
+    )
+    parser.add_argument("-m,", "--motif-length", type=int, default=12, help="Average length of motif")
+    parser.add_argument(
+        "-r,",
+        "--random-length",
+        type=int,
+        default=3,
+        help="Average length of random sequence parts before and after motif",
+    )
+    parser.add_argument(
+        "-d,",
+        "--dispersion",
+        type=int,
+        default=2,
+        help="Variation of total sequence length",
+    )
+    available_alphabets = ",".join(list(alphabets.keys()) + ["custom"])
+    parser.add_argument(
+        "--alphabet",
+        type=str,
+        default=list(alphabets.keys())[0],
+        help=f"Sequence alphabet: {available_alphabets}. Custom alphabet is a list of values separated " f"by comma",
+    )
+    parser.add_argument(
+        "--max-variants-position",
+        type=int,
+        default=3,
+        help="Maximum number of different letters in conservative position in motif",
+    )
+    parser.add_argument(
+        "--cliff-probability",
+        type=float,
+        default=0.01,
+        help="Probability to make activity cliff of a sequence",
+    )
+    parser.add_argument(
+        "--cliff-strength",
+        type=float,
+        default=4.0,
+        help="Strength of cliff",
+    )
+    parser.add_argument(
+        "--disable-cliffs",
+        type=bool,
+        default=False,
+        help="Disable generation of cliffs",
+    )
+    command_line_args = parser.parse_args()
+    return command_line_args
+# ====================================================================================
+grok = "clusters" in globals()
+if not grok:
+    # We are not in Datagrok - need to parse command line arguments
+    args = parse_command_line_args()
+    clusters = args.clusters
+    num_sequences = args.sequences
+    motif_length = args.motif_length
+    max_variants_position = args.max_variants_position
+    random_length = args.random_length
+    dispersion = args.dispersion
+    alphabet_key = args.alphabet
+    disable_cliffs = args.disable_cliffs
+    cliff_probability = args.cliff_probability
+    cliff_strength = args.cliff_strength
+alphabet: alphabet_type = alphabets[alphabet_key].split(",") if alphabet_key in alphabets else alphabet_key.split(",")
+# Running sequence generator
+header, data = generate_sequences(
+    clusters,
+    num_sequences,
+    motif_length,
+    max_variants_position,
+    random_length,
+    dispersion,
+    alphabet,
+    not disable_cliffs,
+    cliff_probability,
+    cliff_strength,
+)
+if grok:
+    # Exporting data to Datagrok as a pandas dataframe
+    import pandas as pd
+    sequences = pd.DataFrame.from_records(data, columns=header)
+else:
+    # Writing results to stdout - no need to work with big and heavy Pandas
+    import csv
+    csv_writer = csv.writer(sys.stdout, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
+    csv_writer.writerow(header)
+    for line in data:
+        csv_writer.writerow(line)

package/src/analysis/sequence-activity-cliffs.ts CHANGED Viewed

@@ -43,8 +43,8 @@ export async function getSimilaritiesMatrix(
 }
 export async function getChemSimilaritiesMatrix(dim: number, seqCol: DG.Column,
-  df: DG.DataFrame, colName: string, simArr: DG.Column[])
-  : Promise<DG.Column[]> {
+  df: DG.DataFrame, colName: string, simArr: (DG.Column | null)[])
+  : Promise<(DG.Column | null)[]> {
   if (seqCol.version !== seqCol.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
     await invalidateMols(seqCol, false);
   const fpDf = DG.DataFrame.create(seqCol.length);

package/src/analysis/sequence-diversity-viewer.ts CHANGED Viewed

@@ -12,15 +12,17 @@ import {updateDivInnerHTML} from '../utils/ui-utils';
 import {Subject} from 'rxjs';
 export class SequenceDiversityViewer extends SequenceSearchBaseViewer {
+  diverseColumnLabel: string | null; // Use postfix Label to prevent activating table column selection editor
   renderMolIds: number[] | null = null;
   columnNames = [];
   computeCompleted = new Subject<boolean>();
   constructor() {
     super('diversity');
+    this.diverseColumnLabel = this.string('diverseColumnLabel', null);
   }
   async render(computeData = true): Promise<void> {
     if (!this.beforeRender())
       return;
@@ -29,14 +31,15 @@ export class SequenceDiversityViewer extends SequenceSearchBaseViewer {
         const monomericMols = await getMonomericMols(this.moleculeColumn);
         //need to create df to calculate fingerprints
         const monomericMolsDf = DG.DataFrame.fromColumns([monomericMols]);
-        this.renderMolIds =
-        await grok.functions.call('Chem:callChemDiversitySearch', {
+        this.renderMolIds = await grok.functions.call('Chem:callChemDiversitySearch', {
           col: monomericMols,
           metricName: this.distanceMetric,
           limit: this.limit,
           fingerprint: this.fingerprint
         });
-        const resCol = DG.Column.string('sequence', this.renderMolIds!.length)
+        const diverseColumnName: string = this.diverseColumnLabel != null ? this.diverseColumnLabel :
+          `diverse (${this.moleculeColumnName})`;
+        const resCol = DG.Column.string(diverseColumnName, this.renderMolIds!.length)
           .init((i) => this.moleculeColumn?.get(this.renderMolIds![i]));
         resCol.semType = DG.SEMTYPE.MACROMOLECULE;
         this.tags.forEach((tag) => resCol.setTag(tag, this.moleculeColumn!.getTag(tag)));

package/src/analysis/sequence-similarity-viewer.ts CHANGED Viewed

@@ -11,13 +11,15 @@ import {Subject} from 'rxjs';
 import {TAGS as bioTAGS, getSplitter} from '@datagrok-libraries/bio/src/utils/macromolecule';
 export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
+  cutoff: number;
   hotSearch: boolean;
+  similarColumnLabel: string | null; // Use postfix Label to prevent activating table column selection editor
   sketchedMolecule: string = '';
   curIdx: number = 0;
   molCol: DG.Column | null = null;
   idxs: DG.Column | null = null;
   scores: DG.Column | null = null;
-  cutoff: number;
   gridSelect: boolean = false;
   targetMoleculeIdx: number = 0;
   computeCompleted = new Subject<boolean>();
@@ -26,6 +28,7 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
     super('similarity');
     this.cutoff = this.float('cutoff', 0.01, {min: 0, max: 1});
     this.hotSearch = this.bool('hotSearch', true);
+    this.similarColumnLabel = this.string('similarColumnLabel', null);
   }
   init(): void {
@@ -54,7 +57,9 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
         });
         this.idxs = df.getCol('indexes');
         this.scores = df.getCol('score');
-        this.molCol = DG.Column.string('sequence',
+        const similarColumnName: string = this.similarColumnLabel != null ? this.similarColumnLabel :
+          `similar (${this.moleculeColumnName})`;
+        this.molCol = DG.Column.string(similarColumnName,
           this.idxs!.length).init((i) => this.moleculeColumn?.get(this.idxs?.get(i)));
         this.molCol.semType = DG.SEMTYPE.MACROMOLECULE;
         this.tags.forEach((tag) => this.molCol!.setTag(tag, this.moleculeColumn!.getTag(tag)));

package/src/analysis/sequence-space.ts CHANGED Viewed

@@ -6,6 +6,7 @@ import {Matrix} from '@datagrok-libraries/utils/src/type-declarations';
 import BitArray from '@datagrok-libraries/utils/src/bit-array';
 import {ISequenceSpaceParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
 import {invalidateMols, MONOMERIC_COL_TAGS} from '../substructure-search/substructure-search';
+import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
 import * as grok from 'datagrok-api/grok';
 export interface ISequenceSpaceResult {
@@ -53,6 +54,23 @@ export async function sequenceSpaceByFingerprints(spaceParams: ISequenceSpacePar
   return result;
 }
+export async function getSequenceSpace(spaceParams: ISequenceSpaceParams): Promise<ISequenceSpaceResult> {
+  const uh = new UnitsHandler(spaceParams.seqCol);
+  if (uh.isFasta()) {
+    const distanceFName = uh.getDistanceFunctionName();
+    const sequenceSpaceResult = await reduceDimensinalityWithNormalization(
+      spaceParams.seqCol.toList(),
+      spaceParams.methodName,
+      distanceFName,
+      spaceParams.options);
+    console.log(sequenceSpaceResult);
+    const cols: DG.Column[] = spaceParams.embedAxesNames.map(
+      (name: string, index: number) => DG.Column.fromFloat32Array(name, sequenceSpaceResult.embedding[index]));
+    return {distance: sequenceSpaceResult.distance, coordinates: new DG.ColumnList(cols)};
+  } else {
+    return await sequenceSpaceByFingerprints(spaceParams);
+  }
+}
 export function getEmbeddingColsNames(df: DG.DataFrame) {
   const axes = ['Embed_X', 'Embed_Y'];

package/src/demo/bio01-similarity-diversity.ts CHANGED Viewed

@@ -6,8 +6,10 @@ import {_package} from '../package';
 import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
 import {delay} from '@datagrok-libraries/utils/src/test';
 import {handleError} from './utils';
+import {SequenceDiversityViewer} from '../analysis/sequence-diversity-viewer';
+import {SequenceSimilarityViewer} from '../analysis/sequence-similarity-viewer';
-const dataFn = 'data/sample_FASTA_DNA.csv';
+const dataFn: string = 'data/sample_FASTA_DNA.csv';
 export async function demoBio01UI() {
   let view: DG.TableView;
@@ -17,21 +19,34 @@ export async function demoBio01UI() {
     const demoScript = new DemoScript('Demo', 'Sequence similarity / diversity search');
     await demoScript
       .step(`Loading DNA notation 'fasta'`, async () => {
+        grok.shell.windows.showContextPanel = false;
+        grok.shell.windows.showProperties = false;
         df = await _package.files.readCsv(dataFn);
         view = grok.shell.addTableView(df);
+        view.grid.columns.byName('id')!.width = 0;
+        view.grid.columns.byName('sequence')!.width = 500;
+        // TODO: Fix column width
       }, {
         description: `Load dataset with macromolecules of 'fasta' notation, 'DNA' alphabet.`,
-        delay: 1600
+        delay: 1200
       })
       .step('Sequence similarity search', async () => {
-        const simViewer = await df.plot.fromType('Sequence Similarity Search') as DG.Viewer;
+        const simViewer = await df.plot.fromType('Sequence Similarity Search', {
+          moleculeColumnName: 'sequence',
+          similarColumnLabel: 'Similar to current',
+        }) as SequenceSimilarityViewer;
         view.dockManager.dock(simViewer, DG.DOCK_TYPE.RIGHT, null, 'Similarity search', 0.35);
       }, {
         description: `Add 'Sequence Similarity Search' viewer.`,
         delay: 1600
       })
       .step('Sequence diversity search', async () => {
-        const divViewer = await df.plot.fromType('Sequence Diversity Search') as DG.Viewer;
+        const divViewer = await df.plot.fromType('Sequence Diversity Search', {
+          moleculeColumnName: 'sequence',
+          diverseColumnLabel: 'Top diverse sequences of all data'
+        }) as SequenceDiversityViewer;
         view.dockManager.dock(divViewer, DG.DOCK_TYPE.DOWN, null, 'Diversity search', 0.27);
       }, {
         description: `Add 'Sequence Deversity Search' viewer.`,

package/src/demo/bio01a-hierarchical-clustering-and-sequence-space.ts CHANGED Viewed

@@ -36,6 +36,9 @@ export async function demoBio01aUI() {
         ]);
         view = grok.shell.addTableView(df);
         view.grid.props.rowHeight = 22;
+        grok.shell.windows.showContextPanel = false;
+        grok.shell.windows.showProperties = false;
       }, {
         description: `Load dataset with macromolecules of 'fasta' notation, 'DNA' alphabet.`,
         delay: 1600,

package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts CHANGED Viewed

@@ -29,6 +29,9 @@ export async function demoBio01bUI() {
     const demoScript = new DemoScript('Demo', '');
     await demoScript
       .step(`Loading DNA notation \'fasta\'`, async () => {
+        grok.shell.windows.showContextPanel = false;
+        grok.shell.windows.showProperties = false;
         [df, treeHelper, dendrogramSvc] = await Promise.all([
           _package.files.readCsv(dataFn),
           getTreeHelper(),

package/src/demo/bio05-helm-msa-sequence-space.ts CHANGED Viewed

@@ -28,6 +28,9 @@ export async function demoBio05UI(): Promise<void> {
     await demoScript
       .step(`Loading peptides notation 'HELM'`, async () => {
         view = grok.shell.addTableView(df = await _package.files.readCsv(helmFn));
+        grok.shell.windows.showContextPanel = false;
+        grok.shell.windows.showProperties = false;
       }, {
         description: 'Load dataset with macromolecules of \'Helm\' notation.',
         delay: 1600,
@@ -44,15 +47,6 @@ export async function demoBio05UI(): Promise<void> {
         description: 'Multiple sequence alignment (MSA) performed with PepSeA tool operating on non-natural aminoacids as well.',
         delay: 1600,
       })
-      .step('Composition analysis on MSA results', async () => {
-        wlViewer = await df.plot.fromType('WebLogo', {
-          sequenceColumnName: msaHelmColName
-        }) as DG.Viewer & IWebLogoViewer;
-        view.dockManager.dock(wlViewer, DG.DOCK_TYPE.DOWN, null, 'Composition analysis', 0.2);
-      }, {
-        description: 'Composition analysis allows to reveal functional features of sequences like motifs, or variable loops.',
-        delay: 1600,
-      })
       .step('Building sequence space', async () => {
         const method: string = 'UMAP';
         ssViewer = (await sequenceSpaceTopMenu(df, msaHelmCol,
@@ -62,6 +56,16 @@ export async function demoBio05UI(): Promise<void> {
         description: 'Reduce sequence space dimensionality to display on 2D representation.',
         delay: 1600
       })
+      .step('Composition analysis on MSA results', async () => {
+        wlViewer = await df.plot.fromType('WebLogo', {
+          sequenceColumnName: msaHelmColName,
+          maxHeight: 50,
+        }) as DG.Viewer & IWebLogoViewer;
+        view.dockManager.dock(wlViewer, DG.DOCK_TYPE.DOWN, null, 'Composition analysis', 0.2);
+      }, {
+        description: 'Composition analysis allows to reveal functional features of sequences like motifs, or variable loops.',
+        delay: 1600,
+      })
       .start();
   } catch (err: any) {
     handleError(err);

package/src/package.ts CHANGED Viewed

@@ -9,7 +9,7 @@ import {
 } from './utils/cell-renderer';
 import {VdRegionsViewer} from './viewers/vd-regions-viewer';
 import {SequenceAlignment} from './seq_align';
-import {getEmbeddingColsNames, sequenceSpaceByFingerprints} from './analysis/sequence-space';
+import {getEmbeddingColsNames, sequenceSpaceByFingerprints, getSequenceSpace} from './analysis/sequence-space';
 import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
 import {
   createLinesGrid,
@@ -49,6 +49,7 @@ import {demoBio01bUI} from './demo/bio01b-hierarchical-clustering-and-activity-c
 import {demoBio05UI} from './demo/bio05-helm-msa-sequence-space';
 import {checkInputColumnUI} from './utils/check-input-column';
 import {multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
+import { runKalign } from './utils/multiple-sequence-alignment';
 export const _package = new DG.Package();
@@ -286,19 +287,23 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
     'separator': macroMolecule.getTag(bioTAGS.separator),
     'alphabet': macroMolecule.getTag(bioTAGS.alphabet),
   };
+  const uh = new UnitsHandler(macroMolecule);
+  let columnDistanceMetric = 'Tanimoto';
+  if (uh.isFasta())
+    columnDistanceMetric = uh.getDistanceFunctionName();
   const sp = await getActivityCliffs(
     df,
     macroMolecule,
     null,
     axesNames,
-    'Activity cliffs',
+    columnDistanceMetric,
     activities,
     similarity,
     'Tanimoto',
     methodName,
     DG.SEMTYPE.MACROMOLECULE,
     tags,
-    sequenceSpaceByFingerprints,
+    getSequenceSpace,
     getChemSimilaritiesMatrix,
     createTooltipElement,
     createPropPanelElement,
@@ -349,7 +354,7 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
     embedAxesNames: embedColsNames,
     options: options
   };
-  const sequenceSpaceRes = await sequenceSpaceByFingerprints(chemSpaceParams);
+  const sequenceSpaceRes = await getSequenceSpace(chemSpaceParams);
   const embeddings = sequenceSpaceRes.coordinates;
   for (const col of embeddings) {
     const listValues = col.toList();
@@ -411,10 +416,21 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
 //top-menu: Bio | Alignment | MSA...
 //name: MSA...
 //tags: bio, panel
-export function multipleSequenceAlignmentAny(): void {
+export function multipleSequenceAlignmentDialog(): void {
   multipleSequenceAlignmentUI();
 }
+//name: Multiple Sequence Alignment
+//description: Multiple sequence alignment
+//tags: bio
+//input: column sequenceCol {semType: Macromolecule}
+//input: column clustersCol
+//output: column result
+export async function alignSequences(sequenceCol: DG.Column<string> | null = null,
+  clustersCol: DG.Column | null = null): Promise<DG.Column<string>> {
+  return multipleSequenceAlignmentUI({col: sequenceCol, clustersCol});
+}
 //top-menu: Bio | Structure | Composition Analysis
 //name: Composition Analysis
 //meta.icon: files/icons/composition-analysis.svg

package/src/tests/checkInputColumn-tests.ts CHANGED Viewed

@@ -15,7 +15,7 @@ seq3,
 seq4`;
   test('testMsaPos', async () => {
-    const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
+    const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentDialog'})[0];
     const funcInputColumnProperty: DG.Property = func.inputs.find((i) => i.name == 'sequence')!;
     const k = 11;
@@ -67,7 +67,7 @@ seq4`;
   });
   test('testGetActionFunctionMeta', async () => {
-    const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
+    const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentDialog'})[0];
     const sequenceInput: DG.Property = func.inputs.find((i) => i.name == 'sequence')!;
     const k = 11;
   });

package/src/tests/msa-tests.ts CHANGED Viewed

@@ -138,7 +138,7 @@ async function _testMSAOnColumn(
   if (alphabet)
     expect(srcSeqCol.getTag(bioTAGS.alphabet), alphabet);
-  const msaSeqCol = await multipleSequenceAlignmentUI(srcSeqCol, pepseaMethod);
+  const msaSeqCol = await multipleSequenceAlignmentUI({col: srcSeqCol, pepsea: {method: pepseaMethod}});
   expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
   expect(msaSeqCol.getTag(DG.TAGS.UNITS), tgtNotation);
   expect(msaSeqCol.getTag(bioTAGS.aligned), ALIGNMENT.SEQ_MSA);

package/src/tests/renderers-test.ts CHANGED Viewed

@@ -3,7 +3,7 @@ import * as DG from 'datagrok-api/dg';
 import {after, before, category, delay, expect, test} from '@datagrok-libraries/utils/src/test';
-import {importFasta, multipleSequenceAlignmentAny} from '../package';
+import {importFasta} from '../package';
 import {convertDo} from '../utils/convert';
 import * as C from '../utils/constants';
 import {generateLongSequence, generateManySequences, performanceTest} from './utils/sequences-generators';
@@ -146,7 +146,7 @@ category('renderers', () => {
     expect(srcSeqCol.getTag(bioTAGS.alphabet), ALPHABET.PT);
     expect(srcSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
-    const msaSeqCol = await multipleSequenceAlignmentUI(srcSeqCol);
+    const msaSeqCol = await multipleSequenceAlignmentUI({col: srcSeqCol});
     tv.grid.invalidate();
     expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);