@datagrok/bio 2.27.2 → 2.27.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +50 -0
- package/agents/package-knowledge.yaml +53 -0
- package/dist/455.js +1 -1
- package/dist/455.js.map +1 -1
- package/dist/682.js +1 -1
- package/dist/682.js.map +1 -1
- package/dist/705.js +1 -1
- package/dist/705.js.map +1 -1
- package/dist/909.js +2 -0
- package/dist/909.js.map +1 -0
- package/dist/immunum_bg.wasm +0 -0
- package/dist/package-test.js +3 -3
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +3 -3
- package/dist/package.js.map +1 -1
- package/package.json +4 -2
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +24 -11
- package/src/package-api.ts +15 -1
- package/src/package-test.ts +1 -0
- package/src/package.g.ts +12 -1
- package/src/package.ts +22 -4
- package/src/tests/antibody-numbering-tests.ts +190 -0
- package/src/tests/detectors-tests.ts +5 -1
- package/src/tests/splitters-test.ts +8 -4
- package/src/tests/to-atomic-level-tests.ts +144 -0
- package/src/utils/annotations/numbering-ui.ts +34 -90
- package/src/utils/antibody-numbering/immunum-client.ts +45 -0
- package/src/utils/antibody-numbering/immunum-glue.js +275 -0
- package/src/utils/antibody-numbering/immunum.worker.ts +159 -0
- package/src/utils/antibody-numbering/number-antibody.ts +105 -0
- package/src/utils/antibody-numbering/types.ts +48 -0
- package/src/utils/seq-helper/seq-handler.ts +25 -9
- package/test-console-output-1.log +582 -485
- package/test-record-1.mp4 +0 -0
- package/webpack.config.js +13 -0
- package/dist/282.js +0 -2
- package/dist/282.js.map +0 -1
- package/dist/287.js +0 -2
- package/dist/287.js.map +0 -1
- package/dist/422.js +0 -2
- package/dist/422.js.map +0 -1
- package/dist/767.js +0 -2
- package/dist/767.js.map +0 -1
- package/src/utils/antibody-numbering (WIP)/alignment.ts +0 -578
- package/src/utils/antibody-numbering (WIP)/annotator.ts +0 -120
- package/src/utils/antibody-numbering (WIP)/data/blosum62.ts +0 -55
- package/src/utils/antibody-numbering (WIP)/data/consensus-aho.ts +0 -155
- package/src/utils/antibody-numbering (WIP)/data/consensus-imgt.ts +0 -162
- package/src/utils/antibody-numbering (WIP)/data/consensus-kabat.ts +0 -157
- package/src/utils/antibody-numbering (WIP)/data/consensus-martin.ts +0 -152
- package/src/utils/antibody-numbering (WIP)/data/consensus.ts +0 -36
- package/src/utils/antibody-numbering (WIP)/data/regions.ts +0 -63
- package/src/utils/antibody-numbering (WIP)/index.ts +0 -31
- package/src/utils/antibody-numbering (WIP)/testdata.ts +0 -5356
- package/src/utils/antibody-numbering (WIP)/types.ts +0 -69
- /package/dist/{8473fcbfb6e85ca6c852.wasm → wasmCluster.wasm} +0 -0
- /package/dist/{9a8fbf37666e32487835.wasm → wasmDbscan.wasm} +0 -0
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
|
|
3
|
+
import {
|
|
4
|
+
NumberingScheme, ChainType, SCHEME_REGIONS,
|
|
5
|
+
} from '@datagrok-libraries/bio/src/utils/macromolecule/numbering-schemes';
|
|
6
|
+
|
|
7
|
+
import {numberSequencesWithImmunum} from './immunum-client';
|
|
8
|
+
|
|
9
|
+
/** Normalizes an incoming scheme string to the immunum library's accepted values.
|
|
10
|
+
* The engine only supports 'imgt' and 'kabat'; the dialog's scheme dropdown is
|
|
11
|
+
* populated from the `choices` option on this function's `scheme` parameter, so
|
|
12
|
+
* in practice only those two values reach here. */
|
|
13
|
+
function toImmunumScheme(scheme: string): 'imgt' | 'kabat' {
|
|
14
|
+
return (scheme || '').toLowerCase() === 'kabat' ? 'kabat' : 'imgt';
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/** Converts the scheme string to the enum value used by the region-table lookup. */
|
|
18
|
+
function toSchemeEnum(scheme: string): NumberingScheme {
|
|
19
|
+
return (scheme || '').toLowerCase() === 'kabat' ? NumberingScheme.Kabat : NumberingScheme.IMGT;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/** Immunum returns 'Heavy' / 'Light' but region tables key on Heavy / Light_Kappa
|
|
23
|
+
* / Light_Lambda. Use chain code to disambiguate: K → Light_Kappa, L → Light_Lambda.
|
|
24
|
+
* Unknown chains fall back to Heavy so the dialog still gets *some* region set. */
|
|
25
|
+
function toRegionChainKey(chainGroup: string, chainCode: string): ChainType {
|
|
26
|
+
if (chainGroup === 'Light')
|
|
27
|
+
return chainCode === 'L' ? ChainType.Light_Lambda : ChainType.Light_Kappa;
|
|
28
|
+
return ChainType.Heavy;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/** Builds a row's region-annotation JSON matching the antpack script output:
|
|
32
|
+
* each entry has id/name/description/start/end/visualType/category/sourceScheme.
|
|
33
|
+
* The `start` and `end` are **scheme-position codes** (strings), not character
|
|
34
|
+
* indices — the dialog resolves them to chars via numbering_map. */
|
|
35
|
+
function buildRegionAnnotations(
|
|
36
|
+
schemeLabel: string, chainGroup: string, chainCode: string,
|
|
37
|
+
): any[] {
|
|
38
|
+
if (!chainGroup) return [];
|
|
39
|
+
const schemeEnum = toSchemeEnum(schemeLabel);
|
|
40
|
+
const regions = SCHEME_REGIONS[schemeEnum];
|
|
41
|
+
const chainKey = toRegionChainKey(chainGroup, chainCode);
|
|
42
|
+
const regionDefs = regions?.[chainKey];
|
|
43
|
+
if (!regionDefs) return [];
|
|
44
|
+
const schemeLower = schemeLabel.toLowerCase();
|
|
45
|
+
return regionDefs.map((r) => ({
|
|
46
|
+
id: `${schemeLower}-${chainGroup}-${r.name}`.toLowerCase(),
|
|
47
|
+
name: r.name,
|
|
48
|
+
description: `${r.name} (${schemeLabel.toUpperCase()} ${r.startPosition}-${r.endPosition})`,
|
|
49
|
+
start: r.startPosition,
|
|
50
|
+
end: r.endPosition,
|
|
51
|
+
visualType: 'region',
|
|
52
|
+
category: 'structure',
|
|
53
|
+
sourceScheme: schemeLabel.toUpperCase(),
|
|
54
|
+
autoGenerated: true,
|
|
55
|
+
}));
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/** Runs immunum numbering on a sequence column and returns a DataFrame matching
|
|
59
|
+
* the shape expected by `numbering-ui.ts` (same columns as the antpack Python
|
|
60
|
+
* script `number_antibody.py`):
|
|
61
|
+
* - `position_names` — comma-separated list of numbered position codes
|
|
62
|
+
* - `chain_type` — 'Heavy' | 'Light' | ''
|
|
63
|
+
* - `annotations_json` — JSON array of FR/CDR region definitions
|
|
64
|
+
* - `numbering_detail` — JSON array of {position, aa} per numbered residue
|
|
65
|
+
* - `numbering_map` — JSON object mapping position code → char index
|
|
66
|
+
*
|
|
67
|
+
* The numbering runs in a web worker (see `immunum.worker.ts`) so the main
|
|
68
|
+
* thread stays responsive even for large columns. */
|
|
69
|
+
export async function numberAntibodyColumn(
|
|
70
|
+
seqCol: DG.Column<string>,
|
|
71
|
+
scheme: string,
|
|
72
|
+
): Promise<DG.DataFrame> {
|
|
73
|
+
const n = seqCol.length;
|
|
74
|
+
const sequences: string[] = new Array(n);
|
|
75
|
+
for (let i = 0; i < n; i++)
|
|
76
|
+
sequences[i] = seqCol.get(i) ?? '';
|
|
77
|
+
|
|
78
|
+
const immunumScheme = toImmunumScheme(scheme);
|
|
79
|
+
const rows = await numberSequencesWithImmunum(sequences, immunumScheme);
|
|
80
|
+
|
|
81
|
+
const posCol = DG.Column.fromType(DG.COLUMN_TYPE.STRING, 'position_names', n);
|
|
82
|
+
const chainCol = DG.Column.fromType(DG.COLUMN_TYPE.STRING, 'chain_type', n);
|
|
83
|
+
const annotCol = DG.Column.fromType(DG.COLUMN_TYPE.STRING, 'annotations_json', n);
|
|
84
|
+
const detailCol = DG.Column.fromType(DG.COLUMN_TYPE.STRING, 'numbering_detail', n);
|
|
85
|
+
const mapCol = DG.Column.fromType(DG.COLUMN_TYPE.STRING, 'numbering_map', n);
|
|
86
|
+
|
|
87
|
+
for (let i = 0; i < n; i++) {
|
|
88
|
+
const r = rows[i];
|
|
89
|
+
if (!r || r.error || !r.positionNames) {
|
|
90
|
+
posCol.set(i, '');
|
|
91
|
+
chainCol.set(i, '');
|
|
92
|
+
annotCol.set(i, '[]');
|
|
93
|
+
detailCol.set(i, '');
|
|
94
|
+
mapCol.set(i, '');
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
posCol.set(i, r.positionNames);
|
|
98
|
+
chainCol.set(i, r.chainType);
|
|
99
|
+
annotCol.set(i, JSON.stringify(buildRegionAnnotations(scheme, r.chainType, r.chainCode)));
|
|
100
|
+
detailCol.set(i, JSON.stringify(r.numberingDetail));
|
|
101
|
+
mapCol.set(i, JSON.stringify(r.numberingMap));
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return DG.DataFrame.fromColumns([posCol, chainCol, annotCol, detailCol, mapCol]);
|
|
105
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/** Position → amino-acid entry in the numbering detail JSON. */
|
|
2
|
+
export interface ImmunumNumberingEntry {
|
|
3
|
+
/** Position code (e.g. "1", "27A"). */
|
|
4
|
+
position: string;
|
|
5
|
+
/** Single-letter residue at this position. */
|
|
6
|
+
aa: string;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
/** Per-row numbering result produced by the immunum worker. */
|
|
10
|
+
export interface ImmunumNumberingRow {
|
|
11
|
+
/** Comma-separated position codes in scheme order. Empty string on failure. */
|
|
12
|
+
positionNames: string;
|
|
13
|
+
/** 'Heavy' / 'Light' / '' — UI-facing chain group. */
|
|
14
|
+
chainType: string;
|
|
15
|
+
/** Immunum chain code: H, K, L, A, B, G, D, or ''. */
|
|
16
|
+
chainCode: string;
|
|
17
|
+
/** Ordered list of {position, aa} entries for the aligned region. */
|
|
18
|
+
numberingDetail: ImmunumNumberingEntry[];
|
|
19
|
+
/** Map from position code → character index in the extracted (gap-free) sequence. */
|
|
20
|
+
numberingMap: Record<string, number>;
|
|
21
|
+
/** Alignment confidence in [0, 1]. */
|
|
22
|
+
confidence: number;
|
|
23
|
+
/** Error message, empty string on success. */
|
|
24
|
+
error: string;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export type ImmunumWorkerRequest =
|
|
28
|
+
| {op: 'init'}
|
|
29
|
+
| {
|
|
30
|
+
op: 'number';
|
|
31
|
+
sequences: string[];
|
|
32
|
+
/** Immunum scheme: 'imgt' or 'kabat'. */
|
|
33
|
+
scheme: string;
|
|
34
|
+
/** Case-insensitive chain codes (e.g. ['H', 'K', 'L']). Defaults to H/K/L. */
|
|
35
|
+
chains?: string[];
|
|
36
|
+
/** Minimum confidence in [0, 1]; null/undefined → library default (0.5). */
|
|
37
|
+
minConfidence?: number | null;
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
export type ImmunumWorkerResponse =
|
|
41
|
+
| {ok: true; rows?: ImmunumNumberingRow[]}
|
|
42
|
+
| {ok: false; error: string};
|
|
43
|
+
|
|
44
|
+
/** Supported schemes by immunum. Chothia/AHo are not supported — the UI engine
|
|
45
|
+
* dropdown lists all schemes globally so the immunum path falls back for the
|
|
46
|
+
* unsupported ones by returning empty rows with errors. */
|
|
47
|
+
export const IMMUNUM_SCHEMES = ['imgt', 'kabat'] as const;
|
|
48
|
+
export type ImmunumScheme = typeof IMMUNUM_SCHEMES[number];
|
|
@@ -11,7 +11,7 @@ import {detectAlphabet, detectHelmAlphabet, splitterAsFastaSimple, StringListSeq
|
|
|
11
11
|
import {mmDistanceFunctions, MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
12
12
|
import {mmDistanceFunctionType} from '@datagrok-libraries/ml/src/macromolecule-distance-functions/types';
|
|
13
13
|
import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/types/monomer-library';
|
|
14
|
-
import {HELM_POLYMER_TYPE, HELM_WRAPPERS_REGEXP, PHOSPHATE_SYMBOL} from '@datagrok-libraries/bio/src/utils/const';
|
|
14
|
+
import {DEOXYRIBOSE_SYMBOL, HELM_POLYMER_TYPE, HELM_WRAPPERS_REGEXP, PHOSPHATE_SYMBOL, RIBOSE_SYMBOL} from '@datagrok-libraries/bio/src/utils/const';
|
|
15
15
|
import {GAP_SYMBOL, GapOriginals} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
|
|
16
16
|
import {CellRendererBackBase, GridCellRendererTemp} from '@datagrok-libraries/bio/src/utils/cell-renderer-back-base';
|
|
17
17
|
import {HelmTypes} from '@datagrok-libraries/bio/src/helm/consts';
|
|
@@ -939,6 +939,11 @@ export class SeqHandler implements ISeqHandler {
|
|
|
939
939
|
|
|
940
940
|
if (cm === GAP_SYMBOL)
|
|
941
941
|
om = GapOriginals[NOTATION.FASTA];
|
|
942
|
+
// For HELM RNA, the splitter triplet-splits each nucleotide into
|
|
943
|
+
// [sugar, base, phosphate]; FASTA conversion keeps only the base, so
|
|
944
|
+
// drop standalone sugar/phosphate tokens.
|
|
945
|
+
else if (isHelm && (cm === PHOSPHATE_SYMBOL || cm === RIBOSE_SYMBOL || cm === DEOXYRIBOSE_SYMBOL))
|
|
946
|
+
om = '';
|
|
942
947
|
else if (cm === PHOSPHATE_SYMBOL)
|
|
943
948
|
om = '';
|
|
944
949
|
else if (om.length > 1)
|
|
@@ -978,7 +983,9 @@ export class SeqHandler implements ISeqHandler {
|
|
|
978
983
|
return joinToBiln(srcSS);
|
|
979
984
|
}
|
|
980
985
|
|
|
981
|
-
/** Splits Helm sequence adjusting nucleotides to single char symbols. (!) Removes lone phosphorus
|
|
986
|
+
/** Splits Helm sequence adjusting nucleotides to single char symbols. (!) Removes lone phosphorus,
|
|
987
|
+
* ribose, and deoxyribose tokens (which the underlying splitter emits when triplet-splitting
|
|
988
|
+
* each nucleotide of an RNA chain). */
|
|
982
989
|
private splitterAsHelmNucl(src: string): ISeqSplitted {
|
|
983
990
|
const srcMList: ISeqSplitted = this.splitter(src);
|
|
984
991
|
const tgtMList: (string | null)[] = new Array<string>(srcMList.length);
|
|
@@ -988,7 +995,8 @@ export class SeqHandler implements ISeqHandler {
|
|
|
988
995
|
let om: string | null = srcMList.getOriginal(posIdx);
|
|
989
996
|
if (isDna || isRna) {
|
|
990
997
|
om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
|
|
991
|
-
om
|
|
998
|
+
if (om === PHOSPHATE_SYMBOL || om === RIBOSE_SYMBOL || om === DEOXYRIBOSE_SYMBOL)
|
|
999
|
+
om = null;
|
|
992
1000
|
}
|
|
993
1001
|
tgtMList[posIdx] = om ? om : null;
|
|
994
1002
|
}
|
|
@@ -1009,18 +1017,26 @@ export class SeqHandler implements ISeqHandler {
|
|
|
1009
1017
|
// -- joiners --
|
|
1010
1018
|
|
|
1011
1019
|
function joinToSeparator(seqS: ISeqSplitted, tgtSeparator: string, isHelm: boolean): string {
|
|
1012
|
-
const resMList: string[] =
|
|
1020
|
+
const resMList: string[] = [];
|
|
1013
1021
|
for (let posIdx: number = 0; posIdx < seqS.length; ++posIdx) {
|
|
1014
1022
|
const cm = seqS.getCanonical(posIdx);
|
|
1015
1023
|
let om = seqS.getOriginal(posIdx);
|
|
1016
1024
|
if (isHelm)
|
|
1017
1025
|
om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
|
|
1018
1026
|
|
|
1019
|
-
if (cm === GAP_SYMBOL)
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1027
|
+
if (cm === GAP_SYMBOL) {
|
|
1028
|
+
resMList.push(GapOriginals[NOTATION.SEPARATOR]);
|
|
1029
|
+
continue;
|
|
1030
|
+
}
|
|
1031
|
+
// For HELM RNA, the splitter triplet-splits each nucleotide into
|
|
1032
|
+
// [sugar, base, phosphate]; separator conversion keeps only the base, so
|
|
1033
|
+
// skip standalone sugar/phosphate tokens entirely (rather than emitting
|
|
1034
|
+
// an empty cell that would show up as an extra separator in the output).
|
|
1035
|
+
if (isHelm && (cm === PHOSPHATE_SYMBOL || cm === RIBOSE_SYMBOL || cm === DEOXYRIBOSE_SYMBOL))
|
|
1036
|
+
continue;
|
|
1037
|
+
if (cm === PHOSPHATE_SYMBOL)
|
|
1038
|
+
continue;
|
|
1039
|
+
resMList.push(om);
|
|
1024
1040
|
}
|
|
1025
1041
|
return resMList.join(tgtSeparator);
|
|
1026
1042
|
}
|