@datagrok/bio 2.27.2 → 2.27.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/CLAUDE.md +50 -0
  2. package/agents/package-knowledge.yaml +53 -0
  3. package/dist/455.js +1 -1
  4. package/dist/455.js.map +1 -1
  5. package/dist/682.js +1 -1
  6. package/dist/682.js.map +1 -1
  7. package/dist/705.js +1 -1
  8. package/dist/705.js.map +1 -1
  9. package/dist/909.js +2 -0
  10. package/dist/909.js.map +1 -0
  11. package/dist/immunum_bg.wasm +0 -0
  12. package/dist/package-test.js +3 -3
  13. package/dist/package-test.js.map +1 -1
  14. package/dist/package.js +3 -3
  15. package/dist/package.js.map +1 -1
  16. package/package.json +4 -2
  17. package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +24 -11
  18. package/src/package-api.ts +15 -1
  19. package/src/package-test.ts +1 -0
  20. package/src/package.g.ts +12 -1
  21. package/src/package.ts +22 -4
  22. package/src/tests/antibody-numbering-tests.ts +190 -0
  23. package/src/tests/detectors-tests.ts +5 -1
  24. package/src/tests/splitters-test.ts +8 -4
  25. package/src/tests/to-atomic-level-tests.ts +144 -0
  26. package/src/utils/annotations/numbering-ui.ts +34 -90
  27. package/src/utils/antibody-numbering/immunum-client.ts +45 -0
  28. package/src/utils/antibody-numbering/immunum-glue.js +275 -0
  29. package/src/utils/antibody-numbering/immunum.worker.ts +159 -0
  30. package/src/utils/antibody-numbering/number-antibody.ts +105 -0
  31. package/src/utils/antibody-numbering/types.ts +48 -0
  32. package/src/utils/seq-helper/seq-handler.ts +25 -9
  33. package/test-console-output-1.log +582 -485
  34. package/test-record-1.mp4 +0 -0
  35. package/webpack.config.js +13 -0
  36. package/dist/282.js +0 -2
  37. package/dist/282.js.map +0 -1
  38. package/dist/287.js +0 -2
  39. package/dist/287.js.map +0 -1
  40. package/dist/422.js +0 -2
  41. package/dist/422.js.map +0 -1
  42. package/dist/767.js +0 -2
  43. package/dist/767.js.map +0 -1
  44. package/src/utils/antibody-numbering (WIP)/alignment.ts +0 -578
  45. package/src/utils/antibody-numbering (WIP)/annotator.ts +0 -120
  46. package/src/utils/antibody-numbering (WIP)/data/blosum62.ts +0 -55
  47. package/src/utils/antibody-numbering (WIP)/data/consensus-aho.ts +0 -155
  48. package/src/utils/antibody-numbering (WIP)/data/consensus-imgt.ts +0 -162
  49. package/src/utils/antibody-numbering (WIP)/data/consensus-kabat.ts +0 -157
  50. package/src/utils/antibody-numbering (WIP)/data/consensus-martin.ts +0 -152
  51. package/src/utils/antibody-numbering (WIP)/data/consensus.ts +0 -36
  52. package/src/utils/antibody-numbering (WIP)/data/regions.ts +0 -63
  53. package/src/utils/antibody-numbering (WIP)/index.ts +0 -31
  54. package/src/utils/antibody-numbering (WIP)/testdata.ts +0 -5356
  55. package/src/utils/antibody-numbering (WIP)/types.ts +0 -69
  56. /package/dist/{8473fcbfb6e85ca6c852.wasm → wasmCluster.wasm} +0 -0
  57. /package/dist/{9a8fbf37666e32487835.wasm → wasmDbscan.wasm} +0 -0
@@ -0,0 +1,105 @@
1
+ import * as DG from 'datagrok-api/dg';
2
+
3
+ import {
4
+ NumberingScheme, ChainType, SCHEME_REGIONS,
5
+ } from '@datagrok-libraries/bio/src/utils/macromolecule/numbering-schemes';
6
+
7
+ import {numberSequencesWithImmunum} from './immunum-client';
8
+
9
+ /** Normalizes an incoming scheme string to the immunum library's accepted values.
10
+ * The engine only supports 'imgt' and 'kabat'; the dialog's scheme dropdown is
11
+ * populated from the `choices` option on this function's `scheme` parameter, so
12
+ * in practice only those two values reach here. */
13
+ function toImmunumScheme(scheme: string): 'imgt' | 'kabat' {
14
+ return (scheme || '').toLowerCase() === 'kabat' ? 'kabat' : 'imgt';
15
+ }
16
+
17
+ /** Converts the scheme string to the enum value used by the region-table lookup. */
18
+ function toSchemeEnum(scheme: string): NumberingScheme {
19
+ return (scheme || '').toLowerCase() === 'kabat' ? NumberingScheme.Kabat : NumberingScheme.IMGT;
20
+ }
21
+
22
+ /** Immunum returns 'Heavy' / 'Light' but region tables key on Heavy / Light_Kappa
23
+ * / Light_Lambda. Use chain code to disambiguate: K → Light_Kappa, L → Light_Lambda.
24
+ * Unknown chains fall back to Heavy so the dialog still gets *some* region set. */
25
+ function toRegionChainKey(chainGroup: string, chainCode: string): ChainType {
26
+ if (chainGroup === 'Light')
27
+ return chainCode === 'L' ? ChainType.Light_Lambda : ChainType.Light_Kappa;
28
+ return ChainType.Heavy;
29
+ }
30
+
31
+ /** Builds a row's region-annotation JSON matching the antpack script output:
32
+ * each entry has id/name/description/start/end/visualType/category/sourceScheme.
33
+ * The `start` and `end` are **scheme-position codes** (strings), not character
34
+ * indices — the dialog resolves them to chars via numbering_map. */
35
+ function buildRegionAnnotations(
36
+ schemeLabel: string, chainGroup: string, chainCode: string,
37
+ ): any[] {
38
+ if (!chainGroup) return [];
39
+ const schemeEnum = toSchemeEnum(schemeLabel);
40
+ const regions = SCHEME_REGIONS[schemeEnum];
41
+ const chainKey = toRegionChainKey(chainGroup, chainCode);
42
+ const regionDefs = regions?.[chainKey];
43
+ if (!regionDefs) return [];
44
+ const schemeLower = schemeLabel.toLowerCase();
45
+ return regionDefs.map((r) => ({
46
+ id: `${schemeLower}-${chainGroup}-${r.name}`.toLowerCase(),
47
+ name: r.name,
48
+ description: `${r.name} (${schemeLabel.toUpperCase()} ${r.startPosition}-${r.endPosition})`,
49
+ start: r.startPosition,
50
+ end: r.endPosition,
51
+ visualType: 'region',
52
+ category: 'structure',
53
+ sourceScheme: schemeLabel.toUpperCase(),
54
+ autoGenerated: true,
55
+ }));
56
+ }
57
+
58
+ /** Runs immunum numbering on a sequence column and returns a DataFrame matching
59
+ * the shape expected by `numbering-ui.ts` (same columns as the antpack Python
60
+ * script `number_antibody.py`):
61
+ * - `position_names` — comma-separated list of numbered position codes
62
+ * - `chain_type` — 'Heavy' | 'Light' | ''
63
+ * - `annotations_json` — JSON array of FR/CDR region definitions
64
+ * - `numbering_detail` — JSON array of {position, aa} per numbered residue
65
+ * - `numbering_map` — JSON object mapping position code → char index
66
+ *
67
+ * The numbering runs in a web worker (see `immunum.worker.ts`) so the main
68
+ * thread stays responsive even for large columns. */
69
+ export async function numberAntibodyColumn(
70
+ seqCol: DG.Column<string>,
71
+ scheme: string,
72
+ ): Promise<DG.DataFrame> {
73
+ const n = seqCol.length;
74
+ const sequences: string[] = new Array(n);
75
+ for (let i = 0; i < n; i++)
76
+ sequences[i] = seqCol.get(i) ?? '';
77
+
78
+ const immunumScheme = toImmunumScheme(scheme);
79
+ const rows = await numberSequencesWithImmunum(sequences, immunumScheme);
80
+
81
+ const posCol = DG.Column.fromType(DG.COLUMN_TYPE.STRING, 'position_names', n);
82
+ const chainCol = DG.Column.fromType(DG.COLUMN_TYPE.STRING, 'chain_type', n);
83
+ const annotCol = DG.Column.fromType(DG.COLUMN_TYPE.STRING, 'annotations_json', n);
84
+ const detailCol = DG.Column.fromType(DG.COLUMN_TYPE.STRING, 'numbering_detail', n);
85
+ const mapCol = DG.Column.fromType(DG.COLUMN_TYPE.STRING, 'numbering_map', n);
86
+
87
+ for (let i = 0; i < n; i++) {
88
+ const r = rows[i];
89
+ if (!r || r.error || !r.positionNames) {
90
+ posCol.set(i, '');
91
+ chainCol.set(i, '');
92
+ annotCol.set(i, '[]');
93
+ detailCol.set(i, '');
94
+ mapCol.set(i, '');
95
+ continue;
96
+ }
97
+ posCol.set(i, r.positionNames);
98
+ chainCol.set(i, r.chainType);
99
+ annotCol.set(i, JSON.stringify(buildRegionAnnotations(scheme, r.chainType, r.chainCode)));
100
+ detailCol.set(i, JSON.stringify(r.numberingDetail));
101
+ mapCol.set(i, JSON.stringify(r.numberingMap));
102
+ }
103
+
104
+ return DG.DataFrame.fromColumns([posCol, chainCol, annotCol, detailCol, mapCol]);
105
+ }
@@ -0,0 +1,48 @@
1
+ /** Position → amino-acid entry in the numbering detail JSON. */
2
+ export interface ImmunumNumberingEntry {
3
+ /** Position code (e.g. "1", "27A"). */
4
+ position: string;
5
+ /** Single-letter residue at this position. */
6
+ aa: string;
7
+ }
8
+
9
+ /** Per-row numbering result produced by the immunum worker. */
10
+ export interface ImmunumNumberingRow {
11
+ /** Comma-separated position codes in scheme order. Empty string on failure. */
12
+ positionNames: string;
13
+ /** 'Heavy' / 'Light' / '' — UI-facing chain group. */
14
+ chainType: string;
15
+ /** Immunum chain code: H, K, L, A, B, G, D, or ''. */
16
+ chainCode: string;
17
+ /** Ordered list of {position, aa} entries for the aligned region. */
18
+ numberingDetail: ImmunumNumberingEntry[];
19
+ /** Map from position code → character index in the extracted (gap-free) sequence. */
20
+ numberingMap: Record<string, number>;
21
+ /** Alignment confidence in [0, 1]. */
22
+ confidence: number;
23
+ /** Error message, empty string on success. */
24
+ error: string;
25
+ }
26
+
27
+ export type ImmunumWorkerRequest =
28
+ | {op: 'init'}
29
+ | {
30
+ op: 'number';
31
+ sequences: string[];
32
+ /** Immunum scheme: 'imgt' or 'kabat'. */
33
+ scheme: string;
34
+ /** Case-insensitive chain codes (e.g. ['H', 'K', 'L']). Defaults to H/K/L. */
35
+ chains?: string[];
36
+ /** Minimum confidence in [0, 1]; null/undefined → library default (0.5). */
37
+ minConfidence?: number | null;
38
+ };
39
+
40
+ export type ImmunumWorkerResponse =
41
+ | {ok: true; rows?: ImmunumNumberingRow[]}
42
+ | {ok: false; error: string};
43
+
44
+ /** Supported schemes by immunum. Chothia/AHo are not supported — the UI engine
45
+ * dropdown lists all schemes globally so the immunum path falls back for the
46
+ * unsupported ones by returning empty rows with errors. */
47
+ export const IMMUNUM_SCHEMES = ['imgt', 'kabat'] as const;
48
+ export type ImmunumScheme = typeof IMMUNUM_SCHEMES[number];
@@ -11,7 +11,7 @@ import {detectAlphabet, detectHelmAlphabet, splitterAsFastaSimple, StringListSeq
11
11
  import {mmDistanceFunctions, MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
12
12
  import {mmDistanceFunctionType} from '@datagrok-libraries/ml/src/macromolecule-distance-functions/types';
13
13
  import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/types/monomer-library';
14
- import {HELM_POLYMER_TYPE, HELM_WRAPPERS_REGEXP, PHOSPHATE_SYMBOL} from '@datagrok-libraries/bio/src/utils/const';
14
+ import {DEOXYRIBOSE_SYMBOL, HELM_POLYMER_TYPE, HELM_WRAPPERS_REGEXP, PHOSPHATE_SYMBOL, RIBOSE_SYMBOL} from '@datagrok-libraries/bio/src/utils/const';
15
15
  import {GAP_SYMBOL, GapOriginals} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
16
16
  import {CellRendererBackBase, GridCellRendererTemp} from '@datagrok-libraries/bio/src/utils/cell-renderer-back-base';
17
17
  import {HelmTypes} from '@datagrok-libraries/bio/src/helm/consts';
@@ -939,6 +939,11 @@ export class SeqHandler implements ISeqHandler {
939
939
 
940
940
  if (cm === GAP_SYMBOL)
941
941
  om = GapOriginals[NOTATION.FASTA];
942
+ // For HELM RNA, the splitter triplet-splits each nucleotide into
943
+ // [sugar, base, phosphate]; FASTA conversion keeps only the base, so
944
+ // drop standalone sugar/phosphate tokens.
945
+ else if (isHelm && (cm === PHOSPHATE_SYMBOL || cm === RIBOSE_SYMBOL || cm === DEOXYRIBOSE_SYMBOL))
946
+ om = '';
942
947
  else if (cm === PHOSPHATE_SYMBOL)
943
948
  om = '';
944
949
  else if (om.length > 1)
@@ -978,7 +983,9 @@ export class SeqHandler implements ISeqHandler {
978
983
  return joinToBiln(srcSS);
979
984
  }
980
985
 
981
- /** Splits Helm sequence adjusting nucleotides to single char symbols. (!) Removes lone phosphorus. */
986
+ /** Splits Helm sequence adjusting nucleotides to single char symbols. (!) Removes lone phosphorus,
987
+ * ribose, and deoxyribose tokens (which the underlying splitter emits when triplet-splitting
988
+ * each nucleotide of an RNA chain). */
982
989
  private splitterAsHelmNucl(src: string): ISeqSplitted {
983
990
  const srcMList: ISeqSplitted = this.splitter(src);
984
991
  const tgtMList: (string | null)[] = new Array<string>(srcMList.length);
@@ -988,7 +995,8 @@ export class SeqHandler implements ISeqHandler {
988
995
  let om: string | null = srcMList.getOriginal(posIdx);
989
996
  if (isDna || isRna) {
990
997
  om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
991
- om = om === PHOSPHATE_SYMBOL ? null : om;
998
+ if (om === PHOSPHATE_SYMBOL || om === RIBOSE_SYMBOL || om === DEOXYRIBOSE_SYMBOL)
999
+ om = null;
992
1000
  }
993
1001
  tgtMList[posIdx] = om ? om : null;
994
1002
  }
@@ -1009,18 +1017,26 @@ export class SeqHandler implements ISeqHandler {
1009
1017
  // -- joiners --
1010
1018
 
1011
1019
  function joinToSeparator(seqS: ISeqSplitted, tgtSeparator: string, isHelm: boolean): string {
1012
- const resMList: string[] = new Array<string>(seqS.length);
1020
+ const resMList: string[] = [];
1013
1021
  for (let posIdx: number = 0; posIdx < seqS.length; ++posIdx) {
1014
1022
  const cm = seqS.getCanonical(posIdx);
1015
1023
  let om = seqS.getOriginal(posIdx);
1016
1024
  if (isHelm)
1017
1025
  om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
1018
1026
 
1019
- if (cm === GAP_SYMBOL)
1020
- om = GapOriginals[NOTATION.SEPARATOR];
1021
- else if (cm === PHOSPHATE_SYMBOL)
1022
- om = '';
1023
- resMList[posIdx] = om;
1027
+ if (cm === GAP_SYMBOL) {
1028
+ resMList.push(GapOriginals[NOTATION.SEPARATOR]);
1029
+ continue;
1030
+ }
1031
+ // For HELM RNA, the splitter triplet-splits each nucleotide into
1032
+ // [sugar, base, phosphate]; separator conversion keeps only the base, so
1033
+ // skip standalone sugar/phosphate tokens entirely (rather than emitting
1034
+ // an empty cell that would show up as an extra separator in the output).
1035
+ if (isHelm && (cm === PHOSPHATE_SYMBOL || cm === RIBOSE_SYMBOL || cm === DEOXYRIBOSE_SYMBOL))
1036
+ continue;
1037
+ if (cm === PHOSPHATE_SYMBOL)
1038
+ continue;
1039
+ resMList.push(om);
1024
1040
  }
1025
1041
  return resMList.join(tgtSeparator);
1026
1042
  }