@datagrok/bio 2.13.2 → 2.13.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/.eslintrc.json +1 -1
  2. package/CHANGELOG.md +26 -0
  3. package/detectors.js +52 -38
  4. package/dist/111.js +2 -0
  5. package/dist/111.js.map +1 -0
  6. package/dist/234.js +2 -0
  7. package/dist/234.js.map +1 -0
  8. package/dist/242.js +2 -0
  9. package/dist/242.js.map +1 -0
  10. package/dist/{286.js → 248.js} +1 -1
  11. package/dist/248.js.map +1 -0
  12. package/dist/284.js +3 -0
  13. package/dist/284.js.map +1 -0
  14. package/dist/317.js +2 -0
  15. package/dist/317.js.map +1 -0
  16. package/dist/589.js +2 -0
  17. package/dist/589.js.map +1 -0
  18. package/dist/603.js +2 -0
  19. package/dist/603.js.map +1 -0
  20. package/dist/682.js +2 -0
  21. package/dist/682.js.map +1 -0
  22. package/dist/705.js +2 -0
  23. package/dist/705.js.map +1 -0
  24. package/dist/{590.js → 731.js} +2 -2
  25. package/dist/731.js.map +1 -0
  26. package/dist/778.js +2 -0
  27. package/dist/778.js.map +1 -0
  28. package/dist/793.js +2 -0
  29. package/dist/793.js.map +1 -0
  30. package/dist/950.js +2 -0
  31. package/dist/950.js.map +1 -0
  32. package/dist/package-test.js +6 -7
  33. package/dist/package-test.js.map +1 -1
  34. package/dist/package.js +6 -7
  35. package/dist/package.js.map +1 -1
  36. package/files/cache_config.json +7 -0
  37. package/package.json +17 -23
  38. package/src/analysis/sequence-activity-cliffs.ts +1 -1
  39. package/src/function-edtiors/split-to-monomers-editor.ts +6 -7
  40. package/src/package-types.ts +19 -19
  41. package/src/package.ts +23 -16
  42. package/src/substructure-search/substructure-search.ts +9 -10
  43. package/src/tests/WebLogo-positions-test.ts +6 -6
  44. package/src/tests/activity-cliffs-tests.ts +5 -2
  45. package/src/tests/bio-tests.ts +6 -6
  46. package/src/tests/checkInputColumn-tests.ts +3 -3
  47. package/src/tests/converters-test.ts +1 -1
  48. package/src/tests/detectors-tests.ts +25 -13
  49. package/src/tests/fasta-export-tests.ts +2 -2
  50. package/src/tests/mm-distance-tests.ts +1 -1
  51. package/src/tests/msa-tests.ts +2 -2
  52. package/src/tests/renderers-test.ts +5 -5
  53. package/src/tests/scoring.ts +26 -5
  54. package/src/tests/seq-handler-get-region.ts +4 -4
  55. package/src/tests/sequence-space-test.ts +1 -1
  56. package/src/tests/substructure-filters-tests.ts +4 -1
  57. package/src/tests/to-atomic-level-tests.ts +1 -1
  58. package/src/utils/cell-renderer-consts.ts +3 -11
  59. package/src/utils/cell-renderer.ts +15 -17
  60. package/src/utils/context-menu.ts +1 -1
  61. package/src/utils/convert.ts +7 -4
  62. package/src/utils/get-region-func-editor.ts +11 -16
  63. package/src/utils/get-region.ts +5 -5
  64. package/src/utils/macromolecule-column-widget.ts +1 -1
  65. package/src/utils/monomer-lib/lib-manager.ts +20 -8
  66. package/src/utils/monomer-lib/library-file-manager/file-manager.ts +28 -24
  67. package/src/utils/monomer-lib/library-file-manager/file-validator.ts +2 -1
  68. package/src/utils/monomer-lib/library-file-manager/ui.ts +3 -6
  69. package/src/utils/multiple-sequence-alignment-ui.ts +10 -11
  70. package/src/utils/multiple-sequence-alignment.ts +2 -2
  71. package/src/utils/pepsea.ts +1 -1
  72. package/src/utils/save-as-fasta.ts +5 -5
  73. package/src/viewers/vd-regions-viewer.ts +2 -2
  74. package/src/widgets/bio-substructure-filter.ts +7 -7
  75. package/src/widgets/package-settings-editor-widget.ts +27 -27
  76. package/src/widgets/representations.ts +57 -61
  77. package/tsconfig.json +4 -4
  78. package/webpack.config.js +1 -1
  79. package/dist/23.js +0 -2
  80. package/dist/23.js.map +0 -1
  81. package/dist/231.js +0 -2
  82. package/dist/231.js.map +0 -1
  83. package/dist/282.js +0 -2
  84. package/dist/282.js.map +0 -1
  85. package/dist/286.js.map +0 -1
  86. package/dist/356.js +0 -2
  87. package/dist/356.js.map +0 -1
  88. package/dist/36.js +0 -2
  89. package/dist/36.js.map +0 -1
  90. package/dist/40.js +0 -2
  91. package/dist/40.js.map +0 -1
  92. package/dist/413.js +0 -2
  93. package/dist/413.js.map +0 -1
  94. package/dist/42.js +0 -2
  95. package/dist/42.js.map +0 -1
  96. package/dist/427.js +0 -2
  97. package/dist/427.js.map +0 -1
  98. package/dist/545.js +0 -3
  99. package/dist/545.js.map +0 -1
  100. package/dist/590.js.map +0 -1
  101. package/dist/65.js +0 -2
  102. package/dist/65.js.map +0 -1
  103. package/dist/796.js +0 -2
  104. package/dist/796.js.map +0 -1
  105. package/dist/package-test.js.LICENSE.txt +0 -1
  106. package/dist/package.js.LICENSE.txt +0 -1
  107. /package/dist/{545.js.LICENSE.txt → 284.js.LICENSE.txt} +0 -0
@@ -0,0 +1,7 @@
1
+ [
2
+ {
3
+ "path": "/monomer-libraries",
4
+ "invalidateOn": "0 0 * * *",
5
+ "preflight": true
6
+ }
7
+ ]
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Aleksandr Tanas",
6
6
  "email": "atanas@datagrok.ai"
7
7
  },
8
- "version": "2.13.2",
8
+ "version": "2.13.5",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -13,30 +13,23 @@
13
13
  "directory": "packages/Bio"
14
14
  },
15
15
  "properties": [
16
- {
17
- "name": "MonomerWidthMode",
18
- "propertyType": "string",
19
- "choices": [
20
- "short",
21
- "long"
22
- ],
23
- "defaultValue": "short",
24
- "nullable": false
25
- },
26
16
  {
27
17
  "name": "MaxMonomerLength",
28
- "propertyType": "int",
29
- "defaultValue": 4,
18
+ "description": "The max length of monomer symbol displayed without shortening, 'long' to no limit",
19
+ "propertyType": "string",
20
+ "defaultValue": "4",
30
21
  "nullable": false
31
22
  },
32
23
  {
33
24
  "name": "TooltipWebLogo",
25
+ "description": "Display WebLogo in a Macromolecule column header tooltip",
34
26
  "propertyType": "bool",
35
27
  "defaultValue": "true",
36
28
  "nullable": false
37
29
  },
38
30
  {
39
31
  "name": "DefaultSeparator",
32
+ "description": "Default separator using to convert sequences into separator notation",
40
33
  "propertyType": "string",
41
34
  "defaultValue": ".",
42
35
  "nullable": false
@@ -44,18 +37,18 @@
44
37
  ],
45
38
  "dependencies": {
46
39
  "@biowasm/aioli": "^3.1.0",
47
- "@datagrok-libraries/bio": "^5.42.3",
40
+ "@datagrok-libraries/bio": "^5.42.5",
48
41
  "@datagrok-libraries/chem-meta": "^1.2.5",
49
42
  "@datagrok-libraries/math": "^1.1.5",
50
43
  "@datagrok-libraries/ml": "^6.6.12",
51
44
  "@datagrok-libraries/tutorials": "^1.3.12",
52
- "@datagrok-libraries/utils": "^4.2.11",
45
+ "@datagrok-libraries/utils": "^4.2.12",
53
46
  "@webgpu/types": "^0.1.40",
54
47
  "ajv": "^8.12.0",
55
48
  "ajv-errors": "^3.0.0",
56
49
  "cash-dom": "^8.0.0",
57
50
  "css-loader": "^6.7.3",
58
- "datagrok-api": "^1.18.6",
51
+ "datagrok-api": "^1.20.0",
59
52
  "dayjs": "^1.11.4",
60
53
  "fastest-levenshtein": "^1.0.16",
61
54
  "openchemlib": "^7.2.3",
@@ -65,8 +58,8 @@
65
58
  "wu": "latest"
66
59
  },
67
60
  "devDependencies": {
68
- "@datagrok-libraries/helm-web-editor": "^1.1.5",
69
- "@datagrok-libraries/js-draw-lite": "^0.0.3",
61
+ "@datagrok-libraries/helm-web-editor": "^1.1.6",
62
+ "@datagrok-libraries/js-draw-lite": "^0.0.4",
70
63
  "@datagrok/chem": "^1.9.2",
71
64
  "@datagrok/dendrogram": "^1.2.29",
72
65
  "@datagrok/helm": "^2.2.1",
@@ -77,12 +70,13 @@
77
70
  "datagrok-tools": "latest",
78
71
  "eslint": "latest",
79
72
  "eslint-config-google": "latest",
80
- "source-map-loader": "^4.0.1",
81
- "ts-loader": "^9.2.5",
82
- "typescript": "^4.8.4",
83
- "webpack": "^5.76.3",
73
+ "eslint-plugin-rxjs": "latest",
74
+ "source-map-loader": "latest",
75
+ "ts-loader": "^9.5.1",
76
+ "typescript": "^5.5.3",
77
+ "webpack": "^5.92.1",
84
78
  "webpack-bundle-analyzer": "latest",
85
- "webpack-cli": "^4.9.1"
79
+ "webpack-cli": "^5.1.4"
86
80
  },
87
81
  "scripts": {
88
82
  "link-api": "npm link datagrok-api",
@@ -164,7 +164,7 @@ export function createLinesGrid(df: DG.DataFrame, colNames: string[]): DG.Grid {
164
164
  const seqDiffCol = DG.Column.string('seq_diff', df.rowCount)
165
165
  .init((i) => `${df.get(colNames[0], i)}#${df.get(colNames[1], i)}`);
166
166
  seqDiffCol.semType = 'MacromoleculeDifference';
167
- seqDiffCol.setTag(DG.TAGS.UNITS, df.col(colNames[0])!.getTag(DG.TAGS.UNITS));
167
+ seqDiffCol.meta.units = df.col(colNames[0])!.meta.units;
168
168
  seqDiffCol.setTag(bioTAGS.separator, df.col(colNames[0])!.getTag(bioTAGS.separator));
169
169
  df.columns.add(seqDiffCol);
170
170
  const grid = df.plot.grid();
@@ -20,14 +20,13 @@ export class SplitToMonomersFunctionEditor {
20
20
  }
21
21
 
22
22
  constructor() {
23
- this.tableInput = ui.tableInput('Table', grok.shell.tv.dataFrame, undefined, () => {
23
+ this.tableInput = ui.input.table('Table', {value: grok.shell.tv.dataFrame, onValueChanged: () => {
24
24
  this.onTableInputChanged();
25
- });
25
+ }});
26
26
  //TODO: remove when the new version of datagrok-api is available
27
27
  const seqColValue = this.tableInput.value!.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
28
- const seqColOptions = {filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE};
29
- //@ts-ignore
30
- this.seqColInput = ui.columnInput('Sequence', this.tableInput.value!, seqColValue, null, seqColOptions);
28
+ this.seqColInput = ui.input.column('Sequence', {table: this.tableInput.value!, value: seqColValue,
29
+ filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE});
31
30
 
32
31
  this.funcParamsDiv = ui.inputs([
33
32
  this.tableInput,
@@ -36,7 +35,7 @@ export class SplitToMonomersFunctionEditor {
36
35
  }
37
36
 
38
37
  onTableInputChanged(): void {
39
- this.seqColInput = ui.columnInput('Sequence', this.tableInput.value!,
40
- this.tableInput.value!.columns.bySemType(DG.SEMTYPE.MACROMOLECULE));
38
+ this.seqColInput = ui.input.column('Sequence', {table: this.tableInput.value!,
39
+ value: this.tableInput.value!.columns.bySemType(DG.SEMTYPE.MACROMOLECULE)});
41
40
  }
42
41
  }
@@ -3,8 +3,9 @@ import * as DG from 'datagrok-api/dg';
3
3
  import * as ui from 'datagrok-api/ui';
4
4
 
5
5
  import {Observable, Subject} from 'rxjs';
6
+
6
7
  import {errInfo} from '@datagrok-libraries/bio/src/utils/err-info';
7
- import {MonomerWidthMode} from './utils/cell-renderer-consts';
8
+ import {LoggerWrapper} from '@datagrok-libraries/bio/src/utils/logger';
8
9
 
9
10
  /** Names of package properties/settings declared in properties section of {@link './package.json'} */
10
11
  export const enum BioPackagePropertiesNames {
@@ -19,39 +20,32 @@ export class BioPackageProperties extends Map<string, any> {
19
20
  private _onPropertyChanged: Subject<string> = new Subject<string>();
20
21
  public get onPropertyChanged(): Observable<string> { return this._onPropertyChanged; }
21
22
 
22
- public get MonomerWidthMode(): MonomerWidthMode {
23
- return super.get(BioPackagePropertiesNames.MonomerWidthMode) as MonomerWidthMode;
24
- }
25
-
26
- public set MonomerWidthMode(value: MonomerWidthMode) {
27
- super.set(BioPackagePropertiesNames.MonomerWidthMode, value);
28
- this._onPropertyChanged.next(BioPackagePropertiesNames.MonomerWidthMode);
23
+ /** Monomer symbol maximum length displayed, null for unlimited. */
24
+ public get maxMonomerLength(): number | null {
25
+ const vs = super.get(BioPackagePropertiesNames.MaxMonomerLength);
26
+ return vs === 'long' ? null : parseInt(vs);
29
27
  }
30
28
 
31
- /** Monomer name maximum length displayed in short mode. */
32
- public get MaxMonomerLength(): number {
33
- return super.get(BioPackagePropertiesNames.MaxMonomerLength) as number;
34
- }
35
-
36
- public set MaxMonomerLength(value: number) {
37
- super.set(BioPackagePropertiesNames.MaxMonomerLength, value);
29
+ public set maxMonomerLength(value: number | null) {
30
+ const vs = value === null ? 'long' : value.toString();
31
+ super.set(BioPackagePropertiesNames.MaxMonomerLength, vs);
38
32
  this._onPropertyChanged.next(BioPackagePropertiesNames.MaxMonomerLength);
39
33
  }
40
34
 
41
- public get TooltipWebLogo(): boolean {
35
+ public get tooltipWebLogo(): boolean {
42
36
  return super.get(BioPackagePropertiesNames.TooltipWebLogo) as boolean;
43
37
  }
44
38
 
45
- public set TooltipWebLogo(value: boolean) {
39
+ public set tooltipWebLogo(value: boolean) {
46
40
  super.set(BioPackagePropertiesNames.TooltipWebLogo, value);
47
41
  this._onPropertyChanged.next(BioPackagePropertiesNames.TooltipWebLogo);
48
42
  }
49
43
 
50
- public get DefaultSeparator(): string {
44
+ public get defaultSeparator(): string {
51
45
  return super.get(BioPackagePropertiesNames.DefaultSeparator) as string;
52
46
  }
53
47
 
54
- public set DefaultSeparator(value: string) {
48
+ public set defaultSeparator(value: string) {
55
49
  if (value.length !== 1) throw new Error('The separator must be of length one.');
56
50
  super.set(BioPackagePropertiesNames.DefaultSeparator, value);
57
51
  this._onPropertyChanged.next(BioPackagePropertiesNames.DefaultSeparator);
@@ -73,6 +67,12 @@ export class BioPackage extends DG.Package {
73
67
 
74
68
  public get initialized(): boolean { return this._initialized; }
75
69
 
70
+ constructor(opts: { debug: boolean } = {debug: false}) {
71
+ super();
72
+ // @ts-ignore
73
+ super._logger = new LoggerWrapper(super.logger, opts.debug);
74
+ }
75
+
76
76
  public completeInit(): void { this._initialized = true; }
77
77
 
78
78
  handleErrorUI(err: any) {
package/src/package.ts CHANGED
@@ -77,6 +77,7 @@ import {generateLongSequence, generateLongSequence2} from '@datagrok-libraries/b
77
77
  import {CyclizedNotationProvider} from './utils/cyclized';
78
78
  import {getMolColumnFromHelm} from './utils/helm-to-molfile/utils';
79
79
  import {PackageSettingsEditorWidget} from './widgets/package-settings-editor-widget';
80
+ import {getUserLibSettings, setUserLibSettings} from '@datagrok-libraries/bio/src/monomer-works/lib-settings';
80
81
 
81
82
  export const _package = new BioPackage();
82
83
 
@@ -115,6 +116,12 @@ export async function initBio() {
115
116
  await Promise.all([
116
117
  (async () => {
117
118
  const monomerLibManager = await MonomerLibManager.getInstance();
119
+ // Fix user lib settings for explicit stuck from a terminated test
120
+ const libSettings = await getUserLibSettings();
121
+ if (libSettings.explicit) {
122
+ libSettings.explicit = [];
123
+ await setUserLibSettings(libSettings);
124
+ }
118
125
  await monomerLibManager.loadLibraries();
119
126
  monomerLib = monomerLibManager.getBioLib();
120
127
  })(),
@@ -291,16 +298,16 @@ export function SeqActivityCliffsEditor(call: DG.FuncCall) {
291
298
 
292
299
  // -- Package settings editor --
293
300
 
294
- //name: packageSettingsEditor
295
- //description: The database connection
296
- //tags: packageSettingsEditor
297
- //input: object propList
298
- //output: widget result
299
- export function packageSettingsEditor(propList: DG.Property[]): DG.Widget {
300
- const widget = new PackageSettingsEditorWidget(propList);
301
- widget.init().then(); // Ignore promise returned
302
- return widget as DG.Widget;
303
- }
301
+ // //name: packageSettingsEditor
302
+ // //description: The database connection
303
+ // //tags: packageSettingsEditor
304
+ // //input: object propList
305
+ // //output: widget result
306
+ // export function packageSettingsEditor(propList: DG.Property[]): DG.Widget {
307
+ // const widget = new PackageSettingsEditorWidget(propList);
308
+ // widget.init().then(); // Ignore promise returned
309
+ // return widget as DG.Widget;
310
+ // }
304
311
 
305
312
  // -- Cell renderers --
306
313
 
@@ -440,7 +447,7 @@ export async function activityCliffs(table: DG.DataFrame, molecules: DG.Column<s
440
447
  return;
441
448
  const axesNames = getEmbeddingColsNames(table);
442
449
  const tags = {
443
- 'units': molecules.getTag(DG.TAGS.UNITS),
450
+ 'units': molecules.meta.units!,
444
451
  'aligned': molecules.getTag(bioTAGS.aligned),
445
452
  'separator': molecules.getTag(bioTAGS.separator),
446
453
  'alphabet': molecules.getTag(bioTAGS.alphabet),
@@ -658,8 +665,8 @@ export async function compositionAnalysis(): Promise<void> {
658
665
  } else if (colList.length > 1) {
659
666
  const colListNames: string [] = colList.map((col) => col.name);
660
667
  const selectedCol = colList.find((c) => { return SeqHandler.forColumn(c).isMsa(); });
661
- const colInput: DG.InputBase = ui.choiceInput(
662
- 'Column', selectedCol ? selectedCol.name : colListNames[0], colListNames);
668
+ const colInput: DG.InputBase = ui.input.choice(
669
+ 'Column', {value: selectedCol ? selectedCol.name : colListNames[0], items: colListNames});
663
670
  ui.dialog({
664
671
  title: 'Composition Analysis',
665
672
  helpUrl: 'https://datagrok.ai/help/datagrok/solutions/domains/bio/#sequence-composition',
@@ -743,13 +750,13 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
743
750
  const semType = await grok.functions.call('Bio:detectMacromolecule', {col: col});
744
751
  if (semType === DG.SEMTYPE.MACROMOLECULE) {
745
752
  //console.warn(`file: ${fileInfo.path}, column: ${col.name}, ` +
746
- // `semType: ${semType}, units: ${col.getTag(DG.TAGS.UNITS)}`);
753
+ // `semType: ${semType}, units: ${col.meta.units}`);
747
754
  // console.warn('file: "' + fileInfo.path + '", semType: "' + semType + '", ' +
748
- // 'units: "' + col.getTag(DG.TAGS.UNITS) + '"');
755
+ // 'units: "' + col.meta.units + '"');
749
756
 
750
757
  res.push({
751
758
  file: fileInfo.path, result: 'detected', column: col.name,
752
- message: `units: ${col.getTag(DG.TAGS.UNITS)}`,
759
+ message: `units: ${col.meta.units}`,
753
760
  });
754
761
  }
755
762
  }
@@ -54,7 +54,7 @@ export class SubstructureSearchDialog {
54
54
  }
55
55
 
56
56
  updateNotationDiv(): void {
57
- this.units = this.col.getTag(DG.TAGS.UNITS);
57
+ this.units = this.col.meta.units!;
58
58
  this.separator = this.col.getTag(bioTAGS.separator);
59
59
  const notationDiv = this.dialog.root.getElementsByClassName('notation-text')[0];
60
60
  if (notationDiv)
@@ -63,26 +63,25 @@ export class SubstructureSearchDialog {
63
63
 
64
64
  createUI(): void {
65
65
  const dataframe = grok.shell.tv.dataFrame;
66
- const seqColOptions = {filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE};
67
- this.columnsInput = ui.columnInput('Column', dataframe, this.col, (column: DG.Column) => {
68
- this.col = column;
66
+ this.columnsInput = ui.input.column('Column', {table: dataframe, value: this.col, onValueChanged: (input) => {
67
+ this.col = input.value;
69
68
  this.updateNotationDiv();
70
69
  this.updateInputs();
71
- }, seqColOptions);
70
+ }, filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE});
72
71
 
73
- this.substructureInput = ui.stringInput('Substructure', '');
72
+ this.substructureInput = ui.input.string('Substructure', {value: ''});
74
73
 
75
74
  this.editHelmLink = ui.link('Edit helm', () => this.editHelmLinkAction(), undefined, {style: {position: 'relative', left: '95px'}});
76
75
 
77
76
  const df = DG.DataFrame.create(1);
78
77
  df.columns.addNewString(SUBSTR_HELM_COL_NAME).init((_i) => '');
79
78
  df.col(SUBSTR_HELM_COL_NAME)!.semType = this.col.semType;
80
- df.col(SUBSTR_HELM_COL_NAME)!.setTag(DG.TAGS.UNITS, NOTATION.HELM);
79
+ df.col(SUBSTR_HELM_COL_NAME)!.meta.units = NOTATION.HELM;
81
80
  this.grid = df.plot.grid();
82
- this.separatorInput = ui.stringInput('Separator', this.separator);
81
+ this.separatorInput = ui.input.string('Separator', {value: this.separator});
83
82
 
84
83
  this.inputsDiv = ui.div();
85
- this.units = this.col.getTag(DG.TAGS.UNITS);
84
+ this.units = this.col.meta.units!;
86
85
  this.separator = this.col.getTag(bioTAGS.separator);
87
86
  this.updateInputs();
88
87
 
@@ -135,7 +134,7 @@ export async function helmSubstructureSearch(substructure: string, col: DG.Colum
135
134
  await invalidateMols(col, true);
136
135
  const substructureCol: DG.Column<string> = DG.Column.string('helm', 1).init((_i) => substructure);
137
136
  substructureCol.semType = DG.SEMTYPE.MACROMOLECULE;
138
- substructureCol.setTag(DG.TAGS.UNITS, NOTATION.HELM);
137
+ substructureCol.meta.units = NOTATION.HELM;
139
138
  const substructureMolsCol =
140
139
  await getMonomericMols(substructureCol, true, col.temp[MONOMERIC_COL_TAGS.MONOMERS_DICT]);
141
140
  const matchesCol = await grok.functions.call('Chem:searchSubstructure', {
@@ -29,7 +29,7 @@ ATC-G-TTGC--
29
29
 
30
30
  const seqCol: DG.Column = df.getCol('seq');
31
31
  seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
32
- seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
32
+ seqCol.meta.units = NOTATION.FASTA;
33
33
  seqCol.setTag(bioTAGS.alphabet, ALPHABET.DNA);
34
34
  seqCol.setTag(bioTAGS.aligned, 'SEQ.MSA');
35
35
 
@@ -77,7 +77,7 @@ ATC-G-TTGC--
77
77
 
78
78
  const seqCol: DG.Column = df.getCol('seq');
79
79
  seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
80
- seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
80
+ seqCol.meta.units = NOTATION.FASTA;
81
81
  seqCol.setTag(bioTAGS.alphabet, ALPHABET.DNA);
82
82
  seqCol.setTag(bioTAGS.aligned, 'SEQ');
83
83
 
@@ -120,7 +120,7 @@ ATC-G-TTGC--
120
120
 
121
121
  const seqCol: DG.Column = df.getCol('seq');
122
122
  seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
123
- seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
123
+ seqCol.meta.units = NOTATION.FASTA;
124
124
  seqCol.setTag(bioTAGS.alphabet, ALPHABET.DNA);
125
125
  seqCol.setTag(bioTAGS.aligned, 'SEQ.MSA');
126
126
 
@@ -191,8 +191,8 @@ ATC-G-TTGC--
191
191
  test('empty', async () => {
192
192
  const df: DG.DataFrame = DG.DataFrame.fromColumns([(() => {
193
193
  const col = DG.Column.fromStrings('seq', []);
194
- col.setTag(DG.TAGS.SEMTYPE, DG.SEMTYPE.MACROMOLECULE);
195
- col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
194
+ col.semType = DG.SEMTYPE.MACROMOLECULE;
195
+ col.meta.units = NOTATION.FASTA;
196
196
  col.setTag(bioTAGS.alphabet, ALPHABET.DNA);
197
197
  return col;
198
198
  })()]);
@@ -222,7 +222,7 @@ function buildDfWithSeqCol(csv: string, notation: NOTATION, alphabet: ALPHABET,
222
222
 
223
223
  const seqCol: DG.Column = df.getCol('seq');
224
224
  seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
225
- seqCol.setTag(DG.TAGS.UNITS, notation);
225
+ seqCol.meta.units = notation;
226
226
  seqCol.setTag(bioTAGS.alphabet, alphabet);
227
227
  seqCol.setTag(bioTAGS.aligned, aligned);
228
228
 
@@ -13,21 +13,24 @@ import {
13
13
  getUserLibSettings, setUserLibSettings, setUserLibSettingsForTests
14
14
  } from '@datagrok-libraries/bio/src/monomer-works/lib-settings';
15
15
  import {UserLibSettings} from '@datagrok-libraries/bio/src/monomer-works/types';
16
+ import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/types';
17
+ import {getHelmHelper, IHelmHelper} from '@datagrok-libraries/bio/src/helm/helm-helper';
16
18
 
17
19
  import {_package} from '../package-test';
18
- import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/types';
19
20
 
20
21
 
21
22
  category('activityCliffs', async () => {
22
23
  let viewList: DG.ViewBase[] = [];
23
24
  let dfList: DG.DataFrame[] = [];
24
25
 
26
+ let helmHelper: IHelmHelper;
25
27
  let monomerLibHelper: IMonomerLibHelper;
26
28
  /** Backup actual user's monomer libraries settings */
27
29
  let userLibSettings: UserLibSettings;
28
30
  const seqEncodingFunc = DG.Func.find({name: 'macromoleculePreprocessingFunction', package: 'Bio'})[0];
29
31
  const helmEncodingFunc = DG.Func.find({name: 'helmPreprocessingFunction', package: 'Bio'})[0];
30
32
  before(async () => {
33
+ helmHelper = await getHelmHelper(); // init Helm package
31
34
  monomerLibHelper = await getMonomerLibHelper();
32
35
  userLibSettings = await getUserLibSettings();
33
36
 
@@ -59,7 +62,7 @@ category('activityCliffs', async () => {
59
62
 
60
63
  await _testActivityCliffsOpen(actCliffsDf, DimReductionMethods.UMAP,
61
64
  'sequence', 'Activity', 90, cliffsNum, MmDistanceFunctionsNames.LEVENSHTEIN, seqEncodingFunc);
62
- });
65
+ }, {benchmark: true});
63
66
 
64
67
  test('activityCliffsWithEmptyRows', async () => {
65
68
  const actCliffsDfWithEmptyRows = await readDataframe('tests/100_3_clustests_empty_vals.csv');
@@ -58,7 +58,7 @@ PEPTIDE1{meI}$$$$`;
58
58
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
59
59
  const seqCol: DG.Column = df.getCol('seq')!;
60
60
  seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
61
- seqCol.setTag(DG.TAGS.UNITS, NOTATION.HELM);
61
+ seqCol.meta.units = NOTATION.HELM;
62
62
  const stats = getStatsForCol(seqCol, 1, splitterAsHelm);
63
63
 
64
64
  expectObject(stats.freq, {
@@ -129,7 +129,7 @@ export async function _testGetStats(csvDfN1: string) {
129
129
  const dfN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
130
130
  const seqCol: DG.Column = dfN1.col('seq')!;
131
131
  seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
132
- seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
132
+ seqCol.meta.units = NOTATION.FASTA;
133
133
  const stats = getStatsForCol(seqCol, 5, splitterAsFasta);
134
134
 
135
135
  expectObject(stats.freq, {
@@ -159,7 +159,7 @@ export async function _testPickupPaletteN1(csvDfN1: string) {
159
159
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
160
160
  const col: DG.Column = df.col('seq')!;
161
161
  col.semType = DG.SEMTYPE.MACROMOLECULE;
162
- col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
162
+ col.meta.units = NOTATION.FASTA;
163
163
  const cp = pickUpPalette(col);
164
164
 
165
165
  expect(cp instanceof NucleotidesPalettes, true);
@@ -169,7 +169,7 @@ export async function _testPickupPaletteN1e(csvDfN1e: string) {
169
169
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1e);
170
170
  const col: DG.Column = df.col('seq')!;
171
171
  col.semType = DG.SEMTYPE.MACROMOLECULE;
172
- col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
172
+ col.meta.units = NOTATION.FASTA;
173
173
  const cp = pickUpPalette(col);
174
174
 
175
175
  expect(cp instanceof NucleotidesPalettes, true);
@@ -179,7 +179,7 @@ export async function _testPickupPaletteAA1(csvDfAA1: string) {
179
179
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfAA1);
180
180
  const col: DG.Column = df.col('seq')!;
181
181
  col.semType = DG.SEMTYPE.MACROMOLECULE;
182
- col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
182
+ col.meta.units = NOTATION.FASTA;
183
183
  const cp = pickUpPalette(col);
184
184
 
185
185
  expect(cp instanceof AminoacidsPalettes, true);
@@ -189,7 +189,7 @@ export async function _testPickupPaletteX(csvDfX: string) {
189
189
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfX);
190
190
  const col: DG.Column = df.col('seq')!;
191
191
  col.semType = DG.SEMTYPE.MACROMOLECULE;
192
- col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
192
+ col.meta.units = NOTATION.FASTA;
193
193
  const cp = pickUpPalette(col);
194
194
 
195
195
  expect(cp instanceof UnknownSeqPalette, true);
@@ -18,7 +18,7 @@ seq4`;
18
18
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
19
19
  const col: DG.Column = df.getCol('seq');
20
20
  col.semType = DG.SEMTYPE.MACROMOLECULE;
21
- col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
21
+ col.meta.units = NOTATION.FASTA;
22
22
  col.setTag(bioTAGS.alphabet, ALPHABET.DNA);
23
23
  col.setTag(bioTAGS.aligned, 'SEQ');
24
24
 
@@ -33,7 +33,7 @@ seq4`;
33
33
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
34
34
  const col: DG.Column = df.getCol('seq');
35
35
  col.semType = DG.SEMTYPE.MACROMOLECULE;
36
- col.setTag(DG.TAGS.UNITS, NOTATION.HELM);
36
+ col.meta.units = NOTATION.HELM;
37
37
  // col.setTag(bio.TAGS.alphabetSize, '11');
38
38
  col.setTag(bioTAGS.alphabetIsMultichar, 'true');
39
39
 
@@ -48,7 +48,7 @@ seq4`;
48
48
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
49
49
  const col: DG.Column = df.getCol('seq');
50
50
  col.semType = DG.SEMTYPE.MACROMOLECULE;
51
- col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
51
+ col.meta.units = NOTATION.FASTA;
52
52
  col.setTag(bioTAGS.alphabet, 'UN');
53
53
  col.setTag(bioTAGS.alphabetSize, '11');
54
54
  col.setTag(bioTAGS.alphabetIsMultichar, 'true');
@@ -135,7 +135,7 @@ RNA1{p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.p
135
135
  return function(srcCol: DG.Column): DG.Column {
136
136
  const converterSh = SeqHandler.forColumn(srcCol);
137
137
  const resCol = converterSh.convert(tgtNotation, tgtSeparator);
138
- expect(resCol.getTag(DG.TAGS.UNITS), tgtNotation);
138
+ expect(resCol.meta.units, tgtNotation);
139
139
  return resCol;
140
140
  };
141
141
  }
@@ -99,6 +99,15 @@ C1CCCCC1
99
99
  CCCCCC`,
100
100
  neg: ['col1'],
101
101
  },
102
+ 'negFastaUnSingleChar': {
103
+ csv: `col1
104
+ Alanine
105
+ Cysteine
106
+ Aspartic acid
107
+ Glutamic acid
108
+ Phenylalanine`,
109
+ neg: ['col1']
110
+ },
102
111
 
103
112
  // Same length
104
113
  'fastaMsaSameLength': {
@@ -122,30 +131,33 @@ YN[Re]VYNR[Ac]WYV
122
131
  [Me]EYVMPSFW[Me]H`,
123
132
  pos: {'seq': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 14, true, undefined)},
124
133
  },
134
+ 'fastaMsaExtManyMinus': {
135
+ csv: `seq
136
+ [D-Tic]-------[D-Tyr_Et][Tyr_ab-dehydroMe][dV][Cys_SEt]N[D-Orn][D-aThr]-[Phe_4Me]
137
+ [Phe_2F]--------[Tyr_ab-dehydroMe][dV][Aca]N[D-Orn][D-aThr]-[Phe_4Me]
138
+ [D-Tic]-[Hcy]QTWQ[Phe_4NH2][D-Tyr_Et][Tyr_ab-dehydroMe][dV][Cys_SEt]----[Phe_4Me]`,
139
+ pos: {'seq': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 17, true, undefined)}
140
+ },
125
141
  'sepSameLength': {
126
142
  csv: `seq
127
143
  Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2
128
144
  Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2
129
- Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2`, pos: {
130
- 'seq': new PosCol(NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 5, true, '-'),
131
- }
145
+ Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2`,
146
+ pos: {'seq': new PosCol(NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 5, true, '-')}
132
147
  },
133
148
  'sepMsaSameLength': {
134
149
  csv: `seq
135
150
  Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2
136
151
  Ac(1)-A-A(2)-A-A-A-C(2)-A-A-A-A-C(1)-G-NH2
137
- Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2`, pos: {
138
- 'seq': new PosCol(NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.UN, 5, true, '-'),
139
- }
152
+ Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2`,
153
+ pos: {'seq': new PosCol(NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.UN, 5, true, '-')}
140
154
  },
141
155
  'helmSameLength': {
142
156
  csv: `seq
143
157
  PEPTIDE1{Ac(1).A.A.A.A.A.A.A.A.A.A.A.A.A.C(1).G.NH2}$$$$
144
158
  PEPTIDE1{Ab(1).Y.V.K.H.P.F.W.R.W.Y.A.A.A.C(1).G.NH2}$$$$
145
159
  PEPTIDE1{Ad(1).S.W.Y.C.K.H.P.M.W.A.A.A.A.C(1)-G-NH2}$$$$`,
146
- pos: {
147
- 'seq': new PosCol(NOTATION.HELM, null, null, 19, undefined, undefined)
148
- }
160
+ pos: {'seq': new PosCol(NOTATION.HELM, null, null, 19, undefined, undefined)}
149
161
  },
150
162
  };
151
163
 
@@ -493,7 +505,7 @@ export async function _testNegList(list: string[]): Promise<void> {
493
505
  const col: DG.Column = DG.Column.fromList(DG.TYPE.STRING, 'col1', list);
494
506
  const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
495
507
  if (col.semType === DG.SEMTYPE.MACROMOLECULE) {
496
- const msg = `Negative test detected semType='${col.semType}', units='${col.getTag(DG.TAGS.UNITS)}'.`;
508
+ const msg = `Negative test detected semType='${col.semType}', units='${col.meta.units}'.`;
497
509
  throw new Error(msg);
498
510
  }
499
511
  }
@@ -507,7 +519,7 @@ export async function _testNeg(readDf: DfReaderFunc, colName: string) {
507
519
  col.semType = semType;
508
520
 
509
521
  if (col.semType === DG.SEMTYPE.MACROMOLECULE) {
510
- const msg = `Negative test detected semType='${col.semType}', units='${col.getTag(DG.TAGS.UNITS)}'.`;
522
+ const msg = `Negative test detected semType='${col.semType}', units='${col.meta.units}'.`;
511
523
  throw new Error(msg);
512
524
  }
513
525
  }
@@ -522,7 +534,7 @@ export async function _testPosList(list: string[], units: NOTATION,
522
534
  col.semType = semType;
523
535
 
524
536
  expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
525
- expect(col.getTag(DG.TAGS.UNITS), units);
537
+ expect(col.meta.units, units);
526
538
  expect(col.getTag(bioTAGS.aligned), aligned);
527
539
  expect(col.getTag(bioTAGS.alphabet), alphabet);
528
540
  if (separator)
@@ -550,7 +562,7 @@ export async function _testPos(
550
562
  col.semType = semType;
551
563
 
552
564
  expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
553
- expect(col.getTag(DG.TAGS.UNITS), units);
565
+ expect(col.meta.units, units);
554
566
  expect(col.getTag(bioTAGS.aligned), aligned);
555
567
  expect(col.getTag(bioTAGS.alphabet), alphabet);
556
568
  if (separator)
@@ -91,7 +91,7 @@ MRGGL
91
91
  const srcSeq: string = wrapData[testKey].src;
92
92
  const col = DG.Column.fromStrings('src', [srcSeq]);
93
93
  col.semType = DG.SEMTYPE.MACROMOLECULE;
94
- col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
94
+ col.meta.units = NOTATION.FASTA;
95
95
  const sh = SeqHandler.forColumn(col);
96
96
  const srcSS = sh.getSplitted(0);
97
97
  const wrapRes: string[] = wrapSequence(srcSS, lineWidth);
@@ -105,7 +105,7 @@ MRGGL
105
105
 
106
106
  const seqCol: DG.Column = df.getCol(args.seqCol);
107
107
  seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
108
- seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
108
+ seqCol.meta.units = NOTATION.FASTA;
109
109
  const idCols: DG.Column[] = args.idCols.map((colName) => df.getCol(colName));
110
110
 
111
111
  const fastaRes: string = saveAsFastaDo(idCols, seqCol, args.lineWidth);
@@ -122,7 +122,7 @@ ATCGAATCGA`;
122
122
  const seq2 = Array(10000).fill('FYWRRY').join('');
123
123
  _testDistance(seq1, seq2, df, 0.667);
124
124
  } else { _testDistance(prot5, prot6, df, 1.143); }
125
- });
125
+ }, {benchmark: true});
126
126
  });
127
127
 
128
128
  async function _initMacromoleculeColumn(csv: string): Promise<SeqHandler> {