@datagrok-libraries/bio 5.3.0 → 5.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/utils/atomic-works.d.ts +2 -0
- package/src/utils/atomic-works.d.ts.map +1 -0
- package/src/utils/atomic-works.js +354 -0
- package/src/utils/const.d.ts +48 -0
- package/src/utils/const.d.ts.map +1 -0
- package/src/utils/const.js +29 -0
- package/src/utils/macromolecule.js +2 -2
- package/src/utils/monomer-library.d.ts +43 -0
- package/src/utils/monomer-library.d.ts.map +1 -0
- package/src/utils/monomer-library.js +154 -0
- package/src/utils/monomer-utils.d.ts +10 -0
- package/src/utils/monomer-utils.d.ts.map +1 -0
- package/src/utils/monomer-utils.js +125 -0
- package/src/utils/notation-converter.d.ts.map +1 -1
- package/src/utils/notation-converter.js +5 -1
- package/src/utils/to-atomic-level.d.ts +4 -0
- package/src/utils/to-atomic-level.d.ts.map +1 -0
- package/src/utils/to-atomic-level.js +1095 -0
- package/src/utils/units-handler.d.ts +4 -0
- package/src/utils/units-handler.d.ts.map +1 -1
- package/src/utils/units-handler.js +6 -8
- package/tsconfig.json +1 -1
|
@@ -15,7 +15,7 @@ export function getStats(seqCol, minLength, splitter) {
|
|
|
15
15
|
firstLength = mSeq.length;
|
|
16
16
|
else if (mSeq.length !== firstLength)
|
|
17
17
|
sameLength = false;
|
|
18
|
-
if (mSeq.length
|
|
18
|
+
if (mSeq.length >= minLength) {
|
|
19
19
|
for (const m of mSeq) {
|
|
20
20
|
if (!(m in freq))
|
|
21
21
|
freq[m] = 0;
|
|
@@ -187,4 +187,4 @@ export function pickUpSeqCol(df) {
|
|
|
187
187
|
resCol = semTypeColList[0];
|
|
188
188
|
return resCol;
|
|
189
189
|
}
|
|
190
|
-
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"macromolecule.js","sourceRoot":"","sources":["macromolecule.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAEtC,OAAO,EAAC,MAAM,EAAC,MAAM,iDAAiD,CAAC;AACvE,OAAO,EAAC,YAAY,EAAE,gBAAgB,EAAC,MAAM,iDAAiD,CAAC;AAY/F,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,EAAC,YAAY,EAAC,MAAM,wBAAwB,CAAC;AACpD,OAAO,KAAK,GAAG,MAAM,aAAa,CAAC;AAsBlC,CAAC;AAMF,MAAM,UAAU,QAAQ,CAAC,MAAiB,EAAE,SAAiB,EAAE,QAAsB;IACnF,MAAM,IAAI,GAA4B,EAAE,CAAC;IACzC,IAAI,UAAU,GAAG,IAAI,CAAC;IACtB,IAAI,WAAW,GAAG,IAAI,CAAC;IAEvB,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,UAAU,EAAE;QACnC,MAAM,IAAI,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC;QAE3B,IAAI,WAAW,IAAI,IAAI;YACrB,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC;aACvB,IAAI,IAAI,CAAC,MAAM,KAAK,WAAW;YAClC,UAAU,GAAG,KAAK,CAAC;QAErB,IAAI,IAAI,CAAC,MAAM,GAAG,SAAS,EAAE;YAC3B,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE;gBACpB,IAAI,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;oBACd,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;gBACd,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;aACd;SACF;KACF;IACD,OAAO,EAAC,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE,UAAU,EAAC,CAAC;AAC9C,CAAC;AAED,MAAM,CAAC,MAAM,SAAS,GAAW,qBAAqB,CAAC;AAEvD;;;GAGG;AACH,MAAM,UAAU,eAAe,CAAC,GAAQ;IACtC,OAAO,EAAE,CAAmB,GAAG,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;SAC5D,GAAG,CAAC,CAAC,EAAoB,EAAE,EAAE;QAC5B,IAAI,IAAY,CAAC;QACjB,MAAM,CAAC,GAAW,EAAE,CAAC,CAAC,CAAC,CAAC;QACxB,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE;YAChB,IAAI,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;SACd;aAAM;YACL,IAAI,GAAG,CAAC,CAAC;SACV;QACD,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;AACjB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,wBAAwB,CAAC,SAAiB,EAAE,QAA4B,SAAS;IAC/F,OAAO,CAAC,GAAW,EAAE,EAAE;QACrB,OAAO,GAAG,CAAC,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;IACrC,CAAC,CAAC;AACJ,CAAC;AAED,MAAM,MAAM,GAAW,iCAAiC,CAAC;AACzD,MAAM,SAAS,GAAW,gBAAgB,CAAC;AAG3C;;;;GAIG;AACH,MAAM,UAAU,cAAc,CAAC,GAAQ;IACrC,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC;IACrB,MAAM,EAAE,GAA2B,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC;IAC/D,MAAM,KAAK,GAAkB,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAE/C,MAAM,aAAa,GAAG,CAAC,EAAU,EAAU,EAAE;QAC3C,SAAS,CAAC,SAAS,GAAG,CAAC,CAAC;QACxB,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAChC,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC;YAC1B,OAAO,IAAI,CAAC,CAAC,CAAC,CAAC;;YAEf,OAAO,EAAE,CAAC;IACd,CAAC,CAAC;IAEF,MAAM,MAAM,GAAa,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IACvD,OAAO,MAAM,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC;AACnC,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,WAAW,CAAC,KAAa,EAAE,SAAiB,EAAE,QAA4B,SAAS;IACjG,IAAI,KAAK,CAAC,WAAW,EAAE,CAAC,UAAU,8BAAgB;QAChD,OAAO,eAAe,CAAC;SACpB,IAAI,KAAK,CAAC,WAAW,EAAE,CAAC,UAAU,sCAAoB;QACzD,OAAO,wBAAwB,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;SAC/C,IAAI,KAAK,CAAC,WAAW,EAAE,CAAC,UAAU,4BAAe;QACpD,OAAO,cAAc,CAAC;;QAEtB,MAAM,IAAI,KAAK,CAAC,oBAAoB,KAAK,IAAI,CAAC,CAAC;IAEjD,0BAA0B;AAC5B,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,oBAAoB,CAAC,GAAc;IACjD,IAAI,GAAG,CAAC,OAAO,KAAK,EAAE,CAAC,OAAO,CAAC,aAAa;QAC1C,MAAM,IAAI,KAAK,CAAC,6BAA6B,EAAE,CAAC,OAAO,CAAC,aAAa,SAAS,CAAC,CAAC;IAElF,MAAM,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACxC,MAAM,SAAS,GAAG,GAAG,CAAC,MAAM,kCAAgB,CAAC;IAC7C,OAAO,WAAW,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;AACvC,CAAC;AAED,MAAM,iBAAiB,GAAW,QAAQ,CAAC;AAE3C,+CAA+C;AAC/C,MAAM,UAAU,cAAc,CAAC,KAAa,EAAE,kBAA0B;;IACtE,MAAM,eAAe,GAA4B,KAAK,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;IAChF,MAAM,WAAW,GAAY,KAAK,CAAC,MAAM,GAAG,kBAAkB,IAAI,CAAC,MAAA,eAAe,aAAf,eAAe,uBAAf,eAAe,CAAE,MAAM,mCAAI,CAAC,CAAC,GAAG,CAAC,CAAC;IACrG,MAAM,UAAU,GAAG,MAAA,eAAe,aAAf,eAAe,uBAAf,eAAe,CAAG,CAAC,CAAC,mCAAI,GAAG,CAAC;IAC/C,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,EAAE,kBAAkB,CAAC,GAAG,GAAG,CAAC;AACvF,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,qBAAqB,CAAC,IAAkB,EAAE,QAAqB,EAAE,YAAoB,GAAG;IACtG,MAAM,IAAI,GAAG,IAAI,GAAG,CAAS,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,GAAG,QAAQ,CAAC,CAAC,CAAC;IAC3E,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;IAEvB,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,SAAS,GAAa,EAAE,CAAC;IAC/B,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE;QACpB,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACpC,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;KACzC;IACD,0FAA0F;IAC1F,MAAM,KAAK,GAAW,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC;IACxC,MAAM,SAAS,GAAW,IAAI,MAAM,CAAC,SAAS,CAAC,CAAC;IAChD,OAAO,gBAAgB,CAAC,KAAK,EAAE,SAAS,CAAC,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,GAAG,YAAY,CAAC,SAAS,CAAC,CAAC,CAAC;AAC9F,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,KAAkB;IAC/C,MAAM,kBAAkB,GAA4B;QAClD,yBAAc,YAAY,CAAC,oBAAoB,CAAC;QAChD,2BAAe,YAAY,CAAC,gBAAgB,CAAC;QAC7C,2BAAe,YAAY,CAAC,gBAAgB,CAAC;KAC9C,CAAC;IAEF,gDAAgD;IAChD,MAAM,qBAAqB,GAAa,kBAAkB,CAAC,GAAG,CAC5D,CAAC,CAAC,EAAE,EAAE,CAAC,qBAAqB,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAClD,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,qBAAqB,CAAC,CAAC;IAClD,MAAM,QAAQ,GAAG,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC,kBAAkB,CAAC,qBAAqB,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IACrG,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,aAAa,CAAC,MAAiB,EAAE,YAAoB,CAAC;IACpE,IAAI,QAAgB,CAAC;IACrB,IAAI,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC,OAAO,CAAC,aAAa,EAAE;QAC9C,MAAM,EAAE,GAAiB,IAAI,YAAY,CAAC,MAAM,CAAC,CAAC;QAClD,QAAQ,GAAG,EAAE,CAAC,QAAQ,CAAC;KACxB;SAAM;QACL,MAAM,KAAK,GAAgB,QAAQ,CAAC,MAAM,EAAE,SAAS,EAAE,eAAe,CAAC,CAAC;QACxE,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;KAClC;IAED,MAAM,GAAG,GAAG,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IACvC,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,WAAmB;IAClD,QAAQ,WAAW,EAAE;QACrB,KAAK,IAAI;YACP,OAAO,GAAG,CAAC,kBAAkB,CAAC,UAAU,CAAC;QAC3C,KAAK,IAAI,CAAC;QACV,KAAK,KAAK,CAAC;QACX,KAAK,KAAK;YACR,OAAO,GAAG,CAAC,mBAAmB,CAAC,YAAY,CAAC;QAC5C,QAAQ;QACV;YACE,OAAO,GAAG,CAAC,kBAAkB,CAAC,KAAK,CAAC;KACrC;AACH,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,EAAgB;;IAC3C,MAAM,cAAc,GAAG,EAAE,CAAC,OAAO,CAAC,YAAY,CAAC,EAAE,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;IACzE,IAAI,MAAM,GAAqB,MAAA,cAAc,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE;QACzD,MAAM,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACxC,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;IACrD,CAAC,CAAC,mCAAI,IAAI,CAAC;IACX,IAAI,CAAC,MAAM,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC;QACtC,MAAM,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC;IAC7B,OAAO,MAAM,CAAC;AAChB,CAAC","sourcesContent":["import * as grok from 'datagrok-api/grok';\nimport * as ui from 'datagrok-api/ui';\nimport * as DG from 'datagrok-api/dg';\n\nimport {Vector} from '@datagrok-libraries/utils/src/type-declarations';\nimport {vectorLength, vectorDotProduct} from '@datagrok-libraries/utils/src/vector-operations';\n\n/** Stats of sequences with specified splitter func, returns { freq, sameLength }.\n * @param {DG.Column} seqCol\n * @param {number} minLength\n * @param {SplitterFunc} splitter\n * @return { SeqColStats }, sameLength: boolean } stats of column sequences\n */\nimport {SeqPalette} from '../seq-palettes';\nimport {Aminoacids, AminoacidsPalettes} from '../aminoacids';\nimport {Nucleotides, NucleotidesPalettes} from '../nucleotides';\nimport {UnknownSeqPalettes} from '../unknown';\nimport wu from 'wu';\nimport {UnitsHandler} from '../utils/units-handler';\nimport * as bio from '../../index';\n\n/** enum type to simplify setting \"user-friendly\" notation if necessary */\nexport const enum NOTATION {\n  FASTA = 'fasta',\n  SEPARATOR = 'separator',\n  HELM = 'helm',\n}\n\nexport const enum ALPHABET {\n  DNA = 'DNA',\n  RNA = 'RNA',\n  PT = 'PT',\n  UN = 'UN',\n}\n\nexport const enum TAGS {\n  aligned = 'aligned',\n  alphabet = 'alphabet',\n  alphabetSize = '.alphabetSize',\n  alphabetIsMultichar = '.alphabetIsMultichar',\n  separator = 'separator',\n};\n\nexport type SeqColStats = { freq: MonomerFreqs, sameLength: boolean }\nexport type SplitterFunc = (seq: string) => string[];\nexport type MonomerFreqs = { [m: string]: number };\n\nexport function getStats(seqCol: DG.Column, minLength: number, splitter: SplitterFunc): SeqColStats {\n  const freq: { [m: string]: number } = {};\n  let sameLength = true;\n  let firstLength = null;\n\n  for (const seq of seqCol.categories) {\n    const mSeq = splitter(seq);\n\n    if (firstLength == null)\n      firstLength = mSeq.length;\n    else if (mSeq.length !== firstLength)\n      sameLength = false;\n\n    if (mSeq.length > minLength) {\n      for (const m of mSeq) {\n        if (!(m in freq))\n          freq[m] = 0;\n        freq[m] += 1;\n      }\n    }\n  }\n  return {freq: freq, sameLength: sameLength};\n}\n\nexport const monomerRe: RegExp = /\\[(\\w+)\\]|(\\w)|(-)/g;\n\n/** Split sequence for single character monomers, square brackets multichar monomer names or gap symbol.\n * @param {any} seq object with sequence\n * @return {string[]} array of monomers\n */\nexport function splitterAsFasta(seq: any): string[] {\n  return wu<RegExpMatchArray>(seq.toString().matchAll(monomerRe))\n    .map((ma: RegExpMatchArray) => {\n      let mRes: string;\n      const m: string = ma[0];\n      if (m.length > 1) {\n        mRes = ma[1];\n      } else {\n        mRes = m;\n      }\n      return mRes;\n    }).toArray();\n}\n\n/** Gets method to split sequence by separator\n * @param {string} separator\n * @param limit\n * @return {SplitterFunc}\n */\nexport function getSplitterWithSeparator(separator: string, limit: number | undefined = undefined): SplitterFunc {\n  return (seq: string) => {\n    return seq.split(separator, limit);\n  };\n}\n\nconst helmRe: RegExp = /(PEPTIDE1|DNA1|RNA1)\\{([^}]+)}/g;\nconst helmPp1Re: RegExp = /\\[([^\\[\\]]+)]/g;\n\n\n/** Splits Helm string to monomers, but does not replace monomer names to other notation (e.g. for RNA).\n * Only for linear polymers, does not split RNA for ribose and phosphate monomers.\n * @param {string} seq Source string of HELM notation\n * @return {string[]}\n */\nexport function splitterAsHelm(seq: any): string[] {\n  helmRe.lastIndex = 0;\n  const ea: RegExpExecArray | null = helmRe.exec(seq.toString());\n  const inSeq: string | null = ea ? ea[2] : null;\n\n  const mmPostProcess = (mm: string): string => {\n    helmPp1Re.lastIndex = 0;\n    const pp1M = helmPp1Re.exec(mm);\n    if (pp1M && pp1M.length >= 2)\n      return pp1M[1];\n    else\n      return mm;\n  };\n\n  const mmList: string[] = inSeq ? inSeq.split('.') : [];\n  return mmList.map(mmPostProcess);\n}\n\n/** Get splitter method to split sequences to monomers\n * @param {string} units\n * @param {string} separator\n * @param limit\n * @return {SplitterFunc}\n */\nexport function getSplitter(units: string, separator: string, limit: number | undefined = undefined): SplitterFunc {\n  if (units.toLowerCase().startsWith(NOTATION.FASTA))\n    return splitterAsFasta;\n  else if (units.toLowerCase().startsWith(NOTATION.SEPARATOR))\n    return getSplitterWithSeparator(separator, limit);\n  else if (units.toLowerCase().startsWith(NOTATION.HELM))\n    return splitterAsHelm;\n  else\n    throw new Error(`Unexpected units ${units} .`);\n\n  // TODO: Splitter for HELM\n}\n\n/** Generate splitter function for sequence column\n * @param {DG.Column} col\n * @return {SplitterFunc} Splitter function\n */\nexport function getSplitterForColumn(col: DG.Column): SplitterFunc {\n  if (col.semType !== DG.SEMTYPE.MACROMOLECULE)\n    throw new Error(`Get splitter for semType \"${DG.SEMTYPE.MACROMOLECULE}\" only.`);\n\n  const units = col.getTag(DG.TAGS.UNITS);\n  const separator = col.getTag(TAGS.separator);\n  return getSplitter(units, separator);\n}\n\nconst longMonomerPartRe: RegExp = /(\\w+)/g;\n\n/** Convert long monomer names to short ones */\nexport function monomerToShort(amino: string, maxLengthOfMonomer: number): string {\n  const shortAminoMatch: RegExpMatchArray | null = amino.match(longMonomerPartRe);\n  const needAddDots: boolean = amino.length > maxLengthOfMonomer || (shortAminoMatch?.length ?? 0) > 1;\n  const shortAmino = shortAminoMatch?.[0] ?? ' ';\n  return !needAddDots ? shortAmino : shortAmino.substring(0, maxLengthOfMonomer) + '…';\n}\n\n/** Calculate similarity in current sequence and alphabet.\n * @param {MonomerFreqs} freq\n * @param {Set<string>} alphabet\n * @param {string} gapSymbol\n * @return {number} Cosine similarity\n */\nexport function getAlphabetSimilarity(freq: MonomerFreqs, alphabet: Set<string>, gapSymbol: string = '-'): number {\n  const keys = new Set<string>([...new Set(Object.keys(freq)), ...alphabet]);\n  keys.delete(gapSymbol);\n\n  const freqA: number[] = [];\n  const alphabetA: number[] = [];\n  for (const m of keys) {\n    freqA.push(m in freq ? freq[m] : 0);\n    alphabetA.push(alphabet.has(m) ? 1 : 0);\n  }\n  /* There were a few ideas: chi-squared, pearson correlation (variance?), scalar product */\n  const freqV: Vector = new Vector(freqA);\n  const alphabetV: Vector = new Vector(alphabetA);\n  return vectorDotProduct(freqV, alphabetV) / (vectorLength(freqV) * vectorLength(alphabetV));\n}\n\nexport function detectAlphabet(stats: SeqColStats): string {\n  const alphabetCandidates: [string, Set<string>][] = [\n    [ALPHABET.PT, UnitsHandler.PeptideFastaAlphabet],\n    [ALPHABET.DNA, UnitsHandler.DnaFastaAlphabet],\n    [ALPHABET.RNA, UnitsHandler.RnaFastaAlphabet],\n  ];\n\n  // Calculate likelihoods for alphabet_candidates\n  const alphabetCandidatesSim: number[] = alphabetCandidates.map(\n    (c) => getAlphabetSimilarity(stats.freq, c[1]));\n  const maxCos = Math.max(...alphabetCandidatesSim);\n  const alphabet = maxCos > 0.65 ? alphabetCandidates[alphabetCandidatesSim.indexOf(maxCos)][0] : 'UN';\n  return alphabet;\n}\n\n/** Selects a suitable palette based on column data\n * @param {DG.Column} seqCol Column to look for a palette\n * @param {number}  minLength minimum length of sequence to detect palette (empty strings are allowed)\n * @return {SeqPalette} Palette corresponding to the alphabet of the sequences in the column\n */\nexport function pickUpPalette(seqCol: DG.Column, minLength: number = 5): SeqPalette {\n  let alphabet: string;\n  if (seqCol.semType == DG.SEMTYPE.MACROMOLECULE) {\n    const uh: UnitsHandler = new UnitsHandler(seqCol);\n    alphabet = uh.alphabet;\n  } else {\n    const stats: SeqColStats = getStats(seqCol, minLength, splitterAsFasta);\n    alphabet = detectAlphabet(stats);\n  }\n\n  const res = getPaletteByType(alphabet);\n  return res;\n}\n\nexport function getPaletteByType(paletteType: string): bio.SeqPalette {\n  switch (paletteType) {\n  case 'PT':\n    return bio.AminoacidsPalettes.GrokGroups;\n  case 'NT':\n  case 'DNA':\n  case 'RNA':\n    return bio.NucleotidesPalettes.Chromatogram;\n    // other\n  default:\n    return bio.UnknownSeqPalettes.Color;\n  }\n}\n\nexport function pickUpSeqCol(df: DG.DataFrame): DG.Column | null {\n  const semTypeColList = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);\n  let resCol: DG.Column | null = semTypeColList.find((col) => {\n    const units = col.getTag(DG.TAGS.UNITS);\n    return units ? units.indexOf('MSA') !== -1 : false;\n  }) ?? null;\n  if (!resCol && semTypeColList.length > 0)\n    resCol = semTypeColList[0];\n  return resCol;\n}\n"]}
|
|
190
|
+
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"macromolecule.js","sourceRoot":"","sources":["macromolecule.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAEtC,OAAO,EAAC,MAAM,EAAC,MAAM,iDAAiD,CAAC;AACvE,OAAO,EAAC,YAAY,EAAE,gBAAgB,EAAC,MAAM,iDAAiD,CAAC;AAY/F,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,EAAC,YAAY,EAAC,MAAM,wBAAwB,CAAC;AACpD,OAAO,KAAK,GAAG,MAAM,aAAa,CAAC;AAsBlC,CAAC;AAMF,MAAM,UAAU,QAAQ,CAAC,MAAiB,EAAE,SAAiB,EAAE,QAAsB;IACnF,MAAM,IAAI,GAA4B,EAAE,CAAC;IACzC,IAAI,UAAU,GAAG,IAAI,CAAC;IACtB,IAAI,WAAW,GAAG,IAAI,CAAC;IAEvB,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,UAAU,EAAE;QACnC,MAAM,IAAI,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC;QAE3B,IAAI,WAAW,IAAI,IAAI;YACrB,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC;aACvB,IAAI,IAAI,CAAC,MAAM,KAAK,WAAW;YAClC,UAAU,GAAG,KAAK,CAAC;QAErB,IAAI,IAAI,CAAC,MAAM,IAAI,SAAS,EAAE;YAC5B,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE;gBACpB,IAAI,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;oBACd,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;gBACd,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;aACd;SACF;KACF;IACD,OAAO,EAAC,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE,UAAU,EAAC,CAAC;AAC9C,CAAC;AAED,MAAM,CAAC,MAAM,SAAS,GAAW,qBAAqB,CAAC;AAEvD;;;GAGG;AACH,MAAM,UAAU,eAAe,CAAC,GAAQ;IACtC,OAAO,EAAE,CAAmB,GAAG,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;SAC5D,GAAG,CAAC,CAAC,EAAoB,EAAE,EAAE;QAC5B,IAAI,IAAY,CAAC;QACjB,MAAM,CAAC,GAAW,EAAE,CAAC,CAAC,CAAC,CAAC;QACxB,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE;YAChB,IAAI,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;SACd;aAAM;YACL,IAAI,GAAG,CAAC,CAAC;SACV;QACD,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;AACjB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,wBAAwB,CAAC,SAAiB,EAAE,QAA4B,SAAS;IAC/F,OAAO,CAAC,GAAW,EAAE,EAAE;QACrB,OAAO,GAAG,CAAC,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;IACrC,CAAC,CAAC;AACJ,CAAC;AAED,MAAM,MAAM,GAAW,iCAAiC,CAAC;AACzD,MAAM,SAAS,GAAW,gBAAgB,CAAC;AAG3C;;;;GAIG;AACH,MAAM,UAAU,cAAc,CAAC,GAAQ;IACrC,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC;IACrB,MAAM,EAAE,GAA2B,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC;IAC/D,MAAM,KAAK,GAAkB,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAE/C,MAAM,aAAa,GAAG,CAAC,EAAU,EAAU,EAAE;QAC3C,SAAS,CAAC,SAAS,GAAG,CAAC,CAAC;QACxB,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAChC,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC;YAC1B,OAAO,IAAI,CAAC,CAAC,CAAC,CAAC;;YAEf,OAAO,EAAE,CAAC;IACd,CAAC,CAAC;IAEF,MAAM,MAAM,GAAa,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IACvD,OAAO,MAAM,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC;AACnC,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,WAAW,CAAC,KAAa,EAAE,SAAiB,EAAE,QAA4B,SAAS;IACjG,IAAI,KAAK,CAAC,WAAW,EAAE,CAAC,UAAU,8BAAgB;QAChD,OAAO,eAAe,CAAC;SACpB,IAAI,KAAK,CAAC,WAAW,EAAE,CAAC,UAAU,sCAAoB;QACzD,OAAO,wBAAwB,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;SAC/C,IAAI,KAAK,CAAC,WAAW,EAAE,CAAC,UAAU,4BAAe;QACpD,OAAO,cAAc,CAAC;;QAEtB,MAAM,IAAI,KAAK,CAAC,oBAAoB,KAAK,IAAI,CAAC,CAAC;IAEjD,0BAA0B;AAC5B,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,oBAAoB,CAAC,GAAc;IACjD,IAAI,GAAG,CAAC,OAAO,KAAK,EAAE,CAAC,OAAO,CAAC,aAAa;QAC1C,MAAM,IAAI,KAAK,CAAC,6BAA6B,EAAE,CAAC,OAAO,CAAC,aAAa,SAAS,CAAC,CAAC;IAElF,MAAM,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACxC,MAAM,SAAS,GAAG,GAAG,CAAC,MAAM,kCAAgB,CAAC;IAC7C,OAAO,WAAW,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;AACvC,CAAC;AAED,MAAM,iBAAiB,GAAW,QAAQ,CAAC;AAE3C,+CAA+C;AAC/C,MAAM,UAAU,cAAc,CAAC,KAAa,EAAE,kBAA0B;;IACtE,MAAM,eAAe,GAA4B,KAAK,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;IAChF,MAAM,WAAW,GAAY,KAAK,CAAC,MAAM,GAAG,kBAAkB,IAAI,CAAC,MAAA,eAAe,aAAf,eAAe,uBAAf,eAAe,CAAE,MAAM,mCAAI,CAAC,CAAC,GAAG,CAAC,CAAC;IACrG,MAAM,UAAU,GAAG,MAAA,eAAe,aAAf,eAAe,uBAAf,eAAe,CAAG,CAAC,CAAC,mCAAI,GAAG,CAAC;IAC/C,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,EAAE,kBAAkB,CAAC,GAAG,GAAG,CAAC;AACvF,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,qBAAqB,CAAC,IAAkB,EAAE,QAAqB,EAAE,YAAoB,GAAG;IACtG,MAAM,IAAI,GAAG,IAAI,GAAG,CAAS,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,GAAG,QAAQ,CAAC,CAAC,CAAC;IAC3E,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;IAEvB,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,SAAS,GAAa,EAAE,CAAC;IAC/B,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE;QACpB,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACpC,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;KACzC;IACD,0FAA0F;IAC1F,MAAM,KAAK,GAAW,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC;IACxC,MAAM,SAAS,GAAW,IAAI,MAAM,CAAC,SAAS,CAAC,CAAC;IAChD,OAAO,gBAAgB,CAAC,KAAK,EAAE,SAAS,CAAC,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,GAAG,YAAY,CAAC,SAAS,CAAC,CAAC,CAAC;AAC9F,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,KAAkB;IAC/C,MAAM,kBAAkB,GAA4B;QAClD,yBAAc,YAAY,CAAC,oBAAoB,CAAC;QAChD,2BAAe,YAAY,CAAC,gBAAgB,CAAC;QAC7C,2BAAe,YAAY,CAAC,gBAAgB,CAAC;KAC9C,CAAC;IAEF,gDAAgD;IAChD,MAAM,qBAAqB,GAAa,kBAAkB,CAAC,GAAG,CAC5D,CAAC,CAAC,EAAE,EAAE,CAAC,qBAAqB,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAClD,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,qBAAqB,CAAC,CAAC;IAClD,MAAM,QAAQ,GAAG,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC,kBAAkB,CAAC,qBAAqB,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IACrG,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,aAAa,CAAC,MAAiB,EAAE,YAAoB,CAAC;IACpE,IAAI,QAAgB,CAAC;IACrB,IAAI,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC,OAAO,CAAC,aAAa,EAAE;QAC9C,MAAM,EAAE,GAAiB,IAAI,YAAY,CAAC,MAAM,CAAC,CAAC;QAClD,QAAQ,GAAG,EAAE,CAAC,QAAQ,CAAC;KACxB;SAAM;QACL,MAAM,KAAK,GAAgB,QAAQ,CAAC,MAAM,EAAE,SAAS,EAAE,eAAe,CAAC,CAAC;QACxE,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;KAClC;IAED,MAAM,GAAG,GAAG,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IACvC,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,WAAmB;IAClD,QAAQ,WAAW,EAAE;QACrB,KAAK,IAAI;YACP,OAAO,GAAG,CAAC,kBAAkB,CAAC,UAAU,CAAC;QAC3C,KAAK,IAAI,CAAC;QACV,KAAK,KAAK,CAAC;QACX,KAAK,KAAK;YACR,OAAO,GAAG,CAAC,mBAAmB,CAAC,YAAY,CAAC;QAC5C,QAAQ;QACV;YACE,OAAO,GAAG,CAAC,kBAAkB,CAAC,KAAK,CAAC;KACrC;AACH,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,EAAgB;;IAC3C,MAAM,cAAc,GAAG,EAAE,CAAC,OAAO,CAAC,YAAY,CAAC,EAAE,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;IACzE,IAAI,MAAM,GAAqB,MAAA,cAAc,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE;QACzD,MAAM,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACxC,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;IACrD,CAAC,CAAC,mCAAI,IAAI,CAAC;IACX,IAAI,CAAC,MAAM,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC;QACtC,MAAM,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC;IAC7B,OAAO,MAAM,CAAC;AAChB,CAAC","sourcesContent":["import * as grok from 'datagrok-api/grok';\nimport * as ui from 'datagrok-api/ui';\nimport * as DG from 'datagrok-api/dg';\n\nimport {Vector} from '@datagrok-libraries/utils/src/type-declarations';\nimport {vectorLength, vectorDotProduct} from '@datagrok-libraries/utils/src/vector-operations';\n\n/** Stats of sequences with specified splitter func, returns { freq, sameLength }.\n * @param {DG.Column} seqCol\n * @param {number} minLength\n * @param {SplitterFunc} splitter\n * @return { SeqColStats }, sameLength: boolean } stats of column sequences\n */\nimport {SeqPalette} from '../seq-palettes';\nimport {Aminoacids, AminoacidsPalettes} from '../aminoacids';\nimport {Nucleotides, NucleotidesPalettes} from '../nucleotides';\nimport {UnknownSeqPalettes} from '../unknown';\nimport wu from 'wu';\nimport {UnitsHandler} from '../utils/units-handler';\nimport * as bio from '../../index';\n\n/** enum type to simplify setting \"user-friendly\" notation if necessary */\nexport const enum NOTATION {\n  FASTA = 'fasta',\n  SEPARATOR = 'separator',\n  HELM = 'helm',\n}\n\nexport const enum ALPHABET {\n  DNA = 'DNA',\n  RNA = 'RNA',\n  PT = 'PT',\n  UN = 'UN',\n}\n\nexport const enum TAGS {\n  aligned = 'aligned',\n  alphabet = 'alphabet',\n  alphabetSize = '.alphabetSize',\n  alphabetIsMultichar = '.alphabetIsMultichar',\n  separator = 'separator',\n};\n\nexport type SeqColStats = { freq: MonomerFreqs, sameLength: boolean }\nexport type SplitterFunc = (seq: string) => string[];\nexport type MonomerFreqs = { [m: string]: number };\n\nexport function getStats(seqCol: DG.Column, minLength: number, splitter: SplitterFunc): SeqColStats {\n  const freq: { [m: string]: number } = {};\n  let sameLength = true;\n  let firstLength = null;\n\n  for (const seq of seqCol.categories) {\n    const mSeq = splitter(seq);\n\n    if (firstLength == null)\n      firstLength = mSeq.length;\n    else if (mSeq.length !== firstLength)\n      sameLength = false;\n\n    if (mSeq.length >= minLength) {\n      for (const m of mSeq) {\n        if (!(m in freq))\n          freq[m] = 0;\n        freq[m] += 1;\n      }\n    }\n  }\n  return {freq: freq, sameLength: sameLength};\n}\n\nexport const monomerRe: RegExp = /\\[(\\w+)\\]|(\\w)|(-)/g;\n\n/** Split sequence for single character monomers, square brackets multichar monomer names or gap symbol.\n * @param {any} seq object with sequence\n * @return {string[]} array of monomers\n */\nexport function splitterAsFasta(seq: any): string[] {\n  return wu<RegExpMatchArray>(seq.toString().matchAll(monomerRe))\n    .map((ma: RegExpMatchArray) => {\n      let mRes: string;\n      const m: string = ma[0];\n      if (m.length > 1) {\n        mRes = ma[1];\n      } else {\n        mRes = m;\n      }\n      return mRes;\n    }).toArray();\n}\n\n/** Gets method to split sequence by separator\n * @param {string} separator\n * @param limit\n * @return {SplitterFunc}\n */\nexport function getSplitterWithSeparator(separator: string, limit: number | undefined = undefined): SplitterFunc {\n  return (seq: string) => {\n    return seq.split(separator, limit);\n  };\n}\n\nconst helmRe: RegExp = /(PEPTIDE1|DNA1|RNA1)\\{([^}]+)}/g;\nconst helmPp1Re: RegExp = /\\[([^\\[\\]]+)]/g;\n\n\n/** Splits Helm string to monomers, but does not replace monomer names to other notation (e.g. for RNA).\n * Only for linear polymers, does not split RNA for ribose and phosphate monomers.\n * @param {string} seq Source string of HELM notation\n * @return {string[]}\n */\nexport function splitterAsHelm(seq: any): string[] {\n  helmRe.lastIndex = 0;\n  const ea: RegExpExecArray | null = helmRe.exec(seq.toString());\n  const inSeq: string | null = ea ? ea[2] : null;\n\n  const mmPostProcess = (mm: string): string => {\n    helmPp1Re.lastIndex = 0;\n    const pp1M = helmPp1Re.exec(mm);\n    if (pp1M && pp1M.length >= 2)\n      return pp1M[1];\n    else\n      return mm;\n  };\n\n  const mmList: string[] = inSeq ? inSeq.split('.') : [];\n  return mmList.map(mmPostProcess);\n}\n\n/** Get splitter method to split sequences to monomers\n * @param {string} units\n * @param {string} separator\n * @param limit\n * @return {SplitterFunc}\n */\nexport function getSplitter(units: string, separator: string, limit: number | undefined = undefined): SplitterFunc {\n  if (units.toLowerCase().startsWith(NOTATION.FASTA))\n    return splitterAsFasta;\n  else if (units.toLowerCase().startsWith(NOTATION.SEPARATOR))\n    return getSplitterWithSeparator(separator, limit);\n  else if (units.toLowerCase().startsWith(NOTATION.HELM))\n    return splitterAsHelm;\n  else\n    throw new Error(`Unexpected units ${units} .`);\n\n  // TODO: Splitter for HELM\n}\n\n/** Generate splitter function for sequence column\n * @param {DG.Column} col\n * @return {SplitterFunc} Splitter function\n */\nexport function getSplitterForColumn(col: DG.Column): SplitterFunc {\n  if (col.semType !== DG.SEMTYPE.MACROMOLECULE)\n    throw new Error(`Get splitter for semType \"${DG.SEMTYPE.MACROMOLECULE}\" only.`);\n\n  const units = col.getTag(DG.TAGS.UNITS);\n  const separator = col.getTag(TAGS.separator);\n  return getSplitter(units, separator);\n}\n\nconst longMonomerPartRe: RegExp = /(\\w+)/g;\n\n/** Convert long monomer names to short ones */\nexport function monomerToShort(amino: string, maxLengthOfMonomer: number): string {\n  const shortAminoMatch: RegExpMatchArray | null = amino.match(longMonomerPartRe);\n  const needAddDots: boolean = amino.length > maxLengthOfMonomer || (shortAminoMatch?.length ?? 0) > 1;\n  const shortAmino = shortAminoMatch?.[0] ?? ' ';\n  return !needAddDots ? shortAmino : shortAmino.substring(0, maxLengthOfMonomer) + '…';\n}\n\n/** Calculate similarity in current sequence and alphabet.\n * @param {MonomerFreqs} freq\n * @param {Set<string>} alphabet\n * @param {string} gapSymbol\n * @return {number} Cosine similarity\n */\nexport function getAlphabetSimilarity(freq: MonomerFreqs, alphabet: Set<string>, gapSymbol: string = '-'): number {\n  const keys = new Set<string>([...new Set(Object.keys(freq)), ...alphabet]);\n  keys.delete(gapSymbol);\n\n  const freqA: number[] = [];\n  const alphabetA: number[] = [];\n  for (const m of keys) {\n    freqA.push(m in freq ? freq[m] : 0);\n    alphabetA.push(alphabet.has(m) ? 1 : 0);\n  }\n  /* There were a few ideas: chi-squared, pearson correlation (variance?), scalar product */\n  const freqV: Vector = new Vector(freqA);\n  const alphabetV: Vector = new Vector(alphabetA);\n  return vectorDotProduct(freqV, alphabetV) / (vectorLength(freqV) * vectorLength(alphabetV));\n}\n\nexport function detectAlphabet(stats: SeqColStats): string {\n  const alphabetCandidates: [string, Set<string>][] = [\n    [ALPHABET.PT, UnitsHandler.PeptideFastaAlphabet],\n    [ALPHABET.DNA, UnitsHandler.DnaFastaAlphabet],\n    [ALPHABET.RNA, UnitsHandler.RnaFastaAlphabet],\n  ];\n\n  // Calculate likelihoods for alphabet_candidates\n  const alphabetCandidatesSim: number[] = alphabetCandidates.map(\n    (c) => getAlphabetSimilarity(stats.freq, c[1]));\n  const maxCos = Math.max(...alphabetCandidatesSim);\n  const alphabet = maxCos > 0.65 ? alphabetCandidates[alphabetCandidatesSim.indexOf(maxCos)][0] : 'UN';\n  return alphabet;\n}\n\n/** Selects a suitable palette based on column data\n * @param {DG.Column} seqCol Column to look for a palette\n * @param {number}  minLength minimum length of sequence to detect palette (empty strings are allowed)\n * @return {SeqPalette} Palette corresponding to the alphabet of the sequences in the column\n */\nexport function pickUpPalette(seqCol: DG.Column, minLength: number = 5): SeqPalette {\n  let alphabet: string;\n  if (seqCol.semType == DG.SEMTYPE.MACROMOLECULE) {\n    const uh: UnitsHandler = new UnitsHandler(seqCol);\n    alphabet = uh.alphabet;\n  } else {\n    const stats: SeqColStats = getStats(seqCol, minLength, splitterAsFasta);\n    alphabet = detectAlphabet(stats);\n  }\n\n  const res = getPaletteByType(alphabet);\n  return res;\n}\n\nexport function getPaletteByType(paletteType: string): bio.SeqPalette {\n  switch (paletteType) {\n  case 'PT':\n    return bio.AminoacidsPalettes.GrokGroups;\n  case 'NT':\n  case 'DNA':\n  case 'RNA':\n    return bio.NucleotidesPalettes.Chromatogram;\n    // other\n  default:\n    return bio.UnknownSeqPalettes.Color;\n  }\n}\n\nexport function pickUpSeqCol(df: DG.DataFrame): DG.Column | null {\n  const semTypeColList = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);\n  let resCol: DG.Column | null = semTypeColList.find((col) => {\n    const units = col.getTag(DG.TAGS.UNITS);\n    return units ? units.indexOf('MSA') !== -1 : false;\n  }) ?? null;\n  if (!resCol && semTypeColList.length > 0)\n    resCol = semTypeColList[0];\n  return resCol;\n}\n"]}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
export declare type MonomerEntry = {
|
|
2
|
+
mol: string;
|
|
3
|
+
type: string;
|
|
4
|
+
analogueCode: string;
|
|
5
|
+
linkages: {
|
|
6
|
+
[link: string]: {
|
|
7
|
+
atomNumber: number;
|
|
8
|
+
type: string;
|
|
9
|
+
};
|
|
10
|
+
};
|
|
11
|
+
};
|
|
12
|
+
export declare type MonomerEntries = {
|
|
13
|
+
[name: string]: MonomerEntry;
|
|
14
|
+
};
|
|
15
|
+
export declare type LinkData = {
|
|
16
|
+
[link: string]: {
|
|
17
|
+
atomNumber: number;
|
|
18
|
+
type: string;
|
|
19
|
+
};
|
|
20
|
+
};
|
|
21
|
+
/** HELM associated sdf libraries with monomer processing*/
|
|
22
|
+
export declare class MonomerLibrary {
|
|
23
|
+
static libName: string;
|
|
24
|
+
private monomerFields;
|
|
25
|
+
private library;
|
|
26
|
+
private monomers;
|
|
27
|
+
constructor(sdf: string);
|
|
28
|
+
/** getting full monomer information from monomer library
|
|
29
|
+
* @param {string} name
|
|
30
|
+
* @return {MonomerEntry}
|
|
31
|
+
*/
|
|
32
|
+
getMonomerEntry(name: string): MonomerEntry;
|
|
33
|
+
/** getting mol as string for monomer
|
|
34
|
+
* @param {string} name
|
|
35
|
+
* @return {string}
|
|
36
|
+
*/
|
|
37
|
+
getMonomerMol(name: string): string;
|
|
38
|
+
/** getting the list of the minomers available in library*/
|
|
39
|
+
get monomerNames(): string[];
|
|
40
|
+
static get id(): string;
|
|
41
|
+
private getLinkData;
|
|
42
|
+
}
|
|
43
|
+
//# sourceMappingURL=monomer-library.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"monomer-library.d.ts","sourceRoot":"","sources":["monomer-library.ts"],"names":[],"mappings":"AAAA,oBAAY,YAAY,GAAG;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE;QAAE,CAAC,IAAI,EAAE,MAAM,GAAG;YAAE,UAAU,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,CAAA;KAAE,CAAA;CACnE,CAAC;AACF,oBAAY,cAAc,GAAG;IAAE,CAAC,IAAI,EAAE,MAAM,GAAG,YAAY,CAAA;CAAE,CAAC;AAC9D,oBAAY,QAAQ,GAAG;IAAE,CAAC,IAAI,EAAE,MAAM,GAAG;QAAE,UAAU,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAA;CAAE,CAAC;AAEhF,2DAA2D;AAC3D,qBAAa,cAAc;IACzB,MAAM,CAAC,OAAO,SAAoB;IAElC,OAAO,CAAC,aAAa,CAEnB;IAEF,OAAO,CAAC,OAAO,CAAsB;IAErC,OAAO,CAAC,QAAQ,CAAgB;gBAEpB,GAAG,EAAE,MAAM;IA2BvB;;;OAGG;IACI,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,YAAY;IAOlD;;;OAGG;IACI,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM;IAiB1C,2DAA2D;IAC3D,IAAI,YAAY,IAAI,MAAM,EAAE,CAE3B;IAED,MAAM,KAAK,EAAE,IAAI,MAAM,CAEtB;IAED,OAAO,CAAC,WAAW;CAiCpB"}
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
/** HELM associated sdf libraries with monomer processing*/
|
|
2
|
+
export class MonomerLibrary {
|
|
3
|
+
constructor(sdf) {
|
|
4
|
+
this.monomerFields = [
|
|
5
|
+
'molecule', 'MonomerType', 'MonomerNaturalAnalogCode', 'MonomerName', 'MonomerCode', 'MonomerCaps', 'BranchMonomer',
|
|
6
|
+
];
|
|
7
|
+
this.library = {};
|
|
8
|
+
this.monomers = [];
|
|
9
|
+
const sdfReader = new SDFReader();
|
|
10
|
+
const data = sdfReader.getColls(sdf);
|
|
11
|
+
this.monomerFields.forEach((f) => {
|
|
12
|
+
if (!(f in data))
|
|
13
|
+
throw new Error(`Monomer library was not compiled: ${f} field is absent in provided file`);
|
|
14
|
+
if (data[f].length != data.molecule.length)
|
|
15
|
+
throw new Error(`Monomer library was not compiled: ${f} field is not presented for each monomer`);
|
|
16
|
+
});
|
|
17
|
+
for (let i = 0; i < data.molecule.length; i++) {
|
|
18
|
+
const linkData = this.getLinkData(data.molecule[i], data.MonomerCaps[i], data.MonomerName[i]);
|
|
19
|
+
const entry = {
|
|
20
|
+
mol: data.molecule[i],
|
|
21
|
+
type: 'Peptide',
|
|
22
|
+
code: data.MonomerCode[i],
|
|
23
|
+
analogueCode: data.MonomerNaturalAnalogCode[i],
|
|
24
|
+
linkages: linkData,
|
|
25
|
+
};
|
|
26
|
+
const name = data.MonomerCode[i] !== '.' ? data.MonomerCode[i] : data.MonomerName[i];
|
|
27
|
+
this.library[name] = entry;
|
|
28
|
+
this.monomers.push(name);
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
/** getting full monomer information from monomer library
|
|
32
|
+
* @param {string} name
|
|
33
|
+
* @return {MonomerEntry}
|
|
34
|
+
*/
|
|
35
|
+
getMonomerEntry(name) {
|
|
36
|
+
if (!this.monomers.includes(name))
|
|
37
|
+
throw new Error(`Monomer library do not contain ${name} monomer`);
|
|
38
|
+
return this.library[name];
|
|
39
|
+
}
|
|
40
|
+
/** getting mol as string for monomer
|
|
41
|
+
* @param {string} name
|
|
42
|
+
* @return {string}
|
|
43
|
+
*/
|
|
44
|
+
getMonomerMol(name) {
|
|
45
|
+
if (!this.monomers.includes(name))
|
|
46
|
+
throw new Error(`Monomer library do not contain ${name} monomer`);
|
|
47
|
+
const entry = this.library[name];
|
|
48
|
+
let monomerMol = entry.mol.replace(/M RGP .+\n/, '');
|
|
49
|
+
//order matters
|
|
50
|
+
const links = Object.keys(entry.linkages);
|
|
51
|
+
for (const link of links)
|
|
52
|
+
monomerMol = monomerMol.replace('R#', entry.linkages[link].type + ' ');
|
|
53
|
+
return monomerMol;
|
|
54
|
+
}
|
|
55
|
+
/** getting the list of the minomers available in library*/
|
|
56
|
+
get monomerNames() {
|
|
57
|
+
return this.monomers;
|
|
58
|
+
}
|
|
59
|
+
static get id() {
|
|
60
|
+
return MonomerLibrary.libName;
|
|
61
|
+
}
|
|
62
|
+
getLinkData(mol, caps, name) {
|
|
63
|
+
var _a;
|
|
64
|
+
const rawData = mol.match(/M RGP .+/);
|
|
65
|
+
if (rawData === null)
|
|
66
|
+
throw new Error(`Monomer library was not compiled: ${name} entry has no RGP`);
|
|
67
|
+
const types = {};
|
|
68
|
+
(_a = caps.split('\n')) === null || _a === void 0 ? void 0 : _a.forEach((e) => {
|
|
69
|
+
types[e.match(/\d+/)[0]] = e.match(/(?<=\])\w+/)[0];
|
|
70
|
+
});
|
|
71
|
+
const data = rawData[0].replace('M RGP ', '').split(/\s+/);
|
|
72
|
+
const res = {};
|
|
73
|
+
for (let i = 0; i < parseInt(data[0]); i++) {
|
|
74
|
+
const code = parseInt(data[2 * i + 2]);
|
|
75
|
+
let type = '';
|
|
76
|
+
switch (code) {
|
|
77
|
+
case 1:
|
|
78
|
+
type = 'N-terminal';
|
|
79
|
+
break;
|
|
80
|
+
case 2:
|
|
81
|
+
type = 'C-terminal';
|
|
82
|
+
break;
|
|
83
|
+
case 3:
|
|
84
|
+
type = 'branch';
|
|
85
|
+
break;
|
|
86
|
+
default:
|
|
87
|
+
break;
|
|
88
|
+
}
|
|
89
|
+
res[type] = { atomNumber: parseInt(data[2 * i + 1]), type: types[code] };
|
|
90
|
+
}
|
|
91
|
+
return res;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
MonomerLibrary.libName = 'monomerLibrary';
|
|
95
|
+
//TODO: merge with Chem version
|
|
96
|
+
class SDFReader {
|
|
97
|
+
constructor() {
|
|
98
|
+
this.dataColls = { 'molecule': [] };
|
|
99
|
+
}
|
|
100
|
+
getColls(content) {
|
|
101
|
+
this.read(content);
|
|
102
|
+
return this.dataColls;
|
|
103
|
+
}
|
|
104
|
+
read(content) {
|
|
105
|
+
content = content.replaceAll('\r', ''); //equalize old and new sdf standards
|
|
106
|
+
let startIndex = content.indexOf('$$$$', 0);
|
|
107
|
+
this.parse(content, 0, startIndex, (name, val) => {
|
|
108
|
+
this.dataColls[name] = [];
|
|
109
|
+
this.dataColls[name].push(val);
|
|
110
|
+
});
|
|
111
|
+
startIndex += 5;
|
|
112
|
+
while (startIndex > -1 && startIndex < content.length)
|
|
113
|
+
startIndex = this.readNext(content, startIndex);
|
|
114
|
+
}
|
|
115
|
+
readNext(content, startIndex) {
|
|
116
|
+
const nextStartIndex = content.indexOf('$$$$', startIndex);
|
|
117
|
+
if (nextStartIndex === -1) {
|
|
118
|
+
return -1;
|
|
119
|
+
}
|
|
120
|
+
else {
|
|
121
|
+
this.parse(content, startIndex, nextStartIndex, (name, val) => {
|
|
122
|
+
this.dataColls[name].push(val);
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
if (nextStartIndex > -1)
|
|
126
|
+
return nextStartIndex + 5;
|
|
127
|
+
return nextStartIndex;
|
|
128
|
+
}
|
|
129
|
+
parse(content, start, end, handler) {
|
|
130
|
+
const molEnd = +content.indexOf('M END\n', start) + 7;
|
|
131
|
+
let localEnd = start;
|
|
132
|
+
this.dataColls['molecule'].push(content.substring(start, molEnd));
|
|
133
|
+
start = molEnd;
|
|
134
|
+
while (localEnd < end) {
|
|
135
|
+
start = content.indexOf('> <', localEnd);
|
|
136
|
+
if (start === -1)
|
|
137
|
+
return;
|
|
138
|
+
start += 3;
|
|
139
|
+
localEnd = content.indexOf('>\n', start);
|
|
140
|
+
if (localEnd === -1)
|
|
141
|
+
return;
|
|
142
|
+
const propertyName = content.substring(start, localEnd);
|
|
143
|
+
start = localEnd + 2;
|
|
144
|
+
localEnd = content.indexOf('\n', start);
|
|
145
|
+
if (localEnd === -1)
|
|
146
|
+
localEnd = end;
|
|
147
|
+
else if (content[localEnd + 1] != '\n')
|
|
148
|
+
localEnd = content.indexOf('\n', localEnd + 1);
|
|
149
|
+
handler(propertyName, content.substring(start, localEnd));
|
|
150
|
+
localEnd += 2;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"monomer-library.js","sourceRoot":"","sources":["monomer-library.ts"],"names":[],"mappings":"AASA,2DAA2D;AAC3D,MAAM,OAAO,cAAc;IAWzB,YAAY,GAAW;QARf,kBAAa,GAAa;YAChC,UAAU,EAAE,aAAa,EAAE,0BAA0B,EAAE,aAAa,EAAE,aAAa,EAAE,aAAa,EAAE,eAAe;SACpH,CAAC;QAEM,YAAO,GAAmB,EAAE,CAAC;QAE7B,aAAQ,GAAa,EAAE,CAAC;QAG9B,MAAM,SAAS,GAAG,IAAI,SAAS,EAAE,CAAC;QAClC,MAAM,IAAI,GAAG,SAAS,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;QACrC,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE;YAC/B,IAAI,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;gBACd,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,mCAAmC,CAAC,CAAC;YAE7F,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM;gBACxC,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,0CAA0C,CAAC,CAAC;QACtG,CAAC,CAAC,CAAC;QAEH,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YAC7C,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9F,MAAM,KAAK,GAAG;gBACZ,GAAG,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;gBACrB,IAAI,EAAE,SAAS;gBACf,IAAI,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;gBACzB,YAAY,EAAE,IAAI,CAAC,wBAAwB,CAAC,CAAC,CAAC;gBAC9C,QAAQ,EAAE,QAAQ;aACnB,CAAC;YAEF,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;YACrF,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC;YAC3B,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;SAC1B;IACH,CAAC;IAED;;;OAGG;IACI,eAAe,CAAC,IAAY;QACjC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC;YAC/B,MAAM,IAAI,KAAK,CAAC,kCAAkC,IAAI,UAAU,CAAC,CAAC;QAEpE,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IAC5B,CAAC;IAED;;;OAGG;IACI,aAAa,CAAC,IAAY;QAC/B,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC;YAC/B,MAAM,IAAI,KAAK,CAAC,kCAAkC,IAAI,UAAU,CAAC,CAAC;QAGpE,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QACjC,IAAI,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC,CAAC;QAEvD,eAAe;QACf,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QAC1C,KAAK,MAAM,IAAI,IAAI,KAAK;YACtB,UAAU,GAAG,UAAU,CAAC,OAAO,CAAC,IAAI,EAAE,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,IAAI,GAAG,GAAG,CAAC,CAAC;QAGzE,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,2DAA2D;IAC3D,IAAI,YAAY;QACd,OAAO,IAAI,CAAC,QAAQ,CAAC;IACvB,CAAC;IAED,MAAM,KAAK,EAAE;QACX,OAAO,cAAc,CAAC,OAAO,CAAC;IAChC,CAAC;IAEO,WAAW,CAAC,GAAW,EAAE,IAAY,EAAE,IAAY;;QACzD,MAAM,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;QACxC,IAAI,OAAO,KAAK,IAAI;YAClB,MAAM,IAAI,KAAK,CAAC,qCAAqC,IAAI,mBAAmB,CAAC,CAAC;QAEhF,MAAM,KAAK,GAA+B,EAAE,CAAC;QAC7C,MAAA,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,0CAAE,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE;YAC9B,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAE,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,YAAY,CAAE,CAAC,CAAC,CAAC,CAAC;QACxD,CAAC,CAAC,CAAC;QAEH,MAAM,IAAI,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAC7D,MAAM,GAAG,GAAa,EAAE,CAAC;QACzB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YAC1C,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACvC,IAAI,IAAI,GAAG,EAAE,CAAC;YACd,QAAQ,IAAI,EAAE;gBACd,KAAK,CAAC;oBACJ,IAAI,GAAG,YAAY,CAAC;oBACpB,MAAM;gBACR,KAAK,CAAC;oBACJ,IAAI,GAAG,YAAY,CAAC;oBACpB,MAAM;gBACR,KAAK,CAAC;oBACJ,IAAI,GAAG,QAAQ,CAAC;oBAChB,MAAM;gBACR;oBACE,MAAM;aACP;YACD,GAAG,CAAC,IAAI,CAAC,GAAG,EAAC,UAAU,EAAE,QAAQ,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,EAAC,CAAC;SACxE;QAED,OAAO,GAAG,CAAC;IACb,CAAC;;AA9GM,sBAAO,GAAG,gBAAgB,CAAC;AAiHpC,+BAA+B;AAC/B,MAAM,SAAS;IAGb;QACE,IAAI,CAAC,SAAS,GAAG,EAAC,UAAU,EAAE,EAAE,EAAC,CAAC;IACpC,CAAC;IAED,QAAQ,CAAC,OAAe;QACtB,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACnB,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAED,IAAI,CAAC,OAAe;QAClB,OAAO,GAAG,OAAO,CAAC,UAAU,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,oCAAoC;QAC5E,IAAI,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAC5C,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,IAAY,EAAE,GAAW,EAAQ,EAAE;YACrE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC;YAC1B,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACjC,CAAC,CAAC,CAAC;QACH,UAAU,IAAI,CAAC,CAAC;QAChB,OAAO,UAAU,GAAG,CAAC,CAAC,IAAI,UAAU,GAAG,OAAO,CAAC,MAAM;YACnD,UAAU,GAAG,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC;IACpD,CAAC;IAED,QAAQ,CAAC,OAAe,EAAE,UAAkB;QAC1C,MAAM,cAAc,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;QAC3D,IAAI,cAAc,KAAK,CAAC,CAAC,EAAE;YACzB,OAAO,CAAC,CAAC,CAAC;SACX;aAAM;YACL,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,UAAU,EAAE,cAAc,EAC5C,CAAC,IAAY,EAAE,GAAW,EAAQ,EAAE;gBAClC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACjC,CAAC,CAAC,CAAC;SACN;QAED,IAAI,cAAc,GAAG,CAAC,CAAC;YACrB,OAAO,cAAc,GAAG,CAAC,CAAC;QAG5B,OAAO,cAAc,CAAC;IACxB,CAAC;IAED,KAAK,CAAC,OAAe,EAAE,KAAa,EAAE,GAAW,EAAE,OAA4C;QAC7F,MAAM,MAAM,GAAG,CAAC,OAAO,CAAC,OAAO,CAAC,UAAU,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC;QACvD,IAAI,QAAQ,GAAG,KAAK,CAAC;QACrB,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC;QAElE,KAAK,GAAG,MAAM,CAAC;QACf,OAAO,QAAQ,GAAG,GAAG,EAAE;YACrB,KAAK,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;YACzC,IAAI,KAAK,KAAK,CAAC,CAAC;gBACd,OAAO;YAGT,KAAK,IAAI,CAAC,CAAC;YACX,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;YACzC,IAAI,QAAQ,KAAK,CAAC,CAAC;gBACjB,OAAO;YAGT,MAAM,YAAY,GAAG,OAAO,CAAC,SAAS,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;YACxD,KAAK,GAAG,QAAQ,GAAG,CAAC,CAAC;YAErB,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;YACxC,IAAI,QAAQ,KAAK,CAAC,CAAC;gBACjB,QAAQ,GAAG,GAAG,CAAC;iBACZ,IAAI,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC,IAAI,IAAI;gBACpC,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,QAAQ,GAAG,CAAC,CAAC,CAAC;YAEjD,OAAO,CAAC,YAAY,EAAE,OAAO,CAAC,SAAS,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC;YAC1D,QAAQ,IAAI,CAAC,CAAC;SACf;IACH,CAAC;CACF","sourcesContent":["export type MonomerEntry = {\n  mol: string,\n  type: string,\n  analogueCode: string,\n  linkages: { [link: string]: { atomNumber: number, type: string } }\n};\nexport type MonomerEntries = { [name: string]: MonomerEntry };\nexport type LinkData = { [link: string]: { atomNumber: number, type: string } };\n\n/** HELM associated sdf libraries with monomer processing*/\nexport class MonomerLibrary {\n  static libName = 'monomerLibrary';\n\n  private monomerFields: string[] = [\n    'molecule', 'MonomerType', 'MonomerNaturalAnalogCode', 'MonomerName', 'MonomerCode', 'MonomerCaps', 'BranchMonomer',\n  ];\n\n  private library: MonomerEntries = {};\n\n  private monomers: string[] = [];\n\n  constructor(sdf: string) {\n    const sdfReader = new SDFReader();\n    const data = sdfReader.getColls(sdf);\n    this.monomerFields.forEach((f) => {\n      if (!(f in data))\n        throw new Error(`Monomer library was not compiled: ${f} field is absent in provided file`);\n\n      if (data[f].length != data.molecule.length)\n        throw new Error(`Monomer library was not compiled: ${f} field is not presented for each monomer`);\n    });\n\n    for (let i = 0; i < data.molecule.length; i++) {\n      const linkData = this.getLinkData(data.molecule[i], data.MonomerCaps[i], data.MonomerName[i]);\n      const entry = {\n        mol: data.molecule[i],\n        type: 'Peptide',\n        code: data.MonomerCode[i],\n        analogueCode: data.MonomerNaturalAnalogCode[i],\n        linkages: linkData,\n      };\n\n      const name = data.MonomerCode[i] !== '.' ? data.MonomerCode[i] : data.MonomerName[i];\n      this.library[name] = entry;\n      this.monomers.push(name);\n    }\n  }\n\n  /** getting full monomer information from monomer library\n   * @param {string} name\n   * @return {MonomerEntry}\n   */\n  public getMonomerEntry(name: string): MonomerEntry {\n    if (!this.monomers.includes(name))\n      throw new Error(`Monomer library do not contain ${name} monomer`);\n\n    return this.library[name];\n  }\n\n  /** getting mol as string for monomer\n   * @param {string} name\n   * @return {string}\n   */\n  public getMonomerMol(name: string): string {\n    if (!this.monomers.includes(name))\n      throw new Error(`Monomer library do not contain ${name} monomer`);\n\n\n    const entry = this.library[name];\n    let monomerMol = entry.mol.replace(/M  RGP  .+\\n/, '');\n\n    //order matters\n    const links = Object.keys(entry.linkages);\n    for (const link of links)\n      monomerMol = monomerMol.replace('R#', entry.linkages[link].type + ' ');\n\n\n    return monomerMol;\n  }\n\n  /** getting the list of the minomers available in library*/\n  get monomerNames(): string[] {\n    return this.monomers;\n  }\n\n  static get id(): string {\n    return MonomerLibrary.libName;\n  }\n\n  private getLinkData(mol: string, caps: string, name: string): LinkData {\n    const rawData = mol.match(/M  RGP  .+/);\n    if (rawData === null)\n      throw new Error(`Monomer library was not compiled: ${name} entry has no RGP`);\n\n    const types: { [code: string]: string } = {};\n    caps.split('\\n')?.forEach((e) => {\n      types[e.match(/\\d+/)![0]] = e.match(/(?<=\\])\\w+/)![0];\n    });\n\n    const data = rawData[0].replace('M  RGP  ', '').split(/\\s+/);\n    const res: LinkData = {};\n    for (let i = 0; i < parseInt(data[0]); i++) {\n      const code = parseInt(data[2 * i + 2]);\n      let type = '';\n      switch (code) {\n      case 1:\n        type = 'N-terminal';\n        break;\n      case 2:\n        type = 'C-terminal';\n        break;\n      case 3:\n        type = 'branch';\n        break;\n      default:\n        break;\n      }\n      res[type] = {atomNumber: parseInt(data[2 * i + 1]), type: types[code]};\n    }\n\n    return res;\n  }\n}\n\n//TODO: merge with Chem version\nclass SDFReader {\n  dataColls: { [_: string]: string [] };\n\n  constructor() {\n    this.dataColls = {'molecule': []};\n  }\n\n  getColls(content: string): { [_: string]: string[] } {\n    this.read(content);\n    return this.dataColls;\n  }\n\n  read(content: string): void {\n    content = content.replaceAll('\\r', ''); //equalize old and new sdf standards\n    let startIndex = content.indexOf('$$$$', 0);\n    this.parse(content, 0, startIndex, (name: string, val: string): void => { // TODO: type\n      this.dataColls[name] = [];\n      this.dataColls[name].push(val);\n    });\n    startIndex += 5;\n    while (startIndex > -1 && startIndex < content.length)\n      startIndex = this.readNext(content, startIndex);\n  }\n\n  readNext(content: string, startIndex: number): number {\n    const nextStartIndex = content.indexOf('$$$$', startIndex);\n    if (nextStartIndex === -1) {\n      return -1;\n    } else {\n      this.parse(content, startIndex, nextStartIndex,\n        (name: string, val: string): void => {\n          this.dataColls[name].push(val);\n        });\n    }\n\n    if (nextStartIndex > -1)\n      return nextStartIndex + 5;\n\n\n    return nextStartIndex;\n  }\n\n  parse(content: string, start: number, end: number, handler: (name: string, val: string) => void): void {\n    const molEnd = +content.indexOf('M  END\\n', start) + 7;\n    let localEnd = start;\n    this.dataColls['molecule'].push(content.substring(start, molEnd));\n\n    start = molEnd;\n    while (localEnd < end) {\n      start = content.indexOf('> <', localEnd);\n      if (start === -1)\n        return;\n\n\n      start += 3;\n      localEnd = content.indexOf('>\\n', start);\n      if (localEnd === -1)\n        return;\n\n\n      const propertyName = content.substring(start, localEnd);\n      start = localEnd + 2;\n\n      localEnd = content.indexOf('\\n', start);\n      if (localEnd === -1)\n        localEnd = end;\n      else if (content[localEnd + 1] != '\\n')\n        localEnd = content.indexOf('\\n', localEnd + 1);\n\n      handler(propertyName, content.substring(start, localEnd));\n      localEnd += 2;\n    }\n  }\n}\n"]}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
export declare const HELM_CORE_LIB_FILENAME = "/data/HELMCoreLibrary.json";
|
|
3
|
+
export declare function encodeMonomers(col: DG.Column): DG.Column | null;
|
|
4
|
+
export declare function getMolfilesFromSeq(col: DG.Column, monomersLibObject: any[]): any[][] | null;
|
|
5
|
+
export declare function getMolfilesFromSingleSeq(cell: DG.Cell, monomersLibObject: any[]): any[][] | null;
|
|
6
|
+
export declare function createMomomersMolDict(lib: any[]): {
|
|
7
|
+
[key: string]: string | any;
|
|
8
|
+
};
|
|
9
|
+
export declare function createJsonMonomerLibFromSdf(table: DG.DataFrame): any;
|
|
10
|
+
//# sourceMappingURL=monomer-utils.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"monomer-utils.d.ts","sourceRoot":"","sources":["monomer-utils.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAUtC,eAAO,MAAM,sBAAsB,+BAA+B,CAAC;AAEnE,wBAAgB,cAAc,CAAC,GAAG,EAAE,EAAE,CAAC,MAAM,GAAG,EAAE,CAAC,MAAM,GAAG,IAAI,CAwB/D;AAED,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,EAAE,CAAC,MAAM,EAAE,iBAAiB,EAAE,GAAG,EAAE,GAAG,GAAG,EAAE,EAAE,GAAG,IAAI,CAuB3F;AAED,wBAAgB,wBAAwB,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,EAAE,iBAAiB,EAAE,GAAG,EAAE,GAAG,GAAG,EAAE,EAAE,GAAG,IAAI,CAoBhG;AAED,wBAAgB,qBAAqB,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG;IAAE,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,GAAG,CAAA;CAAE,CAYjF;AAED,wBAAgB,2BAA2B,CAAC,KAAK,EAAE,EAAE,CAAC,SAAS,GAAG,GAAG,CA8BpE"}
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
// import * as ui from 'datagrok-api/ui';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
// import {WebLogo, SplitterFunc} from '../../src/viewers/web-logo';
|
|
5
|
+
import { HELM_CORE_FIELDS, jsonSdfMonomerLibDict, MONOMER_ENCODE_MAX, MONOMER_ENCODE_MIN, SDF_MONOMER_NAME } from './const';
|
|
6
|
+
// import {UnitsHandler} from './units-handler';
|
|
7
|
+
import * as bio from '../../index';
|
|
8
|
+
export const HELM_CORE_LIB_FILENAME = '/data/HELMCoreLibrary.json';
|
|
9
|
+
export function encodeMonomers(col) {
|
|
10
|
+
let encodeSymbol = MONOMER_ENCODE_MIN;
|
|
11
|
+
const monomerSymbolDict = {};
|
|
12
|
+
const units = col.tags[DG.TAGS.UNITS];
|
|
13
|
+
const sep = col.getTag("separator" /* bio.TAGS.separator */);
|
|
14
|
+
const splitterFunc = bio.getSplitter(units, sep);
|
|
15
|
+
const encodedStringArray = [];
|
|
16
|
+
for (let i = 0; i < col.length; ++i) {
|
|
17
|
+
let encodedMonomerStr = '';
|
|
18
|
+
const monomers = splitterFunc(col.get(i));
|
|
19
|
+
monomers.forEach((m) => {
|
|
20
|
+
if (!monomerSymbolDict[m]) {
|
|
21
|
+
if (encodeSymbol > MONOMER_ENCODE_MAX) {
|
|
22
|
+
grok.shell.error(`Not enough symbols to encode monomers`);
|
|
23
|
+
return null;
|
|
24
|
+
}
|
|
25
|
+
monomerSymbolDict[m] = encodeSymbol;
|
|
26
|
+
encodeSymbol++;
|
|
27
|
+
}
|
|
28
|
+
encodedMonomerStr += String.fromCodePoint(monomerSymbolDict[m]);
|
|
29
|
+
});
|
|
30
|
+
encodedStringArray.push(encodedMonomerStr);
|
|
31
|
+
}
|
|
32
|
+
return DG.Column.fromStrings('encodedMolecules', encodedStringArray);
|
|
33
|
+
}
|
|
34
|
+
export function getMolfilesFromSeq(col, monomersLibObject) {
|
|
35
|
+
const units = col.tags[DG.TAGS.UNITS];
|
|
36
|
+
const sep = col.getTag('separator');
|
|
37
|
+
const splitterFunc = bio.getSplitter(units, sep);
|
|
38
|
+
const monomersDict = createMomomersMolDict(monomersLibObject);
|
|
39
|
+
const molFiles = [];
|
|
40
|
+
for (let i = 0; i < col.length; ++i) {
|
|
41
|
+
const macroMolecule = col.get(i);
|
|
42
|
+
const monomers = splitterFunc(macroMolecule);
|
|
43
|
+
const molFilesForSeq = [];
|
|
44
|
+
for (let j = 0; j < monomers.length; ++j) {
|
|
45
|
+
if (monomers[j]) {
|
|
46
|
+
if (!monomersDict[monomers[j]]) {
|
|
47
|
+
grok.shell.warning(`Monomer ${monomers[j]} is missing in HELM library. Structure cannot be created`);
|
|
48
|
+
return null;
|
|
49
|
+
}
|
|
50
|
+
// what is the reason of double conversion?
|
|
51
|
+
molFilesForSeq.push(JSON.parse(JSON.stringify(monomersDict[monomers[j]])));
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
molFiles.push(molFilesForSeq);
|
|
55
|
+
}
|
|
56
|
+
return molFiles;
|
|
57
|
+
}
|
|
58
|
+
export function getMolfilesFromSingleSeq(cell, monomersLibObject) {
|
|
59
|
+
const units = cell.column.tags[DG.TAGS.UNITS];
|
|
60
|
+
const sep = cell.column.getTag('separator');
|
|
61
|
+
const splitterFunc = bio.getSplitter(units, sep);
|
|
62
|
+
const monomersDict = createMomomersMolDict(monomersLibObject);
|
|
63
|
+
const molFiles = [];
|
|
64
|
+
const macroMolecule = cell.value;
|
|
65
|
+
const monomers = splitterFunc(macroMolecule);
|
|
66
|
+
const molFilesForSeq = [];
|
|
67
|
+
for (let j = 0; j < monomers.length; ++j) {
|
|
68
|
+
if (monomers[j]) {
|
|
69
|
+
if (!monomersDict[monomers[j]]) {
|
|
70
|
+
grok.shell.warning(`Monomer ${monomers[j]} is missing in HELM library. Structure cannot be created`);
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
molFilesForSeq.push(JSON.parse(JSON.stringify(monomersDict[monomers[j]])));
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
molFiles.push(molFilesForSeq);
|
|
77
|
+
return molFiles;
|
|
78
|
+
}
|
|
79
|
+
export function createMomomersMolDict(lib) {
|
|
80
|
+
const dict = {};
|
|
81
|
+
lib.forEach((it) => {
|
|
82
|
+
if (it['polymerType'] === 'PEPTIDE') {
|
|
83
|
+
const monomerObject = {};
|
|
84
|
+
HELM_CORE_FIELDS.forEach((field) => {
|
|
85
|
+
monomerObject[field] = it[field];
|
|
86
|
+
});
|
|
87
|
+
dict[it["symbol" /* HELM_FIELDS.SYMBOL */]] = monomerObject;
|
|
88
|
+
}
|
|
89
|
+
});
|
|
90
|
+
return dict;
|
|
91
|
+
}
|
|
92
|
+
export function createJsonMonomerLibFromSdf(table) {
|
|
93
|
+
const resultLib = [];
|
|
94
|
+
for (let i = 0; i < table.rowCount; i++) {
|
|
95
|
+
const monomer = {};
|
|
96
|
+
Object.keys(jsonSdfMonomerLibDict).forEach((key) => {
|
|
97
|
+
if (key === "symbol" /* HELM_FIELDS.SYMBOL */) {
|
|
98
|
+
const monomerSymbol = table.get(jsonSdfMonomerLibDict[key], i);
|
|
99
|
+
monomer[key] = monomerSymbol === '.' ? table.get(SDF_MONOMER_NAME, i) : monomerSymbol;
|
|
100
|
+
}
|
|
101
|
+
else if (key === "rgroups" /* HELM_FIELDS.RGROUPS */) {
|
|
102
|
+
const rgroups = table.get(jsonSdfMonomerLibDict[key], i).split('\n');
|
|
103
|
+
const jsonRgroups = [];
|
|
104
|
+
rgroups.forEach((g) => {
|
|
105
|
+
const rgroup = {};
|
|
106
|
+
const altAtom = g.substring(g.lastIndexOf(']') + 1);
|
|
107
|
+
const radicalNum = g.match(/\[R(\d+)\]/)[1];
|
|
108
|
+
rgroup["capGroupSmiles" /* RGROUP_FIELDS.CAP_GROUP_SMILES */] = altAtom === 'H' ? `[*:${radicalNum}][H]` : `O[*:${radicalNum}]`;
|
|
109
|
+
rgroup["alternateId" /* RGROUP_FIELDS.ALTER_ID */] = altAtom === 'H' ? `R${radicalNum}-H` : `R${radicalNum}-OH`;
|
|
110
|
+
rgroup["capGroupName" /* RGROUP_FIELDS.CAP_GROUP_NAME */] = altAtom === 'H' ? `H` : `OH`;
|
|
111
|
+
rgroup["label" /* RGROUP_FIELDS.LABEL */] = `R${radicalNum}`;
|
|
112
|
+
jsonRgroups.push(rgroup);
|
|
113
|
+
});
|
|
114
|
+
monomer[key] = jsonRgroups;
|
|
115
|
+
}
|
|
116
|
+
else {
|
|
117
|
+
if (jsonSdfMonomerLibDict[key])
|
|
118
|
+
monomer[key] = table.get(jsonSdfMonomerLibDict[key], i);
|
|
119
|
+
}
|
|
120
|
+
});
|
|
121
|
+
resultLib.push(monomer);
|
|
122
|
+
}
|
|
123
|
+
return resultLib;
|
|
124
|
+
}
|
|
125
|
+
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"monomer-utils.js","sourceRoot":"","sources":["monomer-utils.ts"],"names":[],"mappings":"AAAA,yCAAyC;AACzC,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AACtC,OAAO,KAAK,IAAI,MAAM,mBAAmB,CAAC;AAE1C,oEAAoE;AACpE,OAAO,EAAc,gBAAgB,EAAiB,qBAAqB,EACzE,kBAAkB,EAAE,kBAAkB,EAAE,gBAAgB,EAAC,MAAM,SAAS,CAAC;AAC3E,gDAAgD;AAEhD,OAAO,KAAK,GAAG,MAAM,aAAa,CAAC;AAEnC,MAAM,CAAC,MAAM,sBAAsB,GAAG,4BAA4B,CAAC;AAEnE,MAAM,UAAU,cAAc,CAAC,GAAc;IAC3C,IAAI,YAAY,GAAG,kBAAkB,CAAC;IACtC,MAAM,iBAAiB,GAA8B,EAAE,CAAC;IACxD,MAAM,KAAK,GAAG,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACtC,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,sCAAoB,CAAC;IAC3C,MAAM,YAAY,GAAqB,GAAG,CAAC,WAAW,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;IACnE,MAAM,kBAAkB,GAAG,EAAE,CAAC;IAC9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,EAAE,CAAC,EAAE;QACnC,IAAI,iBAAiB,GAAG,EAAE,CAAC;QAC3B,MAAM,QAAQ,GAAG,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1C,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE;YACrB,IAAI,CAAC,iBAAiB,CAAC,CAAC,CAAC,EAAE;gBACzB,IAAI,YAAY,GAAG,kBAAkB,EAAE;oBACrC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,uCAAuC,CAAC,CAAC;oBAC1D,OAAO,IAAI,CAAC;iBACb;gBACD,iBAAiB,CAAC,CAAC,CAAC,GAAG,YAAY,CAAC;gBACpC,YAAY,EAAE,CAAC;aAChB;YACD,iBAAiB,IAAI,MAAM,CAAC,aAAa,CAAC,iBAAiB,CAAC,CAAC,CAAC,CAAC,CAAC;QAClE,CAAC,CAAC,CAAC;QACH,kBAAkB,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;KAC5C;IACD,OAAO,EAAE,CAAC,MAAM,CAAC,WAAW,CAAC,kBAAkB,EAAE,kBAAkB,CAAC,CAAC;AACvE,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,GAAc,EAAE,iBAAwB;IACzE,MAAM,KAAK,GAAG,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACtC,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;IACpC,MAAM,YAAY,GAAqB,GAAG,CAAC,WAAW,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;IACnE,MAAM,YAAY,GAAG,qBAAqB,CAAC,iBAAiB,CAAC,CAAC;IAC9D,MAAM,QAAQ,GAAG,EAAE,CAAC;IACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,EAAE,CAAC,EAAE;QACnC,MAAM,aAAa,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QACjC,MAAM,QAAQ,GAAG,YAAY,CAAC,aAAa,CAAC,CAAC;QAC7C,MAAM,cAAc,GAAG,EAAE,CAAC;QAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC,EAAE;YACxC,IAAI,QAAQ,CAAC,CAAC,CAAC,EAAE;gBACf,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE;oBAC9B,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,WAAW,QAAQ,CAAC,CAAC,CAAC,0DAA0D,CAAC,CAAC;oBACrG,OAAO,IAAI,CAAC;iBACb;gBACD,2CAA2C;gBAC3C,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;aAC5E;SACF;QACD,QAAQ,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;KAC/B;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,wBAAwB,CAAC,IAAa,EAAE,iBAAwB;IAC9E,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC9C,MAAM,GAAG,GAAG,IAAI,CAAC,MAAO,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;IAC7C,MAAM,YAAY,GAAqB,GAAG,CAAC,WAAW,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;IACnE,MAAM,YAAY,GAAG,qBAAqB,CAAC,iBAAiB,CAAC,CAAC;IAC9D,MAAM,QAAQ,GAAG,EAAE,CAAC;IACpB,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,CAAC;IACjC,MAAM,QAAQ,GAAG,YAAY,CAAC,aAAa,CAAC,CAAC;IAC7C,MAAM,cAAc,GAAG,EAAE,CAAC;IAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC,EAAE;QACxC,IAAI,QAAQ,CAAC,CAAC,CAAC,EAAE;YACf,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE;gBAC9B,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,WAAW,QAAQ,CAAC,CAAC,CAAC,0DAA0D,CAAC,CAAC;gBACrG,OAAO,IAAI,CAAC;aACb;YACD,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;SAC5E;KACF;IACD,QAAQ,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;IAC9B,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,GAAU;IAC9C,MAAM,IAAI,GAAoC,EAAE,CAAC;IACjD,GAAG,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,EAAE;QACjB,IAAI,EAAE,CAAC,aAAa,CAAC,KAAK,SAAS,EAAE;YACnC,MAAM,aAAa,GAA2B,EAAE,CAAC;YACjD,gBAAgB,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,EAAE;gBACjC,aAAa,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC;YACnC,CAAC,CAAC,CAAC;YACH,IAAI,CAAC,EAAE,mCAAoB,CAAC,GAAG,aAAa,CAAC;SAC9C;IACH,CAAC,CAAC,CAAC;IACH,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,UAAU,2BAA2B,CAAC,KAAmB;IAC7D,MAAM,SAAS,GAAG,EAAE,CAAC;IACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,QAAQ,EAAE,CAAC,EAAE,EAAE;QACvC,MAAM,OAAO,GAAoC,EAAE,CAAC;QACpD,MAAM,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE;YACjD,IAAI,GAAG,sCAAuB,EAAE;gBAC9B,MAAM,aAAa,GAAG,KAAK,CAAC,GAAG,CAAC,qBAAqB,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC/D,OAAO,CAAC,GAAG,CAAC,GAAG,aAAa,KAAK,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,gBAAgB,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC;aACvF;iBAAM,IAAI,GAAG,wCAAwB,EAAE;gBACtC,MAAM,OAAO,GAAG,KAAK,CAAC,GAAG,CAAC,qBAAqB,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACrE,MAAM,WAAW,GAAU,EAAE,CAAC;gBAC9B,OAAO,CAAC,OAAO,CAAC,CAAC,CAAS,EAAE,EAAE;oBAC5B,MAAM,MAAM,GAAoC,EAAE,CAAC;oBACnD,MAAM,OAAO,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;oBACpD,MAAM,UAAU,GAAG,CAAC,CAAC,KAAK,CAAC,YAAY,CAAE,CAAC,CAAC,CAAC,CAAC;oBAC7C,MAAM,uDAAgC,GAAG,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,MAAM,UAAU,MAAM,CAAC,CAAC,CAAC,OAAO,UAAU,GAAG,CAAC;oBACzG,MAAM,4CAAwB,GAAG,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,IAAI,UAAU,IAAI,CAAC,CAAC,CAAC,IAAI,UAAU,KAAK,CAAC;oBAC5F,MAAM,mDAA8B,GAAG,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC;oBACpE,MAAM,mCAAqB,GAAG,IAAI,UAAU,EAAE,CAAC;oBAC/C,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBAC3B,CAAC,CAAC,CAAC;gBACH,OAAO,CAAC,GAAG,CAAC,GAAG,WAAW,CAAC;aAC5B;iBAAM;gBACL,IAAK,qBAAyD,CAAC,GAAG,CAAC;oBACjE,OAAO,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,GAAG,CAAE,qBAAyD,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;aAChG;QACH,CAAC,CAAC,CAAC;QACH,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;KACzB;IACD,OAAO,SAAS,CAAC;AACnB,CAAC","sourcesContent":["// import * as ui from 'datagrok-api/ui';\nimport * as DG from 'datagrok-api/dg';\nimport * as grok from 'datagrok-api/grok';\n\n// import {WebLogo, SplitterFunc} from '../../src/viewers/web-logo';\nimport {HELM_FIELDS, HELM_CORE_FIELDS, RGROUP_FIELDS, jsonSdfMonomerLibDict,\n  MONOMER_ENCODE_MAX, MONOMER_ENCODE_MIN, SDF_MONOMER_NAME} from './const';\n// import {UnitsHandler} from './units-handler';\n\nimport * as bio from '../../index';\n\nexport const HELM_CORE_LIB_FILENAME = '/data/HELMCoreLibrary.json';\n\nexport function encodeMonomers(col: DG.Column): DG.Column | null {\n  let encodeSymbol = MONOMER_ENCODE_MIN;\n  const monomerSymbolDict: { [key: string]: number } = {};\n  const units = col.tags[DG.TAGS.UNITS];\n  const sep = col.getTag(bio.TAGS.separator);\n  const splitterFunc: bio.SplitterFunc = bio.getSplitter(units, sep);\n  const encodedStringArray = [];\n  for (let i = 0; i < col.length; ++i) {\n    let encodedMonomerStr = '';\n    const monomers = splitterFunc(col.get(i));\n    monomers.forEach((m) => {\n      if (!monomerSymbolDict[m]) {\n        if (encodeSymbol > MONOMER_ENCODE_MAX) {\n          grok.shell.error(`Not enough symbols to encode monomers`);\n          return null;\n        }\n        monomerSymbolDict[m] = encodeSymbol;\n        encodeSymbol++;\n      }\n      encodedMonomerStr += String.fromCodePoint(monomerSymbolDict[m]);\n    });\n    encodedStringArray.push(encodedMonomerStr);\n  }\n  return DG.Column.fromStrings('encodedMolecules', encodedStringArray);\n}\n\nexport function getMolfilesFromSeq(col: DG.Column, monomersLibObject: any[]): any[][] | null {\n  const units = col.tags[DG.TAGS.UNITS];\n  const sep = col.getTag('separator');\n  const splitterFunc: bio.SplitterFunc = bio.getSplitter(units, sep);\n  const monomersDict = createMomomersMolDict(monomersLibObject);\n  const molFiles = [];\n  for (let i = 0; i < col.length; ++i) {\n    const macroMolecule = col.get(i);\n    const monomers = splitterFunc(macroMolecule);\n    const molFilesForSeq = [];\n    for (let j = 0; j < monomers.length; ++j) {\n      if (monomers[j]) {\n        if (!monomersDict[monomers[j]]) {\n          grok.shell.warning(`Monomer ${monomers[j]} is missing in HELM library. Structure cannot be created`);\n          return null;\n        }\n        // what is the reason of double conversion?\n        molFilesForSeq.push(JSON.parse(JSON.stringify(monomersDict[monomers[j]])));\n      }\n    }\n    molFiles.push(molFilesForSeq);\n  }\n  return molFiles;\n}\n\nexport function getMolfilesFromSingleSeq(cell: DG.Cell, monomersLibObject: any[]): any[][] | null {\n  const units = cell.column.tags[DG.TAGS.UNITS];\n  const sep = cell.column!.getTag('separator');\n  const splitterFunc: bio.SplitterFunc = bio.getSplitter(units, sep);\n  const monomersDict = createMomomersMolDict(monomersLibObject);\n  const molFiles = [];\n  const macroMolecule = cell.value;\n  const monomers = splitterFunc(macroMolecule);\n  const molFilesForSeq = [];\n  for (let j = 0; j < monomers.length; ++j) {\n    if (monomers[j]) {\n      if (!monomersDict[monomers[j]]) {\n        grok.shell.warning(`Monomer ${monomers[j]} is missing in HELM library. Structure cannot be created`);\n        return null;\n      }\n      molFilesForSeq.push(JSON.parse(JSON.stringify(monomersDict[monomers[j]])));\n    }\n  }\n  molFiles.push(molFilesForSeq);\n  return molFiles;\n}\n\nexport function createMomomersMolDict(lib: any[]): { [key: string]: string | any } {\n  const dict: { [key: string]: string | any } = {};\n  lib.forEach((it) => {\n    if (it['polymerType'] === 'PEPTIDE') {\n      const monomerObject: { [key: string]: any } = {};\n      HELM_CORE_FIELDS.forEach((field) => {\n        monomerObject[field] = it[field];\n      });\n      dict[it[HELM_FIELDS.SYMBOL]] = monomerObject;\n    }\n  });\n  return dict;\n}\n\nexport function createJsonMonomerLibFromSdf(table: DG.DataFrame): any {\n  const resultLib = [];\n  for (let i = 0; i < table.rowCount; i++) {\n    const monomer: { [key: string]: string | any } = {};\n    Object.keys(jsonSdfMonomerLibDict).forEach((key) => {\n      if (key === HELM_FIELDS.SYMBOL) {\n        const monomerSymbol = table.get(jsonSdfMonomerLibDict[key], i);\n        monomer[key] = monomerSymbol === '.' ? table.get(SDF_MONOMER_NAME, i) : monomerSymbol;\n      } else if (key === HELM_FIELDS.RGROUPS) {\n        const rgroups = table.get(jsonSdfMonomerLibDict[key], i).split('\\n');\n        const jsonRgroups: any[] = [];\n        rgroups.forEach((g: string) => {\n          const rgroup: { [key: string]: string | any } = {};\n          const altAtom = g.substring(g.lastIndexOf(']') + 1);\n          const radicalNum = g.match(/\\[R(\\d+)\\]/)![1];\n          rgroup[RGROUP_FIELDS.CAP_GROUP_SMILES] = altAtom === 'H' ? `[*:${radicalNum}][H]` : `O[*:${radicalNum}]`;\n          rgroup[RGROUP_FIELDS.ALTER_ID] = altAtom === 'H' ? `R${radicalNum}-H` : `R${radicalNum}-OH`;\n          rgroup[RGROUP_FIELDS.CAP_GROUP_NAME] = altAtom === 'H' ? `H` : `OH`;\n          rgroup[RGROUP_FIELDS.LABEL] = `R${radicalNum}`;\n          jsonRgroups.push(rgroup);\n        });\n        monomer[key] = jsonRgroups;\n      } else {\n        if ((jsonSdfMonomerLibDict as { [key: string]: string | any })[key])\n          monomer[key] = table.get((jsonSdfMonomerLibDict as { [key: string]: string | any })[key], i);\n      }\n    });\n    resultLib.push(monomer);\n  }\n  return resultLib;\n}\n"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"notation-converter.d.ts","sourceRoot":"","sources":["notation-converter.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAEtC,OAAO,EAAC,YAAY,EAAC,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAiC,QAAQ,EAAe,YAAY,EAAO,MAAM,iBAAiB,CAAC;AAE1G,iFAAiF;AACjF,qBAAa,iBAAkB,SAAQ,YAAY;IACjD,OAAO,CAAC,SAAS,CAA6B;IAE9C,SAAS,KAAK,QAAQ,IAAI,YAAY,CAIrC;IAEM,OAAO,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAE1C,WAAW,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAE9C,MAAM,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAEhD;;;;;;OAMG;IACH,OAAO,CAAC,uBAAuB;
|
|
1
|
+
{"version":3,"file":"notation-converter.d.ts","sourceRoot":"","sources":["notation-converter.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAEtC,OAAO,EAAC,YAAY,EAAC,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAiC,QAAQ,EAAe,YAAY,EAAO,MAAM,iBAAiB,CAAC;AAE1G,iFAAiF;AACjF,qBAAa,iBAAkB,SAAQ,YAAY;IACjD,OAAO,CAAC,SAAS,CAA6B;IAE9C,SAAS,KAAK,QAAQ,IAAI,YAAY,CAIrC;IAEM,OAAO,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAE1C,WAAW,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAE9C,MAAM,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAEhD;;;;;;OAMG;IACH,OAAO,CAAC,uBAAuB;IAoB/B;;;;;OAKG;IACH,OAAO,CAAC,eAAe;IAiBvB,OAAO,CAAC,mBAAmB;IAkB3B;;;;;;;OAOG;IACI,mBAAmB,CACxB,aAAa,EAAE,MAAM,EACrB,eAAe,GAAE,MAAM,GAAG,IAAW,GACpC,MAAM;IAOT;;;;;OAKG;IACH,OAAO,CAAC,aAAa;IAgBrB;;;;;OAKG;IACH,OAAO,CAAC,uBAAuB;IA6B/B;;;;;;;;OAQG;IACH,OAAO,CAAC,WAAW;IAwDnB,OAAO,CAAC,sBAAsB;IAK9B;;;;;OAKG;IACI,OAAO,CAAC,WAAW,EAAE,QAAQ,EAAE,YAAY,GAAE,MAAM,GAAG,IAAW,GAAG,EAAE,CAAC,MAAM;gBAmBjE,GAAG,EAAE,EAAE,CAAC,MAAM;CAGlC"}
|