msa-parsers 5.0.4 → 5.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/msa/A3mMSA.js +20 -18
- package/dist/msa/A3mMSA.js.map +1 -1
- package/dist/msa/A3mMSA.test.js +108 -2
- package/dist/msa/A3mMSA.test.js.map +1 -1
- package/package.json +1 -1
- package/src/msa/A3mMSA.test.ts +126 -2
- package/src/msa/A3mMSA.ts +20 -18
package/dist/msa/A3mMSA.js
CHANGED
|
@@ -79,32 +79,33 @@ export default class A3mMSA {
|
|
|
79
79
|
if (seqs.length < 2) {
|
|
80
80
|
return false;
|
|
81
81
|
}
|
|
82
|
-
// Check for lowercase and compute lengths in single pass per sequence
|
|
83
|
-
// In A3M,
|
|
82
|
+
// Check for lowercase and compute match column lengths in single pass per sequence
|
|
83
|
+
// In A3M, match columns = uppercase letters + dashes (deletions)
|
|
84
84
|
let hasLowercase = false;
|
|
85
|
-
let
|
|
86
|
-
let
|
|
85
|
+
let firstMatchLen = -1;
|
|
86
|
+
let sameMatchLength = true;
|
|
87
87
|
for (const seq of seqs) {
|
|
88
|
-
let
|
|
88
|
+
let matchLen = 0;
|
|
89
89
|
for (let i = 0; i < seq.length; i++) {
|
|
90
90
|
const code = seq.charCodeAt(i);
|
|
91
91
|
if (isLower(code)) {
|
|
92
92
|
hasLowercase = true;
|
|
93
93
|
}
|
|
94
|
-
else if (code >= CODE_A && code <= CODE_Z) {
|
|
95
|
-
|
|
94
|
+
else if (code >= CODE_A && code <= CODE_Z || code === CODE_DASH) {
|
|
95
|
+
// Uppercase letters and dashes are match columns
|
|
96
|
+
matchLen++;
|
|
96
97
|
}
|
|
97
98
|
}
|
|
98
|
-
if (
|
|
99
|
-
|
|
99
|
+
if (firstMatchLen === -1) {
|
|
100
|
+
firstMatchLen = matchLen;
|
|
100
101
|
}
|
|
101
102
|
else {
|
|
102
|
-
if (
|
|
103
|
-
|
|
103
|
+
if (matchLen !== firstMatchLen) {
|
|
104
|
+
sameMatchLength = false;
|
|
104
105
|
}
|
|
105
106
|
}
|
|
106
107
|
}
|
|
107
|
-
return hasLowercase &&
|
|
108
|
+
return hasLowercase && sameMatchLength;
|
|
108
109
|
}
|
|
109
110
|
/**
|
|
110
111
|
* Expand A3M format to standard aligned format.
|
|
@@ -130,15 +131,16 @@ export default class A3mMSA {
|
|
|
130
131
|
let i = 0;
|
|
131
132
|
while (i < seq.length) {
|
|
132
133
|
const code = seq.charCodeAt(i);
|
|
133
|
-
if (code >= CODE_A && code <= CODE_Z) {
|
|
134
|
-
// Uppercase letter - match column
|
|
134
|
+
if (code >= CODE_A && code <= CODE_Z || code === CODE_DASH) {
|
|
135
|
+
// Uppercase letter or dash - match column (dash = deletion)
|
|
135
136
|
matches.push(seq[i]);
|
|
136
|
-
// Collect following lowercase/
|
|
137
|
+
// Collect following lowercase/dot characters as insert content
|
|
138
|
+
// Note: dash is NOT insert content, it's a match column
|
|
137
139
|
let ins = '';
|
|
138
140
|
let j = i + 1;
|
|
139
141
|
while (j < seq.length) {
|
|
140
142
|
const c = seq.charCodeAt(j);
|
|
141
|
-
if (isLower(c) || c ===
|
|
143
|
+
if (isLower(c) || c === CODE_DOT) {
|
|
142
144
|
ins += seq[j];
|
|
143
145
|
j++;
|
|
144
146
|
}
|
|
@@ -149,8 +151,8 @@ export default class A3mMSA {
|
|
|
149
151
|
inserts.push(ins);
|
|
150
152
|
i = j;
|
|
151
153
|
}
|
|
152
|
-
else if (code ===
|
|
153
|
-
// Leading gap
|
|
154
|
+
else if (code === CODE_DOT) {
|
|
155
|
+
// Leading dot (gap aligned to insert) - skip
|
|
154
156
|
i++;
|
|
155
157
|
}
|
|
156
158
|
else if (isLower(code)) {
|
package/dist/msa/A3mMSA.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"A3mMSA.js","sourceRoot":"","sources":["../../src/msa/A3mMSA.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,sDAAsD;AACtD,MAAM,MAAM,GAAG,EAAE,CAAA,CAAC,MAAM;AACxB,MAAM,MAAM,GAAG,EAAE,CAAA,CAAC,MAAM;AACxB,MAAM,MAAM,GAAG,EAAE,CAAA,CAAC,MAAM;AACxB,MAAM,MAAM,GAAG,GAAG,CAAA,CAAC,MAAM;AACzB,MAAM,SAAS,GAAG,EAAE,CAAA,CAAC,MAAM;AAC3B,MAAM,QAAQ,GAAG,EAAE,CAAA,CAAC,MAAM;AAE1B,SAAS,OAAO,CAAC,IAAY;IAC3B,OAAO,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,CAAA;AACzC,CAAC;AAED,MAAM,CAAC,OAAO,OAAO,MAAM;IACjB,GAAG,CAAqC;IACxC,YAAY,CAAU;IAE9B,YAAY,IAAY;QACtB,MAAM,OAAO,GAAa,EAAE,CAAA;QAC5B,MAAM,KAAK,GAAa,EAAE,CAAA;QAE1B,6DAA6D;QAC7D,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;YACpC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;gBACtB,SAAQ;YACV,CAAC;YACD,MAAM,UAAU,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;YACtC,IAAI,UAAU,KAAK,CAAC,CAAC,EAAE,CAAC;gBACtB,SAAQ;YACV,CAAC;YACD,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAA;YAC1C,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;YACrC,MAAM,EAAE,GAAG,QAAQ,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAA;YACjE,IAAI,EAAE,EAAE,CAAC;gBACP,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,CAAA;gBAC/D,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;YAChB,CAAC;QACH,CAAC;QAED,IAAI,CAAC,YAAY,GAAG,KAAK,CAAA;QACzB,IAAI,CAAC,GAAG,GAAG,EAAE,OAAO,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,CAAA;IACxD,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,KAAK,CAAC,IAAY;QACvB,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YAC1B,OAAO,KAAK,CAAA;QACd,CAAC;QAED,MAAM,IAAI,GAAa,EAAE,CAAA;QACzB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;YACpC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;gBACtB,SAAQ;YACV,CAAC;YACD,MAAM,UAAU,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;YACtC,IAAI,UAAU,KAAK,CAAC,CAAC,EAAE,CAAC;gBACtB,SAAQ;YACV,CAAC;YACD,MAAM,GAAG,GAAG,KAAK,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,CAAC,CAAA;YAC7D,IAAI,GAAG,EAAE,CAAC;gBACR,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;YAChB,CAAC;QACH,CAAC;QAED,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpB,OAAO,KAAK,CAAA;QACd,CAAC;QAED,
|
|
1
|
+
{"version":3,"file":"A3mMSA.js","sourceRoot":"","sources":["../../src/msa/A3mMSA.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,sDAAsD;AACtD,MAAM,MAAM,GAAG,EAAE,CAAA,CAAC,MAAM;AACxB,MAAM,MAAM,GAAG,EAAE,CAAA,CAAC,MAAM;AACxB,MAAM,MAAM,GAAG,EAAE,CAAA,CAAC,MAAM;AACxB,MAAM,MAAM,GAAG,GAAG,CAAA,CAAC,MAAM;AACzB,MAAM,SAAS,GAAG,EAAE,CAAA,CAAC,MAAM;AAC3B,MAAM,QAAQ,GAAG,EAAE,CAAA,CAAC,MAAM;AAE1B,SAAS,OAAO,CAAC,IAAY;IAC3B,OAAO,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,CAAA;AACzC,CAAC;AAED,MAAM,CAAC,OAAO,OAAO,MAAM;IACjB,GAAG,CAAqC;IACxC,YAAY,CAAU;IAE9B,YAAY,IAAY;QACtB,MAAM,OAAO,GAAa,EAAE,CAAA;QAC5B,MAAM,KAAK,GAAa,EAAE,CAAA;QAE1B,6DAA6D;QAC7D,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;YACpC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;gBACtB,SAAQ;YACV,CAAC;YACD,MAAM,UAAU,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;YACtC,IAAI,UAAU,KAAK,CAAC,CAAC,EAAE,CAAC;gBACtB,SAAQ;YACV,CAAC;YACD,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAA;YAC1C,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;YACrC,MAAM,EAAE,GAAG,QAAQ,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAA;YACjE,IAAI,EAAE,EAAE,CAAC;gBACP,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,CAAA;gBAC/D,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;YAChB,CAAC;QACH,CAAC;QAED,IAAI,CAAC,YAAY,GAAG,KAAK,CAAA;QACzB,IAAI,CAAC,GAAG,GAAG,EAAE,OAAO,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,CAAA;IACxD,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,KAAK,CAAC,IAAY;QACvB,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YAC1B,OAAO,KAAK,CAAA;QACd,CAAC;QAED,MAAM,IAAI,GAAa,EAAE,CAAA;QACzB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;YACpC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;gBACtB,SAAQ;YACV,CAAC;YACD,MAAM,UAAU,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;YACtC,IAAI,UAAU,KAAK,CAAC,CAAC,EAAE,CAAC;gBACtB,SAAQ;YACV,CAAC;YACD,MAAM,GAAG,GAAG,KAAK,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,CAAC,CAAA;YAC7D,IAAI,GAAG,EAAE,CAAC;gBACR,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;YAChB,CAAC;QACH,CAAC;QAED,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpB,OAAO,KAAK,CAAA;QACd,CAAC;QAED,mFAAmF;QACnF,iEAAiE;QACjE,IAAI,YAAY,GAAG,KAAK,CAAA;QACxB,IAAI,aAAa,GAAG,CAAC,CAAC,CAAA;QACtB,IAAI,eAAe,GAAG,IAAI,CAAA;QAE1B,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,IAAI,QAAQ,GAAG,CAAC,CAAA;YAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACpC,MAAM,IAAI,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAA;gBAC9B,IAAI,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;oBAClB,YAAY,GAAG,IAAI,CAAA;gBACrB,CAAC;qBAAM,IAAI,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;oBAClE,iDAAiD;oBACjD,QAAQ,EAAE,CAAA;gBACZ,CAAC;YACH,CAAC;YAED,IAAI,aAAa,KAAK,CAAC,CAAC,EAAE,CAAC;gBACzB,aAAa,GAAG,QAAQ,CAAA;YAC1B,CAAC;iBAAM,CAAC;gBACN,IAAI,QAAQ,KAAK,aAAa,EAAE,CAAC;oBAC/B,eAAe,GAAG,KAAK,CAAA;gBACzB,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,YAAY,IAAI,eAAe,CAAA;IACxC,CAAC;IAED;;;;;;;OAOG;IACK,SAAS,CACf,OAAiB,EACjB,KAAe;QAEf,MAAM,OAAO,GAAG,KAAK,CAAC,MAAM,CAAA;QAC5B,IAAI,OAAO,KAAK,CAAC,EAAE,CAAC;YAClB,OAAO,EAAE,CAAA;QACX,CAAC;QAED,2EAA2E;QAC3E,yEAAyE;QACzE,MAAM,UAAU,GAAe,EAAE,CAAA;QACjC,MAAM,aAAa,GAAe,EAAE,CAAA;QAEpC,KAAK,IAAI,MAAM,GAAG,CAAC,EAAE,MAAM,GAAG,OAAO,EAAE,MAAM,EAAE,EAAE,CAAC;YAChD,MAAM,GAAG,GAAG,OAAO,CAAC,MAAM,CAAE,CAAA;YAC5B,MAAM,OAAO,GAAa,EAAE,CAAA;YAC5B,MAAM,OAAO,GAAa,EAAE,CAAA;YAC5B,IAAI,CAAC,GAAG,CAAC,CAAA;YAET,OAAO,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC;gBACtB,MAAM,IAAI,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAA;gBAE9B,IAAI,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;oBAC3D,4DAA4D;oBAC5D,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAE,CAAC,CAAA;oBACrB,+DAA+D;oBAC/D,wDAAwD;oBACxD,IAAI,GAAG,GAAG,EAAE,CAAA;oBACZ,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;oBACb,OAAO,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC;wBACtB,MAAM,CAAC,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAA;wBAC3B,IAAI,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,QAAQ,EAAE,CAAC;4BACjC,GAAG,IAAI,GAAG,CAAC,CAAC,CAAE,CAAA;4BACd,CAAC,EAAE,CAAA;wBACL,CAAC;6BAAM,CAAC;4BACN,MAAK;wBACP,CAAC;oBACH,CAAC;oBACD,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;oBACjB,CAAC,GAAG,CAAC,CAAA;gBACP,CAAC;qBAAM,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;oBAC7B,6CAA6C;oBAC7C,CAAC,EAAE,CAAA;gBACL,CAAC;qBAAM,IAAI,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;oBACzB,oCAAoC;oBACpC,IAAI,GAAG,GAAG,EAAE,CAAA;oBACZ,OAAO,CAAC,GAAG,GAAG,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;wBACpD,GAAG,IAAI,GAAG,CAAC,CAAC,CAAE,CAAA;wBACd,CAAC,EAAE,CAAA;oBACL,CAAC;oBACD,mCAAmC;oBACnC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;oBAChB,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;gBACnB,CAAC;qBAAM,CAAC;oBACN,CAAC,EAAE,CAAA;gBACL,CAAC;YACH,CAAC;YAED,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;YACxB,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QAC7B,CAAC;QAED,oEAAoE;QACpE,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAA;QAElE,2EAA2E;QAC3E,MAAM,UAAU,GAAG,IAAI,KAAK,CAAS,YAAY,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAC1D,KAAK,IAAI,MAAM,GAAG,CAAC,EAAE,MAAM,GAAG,OAAO,EAAE,MAAM,EAAE,EAAE,CAAC;YAChD,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAE,CAAA;YACtC,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,OAAO,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;gBAC9C,oDAAoD;gBACpD,IAAI,OAAO,GAAG,CAAC,CAAA;gBACf,KAAK,MAAM,CAAC,IAAI,OAAO,CAAC,GAAG,CAAE,EAAE,CAAC;oBAC9B,IAAI,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;wBAC7B,OAAO,EAAE,CAAA;oBACX,CAAC;gBACH,CAAC;gBACD,IAAI,OAAO,GAAG,UAAU,CAAC,GAAG,CAAE,EAAE,CAAC;oBAC/B,UAAU,CAAC,GAAG,CAAC,GAAG,OAAO,CAAA;gBAC3B,CAAC;YACH,CAAC;QACH,CAAC;QAED,2BAA2B;QAC3B,MAAM,QAAQ,GAA2B,EAAE,CAAA;QAE3C,KAAK,IAAI,MAAM,GAAG,CAAC,EAAE,MAAM,GAAG,OAAO,EAAE,MAAM,EAAE,EAAE,CAAC;YAChD,MAAM,OAAO,GAAG,UAAU,CAAC,MAAM,CAAE,CAAA;YACnC,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAE,CAAA;YACtC,MAAM,MAAM,GAAa,EAAE,CAAA;YAE3B,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,YAAY,EAAE,GAAG,EAAE,EAAE,CAAC;gBAC5C,MAAM,MAAM,GAAG,UAAU,CAAC,GAAG,CAAE,CAAA;gBAE/B,IAAI,GAAG,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;oBACzB,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAE,CAAA;oBAC/B,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,IAAI,EAAE,CAAA;oBAErC,wCAAwC;oBACxC,MAAM,CAAC,IAAI,CAAC,SAAS,IAAI,GAAG,CAAC,CAAA;oBAE7B,yBAAyB;oBACzB,IAAI,SAAS,GAAG,EAAE,CAAA;oBAClB,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;wBAC3B,IAAI,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;4BAC7B,SAAS,IAAI,CAAC,CAAC,WAAW,EAAE,CAAA;wBAC9B,CAAC;oBACH,CAAC;oBAED,sCAAsC;oBACtC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;oBAEtB,2CAA2C;oBAC3C,MAAM,OAAO,GAAG,MAAM,GAAG,SAAS,CAAC,MAAM,CAAA;oBACzC,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;wBAChB,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAA;oBAClC,CAAC;gBACH,CAAC;qBAAM,CAAC;oBACN,sCAAsC;oBACtC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;oBAChB,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;wBACf,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAA;oBACjC,CAAC;gBACH,CAAC;YACH,CAAC;YAED,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAE,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;QAC5C,CAAC;QAED,OAAO,QAAQ,CAAA;IACjB,CAAC;IAED,MAAM;QACJ,OAAO,IAAI,CAAC,GAAG,CAAA;IACjB,CAAC;IAED,UAAU;QACR,OAAO,SAAS,CAAA;IAClB,CAAC;IAED,QAAQ;QACN,OAAO,IAAI,CAAC,YAAY,CAAA;IAC1B,CAAC;IAED,MAAM,CAAC,IAAY;QACjB,OAAO,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,CAAA;IACrC,CAAC;IAED,QAAQ;QACN,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAA;QAC7C,OAAO,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAA;IAC5C,CAAC;IAED,aAAa;QACX,OAAO,EAAE,CAAA;IACX,CAAC;IAED,IAAI,cAAc;QAChB,OAAO,EAAE,CAAA;IACX,CAAC;IAED,SAAS;QACP,OAAO,EAAE,CAAA;IACX,CAAC;IAED,OAAO;QACL,OAAO;YACL,EAAE,EAAE,MAAM;YACV,IAAI,EAAE,MAAM;YACZ,MAAM,EAAE,IAAI;YACZ,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACrC,EAAE,EAAE,IAAI;gBACR,QAAQ,EAAE,EAAE;gBACZ,IAAI;aACL,CAAC,CAAC;SACJ,CAAA;IACH,CAAC;IAED,IAAI,YAAY;QACd,OAAO,SAAS,CAAA;IAClB,CAAC;IAED,IAAI,2BAA2B;QAC7B,OAAO,SAAS,CAAA;IAClB,CAAC;IAED,IAAI,MAAM;QACR,OAAO,EAAE,CAAA;IACX,CAAC;CACF"}
|
package/dist/msa/A3mMSA.test.js
CHANGED
|
@@ -25,6 +25,24 @@ ACDEF---KLMNPQ`;
|
|
|
25
25
|
ACDEFghiKLMNPQ`;
|
|
26
26
|
expect(A3mMSA.sniff(a3m)).toBe(false);
|
|
27
27
|
});
|
|
28
|
+
test('counts dashes as match columns in sniff', () => {
|
|
29
|
+
// Both sequences have 6 match columns (uppercase + dashes)
|
|
30
|
+
const a3m = `>seq1
|
|
31
|
+
ACDEFghi
|
|
32
|
+
>seq2
|
|
33
|
+
--DEF---`;
|
|
34
|
+
expect(A3mMSA.sniff(a3m)).toBe(true);
|
|
35
|
+
});
|
|
36
|
+
test('returns false when match column counts differ', () => {
|
|
37
|
+
// seq1: 5 match columns (ACDEF)
|
|
38
|
+
// seq2: 8 match columns (ACDEF---)
|
|
39
|
+
// Without lowercase, this shouldn't be detected as a3m anyway
|
|
40
|
+
const notA3m = `>seq1
|
|
41
|
+
ACDEF
|
|
42
|
+
>seq2
|
|
43
|
+
ACDEF---`;
|
|
44
|
+
expect(A3mMSA.sniff(notA3m)).toBe(false);
|
|
45
|
+
});
|
|
28
46
|
});
|
|
29
47
|
describe('parsing', () => {
|
|
30
48
|
test('parses simple A3M', () => {
|
|
@@ -40,25 +58,113 @@ ACDEF---KLMNPQ`;
|
|
|
40
58
|
expect(seq1).toContain('GHI');
|
|
41
59
|
});
|
|
42
60
|
test('expands lowercase insertions', () => {
|
|
61
|
+
// seq1 has 2 match columns (A, C) with 3 lowercase inserts (abc)
|
|
62
|
+
// seq2 has 2 match columns (A, C) with no inserts
|
|
43
63
|
const a3m = `>seq1
|
|
44
64
|
ACabc
|
|
45
65
|
>seq2
|
|
46
|
-
AC
|
|
66
|
+
AC`;
|
|
47
67
|
const msa = new A3mMSA(a3m);
|
|
48
68
|
const seq1 = msa.getRow('seq1');
|
|
49
69
|
const seq2 = msa.getRow('seq2');
|
|
70
|
+
// seq1: A + C + ABC (inserts uppercased) = ACABC
|
|
71
|
+
// seq2: A + C + ... (gaps for missing inserts) = AC...
|
|
50
72
|
expect(seq1).toBe('ACABC');
|
|
51
73
|
expect(seq2).toBe('AC...');
|
|
52
74
|
});
|
|
53
75
|
test('handles multiple insertions', () => {
|
|
76
|
+
// seq1: 3 match columns (A, D, G) with inserts after A and D
|
|
77
|
+
// seq2: 3 match columns (A, D, G) with no inserts
|
|
54
78
|
const a3m = `>seq1
|
|
55
79
|
AabcDdefG
|
|
56
80
|
>seq2
|
|
57
|
-
|
|
81
|
+
ADG`;
|
|
58
82
|
const msa = new A3mMSA(a3m);
|
|
59
83
|
const seq1 = msa.getRow('seq1');
|
|
60
84
|
const seq2 = msa.getRow('seq2');
|
|
61
85
|
expect(seq1.length).toBe(seq2.length);
|
|
86
|
+
// seq1: A + ABC + D + DEF + G = AABCDDEFG
|
|
87
|
+
// seq2: A + ... + D + ... + G = A...D...G
|
|
88
|
+
expect(seq1).toBe('AABCDDEFG');
|
|
89
|
+
expect(seq2).toBe('A...D...G');
|
|
90
|
+
});
|
|
91
|
+
test('treats dashes as match columns (deletions)', () => {
|
|
92
|
+
// query: 6 match columns (ACDEFG)
|
|
93
|
+
// seq1: 6 match columns with deletions at positions 3,4 (AC--FG)
|
|
94
|
+
const a3m = `>query
|
|
95
|
+
ACDEFG
|
|
96
|
+
>seq1
|
|
97
|
+
AC--FG`;
|
|
98
|
+
const msa = new A3mMSA(a3m);
|
|
99
|
+
const query = msa.getRow('query');
|
|
100
|
+
const seq1 = msa.getRow('seq1');
|
|
101
|
+
expect(query.length).toBe(seq1.length);
|
|
102
|
+
expect(query).toBe('ACDEFG');
|
|
103
|
+
expect(seq1).toBe('AC--FG');
|
|
104
|
+
});
|
|
105
|
+
test('handles leading dashes correctly', () => {
|
|
106
|
+
// query: 6 match columns
|
|
107
|
+
// seq1: 6 match columns with deletions at start
|
|
108
|
+
const a3m = `>query
|
|
109
|
+
ACDEFG
|
|
110
|
+
>seq1
|
|
111
|
+
--DEFG`;
|
|
112
|
+
const msa = new A3mMSA(a3m);
|
|
113
|
+
const query = msa.getRow('query');
|
|
114
|
+
const seq1 = msa.getRow('seq1');
|
|
115
|
+
expect(query.length).toBe(seq1.length);
|
|
116
|
+
expect(query).toBe('ACDEFG');
|
|
117
|
+
expect(seq1).toBe('--DEFG');
|
|
118
|
+
});
|
|
119
|
+
test('handles trailing dashes correctly', () => {
|
|
120
|
+
const a3m = `>query
|
|
121
|
+
ACDEFG
|
|
122
|
+
>seq1
|
|
123
|
+
ACDE--`;
|
|
124
|
+
const msa = new A3mMSA(a3m);
|
|
125
|
+
const query = msa.getRow('query');
|
|
126
|
+
const seq1 = msa.getRow('seq1');
|
|
127
|
+
expect(query.length).toBe(seq1.length);
|
|
128
|
+
expect(query).toBe('ACDEFG');
|
|
129
|
+
expect(seq1).toBe('ACDE--');
|
|
130
|
+
});
|
|
131
|
+
test('handles dashes with insertions', () => {
|
|
132
|
+
// query: 4 match columns (ACFG) with insert (de) after C
|
|
133
|
+
// seq1: 4 match columns with deletion at position 2
|
|
134
|
+
const a3m = `>query
|
|
135
|
+
ACdeFG
|
|
136
|
+
>seq1
|
|
137
|
+
A-FG`;
|
|
138
|
+
const msa = new A3mMSA(a3m);
|
|
139
|
+
const query = msa.getRow('query');
|
|
140
|
+
const seq1 = msa.getRow('seq1');
|
|
141
|
+
expect(query.length).toBe(seq1.length);
|
|
142
|
+
// query: A + C + DE + F + G = ACDEFG
|
|
143
|
+
// seq1: A + - + .. + F + G = A-..FG
|
|
144
|
+
expect(query).toBe('ACDEFG');
|
|
145
|
+
expect(seq1).toBe('A-..FG');
|
|
146
|
+
});
|
|
147
|
+
test('realistic a3m with insertions and deletions', () => {
|
|
148
|
+
// Based on trRosetta documentation example pattern
|
|
149
|
+
const a3m = `>query
|
|
150
|
+
ACDEFGHIKLMNPQ
|
|
151
|
+
>seq1
|
|
152
|
+
ACDEFghiKLMNPQ
|
|
153
|
+
>seq2
|
|
154
|
+
--DEF---KLMNPQ`;
|
|
155
|
+
const msa = new A3mMSA(a3m);
|
|
156
|
+
const query = msa.getRow('query');
|
|
157
|
+
const seq1 = msa.getRow('seq1');
|
|
158
|
+
const seq2 = msa.getRow('seq2');
|
|
159
|
+
expect(query.length).toBe(seq1.length);
|
|
160
|
+
expect(query.length).toBe(seq2.length);
|
|
161
|
+
// seq1 has insert 'ghi' after F, so all sequences get 3 insert columns there
|
|
162
|
+
// query: ACDEF + ... + GHIKLMNPQ = ACDEF...GHIKLMNPQ
|
|
163
|
+
// seq1: ACDEF + GHI + GHIKLMNPQ = ACDEFGHIGHIKLMNPQ
|
|
164
|
+
// seq2: --DEF + ... + ---KLMNPQ = --DEF...---KLMNPQ
|
|
165
|
+
expect(query).toBe('ACDEF...GHIKLMNPQ');
|
|
166
|
+
expect(seq1).toBe('ACDEFGHIGHIKLMNPQ');
|
|
167
|
+
expect(seq2).toBe('--DEF...---KLMNPQ');
|
|
62
168
|
});
|
|
63
169
|
test('getWidth returns correct width', () => {
|
|
64
170
|
const a3m = `>seq1
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"A3mMSA.test.js","sourceRoot":"","sources":["../../src/msa/A3mMSA.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAA;AAE/C,OAAO,MAAM,MAAM,aAAa,CAAA;AAEhC,QAAQ,CAAC,QAAQ,EAAE,GAAG,EAAE;IACtB,QAAQ,CAAC,OAAO,EAAE,GAAG,EAAE;QACrB,IAAI,CAAC,kCAAkC,EAAE,GAAG,EAAE;YAC5C,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;YAC7C,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QAC/C,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,iCAAiC,EAAE,GAAG,EAAE;YAC3C,MAAM,KAAK,GAAG;;;qBAGC,CAAA;YACf,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QACzC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,6BAA6B,EAAE,GAAG,EAAE;YACvC,MAAM,GAAG,GAAG;;;eAGH,CAAA;YACT,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACtC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,mCAAmC,EAAE,GAAG,EAAE;YAC7C,MAAM,GAAG,GAAG;eACH,CAAA;YACT,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QACvC,CAAC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,SAAS,EAAE,GAAG,EAAE;QACvB,IAAI,CAAC,mBAAmB,EAAE,GAAG,EAAE;YAC7B,MAAM,GAAG,GAAG;;;eAGH,CAAA;YACT,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAA;YAChD,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAC/B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAE/B,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;YACrC,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAA;QAC/B,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,8BAA8B,EAAE,GAAG,EAAE;YACxC,MAAM,GAAG,GAAG;;;
|
|
1
|
+
{"version":3,"file":"A3mMSA.test.js","sourceRoot":"","sources":["../../src/msa/A3mMSA.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAA;AAE/C,OAAO,MAAM,MAAM,aAAa,CAAA;AAEhC,QAAQ,CAAC,QAAQ,EAAE,GAAG,EAAE;IACtB,QAAQ,CAAC,OAAO,EAAE,GAAG,EAAE;QACrB,IAAI,CAAC,kCAAkC,EAAE,GAAG,EAAE;YAC5C,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;YAC7C,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QAC/C,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,iCAAiC,EAAE,GAAG,EAAE;YAC3C,MAAM,KAAK,GAAG;;;qBAGC,CAAA;YACf,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QACzC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,6BAA6B,EAAE,GAAG,EAAE;YACvC,MAAM,GAAG,GAAG;;;eAGH,CAAA;YACT,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACtC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,mCAAmC,EAAE,GAAG,EAAE;YAC7C,MAAM,GAAG,GAAG;eACH,CAAA;YACT,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QACvC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,yCAAyC,EAAE,GAAG,EAAE;YACnD,2DAA2D;YAC3D,MAAM,GAAG,GAAG;;;SAGT,CAAA;YACH,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACtC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,+CAA+C,EAAE,GAAG,EAAE;YACzD,gCAAgC;YAChC,mCAAmC;YACnC,8DAA8D;YAC9D,MAAM,MAAM,GAAG;;;SAGZ,CAAA;YACH,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QAC1C,CAAC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,SAAS,EAAE,GAAG,EAAE;QACvB,IAAI,CAAC,mBAAmB,EAAE,GAAG,EAAE;YAC7B,MAAM,GAAG,GAAG;;;eAGH,CAAA;YACT,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAA;YAChD,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAC/B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAE/B,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;YACrC,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAA;QAC/B,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,8BAA8B,EAAE,GAAG,EAAE;YACxC,iEAAiE;YACjE,kDAAkD;YAClD,MAAM,GAAG,GAAG;;;GAGf,CAAA;YACG,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAC/B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAE/B,iDAAiD;YACjD,uDAAuD;YACvD,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;YAC1B,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QAC5B,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,6BAA6B,EAAE,GAAG,EAAE;YACvC,6DAA6D;YAC7D,kDAAkD;YAClD,MAAM,GAAG,GAAG;;;IAGd,CAAA;YACE,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAC/B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAE/B,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;YACrC,0CAA0C;YAC1C,0CAA0C;YAC1C,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;YAC9B,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;QAChC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,4CAA4C,EAAE,GAAG,EAAE;YACtD,kCAAkC;YAClC,iEAAiE;YACjE,MAAM,GAAG,GAAG;;;OAGX,CAAA;YACD,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;YACjC,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAE/B,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;YACtC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;YAC5B,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;QAC7B,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,kCAAkC,EAAE,GAAG,EAAE;YAC5C,yBAAyB;YACzB,gDAAgD;YAChD,MAAM,GAAG,GAAG;;;OAGX,CAAA;YACD,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;YACjC,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAE/B,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;YACtC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;YAC5B,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;QAC7B,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,mCAAmC,EAAE,GAAG,EAAE;YAC7C,MAAM,GAAG,GAAG;;;OAGX,CAAA;YACD,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;YACjC,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAE/B,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;YACtC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;YAC5B,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;QAC7B,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,gCAAgC,EAAE,GAAG,EAAE;YAC1C,yDAAyD;YACzD,oDAAoD;YACpD,MAAM,GAAG,GAAG;;;KAGb,CAAA;YACC,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;YACjC,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAE/B,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;YACtC,qCAAqC;YACrC,oCAAoC;YACpC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;YAC5B,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;QAC7B,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,6CAA6C,EAAE,GAAG,EAAE;YACvD,mDAAmD;YACnD,MAAM,GAAG,GAAG;;;;;eAKH,CAAA;YACT,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;YACjC,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAC/B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAE/B,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;YACtC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;YAEtC,6EAA6E;YAC7E,qDAAqD;YACrD,qDAAqD;YACrD,qDAAqD;YACrD,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAA;YACvC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAA;YACtC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAA;QACxC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,gCAAgC,EAAE,GAAG,EAAE;YAC1C,MAAM,GAAG,GAAG;;;MAGZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAChC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,wBAAwB,EAAE,GAAG,EAAE;YAClC,MAAM,GAAG,GAAG;;;MAGZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAC3B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,EAAE,CAAA;YAEzB,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,CAAA;YAC3C,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,CAAA;QAC7C,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,kCAAkC,EAAE,GAAG,EAAE;YAC5C,MAAM,GAAG,GAAG;;;MAGZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAC3B,MAAM,IAAI,GAAG,GAAG,CAAC,OAAO,EAAE,CAAA;YAE1B,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAC9B,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QACvC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,yBAAyB,EAAE,GAAG,EAAE;YACnC,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAA;QAC1C,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,2CAA2C,EAAE,GAAG,EAAE;YACrD,MAAM,GAAG,GAAG;;;MAGZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAA;QAClD,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,0BAA0B,EAAE,GAAG,EAAE;YACpC,MAAM,GAAG,GAAG;;;;;MAKZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC,CAAA;QAC7D,CAAC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;QAC1B,IAAI,CAAC,+BAA+B,EAAE,GAAG,EAAE;YACzC,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;QACxC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,2BAA2B,EAAE,GAAG,EAAE;YACrC,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,aAAa,EAAE,CAAA;QAC1C,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,0CAA0C,EAAE,GAAG,EAAE;YACpD,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,2BAA2B,CAAC,CAAC,aAAa,EAAE,CAAA;QACzD,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,uBAAuB,EAAE,GAAG,EAAE;YACjC,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;QAChC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,oCAAoC,EAAE,GAAG,EAAE;YAC9C,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,aAAa,EAAE,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;QACzC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,gCAAgC,EAAE,GAAG,EAAE;YAC1C,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,SAAS,EAAE,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;QACrC,CAAC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA"}
|
package/package.json
CHANGED
package/src/msa/A3mMSA.test.ts
CHANGED
|
@@ -30,6 +30,26 @@ ACDEF---KLMNPQ`
|
|
|
30
30
|
ACDEFghiKLMNPQ`
|
|
31
31
|
expect(A3mMSA.sniff(a3m)).toBe(false)
|
|
32
32
|
})
|
|
33
|
+
|
|
34
|
+
test('counts dashes as match columns in sniff', () => {
|
|
35
|
+
// Both sequences have 6 match columns (uppercase + dashes)
|
|
36
|
+
const a3m = `>seq1
|
|
37
|
+
ACDEFghi
|
|
38
|
+
>seq2
|
|
39
|
+
--DEF---`
|
|
40
|
+
expect(A3mMSA.sniff(a3m)).toBe(true)
|
|
41
|
+
})
|
|
42
|
+
|
|
43
|
+
test('returns false when match column counts differ', () => {
|
|
44
|
+
// seq1: 5 match columns (ACDEF)
|
|
45
|
+
// seq2: 8 match columns (ACDEF---)
|
|
46
|
+
// Without lowercase, this shouldn't be detected as a3m anyway
|
|
47
|
+
const notA3m = `>seq1
|
|
48
|
+
ACDEF
|
|
49
|
+
>seq2
|
|
50
|
+
ACDEF---`
|
|
51
|
+
expect(A3mMSA.sniff(notA3m)).toBe(false)
|
|
52
|
+
})
|
|
33
53
|
})
|
|
34
54
|
|
|
35
55
|
describe('parsing', () => {
|
|
@@ -49,30 +69,134 @@ ACDEF---KLMNPQ`
|
|
|
49
69
|
})
|
|
50
70
|
|
|
51
71
|
test('expands lowercase insertions', () => {
|
|
72
|
+
// seq1 has 2 match columns (A, C) with 3 lowercase inserts (abc)
|
|
73
|
+
// seq2 has 2 match columns (A, C) with no inserts
|
|
52
74
|
const a3m = `>seq1
|
|
53
75
|
ACabc
|
|
54
76
|
>seq2
|
|
55
|
-
AC
|
|
77
|
+
AC`
|
|
56
78
|
const msa = new A3mMSA(a3m)
|
|
57
79
|
|
|
58
80
|
const seq1 = msa.getRow('seq1')
|
|
59
81
|
const seq2 = msa.getRow('seq2')
|
|
60
82
|
|
|
83
|
+
// seq1: A + C + ABC (inserts uppercased) = ACABC
|
|
84
|
+
// seq2: A + C + ... (gaps for missing inserts) = AC...
|
|
61
85
|
expect(seq1).toBe('ACABC')
|
|
62
86
|
expect(seq2).toBe('AC...')
|
|
63
87
|
})
|
|
64
88
|
|
|
65
89
|
test('handles multiple insertions', () => {
|
|
90
|
+
// seq1: 3 match columns (A, D, G) with inserts after A and D
|
|
91
|
+
// seq2: 3 match columns (A, D, G) with no inserts
|
|
66
92
|
const a3m = `>seq1
|
|
67
93
|
AabcDdefG
|
|
68
94
|
>seq2
|
|
69
|
-
|
|
95
|
+
ADG`
|
|
70
96
|
const msa = new A3mMSA(a3m)
|
|
71
97
|
|
|
72
98
|
const seq1 = msa.getRow('seq1')
|
|
73
99
|
const seq2 = msa.getRow('seq2')
|
|
74
100
|
|
|
75
101
|
expect(seq1.length).toBe(seq2.length)
|
|
102
|
+
// seq1: A + ABC + D + DEF + G = AABCDDEFG
|
|
103
|
+
// seq2: A + ... + D + ... + G = A...D...G
|
|
104
|
+
expect(seq1).toBe('AABCDDEFG')
|
|
105
|
+
expect(seq2).toBe('A...D...G')
|
|
106
|
+
})
|
|
107
|
+
|
|
108
|
+
test('treats dashes as match columns (deletions)', () => {
|
|
109
|
+
// query: 6 match columns (ACDEFG)
|
|
110
|
+
// seq1: 6 match columns with deletions at positions 3,4 (AC--FG)
|
|
111
|
+
const a3m = `>query
|
|
112
|
+
ACDEFG
|
|
113
|
+
>seq1
|
|
114
|
+
AC--FG`
|
|
115
|
+
const msa = new A3mMSA(a3m)
|
|
116
|
+
|
|
117
|
+
const query = msa.getRow('query')
|
|
118
|
+
const seq1 = msa.getRow('seq1')
|
|
119
|
+
|
|
120
|
+
expect(query.length).toBe(seq1.length)
|
|
121
|
+
expect(query).toBe('ACDEFG')
|
|
122
|
+
expect(seq1).toBe('AC--FG')
|
|
123
|
+
})
|
|
124
|
+
|
|
125
|
+
test('handles leading dashes correctly', () => {
|
|
126
|
+
// query: 6 match columns
|
|
127
|
+
// seq1: 6 match columns with deletions at start
|
|
128
|
+
const a3m = `>query
|
|
129
|
+
ACDEFG
|
|
130
|
+
>seq1
|
|
131
|
+
--DEFG`
|
|
132
|
+
const msa = new A3mMSA(a3m)
|
|
133
|
+
|
|
134
|
+
const query = msa.getRow('query')
|
|
135
|
+
const seq1 = msa.getRow('seq1')
|
|
136
|
+
|
|
137
|
+
expect(query.length).toBe(seq1.length)
|
|
138
|
+
expect(query).toBe('ACDEFG')
|
|
139
|
+
expect(seq1).toBe('--DEFG')
|
|
140
|
+
})
|
|
141
|
+
|
|
142
|
+
test('handles trailing dashes correctly', () => {
|
|
143
|
+
const a3m = `>query
|
|
144
|
+
ACDEFG
|
|
145
|
+
>seq1
|
|
146
|
+
ACDE--`
|
|
147
|
+
const msa = new A3mMSA(a3m)
|
|
148
|
+
|
|
149
|
+
const query = msa.getRow('query')
|
|
150
|
+
const seq1 = msa.getRow('seq1')
|
|
151
|
+
|
|
152
|
+
expect(query.length).toBe(seq1.length)
|
|
153
|
+
expect(query).toBe('ACDEFG')
|
|
154
|
+
expect(seq1).toBe('ACDE--')
|
|
155
|
+
})
|
|
156
|
+
|
|
157
|
+
test('handles dashes with insertions', () => {
|
|
158
|
+
// query: 4 match columns (ACFG) with insert (de) after C
|
|
159
|
+
// seq1: 4 match columns with deletion at position 2
|
|
160
|
+
const a3m = `>query
|
|
161
|
+
ACdeFG
|
|
162
|
+
>seq1
|
|
163
|
+
A-FG`
|
|
164
|
+
const msa = new A3mMSA(a3m)
|
|
165
|
+
|
|
166
|
+
const query = msa.getRow('query')
|
|
167
|
+
const seq1 = msa.getRow('seq1')
|
|
168
|
+
|
|
169
|
+
expect(query.length).toBe(seq1.length)
|
|
170
|
+
// query: A + C + DE + F + G = ACDEFG
|
|
171
|
+
// seq1: A + - + .. + F + G = A-..FG
|
|
172
|
+
expect(query).toBe('ACDEFG')
|
|
173
|
+
expect(seq1).toBe('A-..FG')
|
|
174
|
+
})
|
|
175
|
+
|
|
176
|
+
test('realistic a3m with insertions and deletions', () => {
|
|
177
|
+
// Based on trRosetta documentation example pattern
|
|
178
|
+
const a3m = `>query
|
|
179
|
+
ACDEFGHIKLMNPQ
|
|
180
|
+
>seq1
|
|
181
|
+
ACDEFghiKLMNPQ
|
|
182
|
+
>seq2
|
|
183
|
+
--DEF---KLMNPQ`
|
|
184
|
+
const msa = new A3mMSA(a3m)
|
|
185
|
+
|
|
186
|
+
const query = msa.getRow('query')
|
|
187
|
+
const seq1 = msa.getRow('seq1')
|
|
188
|
+
const seq2 = msa.getRow('seq2')
|
|
189
|
+
|
|
190
|
+
expect(query.length).toBe(seq1.length)
|
|
191
|
+
expect(query.length).toBe(seq2.length)
|
|
192
|
+
|
|
193
|
+
// seq1 has insert 'ghi' after F, so all sequences get 3 insert columns there
|
|
194
|
+
// query: ACDEF + ... + GHIKLMNPQ = ACDEF...GHIKLMNPQ
|
|
195
|
+
// seq1: ACDEF + GHI + GHIKLMNPQ = ACDEFGHIGHIKLMNPQ
|
|
196
|
+
// seq2: --DEF + ... + ---KLMNPQ = --DEF...---KLMNPQ
|
|
197
|
+
expect(query).toBe('ACDEF...GHIKLMNPQ')
|
|
198
|
+
expect(seq1).toBe('ACDEFGHIGHIKLMNPQ')
|
|
199
|
+
expect(seq2).toBe('--DEF...---KLMNPQ')
|
|
76
200
|
})
|
|
77
201
|
|
|
78
202
|
test('getWidth returns correct width', () => {
|
package/src/msa/A3mMSA.ts
CHANGED
|
@@ -91,33 +91,34 @@ export default class A3mMSA {
|
|
|
91
91
|
return false
|
|
92
92
|
}
|
|
93
93
|
|
|
94
|
-
// Check for lowercase and compute lengths in single pass per sequence
|
|
95
|
-
// In A3M,
|
|
94
|
+
// Check for lowercase and compute match column lengths in single pass per sequence
|
|
95
|
+
// In A3M, match columns = uppercase letters + dashes (deletions)
|
|
96
96
|
let hasLowercase = false
|
|
97
|
-
let
|
|
98
|
-
let
|
|
97
|
+
let firstMatchLen = -1
|
|
98
|
+
let sameMatchLength = true
|
|
99
99
|
|
|
100
100
|
for (const seq of seqs) {
|
|
101
|
-
let
|
|
101
|
+
let matchLen = 0
|
|
102
102
|
for (let i = 0; i < seq.length; i++) {
|
|
103
103
|
const code = seq.charCodeAt(i)
|
|
104
104
|
if (isLower(code)) {
|
|
105
105
|
hasLowercase = true
|
|
106
|
-
} else if (code >= CODE_A && code <= CODE_Z) {
|
|
107
|
-
|
|
106
|
+
} else if (code >= CODE_A && code <= CODE_Z || code === CODE_DASH) {
|
|
107
|
+
// Uppercase letters and dashes are match columns
|
|
108
|
+
matchLen++
|
|
108
109
|
}
|
|
109
110
|
}
|
|
110
111
|
|
|
111
|
-
if (
|
|
112
|
-
|
|
112
|
+
if (firstMatchLen === -1) {
|
|
113
|
+
firstMatchLen = matchLen
|
|
113
114
|
} else {
|
|
114
|
-
if (
|
|
115
|
-
|
|
115
|
+
if (matchLen !== firstMatchLen) {
|
|
116
|
+
sameMatchLength = false
|
|
116
117
|
}
|
|
117
118
|
}
|
|
118
119
|
}
|
|
119
120
|
|
|
120
|
-
return hasLowercase &&
|
|
121
|
+
return hasLowercase && sameMatchLength
|
|
121
122
|
}
|
|
122
123
|
|
|
123
124
|
/**
|
|
@@ -151,15 +152,16 @@ export default class A3mMSA {
|
|
|
151
152
|
while (i < seq.length) {
|
|
152
153
|
const code = seq.charCodeAt(i)
|
|
153
154
|
|
|
154
|
-
if (code >= CODE_A && code <= CODE_Z) {
|
|
155
|
-
// Uppercase letter - match column
|
|
155
|
+
if (code >= CODE_A && code <= CODE_Z || code === CODE_DASH) {
|
|
156
|
+
// Uppercase letter or dash - match column (dash = deletion)
|
|
156
157
|
matches.push(seq[i]!)
|
|
157
|
-
// Collect following lowercase/
|
|
158
|
+
// Collect following lowercase/dot characters as insert content
|
|
159
|
+
// Note: dash is NOT insert content, it's a match column
|
|
158
160
|
let ins = ''
|
|
159
161
|
let j = i + 1
|
|
160
162
|
while (j < seq.length) {
|
|
161
163
|
const c = seq.charCodeAt(j)
|
|
162
|
-
if (isLower(c) || c ===
|
|
164
|
+
if (isLower(c) || c === CODE_DOT) {
|
|
163
165
|
ins += seq[j]!
|
|
164
166
|
j++
|
|
165
167
|
} else {
|
|
@@ -168,8 +170,8 @@ export default class A3mMSA {
|
|
|
168
170
|
}
|
|
169
171
|
inserts.push(ins)
|
|
170
172
|
i = j
|
|
171
|
-
} else if (code ===
|
|
172
|
-
// Leading gap
|
|
173
|
+
} else if (code === CODE_DOT) {
|
|
174
|
+
// Leading dot (gap aligned to insert) - skip
|
|
173
175
|
i++
|
|
174
176
|
} else if (isLower(code)) {
|
|
175
177
|
// Leading insert before first match
|