@teselagen/sequence-utils 0.3.10 → 0.3.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bioData.d.ts +1 -1
- package/filterSequenceString.d.ts +2 -2
- package/index.js +8 -5
- package/index.mjs +8 -5
- package/index.umd.js +8 -5
- package/package.json +1 -1
- package/src/bioData.js +2 -2
- package/src/filterSequenceString.js +6 -6
- package/src/filterSequenceString.test.js +23 -5
- package/src/insertSequenceDataAtPositionOrRange.test.js +2 -2
- package/src/tidyUpSequenceData.test.js +27 -30
package/bioData.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
export const protein_letters: "ACDEFGHIKLMNPQRSTVWY";
|
|
2
2
|
export const protein_letters_withUandX: "ACDEFGHIKLMNPQRSTVWYUX";
|
|
3
|
-
export const extended_protein_letters: "ACDEFGHIKLMNPQRSTVWYBXZJUO
|
|
3
|
+
export const extended_protein_letters: "ACDEFGHIKLMNPQRSTVWYBXZJUO";
|
|
4
4
|
export const ambiguous_dna_letters: "GATCRYWSMKHBVDN";
|
|
5
5
|
export const unambiguous_dna_letters: "GATC";
|
|
6
6
|
export const ambiguous_rna_letters: "GAUCRYWSMKHBVDN";
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export default function filterSequenceString(sequenceString: any, { additionalValidChars, isOligo, name, isProtein, isRna, isMixedRnaAndDna
|
|
1
|
+
export default function filterSequenceString(sequenceString: any, { additionalValidChars, isOligo, name, isProtein, isRna, isMixedRnaAndDna }?: {
|
|
2
2
|
additionalValidChars?: string | undefined;
|
|
3
3
|
isOligo: any;
|
|
4
4
|
name: any;
|
|
@@ -6,7 +6,7 @@ export default function filterSequenceString(sequenceString: any, { additionalVa
|
|
|
6
6
|
isRna: any;
|
|
7
7
|
isMixedRnaAndDna: any;
|
|
8
8
|
}): (string | string[])[];
|
|
9
|
-
export function getAcceptedChars({ isOligo, isProtein, isRna, isMixedRnaAndDna
|
|
9
|
+
export function getAcceptedChars({ isOligo, isProtein, isRna, isMixedRnaAndDna }?: {
|
|
10
10
|
isOligo: any;
|
|
11
11
|
isProtein: any;
|
|
12
12
|
isRna: any;
|
package/index.js
CHANGED
|
@@ -6000,7 +6000,7 @@ lodash.exports;
|
|
|
6000
6000
|
var lodashExports = lodash.exports;
|
|
6001
6001
|
const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
|
|
6002
6002
|
const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
|
|
6003
|
-
const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO
|
|
6003
|
+
const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO";
|
|
6004
6004
|
const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
|
|
6005
6005
|
const unambiguous_dna_letters = "GATC";
|
|
6006
6006
|
const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
|
|
@@ -6056,7 +6056,7 @@ const extended_protein_values = {
|
|
|
6056
6056
|
Y: "Y",
|
|
6057
6057
|
Z: "QE",
|
|
6058
6058
|
"*": "\\*\\.",
|
|
6059
|
-
".": "
|
|
6059
|
+
".": "\\.",
|
|
6060
6060
|
"-": "\\-"
|
|
6061
6061
|
};
|
|
6062
6062
|
const bioData = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
|
|
@@ -12399,9 +12399,12 @@ function getReplaceChars({
|
|
|
12399
12399
|
isRna,
|
|
12400
12400
|
isMixedRnaAndDna
|
|
12401
12401
|
} = {}) {
|
|
12402
|
-
return isProtein ? {} :
|
|
12403
|
-
//
|
|
12404
|
-
{}
|
|
12402
|
+
return isProtein ? {} : (
|
|
12403
|
+
// {".": "*"}
|
|
12404
|
+
isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
|
|
12405
|
+
//just plain old dna
|
|
12406
|
+
{}
|
|
12407
|
+
)
|
|
12405
12408
|
);
|
|
12406
12409
|
}
|
|
12407
12410
|
__name(getReplaceChars, "getReplaceChars");
|
package/index.mjs
CHANGED
|
@@ -5998,7 +5998,7 @@ lodash.exports;
|
|
|
5998
5998
|
var lodashExports = lodash.exports;
|
|
5999
5999
|
const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
|
|
6000
6000
|
const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
|
|
6001
|
-
const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO
|
|
6001
|
+
const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO";
|
|
6002
6002
|
const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
|
|
6003
6003
|
const unambiguous_dna_letters = "GATC";
|
|
6004
6004
|
const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
|
|
@@ -6054,7 +6054,7 @@ const extended_protein_values = {
|
|
|
6054
6054
|
Y: "Y",
|
|
6055
6055
|
Z: "QE",
|
|
6056
6056
|
"*": "\\*\\.",
|
|
6057
|
-
".": "
|
|
6057
|
+
".": "\\.",
|
|
6058
6058
|
"-": "\\-"
|
|
6059
6059
|
};
|
|
6060
6060
|
const bioData = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
|
|
@@ -12397,9 +12397,12 @@ function getReplaceChars({
|
|
|
12397
12397
|
isRna,
|
|
12398
12398
|
isMixedRnaAndDna
|
|
12399
12399
|
} = {}) {
|
|
12400
|
-
return isProtein ? {} :
|
|
12401
|
-
//
|
|
12402
|
-
{}
|
|
12400
|
+
return isProtein ? {} : (
|
|
12401
|
+
// {".": "*"}
|
|
12402
|
+
isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
|
|
12403
|
+
//just plain old dna
|
|
12404
|
+
{}
|
|
12405
|
+
)
|
|
12403
12406
|
);
|
|
12404
12407
|
}
|
|
12405
12408
|
__name(getReplaceChars, "getReplaceChars");
|
package/index.umd.js
CHANGED
|
@@ -6002,7 +6002,7 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
|
|
|
6002
6002
|
var lodashExports = lodash.exports;
|
|
6003
6003
|
const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
|
|
6004
6004
|
const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
|
|
6005
|
-
const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO
|
|
6005
|
+
const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO";
|
|
6006
6006
|
const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
|
|
6007
6007
|
const unambiguous_dna_letters = "GATC";
|
|
6008
6008
|
const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
|
|
@@ -6058,7 +6058,7 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
|
|
|
6058
6058
|
Y: "Y",
|
|
6059
6059
|
Z: "QE",
|
|
6060
6060
|
"*": "\\*\\.",
|
|
6061
|
-
".": "
|
|
6061
|
+
".": "\\.",
|
|
6062
6062
|
"-": "\\-"
|
|
6063
6063
|
};
|
|
6064
6064
|
const bioData = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
|
|
@@ -12401,9 +12401,12 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
|
|
|
12401
12401
|
isRna,
|
|
12402
12402
|
isMixedRnaAndDna
|
|
12403
12403
|
} = {}) {
|
|
12404
|
-
return isProtein ? {} :
|
|
12405
|
-
//
|
|
12406
|
-
{}
|
|
12404
|
+
return isProtein ? {} : (
|
|
12405
|
+
// {".": "*"}
|
|
12406
|
+
isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
|
|
12407
|
+
//just plain old dna
|
|
12408
|
+
{}
|
|
12409
|
+
)
|
|
12407
12410
|
);
|
|
12408
12411
|
}
|
|
12409
12412
|
__name(getReplaceChars, "getReplaceChars");
|
package/package.json
CHANGED
package/src/bioData.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
export const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
|
|
4
4
|
export const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
|
|
5
|
-
export const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO
|
|
5
|
+
export const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO";
|
|
6
6
|
export const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
|
|
7
7
|
export const unambiguous_dna_letters = "GATC";
|
|
8
8
|
export const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
|
|
@@ -60,6 +60,6 @@ export const extended_protein_values = {
|
|
|
60
60
|
Y: "Y",
|
|
61
61
|
Z: "QE",
|
|
62
62
|
"*": "\\*\\.",
|
|
63
|
-
".": "
|
|
63
|
+
".": "\\.",
|
|
64
64
|
"-": "\\-"
|
|
65
65
|
};
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import {
|
|
2
2
|
ambiguous_dna_letters,
|
|
3
3
|
ambiguous_rna_letters,
|
|
4
|
-
extended_protein_letters
|
|
4
|
+
extended_protein_letters
|
|
5
5
|
} from "./bioData";
|
|
6
6
|
|
|
7
7
|
export default function filterSequenceString(
|
|
@@ -12,14 +12,14 @@ export default function filterSequenceString(
|
|
|
12
12
|
name,
|
|
13
13
|
isProtein,
|
|
14
14
|
isRna,
|
|
15
|
-
isMixedRnaAndDna
|
|
15
|
+
isMixedRnaAndDna
|
|
16
16
|
} = {}
|
|
17
17
|
) {
|
|
18
18
|
const acceptedChars = getAcceptedChars({
|
|
19
19
|
isOligo,
|
|
20
20
|
isProtein,
|
|
21
21
|
isRna,
|
|
22
|
-
isMixedRnaAndDna
|
|
22
|
+
isMixedRnaAndDna
|
|
23
23
|
});
|
|
24
24
|
const replaceChars = getReplaceChars({
|
|
25
25
|
isOligo,
|
|
@@ -80,7 +80,7 @@ export function getAcceptedChars({
|
|
|
80
80
|
isOligo,
|
|
81
81
|
isProtein,
|
|
82
82
|
isRna,
|
|
83
|
-
isMixedRnaAndDna
|
|
83
|
+
isMixedRnaAndDna
|
|
84
84
|
} = {}) {
|
|
85
85
|
return isProtein
|
|
86
86
|
? `${extended_protein_letters.toLowerCase()}}`
|
|
@@ -101,8 +101,8 @@ export function getReplaceChars({
|
|
|
101
101
|
} = {}) {
|
|
102
102
|
return isProtein
|
|
103
103
|
? {}
|
|
104
|
-
// {".": "*"}
|
|
105
|
-
|
|
104
|
+
: // {".": "*"}
|
|
105
|
+
isOligo
|
|
106
106
|
? {}
|
|
107
107
|
: isRna
|
|
108
108
|
? { t: "u" }
|
|
@@ -50,7 +50,9 @@ describe("filterSequenceString", () => {
|
|
|
50
50
|
}
|
|
51
51
|
);
|
|
52
52
|
// expect(warnings[0]).toBe(`Replaced "." with "*" 2 times`);
|
|
53
|
-
expect(warnings[0]).toBe(
|
|
53
|
+
expect(warnings[0]).toBe(
|
|
54
|
+
'Invalid character(s) detected and removed: 3, 4, 2, ", ", ", ,, ,, ., ., / '
|
|
55
|
+
);
|
|
54
56
|
expect(str).toBe("bbbxtgalmfwkqespvicyhrnd");
|
|
55
57
|
});
|
|
56
58
|
it("when isProtein: true, should handle upper case letters", () => {
|
|
@@ -60,11 +62,27 @@ describe("filterSequenceString", () => {
|
|
|
60
62
|
expect(warnings.length).toBe(0);
|
|
61
63
|
expect(str).toBe("xtgalmfWKQEspvicyhrnd");
|
|
62
64
|
});
|
|
65
|
+
|
|
66
|
+
it("when isProtein: true it should not filter this aa seq", () => {
|
|
67
|
+
const [str] = filterSequenceString(
|
|
68
|
+
"mhhhhhhgsgsmledlkrqvleanlalpkhnlasgssghvsavdrergvfviapsgvdfrimtaddmvvvsietgevvegekppaedtpthrllyqafpsiggivhthsrhatiwaqagqsipatgtthadhfygtipctrkmtdaeingeyewetgnvivetfekqgidaaqmpgvlvhshgpfawgknaedavhnaivleevaymgifcrqlapqlpdmqqtllnkhylrkhgakayygq",
|
|
69
|
+
{
|
|
70
|
+
isProtein: true
|
|
71
|
+
}
|
|
72
|
+
);
|
|
73
|
+
|
|
74
|
+
expect(str).toBe(
|
|
75
|
+
`mhhhhhhgsgsmledlkrqvleanlalpkhnlasgssghvsavdrergvfviapsgvdfrimtaddmvvvsietgevvegekppaedtpthrllyqafpsiggivhthsrhatiwaqagqsipatgtthadhfygtipctrkmtdaeingeyewetgnvivetfekqgidaaqmpgvlvhshgpfawgknaedavhnaivleevaymgifcrqlapqlpdmqqtllnkhylrkhgakayygq`
|
|
76
|
+
);
|
|
77
|
+
});
|
|
63
78
|
it("when isProtein: true, it should convert . to *", () => {
|
|
64
|
-
const [str] = filterSequenceString(
|
|
65
|
-
|
|
66
|
-
|
|
79
|
+
const [str] = filterSequenceString(
|
|
80
|
+
'BXZJUO*bbb342"""xtgalbmfwkqespvicyhrnd,,../',
|
|
81
|
+
{
|
|
82
|
+
isProtein: true
|
|
83
|
+
}
|
|
84
|
+
);
|
|
67
85
|
|
|
68
|
-
expect(str).toBe("
|
|
86
|
+
expect(str).toBe("BXZJUObbbxtgalbmfwkqespvicyhrnd");
|
|
69
87
|
});
|
|
70
88
|
});
|
|
@@ -135,7 +135,7 @@ describe("insertSequenceData", () => {
|
|
|
135
135
|
});
|
|
136
136
|
it("inserts characters at correct origin spanning range with {maintainOriginSplit: true} option", () => {
|
|
137
137
|
const sequenceToInsert = {
|
|
138
|
-
sequence: "
|
|
138
|
+
sequence: "crrrrry",
|
|
139
139
|
// fffffff
|
|
140
140
|
features: [{ name: "feat1", start: 0, end: 6 }]
|
|
141
141
|
};
|
|
@@ -154,7 +154,7 @@ describe("insertSequenceData", () => {
|
|
|
154
154
|
maintainOriginSplit: true
|
|
155
155
|
}
|
|
156
156
|
);
|
|
157
|
-
postInsertSeq.sequence.should.equal("
|
|
157
|
+
postInsertSeq.sequence.should.equal("rrrryagagacr");
|
|
158
158
|
// fffff fff ff
|
|
159
159
|
postInsertSeq.features.should.containSubset([
|
|
160
160
|
{ name: "feat1", start: 10, end: 4 },
|
|
@@ -6,28 +6,25 @@ chai.use(chaiSubset);
|
|
|
6
6
|
chai.should();
|
|
7
7
|
describe("tidyUpSequenceData", () => {
|
|
8
8
|
it("should remove invalid chars by default, while handling annotation start,end (and location start,end) truncation correctly", () => {
|
|
9
|
-
const res = tidyUpSequenceData(
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
},
|
|
29
|
-
|
|
30
|
-
);
|
|
9
|
+
const res = tidyUpSequenceData({
|
|
10
|
+
sequence: "http://localhost:3344/Standalone",
|
|
11
|
+
features: [
|
|
12
|
+
{
|
|
13
|
+
start: 3,
|
|
14
|
+
end: 20,
|
|
15
|
+
locations: [
|
|
16
|
+
{
|
|
17
|
+
start: "3", //this should be converted to an int :)
|
|
18
|
+
end: 5
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
start: 10,
|
|
22
|
+
end: 20
|
|
23
|
+
}
|
|
24
|
+
]
|
|
25
|
+
}
|
|
26
|
+
]
|
|
27
|
+
});
|
|
31
28
|
res.should.containSubset({
|
|
32
29
|
sequence: "httcahstStandan",
|
|
33
30
|
circular: false,
|
|
@@ -64,22 +61,22 @@ describe("tidyUpSequenceData", () => {
|
|
|
64
61
|
},
|
|
65
62
|
{ convertAnnotationsFromAAIndices: true }
|
|
66
63
|
);
|
|
67
|
-
|
|
64
|
+
|
|
68
65
|
res.should.containSubset({
|
|
69
66
|
aminoAcidDataForEachBaseOfDNA: [],
|
|
70
67
|
isProtein: true,
|
|
71
|
-
size:
|
|
72
|
-
proteinSize:
|
|
73
|
-
sequence: "
|
|
74
|
-
proteinSequence: "gagiuhwgagalasjglj
|
|
68
|
+
size: 54, //size should refer to the DNA length
|
|
69
|
+
proteinSize: 18, //proteinSize should refer to the amino acid length
|
|
70
|
+
sequence: "ggngcnggnathtgacaytggggngcnggngcnytngcnwsnhtnggnytnhtn", //degenerate sequence
|
|
71
|
+
proteinSequence: "gagiuhwgagalasjglj",
|
|
75
72
|
circular: false,
|
|
76
73
|
features: [
|
|
77
74
|
{ start: 9, end: 32, forward: true },
|
|
78
|
-
{ start: 30, end:
|
|
75
|
+
{ start: 30, end: 53, forward: true },
|
|
79
76
|
{
|
|
80
77
|
name: "iDon'tFit",
|
|
81
|
-
start:
|
|
82
|
-
end:
|
|
78
|
+
start: 51,
|
|
79
|
+
end: 53,
|
|
83
80
|
forward: true
|
|
84
81
|
}
|
|
85
82
|
]
|