encodingscore-js 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,90 @@
1
+ # EncodingScore.js
2
+
3
+ This library offers a `EncodingScorer` class which provides a score normalized between 0-1, representing how much an encoded string aligns with a provided regular expression.
4
+
5
+ ## Supported Encodings
6
+
7
+ The following encodings are supported for scoring:
8
+
9
+ - Base64
10
+ - Hexadecimal
11
+ - Octal
12
+ - Decimal
13
+
14
+ ## How it works
15
+
16
+ Hexadecimal and base64 work differently from octal and decimal, since base64 and hexadecimal have fixed block sizes while decimal and octal are token-based, with varying lengths to each token.
17
+
18
+ ### Computing encoded n-grams
19
+
20
+ Hexadecimal and base64 strings are slid across, evaluating n-grams (bigrams for hexadecimal, tetragrams for base64) at each possible alignment. This would catch a scenario where the correct encoded text has been rotated, or prepended with an invalid character to obfuscate the text.
21
+
22
+ Octal and decimal strings are split by a space delimiter. Because of this, no sliding occurs.
23
+
24
+ ### Scoring
25
+
26
+ Scoring is done based on a ratio of "hits" (n-grams that pass the regex check) to total evaluated n-grams or tokens.
27
+
28
+ For hexadecimal and base64, an "offset" is applied by removing the final *n - 1* (n=2 for hexadecimal, n=4 for base64) hit/miss results to account for the fact that the final *n - 1* hit results in the array are not going to be long enough and will therefore always miss. This is not necessary for octal and decimal, since it is a token-based system.
29
+
30
+ ## Examples
31
+
32
+ Import the encoding scorer:
33
+ ```js
34
+ import { EncodingScorer } from "./src/encodingScorer.js";
35
+ ```
36
+
37
+ ### Hexadecimal usage
38
+
39
+ ```js
40
+ // Example usage for hexadecimal
41
+ const HEX_STRING = "68 65 6c 6c 6f 2e 20 74 68 69 73 20 69 73 20 61 20 6c 6f 6e 67 20 6d 65 73 73 61 67 65 20 63 6f 6e 74 61 69 6e 69 6e 67 20 61 20 6c 6f 74 20 6f 66 20 64 61 74 61 2e 20 53 6d 61 6c 6c 65 72 20 64 61 74 61 20 69 73 20 6c 65 73 73 20 70 72 65 64 69 63 74 61 62 6c 65 2e 20 50 6c 65 61 73 65 20 63 6f 6e 73 69 64 65 72 20 75 73 69 6e 67 20 6c 6f 6e 67 65 72 20 73 74 72 65 61 6d 73 20 6f 66 20 64 61 74 61 20 66 6f 72 20 61 20 68 69 67 68 65 72 20 64 65 67 72 65 65 20 6f 66 20 61 63 63 75 72 61 63 79 2e 20 54 68 61 6e 6b 73"
42
+
43
+ const NON_MATCHING_HEX = "ff ff ff ff ff ff ff ff 6d ff 6d ff"
44
+
45
+ const hexScorer = new EncodingScorer("hexadecimal");
46
+ console.log(hexScorer.score(HEX_STRING)); // 0.5899419729206963
47
+ console.log(hexScorer.score(NON_MATCHING_HEX)); // 0.12903225806451613
48
+ ```
49
+ ### Base64 Usage
50
+
51
+ ```js
52
+ const BASE_64_STRING = "aGVsbG8uIHRoaXMgaXMgYSBsb25nIG1lc3NhZ2UgY29udGFpbmluZyBhIGxvdCBvZiBkYXRhLiBTbWFsbGVyIGRhdGEgaXMgbGVzcyBwcmVkaWN0YWJsZS4gUGxlYXNlIGNvbnNpZGVyIHVzaW5nIGxvbmdlciBzdHJlYW1zIG9mIGRhdGEgZm9yIGEgaGlnaGVyIGRlZ3JlZSBvZiBhY2N1cmFjeS4gVGhhbmtz"
53
+
54
+ const base64Scorer = new EncodingScorer("base64");
55
+ console.log(base64Scorer.score(BASE_64_STRING)); // 0.5263157894736842
56
+ ```
57
+
58
+ ### Octal Usage
59
+
60
+ ```js
61
+
62
+ const OCTAL_STRING = "150 145 154 154 157 56 40 164 150 151 163 40 151 163 40 141 40 154 157 156 147 40 155 145 163 163 141 147 145 40 143 157 156 164 141 151 156 151 156 147 40 141 40 154 157 164 40 157 146 40 144 141 164 141 56 40 123 155 141 154 154 145 162 40 144 141 164 141 40 151 163 40 154 145 163 163 40 160 162 145 144 151 143 164 141 142 154 145 56 40 120 154 145 141 163 145 40 143 157 156 163 151 144 145 162 40 165 163 151 156 147 40 154 157 156 147 145 162 40 163 164 162 145 141 155 163 40 157 146 40 144 141 164 141 40 146 157 162 40 141 40 150 151 147 150 145 162 40 144 145 147 162 145 145 40 157 146 40 141 143 143 165 162 141 143 171 56 40 124 150 141 156 153 163"
63
+
64
+ const octalScorer = new EncodingScorer("octal");
65
+ console.log(octalScorer.score(OCTAL_STRING)); // 0.8333333333333334
66
+ ```
67
+
68
+ ### Decimal Usage
69
+
70
+ ```js
71
+ const DECIMAL_STRING = "104 101 108 108 111 46 32 116 104 105 115 32 105 115 32 97 32 108 111 110 103 32 109 101 115 115 97 103 101 32 99 111 110 116 97 105 110 105 110 103 32 97 32 108 111 116 32 111 102 32 100 97 116 97 46 32 83 109 97 108 108 101 114 32 100 97 116 97 32 105 115 32 108 101 115 115 32 112 114 101 100 105 99 116 97 98 108 101 46 32 80 108 101 97 115 101 32 99 111 110 115 105 100 101 114 32 117 115 105 110 103 32 108 111 110 103 101 114 32 115 116 114 101 97 109 115 32 111 102 32 100 97 116 97 32 102 111 114 32 97 32 104 105 103 104 101 114 32 100 101 103 114 101 101 32 111 102 32 97 99 99 117 114 97 99 121 46 32 84 104 97 110 107 115"
72
+
73
+ const decimalScorer = new EncodingScorer("decimal");
74
+ console.log(decimalScorer.score(DECIMAL_STRING)); // 0.8333333333333334
75
+ ```
76
+
77
+ ### Custom Regex Pattern
78
+
79
+ ```js
80
+ const DECIMAL_STRING = "84 72 73 83 32 73 83 32 65 32 84 69 83 84 33 32 58 41" // THIS IS A TEST! :)
81
+ const PATTERN = "[A-Z]" // Only allow uppercase alphabet.
82
+ const PATTERN2 = "[A-Z:)!\\s]" // ALlow all of the characters.
83
+ // NOTE: Must use `\\s` for space here.
84
+
85
+ const decimalScorer = new EncodingScorer("decimal", PATTERN);
86
+ console.log(decimalScorer.score(DECIMAL_STRING)); // 0.6111111111111112
87
+
88
+ const decimalScorer2 = new EncodingScorer("decimal", PATTERN2);
89
+ console.log(decimalScorer2.score(DECIMAL_STRING)); // 1
90
+ ```
package/package.json ADDED
@@ -0,0 +1,6 @@
1
+ {
2
+ "name": "encodingscore-js",
3
+ "version": "1.0.0",
4
+ "description": "A library for scoring encoded text against a provided regex.",
5
+ "author": "irebased <rebased.again@gmail.com>"
6
+ }
package/src/base64.js ADDED
@@ -0,0 +1,30 @@
1
+ const B64_RE = /^[A-Za-z0-9+/]{4}$/;
2
+
3
+ export function scanBase64(s, validByte, re) {
4
+ const hits = new Array(s.length).fill(false);
5
+
6
+ for (let i = 0; i <= s.length - 4; i++) {
7
+ const quad = s.slice(i, i + 4);
8
+ if (!B64_RE.test(quad)) continue;
9
+
10
+ try {
11
+ const bin = atob(quad);
12
+ if (bin.length !== 3) continue;
13
+
14
+ let ok = true;
15
+ for (let j = 0; j < 3; j++) {
16
+ const char = bin[j]
17
+ if (!validByte(char, re)) {
18
+ ok = false;
19
+ break;
20
+ }
21
+ }
22
+
23
+ if (ok) {
24
+ for (let j = i; j < i + 4; j++) hits[j] = true;
25
+ }
26
+ } catch {}
27
+ }
28
+
29
+ return hits;
30
+ }
package/src/decimal.js ADDED
@@ -0,0 +1,7 @@
1
+ import { scanNumericTokens } from "./numeric.js";
2
+
3
+ const DEC_RE = /^[0-9]+$/;
4
+
5
+ export function scanDecimal(s, tokens, validByte, validationRe) {
6
+ return scanNumericTokens(s, tokens, validByte, DEC_RE, 10, validationRe);
7
+ }
@@ -0,0 +1,77 @@
1
+ import { scanHex } from './hex.js';
2
+ import { scanBase64 } from './base64.js';
3
+ import { scanDecimal } from './decimal.js';
4
+ import { scanOctal } from './octal.js';
5
+
6
+ const supportedEncodings = [
7
+ "base64",
8
+ "hexadecimal",
9
+ "octal",
10
+ "decimal"
11
+ ]
12
+
13
+ export class EncodingScorer {
14
+ #encoding;
15
+ #characterSetRegex;
16
+ #regexp;
17
+
18
+ constructor(encoding, characterSetRegex="[A-Za-z0-9\s.,!?]") {
19
+ this.#encoding = encoding;
20
+ this.#characterSetRegex = characterSetRegex;
21
+
22
+ if (!this.isValidEncoding()) {
23
+ throw new Exception(`Invalid encoding provided: ${encoding}. Must be one of: ${supportedEncodings}`);
24
+ }
25
+
26
+ try {
27
+ this.#regexp = new RegExp(this.#characterSetRegex);
28
+ } catch (e) {
29
+ if (e instanceof SyntaxError) {
30
+ throw new Exception(`The provided characterSet ${characterSetRegex} could not be parsed as a regex pattern.`);
31
+ }
32
+ }
33
+ }
34
+
35
+ isValidEncoding() {
36
+ return supportedEncodings.includes(this.#encoding);
37
+ }
38
+
39
+ validByte(byte, re) {
40
+ try {
41
+ if (!re.test(byte)) {
42
+ console.log(`Failed validation: ${byte}`)
43
+ }
44
+ return re.test(byte);
45
+ } catch {}
46
+ }
47
+
48
+ score(s) {
49
+ switch (this.#encoding) {
50
+ case "base64":
51
+ return this.#scoreCounter(s, scanBase64, 4);
52
+ case "hexadecimal":
53
+ return this.#scoreCounter(s, scanHex, 4);
54
+ case "decimal":
55
+ return this.#numericScoreCounter(s, scanDecimal);
56
+ case "octal":
57
+ return this.#numericScoreCounter(s, scanOctal);
58
+ default:
59
+ throw Error(`Unsupported encoding: ${this.#encoding}.`);
60
+ }
61
+ }
62
+
63
+ #scoreCounter(s, scoreFn, offset=0) {
64
+ if (s.length <= offset) return 0;
65
+
66
+ let hits = scoreFn(s, this.validByte, this.#regexp);
67
+ const relevantHits = hits.slice(0, hits.length - offset);
68
+ const count = relevantHits.filter(e => e === true).length;
69
+ return count / (relevantHits.length);
70
+ }
71
+
72
+ #numericScoreCounter(s, scoreFn) {
73
+ const tokens = s.split(/\s+/);
74
+ const hits = scoreFn(s, tokens, this.validByte, this.#regexp)
75
+ return hits.length / tokens.length;
76
+ }
77
+ }
package/src/hex.js ADDED
@@ -0,0 +1,24 @@
1
+
2
+ const HEX_RE = /^[0-9a-fA-F]{2}$/;
3
+ export const HEX_BYTE_CHAR_WIDTH = 2;
4
+
5
+ export function scanHex(s, validByte, validationRe) {
6
+ const hits = new Array(s.length).fill(false);
7
+ const cleanedString = s.replaceAll(" ", "");
8
+
9
+ for (let i = 0; i <= cleanedString.length - 2; i++) {
10
+ const pair = cleanedString.slice(i, i + 2);
11
+ if (!HEX_RE.test(pair)) continue;
12
+
13
+ try {
14
+ const byte = parseInt(pair, 16);
15
+ if (validByte(String.fromCharCode(byte), validationRe)) {
16
+ hits[i] = hits[i + 1] = true;
17
+ }
18
+ } catch {
19
+ // Ignores failures.
20
+ }
21
+ }
22
+
23
+ return hits;
24
+ }
package/src/numeric.js ADDED
@@ -0,0 +1,26 @@
1
+
2
+ const OCT_RE = /^[0-7]+$/;
3
+ const DEC_RE = /^[0-9]+$/;
4
+
5
+ export function scanNumericTokens(s, tokens, validByte, re, base, validationRe) {
6
+ const spans = [];
7
+ let offset = 0;
8
+
9
+ for (const tok of tokens) {
10
+ const start = s.indexOf(tok, offset);
11
+ const end = start + tok.length;
12
+ offset = end;
13
+
14
+ let value;
15
+ try {
16
+ if (!re.test(tok)) continue;
17
+ value = parseInt(tok, base);
18
+
19
+ if (value >= 0 && value <= 255 && validByte(String.fromCharCode(value), validationRe)) {
20
+ spans.push([start, end]);
21
+ }
22
+ } catch {}
23
+ }
24
+
25
+ return spans;
26
+ }
package/src/octal.js ADDED
@@ -0,0 +1,7 @@
1
+ import { scanNumericTokens } from "./numeric.js";
2
+
3
+ const OCT_RE = /^[0-7]+$/;
4
+
5
+ export function scanOctal(s, tokens, validByte, validationRe) {
6
+ return scanNumericTokens(s, tokens, validByte, OCT_RE, 8, validationRe);
7
+ }