encodingscore-js 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # EncodingScore.js
1
+ # EncodingScore.py
2
2
 
3
3
  This library offers a `EncodingScorer` class which provides a score normalized between 0-1, representing how much an encoded string aligns with a provided regular expression.
4
4
 
@@ -25,8 +25,6 @@ Octal and decimal strings are split by a space delimiter. Because of this, no sl
25
25
 
26
26
  Scoring is done based on a ratio of "hits" (n-grams that pass the regex check) to total evaluated n-grams or tokens.
27
27
 
28
- For hexadecimal and base64, an "offset" is applied by removing the final *n - 1* (n=2 for hexadecimal, n=4 for base64) hit/miss results to account for the fact that the final *n - 1* hit results in the array are not going to be long enough and will therefore always miss. This is not necessary for octal and decimal, since it is a token-based system.
29
-
30
28
  ## Examples
31
29
 
32
30
  Import the encoding scorer:
@@ -43,8 +41,8 @@ const HEX_STRING = "68 65 6c 6c 6f 2e 20 74 68 69 73 20 69 73 20 61 20 6c 6f 6e
43
41
  const NON_MATCHING_HEX = "ff ff ff ff ff ff ff ff 6d ff 6d ff"
44
42
 
45
43
  const hexScorer = new EncodingScorer("hexadecimal");
46
- console.log(hexScorer.score(HEX_STRING)); // 0.5899419729206963
47
- console.log(hexScorer.score(NON_MATCHING_HEX)); // 0.12903225806451613
44
+ console.log(hexScorer.score(HEX_STRING)); // 0.6679462571976967
45
+ console.log(hexScorer.score(NON_MATCHING_HEX)); // 0.11428571428571428
48
46
  ```
49
47
  ### Base64 Usage
50
48
 
@@ -52,7 +50,7 @@ console.log(hexScorer.score(NON_MATCHING_HEX)); // 0.12903225806451613
52
50
  const BASE_64_STRING = "aGVsbG8uIHRoaXMgaXMgYSBsb25nIG1lc3NhZ2UgY29udGFpbmluZyBhIGxvdCBvZiBkYXRhLiBTbWFsbGVyIGRhdGEgaXMgbGVzcyBwcmVkaWN0YWJsZS4gUGxlYXNlIGNvbnNpZGVyIHVzaW5nIGxvbmdlciBzdHJlYW1zIG9mIGRhdGEgZm9yIGEgaGlnaGVyIGRlZ3JlZSBvZiBhY2N1cmFjeS4gVGhhbmtz"
53
51
 
54
52
  const base64Scorer = new EncodingScorer("base64");
55
- console.log(base64Scorer.score(BASE_64_STRING)); // 0.5263157894736842
53
+ console.log(base64Scorer.score(BASE_64_STRING)); // 1
56
54
  ```
57
55
 
58
56
  ### Octal Usage
@@ -62,7 +60,7 @@ console.log(base64Scorer.score(BASE_64_STRING)); // 0.5263157894736842
62
60
  const OCTAL_STRING = "150 145 154 154 157 56 40 164 150 151 163 40 151 163 40 141 40 154 157 156 147 40 155 145 163 163 141 147 145 40 143 157 156 164 141 151 156 151 156 147 40 141 40 154 157 164 40 157 146 40 144 141 164 141 56 40 123 155 141 154 154 145 162 40 144 141 164 141 40 151 163 40 154 145 163 163 40 160 162 145 144 151 143 164 141 142 154 145 56 40 120 154 145 141 163 145 40 143 157 156 163 151 144 145 162 40 165 163 151 156 147 40 154 157 156 147 145 162 40 163 164 162 145 141 155 163 40 157 146 40 144 141 164 141 40 146 157 162 40 141 40 150 151 147 150 145 162 40 144 145 147 162 145 145 40 157 146 40 141 143 143 165 162 141 143 171 56 40 124 150 141 156 153 163"
63
61
 
64
62
  const octalScorer = new EncodingScorer("octal");
65
- console.log(octalScorer.score(OCTAL_STRING)); // 0.8333333333333334
63
+ console.log(octalScorer.score(OCTAL_STRING)); // 1
66
64
  ```
67
65
 
68
66
  ### Decimal Usage
@@ -71,7 +69,7 @@ console.log(octalScorer.score(OCTAL_STRING)); // 0.8333333333333334
71
69
  const DECIMAL_STRING = "104 101 108 108 111 46 32 116 104 105 115 32 105 115 32 97 32 108 111 110 103 32 109 101 115 115 97 103 101 32 99 111 110 116 97 105 110 105 110 103 32 97 32 108 111 116 32 111 102 32 100 97 116 97 46 32 83 109 97 108 108 101 114 32 100 97 116 97 32 105 115 32 108 101 115 115 32 112 114 101 100 105 99 116 97 98 108 101 46 32 80 108 101 97 115 101 32 99 111 110 115 105 100 101 114 32 117 115 105 110 103 32 108 111 110 103 101 114 32 115 116 114 101 97 109 115 32 111 102 32 100 97 116 97 32 102 111 114 32 97 32 104 105 103 104 101 114 32 100 101 103 114 101 101 32 111 102 32 97 99 99 117 114 97 99 121 46 32 84 104 97 110 107 115"
72
70
 
73
71
  const decimalScorer = new EncodingScorer("decimal");
74
- console.log(decimalScorer.score(DECIMAL_STRING)); // 0.8333333333333334
72
+ console.log(decimalScorer.score(DECIMAL_STRING)); // 1
75
73
  ```
76
74
 
77
75
  ### Custom Regex Pattern
@@ -79,7 +77,7 @@ console.log(decimalScorer.score(DECIMAL_STRING)); // 0.8333333333333334
79
77
  ```js
80
78
  const DECIMAL_STRING = "84 72 73 83 32 73 83 32 65 32 84 69 83 84 33 32 58 41" // THIS IS A TEST! :)
81
79
  const PATTERN = "[A-Z]" // Only allow uppercase alphabet.
82
- const PATTERN2 = "[A-Z:)!\\s]" // ALlow all of the characters.
80
+ const PATTERN2 = "[A-Z:)!\\s]" // Allow all of the characters.
83
81
  // NOTE: Must use `\\s` for space here.
84
82
 
85
83
  const decimalScorer = new EncodingScorer("decimal", PATTERN);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "encodingscore-js",
3
- "version": "1.0.2",
3
+ "version": "1.0.3",
4
4
  "description": "A library for scoring encoded text against a provided regex.",
5
5
  "author": "irebased <rebased.again@gmail.com>"
6
6
  }
package/src/base64.js CHANGED
@@ -25,6 +25,5 @@ export function scanBase64(s, validByte, re) {
25
25
  }
26
26
  } catch {}
27
27
  }
28
-
29
- return hits;
28
+ return hits.filter(hit => hit === true);
30
29
  }
@@ -15,7 +15,7 @@ export class EncodingScorer {
15
15
  #characterSetRegex;
16
16
  #regexp;
17
17
 
18
- constructor(encoding, characterSetRegex="[A-Za-z0-9\s.,!?]") {
18
+ constructor(encoding, characterSetRegex="[A-Za-z0-9\\s.,!?]") {
19
19
  this.#encoding = encoding;
20
20
  this.#characterSetRegex = characterSetRegex;
21
21
 
@@ -45,9 +45,9 @@ export class EncodingScorer {
45
45
  score(s) {
46
46
  switch (this.#encoding) {
47
47
  case "base64":
48
- return this.#scoreCounter(s, scanBase64, 4);
48
+ return this.#scoreCounter(s, scanBase64);
49
49
  case "hexadecimal":
50
- return this.#scoreCounter(s, scanHex, 4);
50
+ return this.#scoreCounter(s, scanHex);
51
51
  case "decimal":
52
52
  return this.#numericScoreCounter(s, scanDecimal);
53
53
  case "octal":
@@ -57,13 +57,9 @@ export class EncodingScorer {
57
57
  }
58
58
  }
59
59
 
60
- #scoreCounter(s, scoreFn, offset=0) {
61
- if (s.length <= offset) return 0;
62
-
60
+ #scoreCounter(s, scoreFn) {
63
61
  let hits = scoreFn(s, this.validByte, this.#regexp);
64
- const relevantHits = hits.slice(0, hits.length - offset);
65
- const count = relevantHits.filter(e => e === true).length;
66
- return count / (relevantHits.length);
62
+ return hits.length / (s.length);
67
63
  }
68
64
 
69
65
  #numericScoreCounter(s, scoreFn) {
package/src/hex.js CHANGED
@@ -20,5 +20,5 @@ export function scanHex(s, validByte, validationRe) {
20
20
  }
21
21
  }
22
22
 
23
- return hits;
23
+ return hits.filter(hit => hit === true);
24
24
  }