@gabrielrufino/cerebrum 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,8 +13,8 @@ jobs:
13
13
  name: Node package CD
14
14
  runs-on: ubuntu-latest
15
15
  steps:
16
- - uses: gabrielrufino/check-ci@main
16
+ - uses: actalog/check-ci@main
17
17
  - uses: actions/checkout@v4
18
- - uses: gabrielrufino/node-pkg-cd@v1
18
+ - uses: actalog/node-pkg-cd@v1
19
19
  with:
20
20
  node-auth-token: ${{ secrets.NODE_AUTH_TOKEN }}
@@ -11,7 +11,7 @@ jobs:
11
11
  runs-on: ubuntu-latest
12
12
  steps:
13
13
  - uses: actions/checkout@v4
14
- - uses: gabrielrufino/node-ci@v3
14
+ - uses: actalog/node-ci@v3
15
15
 
16
16
  concurrency:
17
17
  group: ${{ github.workflow }}-${{ github.sha }}
@@ -0,0 +1,26 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.Tokenizer = void 0;
4
+ class Tokenizer {
5
+ constructor(text = '', ignore = []) {
6
+ this.text = text;
7
+ this.ignore = ignore;
8
+ }
9
+ setText(text) {
10
+ this.text = text;
11
+ return this;
12
+ }
13
+ setIgnore(ignore) {
14
+ this.ignore = ignore;
15
+ return this;
16
+ }
17
+ execute() {
18
+ const ignore = this.ignore.map(item => item.toLocaleLowerCase());
19
+ const punctuation = /[.,;:!?"]/g;
20
+ return this.text
21
+ .replace(punctuation, '')
22
+ .split(/\s+/)
23
+ .filter(token => token && !ignore.includes(token.toLocaleLowerCase()));
24
+ }
25
+ }
26
+ exports.Tokenizer = Tokenizer;
@@ -0,0 +1,49 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ const vitest_1 = require("vitest");
4
+ const Tokenizer_1 = require("./Tokenizer");
5
+ (0, vitest_1.describe)('Tokenizer', () => {
6
+ (0, vitest_1.it)('should tokenize a simple text', () => {
7
+ const tokens = new Tokenizer_1.Tokenizer()
8
+ .setText("Hello world")
9
+ .execute();
10
+ (0, vitest_1.expect)(tokens).toEqual(['Hello', 'world']);
11
+ });
12
+ (0, vitest_1.it)('should remove punctuation', () => {
13
+ const tokens = new Tokenizer_1.Tokenizer()
14
+ .setText("Hello, world!")
15
+ .execute();
16
+ (0, vitest_1.expect)(tokens).toEqual(['Hello', 'world']);
17
+ });
18
+ (0, vitest_1.it)('should handle multiple spaces between words', () => {
19
+ const tokens = new Tokenizer_1.Tokenizer()
20
+ .setText("Hello world")
21
+ .execute();
22
+ (0, vitest_1.expect)(tokens).toEqual(['Hello', 'world']);
23
+ });
24
+ (0, vitest_1.it)('should ignore specified words', () => {
25
+ const tokens = new Tokenizer_1.Tokenizer()
26
+ .setText("Hello, world! Let's tokenize this text with attention to hyphenated-words.")
27
+ .setIgnore(['this', 'with', 'to'])
28
+ .execute();
29
+ (0, vitest_1.expect)(tokens).toEqual(['Hello', 'world', "Let's", 'tokenize', 'text', 'attention', 'hyphenated-words']);
30
+ });
31
+ (0, vitest_1.it)('should return an empty array for empty text', () => {
32
+ const tokens = new Tokenizer_1.Tokenizer()
33
+ .execute();
34
+ (0, vitest_1.expect)(tokens).toEqual([]);
35
+ });
36
+ (0, vitest_1.it)('should allow setting text and ignore list after instantiation', () => {
37
+ const tokenizer = new Tokenizer_1.Tokenizer();
38
+ tokenizer.setText("Testing tokenization").setIgnore(['Testing']);
39
+ const tokens = tokenizer.execute();
40
+ (0, vitest_1.expect)(tokens).toEqual(['tokenization']);
41
+ });
42
+ (0, vitest_1.it)('should be case insensitive when filtering ignored words', () => {
43
+ const tokens = new Tokenizer_1.Tokenizer()
44
+ .setText("Tokenize and ignore some Words")
45
+ .setIgnore(['and', 'some', 'words'])
46
+ .execute();
47
+ (0, vitest_1.expect)(tokens).toEqual(['Tokenize', 'ignore']);
48
+ });
49
+ });
@@ -0,0 +1,17 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
+ };
16
+ Object.defineProperty(exports, "__esModule", { value: true });
17
+ __exportStar(require("./Tokenizer"), exports);
package/dist/index.js CHANGED
@@ -10,13 +10,37 @@ var __createBinding = (this && this.__createBinding) || (Object.create ? (functi
10
10
  if (k2 === undefined) k2 = k;
11
11
  o[k2] = m[k];
12
12
  }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
13
18
  var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
19
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
20
  };
21
+ var __importStar = (this && this.__importStar) || (function () {
22
+ var ownKeys = function(o) {
23
+ ownKeys = Object.getOwnPropertyNames || function (o) {
24
+ var ar = [];
25
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
26
+ return ar;
27
+ };
28
+ return ownKeys(o);
29
+ };
30
+ return function (mod) {
31
+ if (mod && mod.__esModule) return mod;
32
+ var result = {};
33
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
34
+ __setModuleDefault(result, mod);
35
+ return result;
36
+ };
37
+ })();
16
38
  Object.defineProperty(exports, "__esModule", { value: true });
39
+ exports.NLP = void 0;
17
40
  __exportStar(require("./Math/Fibonacci"), exports);
18
41
  __exportStar(require("./Math/LinearRegression"), exports);
19
42
  __exportStar(require("./Math/TwoSum"), exports);
43
+ exports.NLP = __importStar(require("./NLP"));
20
44
  __exportStar(require("./Search/BinarySearch"), exports);
21
45
  __exportStar(require("./Search/LinearSearch"), exports);
22
46
  __exportStar(require("./Sort/BubbleSort"), exports);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@gabrielrufino/cerebrum",
3
- "version": "1.3.0",
3
+ "version": "1.4.0",
4
4
  "description": "Algorithms made in TypeScript",
5
5
  "main": "dist/index.js",
6
6
  "type": "module",
@@ -15,14 +15,14 @@
15
15
  "author": "Gabriel Rufino <contato@gabrielrufino.com>",
16
16
  "license": "UNLICENSED",
17
17
  "devDependencies": {
18
- "@commitlint/cli": "^19.5.0",
19
- "@commitlint/config-conventional": "^19.5.0",
18
+ "@commitlint/cli": "^19.6.0",
19
+ "@commitlint/config-conventional": "^19.6.0",
20
20
  "@faker-js/faker": "^8.4.1",
21
21
  "@gabrielrufino/eslint-config": "^1.6.0",
22
- "@vitest/coverage-v8": "^2.1.4",
22
+ "@vitest/coverage-v8": "^2.1.5",
23
23
  "eslint": "^8.57.1",
24
- "husky": "^9.1.6",
25
- "typescript": "^5.6.3",
24
+ "husky": "^9.1.7",
25
+ "typescript": "^5.7.2",
26
26
  "vitest": "^2.1.0"
27
27
  },
28
28
  "funding": [
@@ -0,0 +1,60 @@
1
+ import { describe, it, expect } from 'vitest'
2
+ import { Tokenizer } from './Tokenizer'
3
+
4
+ describe('Tokenizer', () => {
5
+ it('should tokenize a simple text', () => {
6
+ const tokens = new Tokenizer()
7
+ .setText("Hello world")
8
+ .execute()
9
+
10
+ expect(tokens).toEqual(['Hello', 'world'])
11
+ })
12
+
13
+ it('should remove punctuation', () => {
14
+ const tokens = new Tokenizer()
15
+ .setText("Hello, world!")
16
+ .execute()
17
+
18
+ expect(tokens).toEqual(['Hello', 'world'])
19
+ })
20
+
21
+ it('should handle multiple spaces between words', () => {
22
+ const tokens = new Tokenizer()
23
+ .setText("Hello world")
24
+ .execute()
25
+
26
+ expect(tokens).toEqual(['Hello', 'world'])
27
+ })
28
+
29
+ it('should ignore specified words', () => {
30
+ const tokens = new Tokenizer()
31
+ .setText("Hello, world! Let's tokenize this text with attention to hyphenated-words.")
32
+ .setIgnore(['this', 'with', 'to'])
33
+ .execute()
34
+
35
+ expect(tokens).toEqual(['Hello', 'world', "Let's", 'tokenize', 'text', 'attention', 'hyphenated-words'])
36
+ })
37
+
38
+ it('should return an empty array for empty text', () => {
39
+ const tokens = new Tokenizer()
40
+ .execute()
41
+
42
+ expect(tokens).toEqual([])
43
+ })
44
+
45
+ it('should allow setting text and ignore list after instantiation', () => {
46
+ const tokenizer = new Tokenizer()
47
+ tokenizer.setText("Testing tokenization").setIgnore(['Testing'])
48
+ const tokens = tokenizer.execute()
49
+ expect(tokens).toEqual(['tokenization'])
50
+ })
51
+
52
+ it('should be case insensitive when filtering ignored words', () => {
53
+ const tokens = new Tokenizer()
54
+ .setText("Tokenize and ignore some Words")
55
+ .setIgnore(['and', 'some', 'words'])
56
+ .execute()
57
+
58
+ expect(tokens).toEqual(['Tokenize', 'ignore'])
59
+ })
60
+ })
@@ -0,0 +1,26 @@
1
+ export class Tokenizer {
2
+ constructor (
3
+ private text: string = '',
4
+ private ignore: string[] = []
5
+ ) {}
6
+
7
+ public setText(text: string): this {
8
+ this.text = text
9
+ return this
10
+ }
11
+
12
+ public setIgnore(ignore: string[]): this {
13
+ this.ignore = ignore
14
+ return this
15
+ }
16
+
17
+ public execute(): string[] {
18
+ const ignore = this.ignore.map(item => item.toLocaleLowerCase())
19
+ const punctuation = /[.,;:!?"]/g
20
+
21
+ return this.text
22
+ .replace(punctuation, '')
23
+ .split(/\s+/)
24
+ .filter(token => token && !ignore.includes(token.toLocaleLowerCase()))
25
+ }
26
+ }
@@ -0,0 +1 @@
1
+ export * from './Tokenizer'
package/src/index.ts CHANGED
@@ -2,6 +2,8 @@ export * from './Math/Fibonacci';
2
2
  export * from './Math/LinearRegression';
3
3
  export * from './Math/TwoSum';
4
4
 
5
+ export * as NLP from './NLP';
6
+
5
7
  export * from './Search/BinarySearch';
6
8
  export * from './Search/LinearSearch';
7
9