@crashbytes/semantic-text-toolkit 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +24 -0
- package/.github/dependabot.yml +50 -0
- package/.github/workflows/ci.yml +42 -0
- package/.github/workflows/release.yml +50 -0
- package/README.md +1 -1
- package/jest.config.js +66 -0
- package/package.json +1 -1
- package/src/__tests__/setup.ts +43 -0
- package/src/__tests__/types.test.ts +128 -0
- package/src/engine/__tests__/SemanticEngine.test.ts +398 -0
- package/src/search/__tests__/SemanticSearch.test.ts +582 -0
- package/src/utils/__tests__/vector.test.ts +354 -0
package/.eslintrc.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
{
|
|
2
|
+
"root": true,
|
|
3
|
+
"parser": "@typescript-eslint/parser",
|
|
4
|
+
"plugins": ["@typescript-eslint"],
|
|
5
|
+
"extends": [
|
|
6
|
+
"eslint:recommended",
|
|
7
|
+
"plugin:@typescript-eslint/recommended"
|
|
8
|
+
],
|
|
9
|
+
"env": {
|
|
10
|
+
"node": true,
|
|
11
|
+
"es2020": true,
|
|
12
|
+
"jest": true
|
|
13
|
+
},
|
|
14
|
+
"parserOptions": {
|
|
15
|
+
"ecmaVersion": 2020,
|
|
16
|
+
"sourceType": "module"
|
|
17
|
+
},
|
|
18
|
+
"rules": {
|
|
19
|
+
"@typescript-eslint/no-explicit-any": "warn",
|
|
20
|
+
"@typescript-eslint/no-unused-vars": ["error", { "argsIgnorePattern": "^_" }],
|
|
21
|
+
"@typescript-eslint/no-namespace": "off"
|
|
22
|
+
},
|
|
23
|
+
"ignorePatterns": ["dist", "node_modules", "coverage"]
|
|
24
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
version: 2
|
|
2
|
+
updates:
|
|
3
|
+
- package-ecosystem: "npm"
|
|
4
|
+
directory: "/"
|
|
5
|
+
schedule:
|
|
6
|
+
interval: "weekly"
|
|
7
|
+
day: "monday"
|
|
8
|
+
time: "09:00"
|
|
9
|
+
open-pull-requests-limit: 10
|
|
10
|
+
groups:
|
|
11
|
+
# Group all production dependencies
|
|
12
|
+
production-dependencies:
|
|
13
|
+
applies-to: version-updates
|
|
14
|
+
dependency-type: "production"
|
|
15
|
+
update-types:
|
|
16
|
+
- "minor"
|
|
17
|
+
- "patch"
|
|
18
|
+
|
|
19
|
+
# Group development dependencies
|
|
20
|
+
development-dependencies:
|
|
21
|
+
applies-to: version-updates
|
|
22
|
+
dependency-type: "development"
|
|
23
|
+
update-types:
|
|
24
|
+
- "minor"
|
|
25
|
+
- "patch"
|
|
26
|
+
|
|
27
|
+
# Auto-assign PRs
|
|
28
|
+
assignees:
|
|
29
|
+
- "MichaelEakins"
|
|
30
|
+
|
|
31
|
+
# Labels for PRs
|
|
32
|
+
labels:
|
|
33
|
+
- "dependencies"
|
|
34
|
+
- "automated"
|
|
35
|
+
|
|
36
|
+
commit-message:
|
|
37
|
+
prefix: "chore(deps)"
|
|
38
|
+
include: "scope"
|
|
39
|
+
|
|
40
|
+
- package-ecosystem: "github-actions"
|
|
41
|
+
directory: "/"
|
|
42
|
+
schedule:
|
|
43
|
+
interval: "weekly"
|
|
44
|
+
day: "monday"
|
|
45
|
+
time: "09:00"
|
|
46
|
+
labels:
|
|
47
|
+
- "dependencies"
|
|
48
|
+
- "github-actions"
|
|
49
|
+
commit-message:
|
|
50
|
+
prefix: "chore(actions)"
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
|
|
13
|
+
strategy:
|
|
14
|
+
matrix:
|
|
15
|
+
node-version: [18, 20, 22]
|
|
16
|
+
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
|
|
20
|
+
- name: Setup Node.js ${{ matrix.node-version }}
|
|
21
|
+
uses: actions/setup-node@v4
|
|
22
|
+
with:
|
|
23
|
+
node-version: ${{ matrix.node-version }}
|
|
24
|
+
|
|
25
|
+
- name: Install dependencies
|
|
26
|
+
run: npm install
|
|
27
|
+
|
|
28
|
+
- name: Lint
|
|
29
|
+
run: npm run lint
|
|
30
|
+
|
|
31
|
+
- name: Build
|
|
32
|
+
run: npm run build
|
|
33
|
+
|
|
34
|
+
- name: Test with coverage
|
|
35
|
+
run: npm test -- --coverage
|
|
36
|
+
|
|
37
|
+
- name: Upload coverage reports
|
|
38
|
+
if: matrix.node-version == 20
|
|
39
|
+
uses: codecov/codecov-action@v4
|
|
40
|
+
with:
|
|
41
|
+
file: ./coverage/lcov.info
|
|
42
|
+
fail_ci_if_error: false
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- 'v*'
|
|
7
|
+
|
|
8
|
+
# CRITICAL: Required for npm Trusted Publishing (OIDC authentication)
|
|
9
|
+
permissions:
|
|
10
|
+
contents: write # Needed to create GitHub releases
|
|
11
|
+
id-token: write # Needed for npm provenance/trusted publishing
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
release:
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
with:
|
|
19
|
+
fetch-depth: 0
|
|
20
|
+
|
|
21
|
+
- name: Setup Node.js
|
|
22
|
+
uses: actions/setup-node@v4
|
|
23
|
+
with:
|
|
24
|
+
node-version: '20'
|
|
25
|
+
registry-url: 'https://registry.npmjs.org'
|
|
26
|
+
|
|
27
|
+
- name: Install latest npm
|
|
28
|
+
run: npm install -g npm@latest
|
|
29
|
+
|
|
30
|
+
- name: Install dependencies
|
|
31
|
+
run: npm install
|
|
32
|
+
|
|
33
|
+
- name: Build
|
|
34
|
+
run: npm run build
|
|
35
|
+
|
|
36
|
+
- name: Test
|
|
37
|
+
run: npm test
|
|
38
|
+
|
|
39
|
+
- name: Publish to npm with Provenance
|
|
40
|
+
run: npm publish --provenance --access public
|
|
41
|
+
|
|
42
|
+
- name: Create GitHub Release
|
|
43
|
+
uses: softprops/action-gh-release@v2
|
|
44
|
+
with:
|
|
45
|
+
tag_name: ${{ github.ref }}
|
|
46
|
+
name: Release ${{ github.ref_name }}
|
|
47
|
+
body: |
|
|
48
|
+
See [CHANGELOG.md](https://github.com/CrashBytes/contentful-semantic-text-toolkit/blob/main/CHANGELOG.md) for details.
|
|
49
|
+
draft: false
|
|
50
|
+
prerelease: false
|
package/README.md
CHANGED
|
@@ -7,7 +7,7 @@ Production-grade semantic text analysis with embeddings, similarity computation,
|
|
|
7
7
|
[](https://www.npmjs.com/package/@crashbytes/semantic-text-toolkit)
|
|
8
8
|
[](https://www.npmjs.com/package/@crashbytes/semantic-text-toolkit)
|
|
9
9
|
[](https://bundlephobia.com/package/@crashbytes/semantic-text-toolkit)
|
|
10
|
-
[](./LICENSE)
|
|
11
11
|
|
|
12
12
|
---
|
|
13
13
|
|
package/jest.config.js
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Jest Configuration
|
|
3
|
+
*
|
|
4
|
+
* Architectural Principles:
|
|
5
|
+
* - Comprehensive coverage tracking
|
|
6
|
+
* - Reasonable timeout for ML model operations
|
|
7
|
+
* - Clear test organization patterns
|
|
8
|
+
* - Production-grade quality thresholds
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
module.exports = {
|
|
12
|
+
preset: 'ts-jest',
|
|
13
|
+
testEnvironment: 'node',
|
|
14
|
+
|
|
15
|
+
// Test discovery patterns
|
|
16
|
+
roots: ['<rootDir>/src'],
|
|
17
|
+
testMatch: [
|
|
18
|
+
'**/__tests__/**/*.test.ts',
|
|
19
|
+
'**/?(*.)+(spec|test).ts'
|
|
20
|
+
],
|
|
21
|
+
|
|
22
|
+
// Coverage configuration
|
|
23
|
+
collectCoverageFrom: [
|
|
24
|
+
'src/**/*.ts',
|
|
25
|
+
'!src/**/*.d.ts',
|
|
26
|
+
'!src/**/__tests__/**',
|
|
27
|
+
'!src/index.ts', // Entry point excluded from coverage
|
|
28
|
+
],
|
|
29
|
+
|
|
30
|
+
// Quality thresholds - enforce production standards
|
|
31
|
+
coverageThreshold: {
|
|
32
|
+
global: {
|
|
33
|
+
branches: 90,
|
|
34
|
+
functions: 90,
|
|
35
|
+
lines: 90,
|
|
36
|
+
statements: 90,
|
|
37
|
+
},
|
|
38
|
+
},
|
|
39
|
+
|
|
40
|
+
// Coverage reporting formats
|
|
41
|
+
coverageReporters: [
|
|
42
|
+
'text', // Console output
|
|
43
|
+
'text-summary', // Summary statistics
|
|
44
|
+
'lcov', // CI integration
|
|
45
|
+
'html', // Visual browser report
|
|
46
|
+
],
|
|
47
|
+
|
|
48
|
+
// Timeout configuration - ML operations require patience
|
|
49
|
+
testTimeout: 60000, // 60 seconds for model loading
|
|
50
|
+
|
|
51
|
+
// Module resolution
|
|
52
|
+
moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'],
|
|
53
|
+
|
|
54
|
+
// Transform configuration
|
|
55
|
+
transform: {
|
|
56
|
+
'^.+\\.tsx?$': ['ts-jest', {
|
|
57
|
+
tsconfig: {
|
|
58
|
+
esModuleInterop: true,
|
|
59
|
+
allowSyntheticDefaultImports: true,
|
|
60
|
+
},
|
|
61
|
+
}],
|
|
62
|
+
},
|
|
63
|
+
|
|
64
|
+
// Setup files
|
|
65
|
+
setupFilesAfterEnv: ['<rootDir>/src/__tests__/setup.ts'],
|
|
66
|
+
};
|
package/package.json
CHANGED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Jest Test Setup
|
|
3
|
+
*
|
|
4
|
+
* Global configuration and utilities for test environment.
|
|
5
|
+
* Executed once before test suite runs.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// Extend Jest matchers if needed
|
|
9
|
+
expect.extend({
|
|
10
|
+
toBeWithinRange(received: number, floor: number, ceiling: number) {
|
|
11
|
+
const pass = received >= floor && received <= ceiling;
|
|
12
|
+
if (pass) {
|
|
13
|
+
return {
|
|
14
|
+
message: () =>
|
|
15
|
+
`expected ${received} not to be within range ${floor} - ${ceiling}`,
|
|
16
|
+
pass: true,
|
|
17
|
+
};
|
|
18
|
+
} else {
|
|
19
|
+
return {
|
|
20
|
+
message: () =>
|
|
21
|
+
`expected ${received} to be within range ${floor} - ${ceiling}`,
|
|
22
|
+
pass: false,
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
},
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
// Custom type definitions for extended matchers
|
|
29
|
+
declare global {
|
|
30
|
+
namespace jest {
|
|
31
|
+
interface Matchers<R> {
|
|
32
|
+
toBeWithinRange(floor: number, ceiling: number): R;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Global test configuration
|
|
38
|
+
process.env.NODE_ENV = 'test';
|
|
39
|
+
|
|
40
|
+
// Increase timeout for CI environments
|
|
41
|
+
if (process.env.CI) {
|
|
42
|
+
jest.setTimeout(120000); // 2 minutes for CI
|
|
43
|
+
}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Types Test Suite
|
|
3
|
+
*
|
|
4
|
+
* Validation of type definitions and SemanticError class behavior.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { SemanticError, SemanticErrorCode } from '../types';
|
|
8
|
+
|
|
9
|
+
describe('SemanticError', () => {
|
|
10
|
+
describe('constructor', () => {
|
|
11
|
+
it('creates error with code and message', () => {
|
|
12
|
+
const error = new SemanticError(
|
|
13
|
+
SemanticErrorCode.INVALID_INPUT,
|
|
14
|
+
'Invalid input provided'
|
|
15
|
+
);
|
|
16
|
+
|
|
17
|
+
expect(error).toBeInstanceOf(Error);
|
|
18
|
+
expect(error).toBeInstanceOf(SemanticError);
|
|
19
|
+
expect(error.code).toBe(SemanticErrorCode.INVALID_INPUT);
|
|
20
|
+
expect(error.message).toBe('Invalid input provided');
|
|
21
|
+
expect(error.name).toBe('SemanticError');
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it('creates error with details', () => {
|
|
25
|
+
const error = new SemanticError(
|
|
26
|
+
SemanticErrorCode.DIMENSION_MISMATCH,
|
|
27
|
+
'Dimensions do not match',
|
|
28
|
+
{ expected: 384, actual: 256 }
|
|
29
|
+
);
|
|
30
|
+
|
|
31
|
+
expect(error.details).toEqual({ expected: 384, actual: 256 });
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it('creates error without details', () => {
|
|
35
|
+
const error = new SemanticError(
|
|
36
|
+
SemanticErrorCode.MODEL_NOT_LOADED,
|
|
37
|
+
'Model not loaded'
|
|
38
|
+
);
|
|
39
|
+
|
|
40
|
+
expect(error.details).toBeUndefined();
|
|
41
|
+
});
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
describe('error codes', () => {
|
|
45
|
+
it('has MODEL_NOT_LOADED code', () => {
|
|
46
|
+
expect(SemanticErrorCode.MODEL_NOT_LOADED).toBe('MODEL_NOT_LOADED');
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it('has INVALID_INPUT code', () => {
|
|
50
|
+
expect(SemanticErrorCode.INVALID_INPUT).toBe('INVALID_INPUT');
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
it('has EMBEDDING_FAILED code', () => {
|
|
54
|
+
expect(SemanticErrorCode.EMBEDDING_FAILED).toBe('EMBEDDING_FAILED');
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it('has COMPUTATION_FAILED code', () => {
|
|
58
|
+
expect(SemanticErrorCode.COMPUTATION_FAILED).toBe('COMPUTATION_FAILED');
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
it('has DIMENSION_MISMATCH code', () => {
|
|
62
|
+
expect(SemanticErrorCode.DIMENSION_MISMATCH).toBe('DIMENSION_MISMATCH');
|
|
63
|
+
});
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
describe('error behavior', () => {
|
|
67
|
+
it('can be thrown and caught', () => {
|
|
68
|
+
const throwError = () => {
|
|
69
|
+
throw new SemanticError(
|
|
70
|
+
SemanticErrorCode.INVALID_INPUT,
|
|
71
|
+
'Test error'
|
|
72
|
+
);
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
expect(throwError).toThrow(SemanticError);
|
|
76
|
+
expect(throwError).toThrow('Test error');
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
it('preserves stack trace', () => {
|
|
80
|
+
const error = new SemanticError(
|
|
81
|
+
SemanticErrorCode.INVALID_INPUT,
|
|
82
|
+
'Test error'
|
|
83
|
+
);
|
|
84
|
+
|
|
85
|
+
expect(error.stack).toBeDefined();
|
|
86
|
+
expect(error.stack).toContain('SemanticError');
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
it('can be checked with instanceof', () => {
|
|
90
|
+
try {
|
|
91
|
+
throw new SemanticError(
|
|
92
|
+
SemanticErrorCode.MODEL_NOT_LOADED,
|
|
93
|
+
'Model not loaded'
|
|
94
|
+
);
|
|
95
|
+
} catch (error) {
|
|
96
|
+
expect(error instanceof SemanticError).toBe(true);
|
|
97
|
+
expect(error instanceof Error).toBe(true);
|
|
98
|
+
}
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
it('code can be used for error handling', () => {
|
|
102
|
+
const handleError = (error: SemanticError): string => {
|
|
103
|
+
switch (error.code) {
|
|
104
|
+
case SemanticErrorCode.MODEL_NOT_LOADED:
|
|
105
|
+
return 'Please initialize the model first';
|
|
106
|
+
case SemanticErrorCode.INVALID_INPUT:
|
|
107
|
+
return 'Please check your input';
|
|
108
|
+
case SemanticErrorCode.DIMENSION_MISMATCH:
|
|
109
|
+
return 'Vector dimensions must match';
|
|
110
|
+
default:
|
|
111
|
+
return 'An error occurred';
|
|
112
|
+
}
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
const error1 = new SemanticError(
|
|
116
|
+
SemanticErrorCode.MODEL_NOT_LOADED,
|
|
117
|
+
'Not loaded'
|
|
118
|
+
);
|
|
119
|
+
const error2 = new SemanticError(
|
|
120
|
+
SemanticErrorCode.INVALID_INPUT,
|
|
121
|
+
'Bad input'
|
|
122
|
+
);
|
|
123
|
+
|
|
124
|
+
expect(handleError(error1)).toBe('Please initialize the model first');
|
|
125
|
+
expect(handleError(error2)).toBe('Please check your input');
|
|
126
|
+
});
|
|
127
|
+
});
|
|
128
|
+
});
|