@aj-archipelago/cortex 1.3.10 → 1.3.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.js +15 -0
- package/helper-apps/cortex-file-handler/.env.test +7 -0
- package/helper-apps/cortex-file-handler/.env.test.azure +6 -0
- package/helper-apps/cortex-file-handler/.env.test.gcs +9 -0
- package/helper-apps/cortex-file-handler/blobHandler.js +263 -179
- package/helper-apps/cortex-file-handler/constants.js +107 -0
- package/helper-apps/cortex-file-handler/docHelper.js +4 -1
- package/helper-apps/cortex-file-handler/fileChunker.js +171 -109
- package/helper-apps/cortex-file-handler/helper.js +39 -17
- package/helper-apps/cortex-file-handler/index.js +230 -138
- package/helper-apps/cortex-file-handler/localFileHandler.js +21 -3
- package/helper-apps/cortex-file-handler/package-lock.json +2622 -51
- package/helper-apps/cortex-file-handler/package.json +24 -4
- package/helper-apps/cortex-file-handler/redis.js +9 -18
- package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +22 -0
- package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +49 -0
- package/helper-apps/cortex-file-handler/scripts/test-azure.sh +34 -0
- package/helper-apps/cortex-file-handler/scripts/test-gcs.sh +49 -0
- package/helper-apps/cortex-file-handler/start.js +26 -4
- package/helper-apps/cortex-file-handler/tests/docHelper.test.js +148 -0
- package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +322 -0
- package/helper-apps/cortex-file-handler/tests/start.test.js +928 -0
- package/helper-apps/cortex-realtime-voice-server/client/src/chat/components/ScreenshotCapture.tsx +57 -9
- package/helper-apps/cortex-realtime-voice-server/src/SocketServer.ts +35 -22
- package/helper-apps/cortex-realtime-voice-server/src/Tools.ts +65 -14
- package/helper-apps/cortex-realtime-voice-server/src/realtime/client.ts +10 -10
- package/helper-apps/cortex-realtime-voice-server/src/realtime/socket.ts +2 -1
- package/package.json +1 -1
- package/pathways/system/entity/sys_entity_continue.js +1 -1
- package/pathways/system/entity/sys_entity_start.js +1 -0
- package/pathways/system/entity/sys_generator_reasoning.js +1 -1
- package/pathways/system/entity/sys_generator_video_vision.js +2 -1
- package/pathways/system/entity/sys_router_tool.js +6 -4
- package/pathways/system/rest_streaming/sys_openai_chat_o1.js +19 -0
- package/pathways/system/rest_streaming/sys_openai_chat_o1_mini.js +19 -0
- package/server/plugins/openAiReasoningPlugin.js +11 -2
- package/server/plugins/openAiWhisperPlugin.js +9 -13
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
{
|
|
2
|
-
"name": "@aj-archipelago/
|
|
3
|
-
"version": "1.0.
|
|
4
|
-
"description": "",
|
|
2
|
+
"name": "@aj-archipelago/cortex-file-handler",
|
|
3
|
+
"version": "1.0.15",
|
|
4
|
+
"description": "File handling service for Cortex - handles file uploads, media chunking, and document processing",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"scripts": {
|
|
7
7
|
"start": "node start.js",
|
|
8
8
|
"dev": "node -r dotenv/config start.js",
|
|
9
|
-
"test": "
|
|
9
|
+
"test": "DOTENV_CONFIG_PATH=.env.test NODE_ENV=test node -r dotenv/config node_modules/ava/entrypoints/cli.mjs",
|
|
10
|
+
"test:azure": "DOTENV_CONFIG_PATH=.env.test.azure NODE_ENV=test ./scripts/test-azure.sh",
|
|
11
|
+
"test:watch": "DOTENV_CONFIG_PATH=.env.test NODE_ENV=test node -r dotenv/config node_modules/ava/entrypoints/cli.mjs --watch",
|
|
12
|
+
"test:gcs": "DOTENV_CONFIG_PATH=.env.test.gcs NODE_ENV=test ./scripts/test-gcs.sh"
|
|
10
13
|
},
|
|
11
14
|
"dependencies": {
|
|
12
15
|
"@azure/storage-blob": "^12.13.0",
|
|
@@ -24,5 +27,22 @@
|
|
|
24
27
|
"public-ip": "^6.0.1",
|
|
25
28
|
"uuid": "^9.0.0",
|
|
26
29
|
"xlsx": "^0.18.5"
|
|
30
|
+
},
|
|
31
|
+
"devDependencies": {
|
|
32
|
+
"ava": "^5.3.1",
|
|
33
|
+
"dotenv": "^16.3.1",
|
|
34
|
+
"nock": "^13.3.0"
|
|
35
|
+
},
|
|
36
|
+
"ava": {
|
|
37
|
+
"files": [
|
|
38
|
+
"tests/**/*",
|
|
39
|
+
"!tests/test-files/**/*",
|
|
40
|
+
"!tests/test-docs/**/*",
|
|
41
|
+
"!tests/mocks/**/*"
|
|
42
|
+
],
|
|
43
|
+
"timeout": "1m",
|
|
44
|
+
"nodeArguments": [
|
|
45
|
+
"--experimental-modules"
|
|
46
|
+
]
|
|
27
47
|
}
|
|
28
48
|
}
|
|
@@ -96,34 +96,25 @@ const removeFromFileStoreMap = async (key) => {
|
|
|
96
96
|
const cleanupRedisFileStoreMap = async (nDays=1) => {
|
|
97
97
|
let cleaned = [];
|
|
98
98
|
try {
|
|
99
|
-
|
|
100
|
-
const
|
|
99
|
+
const map = await getAllFileStoreMap();
|
|
100
|
+
const nDaysAgo = new Date(Date.now() - nDays * 24 * 60 * 60 * 1000);
|
|
101
101
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
// Iterate over each key-value pair in the fileStoreMap
|
|
108
|
-
for (const [key, value] of Object.entries(fileStoreMap)) {
|
|
109
|
-
//check timestamp of each value compare to nDays and remove if older
|
|
110
|
-
const timestamp = new Date(value.timestamp);
|
|
111
|
-
const now = new Date();
|
|
112
|
-
const diffTime = Math.abs(now - timestamp);
|
|
113
|
-
const diffDays = Math.ceil(diffTime / (1000 * 60 * 60 * 24));
|
|
114
|
-
if (diffDays > nDays) {
|
|
102
|
+
for(const key in map){
|
|
103
|
+
const value = map[key];
|
|
104
|
+
const timestamp = value?.timestamp ? new Date(value.timestamp) : null;
|
|
105
|
+
if(!timestamp || timestamp.getTime() < nDaysAgo.getTime()){
|
|
115
106
|
// Remove the key from the "FileStoreMap" hash map
|
|
116
107
|
await removeFromFileStoreMap(key);
|
|
117
108
|
console.log(`Removed key ${key} from FileStoreMap`);
|
|
118
109
|
cleaned.push(Object.assign({hash:key}, value));
|
|
119
110
|
}
|
|
120
|
-
|
|
121
111
|
}
|
|
122
112
|
} catch (error) {
|
|
123
113
|
console.error(`Error cleaning FileStoreMap: ${error}`);
|
|
124
|
-
}finally{
|
|
125
|
-
|
|
114
|
+
} finally {
|
|
115
|
+
// Cleanup code if needed
|
|
126
116
|
}
|
|
117
|
+
return cleaned;
|
|
127
118
|
};
|
|
128
119
|
|
|
129
120
|
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { BlobServiceClient } from '@azure/storage-blob';
|
|
2
|
+
|
|
3
|
+
async function createContainer() {
|
|
4
|
+
try {
|
|
5
|
+
const blobServiceClient = BlobServiceClient.fromConnectionString("UseDevelopmentStorage=true");
|
|
6
|
+
const containerClient = blobServiceClient.getContainerClient("test-container");
|
|
7
|
+
|
|
8
|
+
console.log("Creating container...");
|
|
9
|
+
await containerClient.create();
|
|
10
|
+
console.log("Container created successfully");
|
|
11
|
+
} catch (error) {
|
|
12
|
+
// Ignore if container already exists
|
|
13
|
+
if (error.statusCode === 409) {
|
|
14
|
+
console.log("Container already exists");
|
|
15
|
+
} else {
|
|
16
|
+
console.error("Error creating container:", error);
|
|
17
|
+
process.exit(1);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
createContainer();
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { BlobServiceClient } from '@azure/storage-blob';
|
|
2
|
+
import { Storage } from '@google-cloud/storage';
|
|
3
|
+
|
|
4
|
+
async function createAzureContainer() {
|
|
5
|
+
try {
|
|
6
|
+
const blobServiceClient = BlobServiceClient.fromConnectionString("UseDevelopmentStorage=true");
|
|
7
|
+
const containerClient = blobServiceClient.getContainerClient("test-container");
|
|
8
|
+
|
|
9
|
+
console.log("Creating Azure container...");
|
|
10
|
+
await containerClient.create();
|
|
11
|
+
console.log("Azure container created successfully");
|
|
12
|
+
} catch (error) {
|
|
13
|
+
// Ignore if container already exists
|
|
14
|
+
if (error.statusCode === 409) {
|
|
15
|
+
console.log("Azure container already exists");
|
|
16
|
+
} else {
|
|
17
|
+
console.error("Error creating Azure container:", error);
|
|
18
|
+
process.exit(1);
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
async function createGCSBucket() {
|
|
24
|
+
try {
|
|
25
|
+
const storage = new Storage({
|
|
26
|
+
projectId: "test-project",
|
|
27
|
+
apiEndpoint: "http://localhost:4443",
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
console.log("Creating GCS bucket...");
|
|
31
|
+
await storage.createBucket("cortextempfiles");
|
|
32
|
+
console.log("GCS bucket created successfully");
|
|
33
|
+
} catch (error) {
|
|
34
|
+
// Ignore if bucket already exists
|
|
35
|
+
if (error.code === 409) {
|
|
36
|
+
console.log("GCS bucket already exists");
|
|
37
|
+
} else {
|
|
38
|
+
console.error("Error creating GCS bucket:", error);
|
|
39
|
+
process.exit(1);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
async function setup() {
|
|
45
|
+
await createAzureContainer();
|
|
46
|
+
await createGCSBucket();
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
setup();
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
|
|
3
|
+
# Create temp directory for Azurite
|
|
4
|
+
AZURITE_DIR="/tmp/azurite-test"
|
|
5
|
+
mkdir -p $AZURITE_DIR
|
|
6
|
+
|
|
7
|
+
# Start Azurite in background
|
|
8
|
+
echo "Starting Azurite..."
|
|
9
|
+
azurite --silent --location $AZURITE_DIR &
|
|
10
|
+
AZURITE_PID=$!
|
|
11
|
+
|
|
12
|
+
# Wait for Azurite to start
|
|
13
|
+
sleep 2
|
|
14
|
+
|
|
15
|
+
# Create test container
|
|
16
|
+
echo "Setting up Azure container..."
|
|
17
|
+
node scripts/setup-azure-container.js
|
|
18
|
+
|
|
19
|
+
# Run the tests
|
|
20
|
+
echo "Running tests..."
|
|
21
|
+
node -r dotenv/config node_modules/ava/entrypoints/cli.mjs
|
|
22
|
+
|
|
23
|
+
# Store test result
|
|
24
|
+
TEST_RESULT=$?
|
|
25
|
+
|
|
26
|
+
# Kill Azurite
|
|
27
|
+
echo "Cleaning up..."
|
|
28
|
+
kill $AZURITE_PID
|
|
29
|
+
|
|
30
|
+
# Clean up Azurite directory
|
|
31
|
+
rm -rf $AZURITE_DIR
|
|
32
|
+
|
|
33
|
+
# Exit with test result
|
|
34
|
+
exit $TEST_RESULT
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
|
|
3
|
+
# Exit on error
|
|
4
|
+
set -e
|
|
5
|
+
|
|
6
|
+
cleanup() {
|
|
7
|
+
echo "Cleaning up..."
|
|
8
|
+
if [ ! -z "$AZURITE_PID" ]; then
|
|
9
|
+
kill $AZURITE_PID 2>/dev/null || true
|
|
10
|
+
fi
|
|
11
|
+
docker stop fake-gcs-server 2>/dev/null || true
|
|
12
|
+
docker rm fake-gcs-server 2>/dev/null || true
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
# Set up cleanup trap
|
|
16
|
+
trap cleanup EXIT
|
|
17
|
+
|
|
18
|
+
echo "Starting test environment..."
|
|
19
|
+
|
|
20
|
+
# Start Azurite if not running
|
|
21
|
+
if ! nc -z localhost 10000; then
|
|
22
|
+
echo "Starting Azurite..."
|
|
23
|
+
azurite --silent --location .azurite --debug .azurite/debug.log &
|
|
24
|
+
AZURITE_PID=$!
|
|
25
|
+
# Wait for Azurite to be ready
|
|
26
|
+
until nc -z localhost 10000; do
|
|
27
|
+
sleep 1
|
|
28
|
+
done
|
|
29
|
+
fi
|
|
30
|
+
|
|
31
|
+
# Start fake-gcs-server if not running
|
|
32
|
+
if ! nc -z localhost 4443; then
|
|
33
|
+
echo "Starting fake-gcs-server..."
|
|
34
|
+
docker run -d --name fake-gcs-server \
|
|
35
|
+
-p 4443:4443 \
|
|
36
|
+
fsouza/fake-gcs-server -scheme http
|
|
37
|
+
# Wait for fake-gcs-server to be ready
|
|
38
|
+
until nc -z localhost 4443; do
|
|
39
|
+
sleep 1
|
|
40
|
+
done
|
|
41
|
+
fi
|
|
42
|
+
|
|
43
|
+
# Create containers
|
|
44
|
+
echo "Setting up test containers..."
|
|
45
|
+
node scripts/setup-test-containers.js
|
|
46
|
+
|
|
47
|
+
# Run the tests
|
|
48
|
+
echo "Running tests..."
|
|
49
|
+
node -r dotenv/config node_modules/ava/entrypoints/cli.mjs "$@"
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import
|
|
1
|
+
import CortexFileHandler from "./index.js";
|
|
2
2
|
import express from "express";
|
|
3
3
|
import { fileURLToPath } from 'url';
|
|
4
4
|
import { dirname, join } from 'path';
|
|
@@ -15,14 +15,36 @@ app.use(cors());
|
|
|
15
15
|
// Serve static files from the public folder
|
|
16
16
|
app.use('/files', express.static(publicFolder));
|
|
17
17
|
|
|
18
|
+
// New primary endpoint
|
|
19
|
+
app.all('/api/CortexFileHandler', async (req, res) => {
|
|
20
|
+
const context = { req, res, log: console.log }
|
|
21
|
+
try {
|
|
22
|
+
await CortexFileHandler(context, req);
|
|
23
|
+
context.log(context.res);
|
|
24
|
+
res.status(context.res.status || 200).send(context.res.body);
|
|
25
|
+
} catch (error) {
|
|
26
|
+
const status = error.status || 500;
|
|
27
|
+
const message = error.message || 'Internal server error';
|
|
28
|
+
res.status(status).send(message);
|
|
29
|
+
}
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
// Legacy endpoint for compatibility
|
|
18
33
|
app.all('/api/MediaFileChunker', async (req, res) => {
|
|
19
34
|
const context = { req, res, log: console.log }
|
|
20
|
-
|
|
21
|
-
|
|
35
|
+
try {
|
|
36
|
+
await CortexFileHandler(context, req);
|
|
37
|
+
context.log(context.res);
|
|
38
|
+
res.status(context.res.status || 200).send(context.res.body);
|
|
39
|
+
} catch (error) {
|
|
40
|
+
const status = error.status || 500;
|
|
41
|
+
const message = error.message || 'Internal server error';
|
|
42
|
+
res.status(status).send(message);
|
|
43
|
+
}
|
|
22
44
|
});
|
|
23
45
|
|
|
24
46
|
app.listen(port, () => {
|
|
25
|
-
console.log(`
|
|
47
|
+
console.log(`Cortex File Handler running on port ${port} (includes legacy MediaFileChunker endpoint)`);
|
|
26
48
|
});
|
|
27
49
|
|
|
28
50
|
export { port, publicFolder, ipAddress };
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import test from 'ava';
|
|
2
|
+
import { fileURLToPath } from 'url';
|
|
3
|
+
import { dirname, join } from 'path';
|
|
4
|
+
import fs from 'fs/promises';
|
|
5
|
+
import { documentToText, easyChunker } from '../docHelper.js';
|
|
6
|
+
|
|
7
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
8
|
+
|
|
9
|
+
// Setup: Create test documents
|
|
10
|
+
test.before(async t => {
|
|
11
|
+
const testDir = join(__dirname, 'test-docs');
|
|
12
|
+
await fs.mkdir(testDir, { recursive: true });
|
|
13
|
+
|
|
14
|
+
// Create various test files
|
|
15
|
+
const textFile = join(testDir, 'test.txt');
|
|
16
|
+
const largeTextFile = join(testDir, 'large.txt');
|
|
17
|
+
const unicodeFile = join(testDir, 'unicode.txt');
|
|
18
|
+
const jsonFile = join(testDir, 'test.json');
|
|
19
|
+
const emptyFile = join(testDir, 'empty.txt');
|
|
20
|
+
|
|
21
|
+
// Regular text content
|
|
22
|
+
await fs.writeFile(textFile, 'This is a test document content.\nIt has multiple lines.\nThird line here.');
|
|
23
|
+
|
|
24
|
+
// Large text content (>100KB)
|
|
25
|
+
const largeContent = 'Lorem ipsum '.repeat(10000);
|
|
26
|
+
await fs.writeFile(largeTextFile, largeContent);
|
|
27
|
+
|
|
28
|
+
// Unicode content
|
|
29
|
+
const unicodeContent = '这是中文内容\nこれは日本語です\nЭто русский текст\n🌟 emoji test';
|
|
30
|
+
await fs.writeFile(unicodeFile, unicodeContent);
|
|
31
|
+
|
|
32
|
+
// JSON content
|
|
33
|
+
await fs.writeFile(jsonFile, JSON.stringify({ test: 'content' }));
|
|
34
|
+
|
|
35
|
+
// Empty file
|
|
36
|
+
await fs.writeFile(emptyFile, '');
|
|
37
|
+
|
|
38
|
+
t.context = {
|
|
39
|
+
testDir,
|
|
40
|
+
textFile,
|
|
41
|
+
largeTextFile,
|
|
42
|
+
unicodeFile,
|
|
43
|
+
jsonFile,
|
|
44
|
+
emptyFile
|
|
45
|
+
};
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
// Cleanup
|
|
49
|
+
test.after.always(async t => {
|
|
50
|
+
await fs.rm(t.context.testDir, { recursive: true, force: true });
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
// Test basic text file processing
|
|
54
|
+
test('processes text files correctly', async t => {
|
|
55
|
+
const result = await documentToText(t.context.textFile, 'text/plain');
|
|
56
|
+
t.true(typeof result === 'string', 'Result should be a string');
|
|
57
|
+
t.true(result.includes('test document content'), 'Result should contain file content');
|
|
58
|
+
t.true(result.includes('multiple lines'), 'Result should preserve multiple lines');
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
// Test large file handling
|
|
62
|
+
test('handles large text files', async t => {
|
|
63
|
+
const result = await documentToText(t.context.largeTextFile, 'text/plain');
|
|
64
|
+
t.true(result.length > 50000, 'Should handle large files');
|
|
65
|
+
t.true(result.includes('Lorem ipsum'), 'Should contain expected content');
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
// Test Unicode handling
|
|
69
|
+
test('handles Unicode content correctly', async t => {
|
|
70
|
+
const result = await documentToText(t.context.unicodeFile, 'text/plain');
|
|
71
|
+
t.true(result.includes('这是中文内容'), 'Should preserve Chinese characters');
|
|
72
|
+
t.true(result.includes('これは日本語です'), 'Should preserve Japanese characters');
|
|
73
|
+
t.true(result.includes('Это русский текст'), 'Should preserve Cyrillic characters');
|
|
74
|
+
t.true(result.includes('🌟'), 'Should preserve emoji');
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
// Test JSON file handling
|
|
78
|
+
test('rejects JSON files appropriately', async t => {
|
|
79
|
+
await t.throwsAsync(
|
|
80
|
+
async () => documentToText(t.context.jsonFile, 'application/json'),
|
|
81
|
+
{ message: 'Unsupported file type: json' }
|
|
82
|
+
);
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
// Test empty file handling
|
|
86
|
+
test('handles empty files appropriately', async t => {
|
|
87
|
+
const result = await documentToText(t.context.emptyFile, 'text/plain');
|
|
88
|
+
t.is(result, '', 'Empty file should return empty string');
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
// Test unsupported file types
|
|
92
|
+
test('rejects unsupported file types', async t => {
|
|
93
|
+
const unsupportedFile = join(t.context.testDir, 'unsupported.xyz');
|
|
94
|
+
await fs.writeFile(unsupportedFile, 'test content');
|
|
95
|
+
await t.throwsAsync(
|
|
96
|
+
async () => documentToText(unsupportedFile, 'unsupported/type'),
|
|
97
|
+
{ message: 'Unsupported file type: xyz' }
|
|
98
|
+
);
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
// Test text chunking functionality
|
|
102
|
+
test('chunks text correctly with default settings', t => {
|
|
103
|
+
const text = 'This is a test.\nSecond line.\nThird line.\nFourth line.';
|
|
104
|
+
const chunks = easyChunker(text);
|
|
105
|
+
|
|
106
|
+
t.true(Array.isArray(chunks), 'Should return an array of chunks');
|
|
107
|
+
t.true(chunks.length > 0, 'Should create at least one chunk');
|
|
108
|
+
t.true(chunks.every(chunk => typeof chunk === 'string'), 'All chunks should be strings');
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
// Test chunking with very long text
|
|
112
|
+
test('handles chunking of long text', t => {
|
|
113
|
+
const longText = 'Test sentence. '.repeat(1000);
|
|
114
|
+
const chunks = easyChunker(longText);
|
|
115
|
+
|
|
116
|
+
t.true(chunks.length > 1, 'Should split long text into multiple chunks');
|
|
117
|
+
t.true(chunks.every(chunk => chunk.length <= 10000), 'Each chunk should not exceed max length');
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
// Test chunking with various delimiters
|
|
121
|
+
test('respects sentence boundaries in chunking', t => {
|
|
122
|
+
const text = 'First sentence. Second sentence! Third sentence? Fourth sentence.';
|
|
123
|
+
const chunks = easyChunker(text);
|
|
124
|
+
|
|
125
|
+
t.true(chunks.every(chunk =>
|
|
126
|
+
chunk.match(/[.!?](\s|$)/) || chunk === chunks[chunks.length - 1]
|
|
127
|
+
), 'Chunks should end with sentence delimiters when possible');
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
// Test chunking with newlines
|
|
131
|
+
test('handles newlines in chunking', t => {
|
|
132
|
+
const text = 'Line 1\nLine 2\nLine 3\nLine 4';
|
|
133
|
+
const chunks = easyChunker(text);
|
|
134
|
+
|
|
135
|
+
t.true(chunks.some(chunk => chunk.includes('\n')), 'Should preserve newlines');
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
// Test chunking edge cases
|
|
139
|
+
test('handles chunking edge cases', t => {
|
|
140
|
+
// Empty string
|
|
141
|
+
t.deepEqual(easyChunker(''), [''], 'Should handle empty string');
|
|
142
|
+
|
|
143
|
+
// Single character
|
|
144
|
+
t.deepEqual(easyChunker('a'), ['a'], 'Should handle single character');
|
|
145
|
+
|
|
146
|
+
// Only whitespace
|
|
147
|
+
t.deepEqual(easyChunker(' \n '), [' \n '], 'Should handle whitespace');
|
|
148
|
+
});
|