@aj-archipelago/cortex 0.0.9 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/graphql/chunker.js +94 -104
- package/graphql/pathwayResolver.js +1 -1
- package/graphql/plugins/modelPlugin.js +61 -20
- package/graphql/plugins/openAiChatPlugin.js +1 -1
- package/graphql/plugins/openAiCompletionPlugin.js +1 -1
- package/package.json +12 -4
- package/pathways/summary.js +21 -6
- package/tests/chunkfunction.test.js +126 -0
- package/tests/chunking.test.js +19 -18
- package/tests/main.test.js +26 -26
- package/tests/translate.test.js +8 -9
package/graphql/chunker.js
CHANGED
|
@@ -1,22 +1,5 @@
|
|
|
1
1
|
const { encode, decode } = require('gpt-3-encoder')
|
|
2
2
|
|
|
3
|
-
const estimateCharPerToken = (text) => {
|
|
4
|
-
// check text only contains asciish characters
|
|
5
|
-
if (/^[ -~\t\n\r]+$/.test(text)) {
|
|
6
|
-
return 4;
|
|
7
|
-
}
|
|
8
|
-
return 1;
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
const getLastNChar = (text, maxLen) => {
|
|
12
|
-
if (text.length > maxLen) {
|
|
13
|
-
//slice text to avoid maxLen limit but keep the last n characters up to a \n or space to avoid cutting words
|
|
14
|
-
text = text.slice(-maxLen);
|
|
15
|
-
text = text.slice(text.search(/\s/) + 1);
|
|
16
|
-
}
|
|
17
|
-
return text;
|
|
18
|
-
}
|
|
19
|
-
|
|
20
3
|
const getLastNToken = (text, maxTokenLen) => {
|
|
21
4
|
const encoded = encode(text);
|
|
22
5
|
if (encoded.length > maxTokenLen) {
|
|
@@ -35,113 +18,120 @@ const getFirstNToken = (text, maxTokenLen) => {
|
|
|
35
18
|
return text;
|
|
36
19
|
}
|
|
37
20
|
|
|
38
|
-
const
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
if (
|
|
56
|
-
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
21
|
+
const getSemanticChunks = (text, chunkSize) => {
|
|
22
|
+
|
|
23
|
+
const breakByRegex = (str, regex, preserveWhitespace = false) => {
|
|
24
|
+
const result = [];
|
|
25
|
+
let match;
|
|
26
|
+
|
|
27
|
+
while ((match = regex.exec(str)) !== null) {
|
|
28
|
+
const value = str.slice(0, match.index);
|
|
29
|
+
result.push(value);
|
|
30
|
+
|
|
31
|
+
if (preserveWhitespace || /\S/.test(match[0])) {
|
|
32
|
+
result.push(match[0]);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
str = str.slice(match.index + match[0].length);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
if (str) {
|
|
39
|
+
result.push(str);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
return result.filter(Boolean);
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
const breakByParagraphs = (str) => breakByRegex(str, /[\r\n]+/, true);
|
|
46
|
+
const breakBySentences = (str) => breakByRegex(str, /(?<=[.。؟!\?!\n])\s+/, true);
|
|
47
|
+
const breakByWords = (str) => breakByRegex(str, /(\s,;:.+)/);
|
|
48
|
+
|
|
49
|
+
const createChunks = (tokens) => {
|
|
50
|
+
let chunks = [];
|
|
51
|
+
let currentChunk = '';
|
|
52
|
+
|
|
53
|
+
for (const token of tokens) {
|
|
54
|
+
const currentTokenLength = encode(currentChunk + token).length;
|
|
55
|
+
if (currentTokenLength <= chunkSize) {
|
|
56
|
+
currentChunk += token;
|
|
57
|
+
} else {
|
|
58
|
+
if (currentChunk) {
|
|
59
|
+
chunks.push(currentChunk);
|
|
73
60
|
}
|
|
61
|
+
currentChunk = token;
|
|
62
|
+
}
|
|
74
63
|
}
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
for (let i = 0; enableLineChunks && i < sentenceChunks.length; i++) {
|
|
79
|
-
if (isBig(sentenceChunks[i])) { // too long, split into lines
|
|
80
|
-
newlineChunks.push(...sentenceChunks[i].split('\n'));
|
|
81
|
-
} else {
|
|
82
|
-
newlineChunks.push(sentenceChunks[i]);
|
|
83
|
-
}
|
|
64
|
+
|
|
65
|
+
if (currentChunk) {
|
|
66
|
+
chunks.push(currentChunk);
|
|
84
67
|
}
|
|
68
|
+
|
|
69
|
+
return chunks;
|
|
70
|
+
};
|
|
85
71
|
|
|
86
|
-
|
|
87
|
-
let
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
}
|
|
98
|
-
chunk += words[k] + ' ';
|
|
99
|
-
}
|
|
100
|
-
if (chunk.length > 0) {
|
|
101
|
-
chunks.push(chunk.trim());
|
|
102
|
-
}
|
|
72
|
+
const combineChunks = (chunks) => {
|
|
73
|
+
let optimizedChunks = [];
|
|
74
|
+
|
|
75
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
76
|
+
if (i < chunks.length - 1) {
|
|
77
|
+
const combinedChunk = chunks[i] + chunks[i + 1];
|
|
78
|
+
const combinedLen = encode(combinedChunk).length;
|
|
79
|
+
|
|
80
|
+
if (combinedLen <= chunkSize) {
|
|
81
|
+
optimizedChunks.push(combinedChunk);
|
|
82
|
+
i += 1;
|
|
103
83
|
} else {
|
|
104
|
-
|
|
84
|
+
optimizedChunks.push(chunks[i]);
|
|
105
85
|
}
|
|
86
|
+
} else {
|
|
87
|
+
optimizedChunks.push(chunks[i]);
|
|
88
|
+
}
|
|
106
89
|
}
|
|
90
|
+
|
|
91
|
+
return optimizedChunks;
|
|
92
|
+
};
|
|
107
93
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
return finallyMergeChunks ? mergeChunks({ chunks, maxChunkLength, maxChunkToken }) : chunks;
|
|
111
|
-
}
|
|
94
|
+
const breakText = (str) => {
|
|
95
|
+
const tokenLength = encode(str).length;
|
|
112
96
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
return isBigChunk({ text, maxChunkLength, maxChunkToken });
|
|
97
|
+
if (tokenLength <= chunkSize) {
|
|
98
|
+
return [str];
|
|
116
99
|
}
|
|
117
100
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
let
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
101
|
+
const breakers = [breakByParagraphs, breakBySentences, breakByWords];
|
|
102
|
+
|
|
103
|
+
for (let i = 0; i < breakers.length; i++) {
|
|
104
|
+
const tokens = breakers[i](str);
|
|
105
|
+
if (tokens.length > 1) {
|
|
106
|
+
let chunks = createChunks(tokens);
|
|
107
|
+
chunks = combineChunks(chunks);
|
|
108
|
+
const brokenChunks = chunks.flatMap(breakText);
|
|
109
|
+
if (brokenChunks.every(chunk => encode(chunk).length <= chunkSize)) {
|
|
110
|
+
return brokenChunks;
|
|
125
111
|
}
|
|
126
|
-
|
|
127
|
-
}
|
|
128
|
-
if (chunk.length > 0) {
|
|
129
|
-
mergedChunks.push(chunk);
|
|
112
|
+
}
|
|
130
113
|
}
|
|
131
|
-
|
|
114
|
+
|
|
115
|
+
return createChunks([...str]); // Split by characters
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
return breakText(text);
|
|
132
119
|
}
|
|
133
120
|
|
|
134
121
|
|
|
135
122
|
const semanticTruncate = (text, maxLength) => {
|
|
136
|
-
|
|
137
|
-
text = getSemanticChunks({ text, maxChunkLength: maxLength })[0].slice(0, maxLength - 3).trim() + "...";
|
|
138
|
-
}
|
|
123
|
+
if (text.length <= maxLength) {
|
|
139
124
|
return text;
|
|
140
|
-
}
|
|
125
|
+
}
|
|
141
126
|
|
|
127
|
+
const truncatedText = text.slice(0, maxLength - 3).trim();
|
|
128
|
+
const lastSpaceIndex = truncatedText.lastIndexOf(" ");
|
|
142
129
|
|
|
130
|
+
return (lastSpaceIndex !== -1)
|
|
131
|
+
? truncatedText.slice(0, lastSpaceIndex) + "..."
|
|
132
|
+
: truncatedText + "...";
|
|
133
|
+
};
|
|
143
134
|
|
|
144
135
|
module.exports = {
|
|
145
|
-
getSemanticChunks, semanticTruncate,
|
|
146
|
-
getLastNChar, getLastNToken, getFirstNToken, estimateCharPerToken
|
|
136
|
+
getSemanticChunks, semanticTruncate, getLastNToken, getFirstNToken
|
|
147
137
|
}
|
|
@@ -163,7 +163,7 @@ class PathwayResolver {
|
|
|
163
163
|
}
|
|
164
164
|
|
|
165
165
|
// chunk the text and return the chunks with newline separators
|
|
166
|
-
return getSemanticChunks(
|
|
166
|
+
return getSemanticChunks(text, chunkTokenLength);
|
|
167
167
|
}
|
|
168
168
|
|
|
169
169
|
truncate(str, n) {
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
const handlebars = require('handlebars');
|
|
3
3
|
const { request } = require("../../lib/request");
|
|
4
4
|
const { encode } = require("gpt-3-encoder");
|
|
5
|
+
const { getFirstNToken } = require("../chunker");
|
|
5
6
|
|
|
6
7
|
const DEFAULT_MAX_TOKENS = 4096;
|
|
7
8
|
const DEFAULT_PROMPT_TOKEN_RATIO = 0.5;
|
|
@@ -38,37 +39,77 @@ class ModelPlugin {
|
|
|
38
39
|
this.shouldCache = config.get('enableCache') && (pathway.enableCache || pathway.temperature == 0);
|
|
39
40
|
}
|
|
40
41
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
42
|
+
truncateMessagesToTargetLength = (messages, targetTokenLength) => {
|
|
43
|
+
// Calculate the token length of each message
|
|
44
|
+
const tokenLengths = messages.map((message) => ({
|
|
45
|
+
message,
|
|
46
|
+
tokenLength: encode(this.messagesToChatML([message], false)).length,
|
|
47
|
+
}));
|
|
45
48
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
49
|
+
// Calculate the total token length of all messages
|
|
50
|
+
let totalTokenLength = tokenLengths.reduce(
|
|
51
|
+
(sum, { tokenLength }) => sum + tokenLength,
|
|
52
|
+
0
|
|
53
|
+
);
|
|
54
|
+
|
|
55
|
+
// If we're already under the target token length, just bail
|
|
56
|
+
if (totalTokenLength <= targetTokenLength) return messages;
|
|
57
|
+
|
|
58
|
+
// Remove and/or truncate messages until the target token length is reached
|
|
59
|
+
let index = 0;
|
|
60
|
+
while (totalTokenLength > targetTokenLength) {
|
|
61
|
+
const message = tokenLengths[index].message;
|
|
62
|
+
|
|
63
|
+
// Skip system messages
|
|
64
|
+
if (message.role === 'system') {
|
|
65
|
+
index++;
|
|
66
|
+
continue;
|
|
54
67
|
}
|
|
55
|
-
|
|
56
|
-
|
|
68
|
+
|
|
69
|
+
const currentTokenLength = tokenLengths[index].tokenLength;
|
|
70
|
+
|
|
71
|
+
if (totalTokenLength - currentTokenLength >= targetTokenLength) {
|
|
72
|
+
// Remove the message entirely if doing so won't go below the target token length
|
|
73
|
+
totalTokenLength -= currentTokenLength;
|
|
74
|
+
tokenLengths.splice(index, 1);
|
|
75
|
+
} else {
|
|
76
|
+
// Truncate the message to fit the remaining target token length
|
|
77
|
+
const emptyContentLength = encode(this.messagesToChatML([{ ...message, content: '' }], false)).length;
|
|
78
|
+
const otherMessageTokens = totalTokenLength - currentTokenLength;
|
|
79
|
+
const tokensToKeep = targetTokenLength - (otherMessageTokens + emptyContentLength);
|
|
80
|
+
|
|
81
|
+
const truncatedContent = getFirstNToken(message.content, tokensToKeep);
|
|
82
|
+
const truncatedMessage = { ...message, content: truncatedContent };
|
|
83
|
+
|
|
84
|
+
tokenLengths[index] = {
|
|
85
|
+
message: truncatedMessage,
|
|
86
|
+
tokenLength: encode(this.messagesToChatML([ truncatedMessage ], false)).length
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// calculate the length again to keep us honest
|
|
90
|
+
totalTokenLength = tokenLengths.reduce(
|
|
91
|
+
(sum, { tokenLength }) => sum + tokenLength,
|
|
92
|
+
0
|
|
93
|
+
);
|
|
57
94
|
}
|
|
58
95
|
}
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
96
|
+
|
|
97
|
+
// Return the modified messages array
|
|
98
|
+
return tokenLengths.map(({ message }) => message);
|
|
99
|
+
};
|
|
100
|
+
|
|
62
101
|
//convert a messages array to a simple chatML format
|
|
63
|
-
messagesToChatML =
|
|
102
|
+
messagesToChatML(messages, addAssistant = true) {
|
|
64
103
|
let output = "";
|
|
65
104
|
if (messages && messages.length) {
|
|
66
105
|
for (let message of messages) {
|
|
67
|
-
output += (message.role && message.content) ? `<|im_start|>${message.role}\n${message.content}\n<|im_end|>\n` : `${message}\n`;
|
|
106
|
+
output += (message.role && (message.content || message.content === '')) ? `<|im_start|>${message.role}\n${message.content}\n<|im_end|>\n` : `${message}\n`;
|
|
68
107
|
}
|
|
69
108
|
// you always want the assistant to respond next so add a
|
|
70
109
|
// directive for that
|
|
71
|
-
|
|
110
|
+
if (addAssistant) {
|
|
111
|
+
output += "<|im_start|>assistant\n";
|
|
112
|
+
}
|
|
72
113
|
}
|
|
73
114
|
return output;
|
|
74
115
|
}
|
|
@@ -19,7 +19,7 @@ class OpenAIChatPlugin extends ModelPlugin {
|
|
|
19
19
|
// Check if the token length exceeds the model's max token length
|
|
20
20
|
if (tokenLength > modelMaxTokenLength) {
|
|
21
21
|
// Remove older messages until the token length is within the model's limit
|
|
22
|
-
requestMessages = this.
|
|
22
|
+
requestMessages = this.truncateMessagesToTargetLength(requestMessages, modelMaxTokenLength);
|
|
23
23
|
}
|
|
24
24
|
|
|
25
25
|
const requestParameters = {
|
|
@@ -16,7 +16,7 @@ class OpenAICompletionPlugin extends ModelPlugin {
|
|
|
16
16
|
let requestParameters = {};
|
|
17
17
|
|
|
18
18
|
if (modelPromptMessages) {
|
|
19
|
-
const requestMessages = this.
|
|
19
|
+
const requestMessages = this.truncateMessagesToTargetLength(modelPromptMessages, modelMaxTokenLength - 1);
|
|
20
20
|
modelPromptMessagesML = this.messagesToChatML(requestMessages);
|
|
21
21
|
tokenLength = encode(modelPromptMessagesML).length;
|
|
22
22
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aj-archipelago/cortex",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.10",
|
|
4
4
|
"description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
"main": "index.js",
|
|
22
22
|
"scripts": {
|
|
23
23
|
"start": "node start.js",
|
|
24
|
-
"test": "
|
|
24
|
+
"test": "ava"
|
|
25
25
|
},
|
|
26
26
|
"author": "",
|
|
27
27
|
"license": "MIT",
|
|
@@ -53,10 +53,18 @@
|
|
|
53
53
|
"ytdl-core": "^4.11.2"
|
|
54
54
|
},
|
|
55
55
|
"devDependencies": {
|
|
56
|
-
"
|
|
57
|
-
"
|
|
56
|
+
"ava": "^5.2.0",
|
|
57
|
+
"dotenv": "^16.0.3"
|
|
58
58
|
},
|
|
59
59
|
"publishConfig": {
|
|
60
60
|
"access": "private"
|
|
61
|
+
},
|
|
62
|
+
"ava": {
|
|
63
|
+
"files": [
|
|
64
|
+
"tests/**/*.test.js"
|
|
65
|
+
],
|
|
66
|
+
"require": [
|
|
67
|
+
"dotenv/config"
|
|
68
|
+
]
|
|
61
69
|
}
|
|
62
70
|
}
|
package/pathways/summary.js
CHANGED
|
@@ -8,7 +8,7 @@ const { PathwayResolver } = require('../graphql/pathwayResolver');
|
|
|
8
8
|
|
|
9
9
|
module.exports = {
|
|
10
10
|
// The main prompt function that takes the input text and asks to generate a summary.
|
|
11
|
-
prompt: `{{{text}}}\n\nWrite a summary of the above text:\n\n`,
|
|
11
|
+
prompt: `{{{text}}}\n\nWrite a summary of the above text. If the text is in a language other than english, make sure the summary is written in the same language:\n\n`,
|
|
12
12
|
|
|
13
13
|
// Define input parameters for the prompt, such as the target length of the summary.
|
|
14
14
|
inputParameters: {
|
|
@@ -26,7 +26,7 @@ module.exports = {
|
|
|
26
26
|
return await pathwayResolver.resolve(args);
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
-
const errorMargin = 0.
|
|
29
|
+
const errorMargin = 0.1;
|
|
30
30
|
const lowTargetLength = originalTargetLength * (1 - errorMargin);
|
|
31
31
|
const targetWords = Math.round(originalTargetLength / 6.6);
|
|
32
32
|
|
|
@@ -39,17 +39,32 @@ module.exports = {
|
|
|
39
39
|
let summary = '';
|
|
40
40
|
let pathwayResolver = new PathwayResolver({ config, pathway, args, requestState });
|
|
41
41
|
|
|
42
|
-
|
|
43
42
|
// Modify the prompt to be words-based instead of characters-based.
|
|
44
|
-
pathwayResolver.pathwayPrompt = `
|
|
43
|
+
pathwayResolver.pathwayPrompt = `Write a summary of all of the text below. If the text is in a language other than english, make sure the summary is written in the same language. Your summary should be ${targetWords} words in length.\n\nText:\n\n{{{text}}}\n\nSummary:\n\n`
|
|
45
44
|
|
|
46
45
|
let i = 0;
|
|
47
|
-
//
|
|
48
|
-
while ((
|
|
46
|
+
// Make sure it's long enough to start
|
|
47
|
+
while ((summary.length < lowTargetLength) && i < MAX_ITERATIONS) {
|
|
49
48
|
summary = await pathwayResolver.resolve(args);
|
|
50
49
|
i++;
|
|
51
50
|
}
|
|
52
51
|
|
|
52
|
+
// If it's too long, it could be because the input text was chunked
|
|
53
|
+
// and now we have all the chunks together. We can summarize that
|
|
54
|
+
// to get a comprehensive summary.
|
|
55
|
+
if (summary.length > originalTargetLength) {
|
|
56
|
+
pathwayResolver.pathwayPrompt = `Write a summary of all of the text below. If the text is in a language other than english, make sure the summary is written in the same language. Your summary should be ${targetWords} words in length.\n\nText:\n\n${summary}\n\nSummary:\n\n`
|
|
57
|
+
summary = await pathwayResolver.resolve(args);
|
|
58
|
+
i++;
|
|
59
|
+
|
|
60
|
+
// Now make sure it's not too long
|
|
61
|
+
while ((summary.length > originalTargetLength) && i < MAX_ITERATIONS) {
|
|
62
|
+
pathwayResolver.pathwayPrompt = `${summary}\n\nIs that less than ${targetWords} words long? If not, try again using a length of no more than ${targetWords} words.\n\n`;
|
|
63
|
+
summary = await pathwayResolver.resolve(args);
|
|
64
|
+
i++;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
53
68
|
// If the summary is still too long, truncate it.
|
|
54
69
|
if (summary.length > originalTargetLength) {
|
|
55
70
|
return semanticTruncate(summary, originalTargetLength);
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
const test = require('ava');
|
|
2
|
+
|
|
3
|
+
const { getSemanticChunks } = require('../graphql/chunker');
|
|
4
|
+
const { encode } = require('gpt-3-encoder')
|
|
5
|
+
|
|
6
|
+
const testText = `Lorem ipsum dolor sit amet, consectetur adipiscing elit. In id erat sem. Phasellus ac dapibus purus, in fermentum nunc. Mauris quis rutrum magna. Quisque rutrum, augue vel blandit posuere, augue magna convallis turpis, nec elementum augue mauris sit amet nunc. Aenean sit amet leo est. Nunc ante ex, blandit et felis ut, iaculis lacinia est. Phasellus dictum orci id libero ullamcorper tempor.
|
|
7
|
+
|
|
8
|
+
Vivamus id pharetra odio. Sed consectetur leo sed tortor dictum venenatis.Donec gravida libero non accumsan suscipit.Donec lectus turpis, ullamcorper eu pulvinar iaculis, ornare ut risus.Phasellus aliquam, turpis quis viverra condimentum, risus est pretium metus, in porta ipsum tortor vitae elit.Pellentesque id finibus erat. In suscipit, sapien non posuere dignissim, augue nisl ultrices tortor, sit amet eleifend nibh elit at risus.
|
|
9
|
+
|
|
10
|
+
Donec diam ligula, sagittis ut nisl tincidunt, porta sodales magna.Vestibulum ut dui arcu.Fusce at dolor ex.Aliquam eu justo non libero volutpat pulvinar at id urna.Donec nec purus sed elit bibendum faucibus.Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas.Vivamus iaculis mattis velit, ut lacinia massa lacinia quis.Phasellus porttitor gravida ex, id aliquet eros rhoncus quis. Ut fringilla, lectus a vehicula luctus, diam odio convallis dolor, sodales pharetra nulla ex dictum justo.Ut faucibus, augue quis dictum iaculis, diam leo maximus sapien, sit amet vulputate eros quam sed sem.Cras malesuada, sapien sit amet iaculis euismod, nunc odio lacinia est, dictum iaculis ante nisi in est.Fusce vehicula lorem tellus.Nullam a tempus nisi .
|
|
11
|
+
|
|
12
|
+
Sed ut lectus nec ligula blandit tempus.Donec faucibus turpis id urna vehicula imperdiet.Duis tempor vitae orci interdum dignissim.Phasellus sed efficitur sem.Nullam accumsan, turpis vitae consectetur ullamcorper, lectus purus tincidunt nisi, in pulvinar leo tortor at sem.Donec at feugiat dui, nec rhoncus nibh.Nam faucibus ultrices nisl at lobortis.Morbi congue, nisl vel fermentum tristique, dui ipsum rhoncus massa, non varius nibh massa in turpis.Vestibulum vulputate, felis quis lacinia porta, nulla ex volutpat lorem, non rhoncus neque erat quis arcu.Morbi massa nisl, hendrerit eget tortor condimentum, lobortis dapibus sem.Aliquam ut dapibus elit.Sed porta dignissim ante.Nullam interdum ligula et massa vehicula, vel gravida diam laoreet.Vivamus et enim eget turpis pellentesque laoreet.Vivamus pellentesque neque et mauris imperdiet pulvinar.
|
|
13
|
+
|
|
14
|
+
Aliquam eget ligula congue, tincidunt magna eu, rutrum urna.Sed consequat orci est, vel laoreet magna tincidunt sit amet.Curabitur eget condimentum odio, vitae condimentum elit.Duis viverra lobortis magna.Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas.Sed facilisis mi eu scelerisque pharetra.Cras et massa odio.Praesent quis nulla vitae mi blandit egestas ac vitae libero.Cras ultricies ex non consequat scelerisque.Nulla et est ac sem placerat convallis ac vitae massa.Phasellus lobortis mauris vel est vehicula lobortis.Curabitur ipsum ipsum, ullamcorper eget placerat sit amet, dapibus iaculis dui.Phasellus facilisis rutrum metus nec euismod.
|
|
15
|
+
|
|
16
|
+
Nam viverra est ac orci rhoncus, mollis mattis mi lobortis.Maecenas lectus ex, pulvinar vel mauris vel, egestas ornare massa.Nam placerat, tellus vel ullamcorper ullamcorper, enim felis egestas tellus, eu dictum augue tortor vel libero.Integer vel nunc felis.Nulla vehicula et enim non luctus.Vestibulum non odio magna.Donec vitae ipsum et nisl vestibulum maximus eu at augue.Morbi ac tristique quam.Suspendisse vestibulum nec dui et consectetur.Aliquam a dapibus dolor, sit amet fringilla eros.Nam id lorem nulla.
|
|
17
|
+
|
|
18
|
+
Proin vulputate risus purus, id tincidunt magna eleifend vel.Pellentesque et commodo leo, sit amet molestie nunc.Nunc purus lectus, interdum ut mauris ac, varius pretium magna.Etiam sollicitudin eros at pretium molestie.Cras fermentum sagittis elit at egestas.Fusce auctor lacinia nisl ac ullamcorper.Interdum et malesuada fames ac ante ipsum primis in faucibus.Fusce commodo pretium urna vel consequat.In finibus tellus vitae magna pharetra, porttitor egestas libero cursus.Donec eget tincidunt dolor, ac tristique diam.Etiam interdum dictum ex suscipit tempus.In hac habitasse platea dictumst.Nulla ornare libero a leo mollis, sed gravida leo finibus.Nunc ornare, dolor ac convallis varius, quam ipsum ultricies dui, non vehicula est eros eget ipsum.Mauris vel rhoncus ligula, non porta metus.
|
|
19
|
+
|
|
20
|
+
Ut non felis pretium leo viverra tincidunt.Vivamus et ligula commodo dolor faucibus gravida.Quisque eu dolor ac metus pretium pharetra.Integer mattis efficitur libero, sed condimentum nulla ultricies eu.Donec turpis orci, fermentum vitae imperdiet nec, luctus quis lectus.Nunc viverra ornare libero.Vestibulum elementum tempus tortor id semper.
|
|
21
|
+
|
|
22
|
+
Aliquam in dapibus risus.Praesent vitae condimentum elit, sodales pellentesque diam.Curabitur luctus pellentesque nunc, ut eleifend urna dictum ac.Aenean rhoncus lacinia quam a suscipit.Proin purus metus, egestas a pretium eu, tempus ut ante.Sed tellus turpis, hendrerit consequat porta id, porttitor non dolor.Proin volutpat massa a dui dictum facilisis a vel eros.Fusce eu efficitur odio.Aliquam interdum metus id ex dapibus dapibus.Nullam porttitor non sapien nec rhoncus.Cras iaculis fringilla cursus.Praesent at leo orci.Sed eget vulputate eros, eget auctor sapien.Nulla auctor, lectus ut tincidunt rhoncus, ante lorem volutpat eros, ac tincidunt enim ipsum at ex.Fusce dolor arcu, pretium eget elementum vel, semper at ipsum.
|
|
23
|
+
|
|
24
|
+
Integer rhoncus fringilla felis ac tincidunt.Phasellus eu ultricies tellus.Sed pharetra, eros sed dignissim mattis, mi lectus blandit elit, vitae euismod ipsum sapien a eros.Aliquam lobortis tellus venenatis, sagittis lorem non, eleifend odio.Duis ultrices urna vel commodo varius.Sed ultricies mauris ut velit dignissim, eu lobortis ex tempor.Praesent vitae iaculis nisl.Vestibulum id convallis tellus.Vivamus eu consectetur erat.Curabitur interdum est non nibh malesuada ullamcorper.Phasellus mollis arcu a pharetra lacinia.Praesent sit amet sem non dui iaculis tincidunt.Aliquam vitae libero viverra metus feugiat volutpat ut eget sem.Nam facilisis pulvinar urna, ut venenatis ligula accumsan finibus.Maecenas nec aliquam nulla.Maecenas mattis magna erat.
|
|
25
|
+
|
|
26
|
+
Nunc a nulla sed ante sollicitudin ultrices a a ante.Sed feugiat scelerisque leo, eget venenatis orci cursus eget.Ut pretium leo et nunc sodales, in luctus erat faucibus.Interdum et malesuada fames ac ante ipsum primis in faucibus.Mauris facilisis lorem quis turpis commodo, id vulputate metus molestie.Fusce id neque vestibulum, pretium elit in, ultrices justo.Praesent turpis dui, ullamcorper in vulputate ut, posuere at sapien.Etiam laoreet ultrices felis, id venenatis purus.Sed nec mauris pharetra, rhoncus sem non, interdum justo.Nulla sed tincidunt nisi.Suspendisse luctus viverra volutpat.Duis arcu nulla, euismod eu scelerisque in, vulputate eget quam.
|
|
27
|
+
|
|
28
|
+
Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Fusce at dignissim quam.Suspendisse eget metus nec sem accumsan sagittis.Suspendisse non mollis lacus.Donec ac consectetur ante.Nullam luctus, nibh ac imperdiet porta, sem felis bibendum nibh, ut sollicitudin libero nulla a sapien.Sed tristique odio enim, eget tempor enim cursus vel.Morbi tristique lobortis tortor, nec placerat lorem suscipit ac.Nullam sed sodales diam, sed tincidunt est.Quisque semper velit sed risus dictum pretium.Proin condimentum, nisi a vulputate tristique, tellus erat scelerisque nisi, tincidunt viverra est neque non magna.Quisque nibh augue, interdum non justo et, varius rutrum erat.Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas.
|
|
29
|
+
|
|
30
|
+
Vestibulum et lorem auctor, vestibulum nisl id, elementum metus.Pellentesque quis mi a augue consectetur cursus.Morbi sodales risus et faucibus dictum.Ut in lobortis nisl, et euismod nisl.Donec ornare tellus placerat, blandit justo quis, pharetra nisl.Nulla scelerisque magna at nisi suscipit commodo.Fusce pellentesque in elit et consequat.Phasellus vehicula accumsan enim, vitae pellentesque nulla.Nullam id arcu vitae nunc consectetur mattis.Fusce ac sapien vel mi congue fringilla.Nulla mattis consectetur fringilla.Morbi orci elit, tempor in rhoncus eget, fringilla eget erat.
|
|
31
|
+
|
|
32
|
+
Phasellus nec lorem lectus.Donec in cursus elit.In dictum elementum odio a scelerisque.Phasellus ac sapien eget velit accumsan elementum.Mauris odio eros, vulputate eu aliquet a, bibendum in quam.Integer euismod libero ac massa imperdiet, ullamcorper cursus risus auctor.Nam rutrum erat eget tortor suscipit semper sit amet nec mauris.Pellentesque nec semper neque.Nunc fringilla nisl erat, a sollicitudin tortor accumsan finibus.
|
|
33
|
+
|
|
34
|
+
Integer vulputate ex dui, vitae scelerisque purus viverra vel.Cras ultricies purus in nibh dapibus, non hendrerit nulla aliquam.Fusce vitae gravida urna.Mauris eleifend rutrum ex, at fermentum enim fringilla quis.Suspendisse dignissim est eget tempus condimentum.Fusce scelerisque, felis et malesuada dictum, mauris dolor cursus ex, eget pulvinar sem nulla id diam.Ut volutpat tincidunt efficitur.Nunc vel risus fringilla, lacinia urna vitae, aliquet nulla.Nunc sed pulvinar dolor, eu fermentum velit.Curabitur a pretium quam, ut consectetur neque.Nunc ultricies, ex sed mattis efficitur, nulla nunc convallis odio, sit amet pellentesque orci tortor ut sapien.Vivamus felis orci, ultricies eget lacinia at, blandit vitae quam.In lacinia dui nec tincidunt maximus.Donec feugiat consectetur bibendum.Aenean eget vestibulum lacus.
|
|
35
|
+
|
|
36
|
+
Suspendisse vel molestie magna, et viverra justo.Aenean nec mi felis.Nam lacinia purus et congue facilisis.Pellentesque eget odio sed sem tincidunt imperdiet.Proin finibus ex nec placerat aliquet.Phasellus quis sapien nunc.Mauris eu augue aliquam sem suscipit vehicula a luctus augue.Phasellus ac scelerisque nibh.Nullam eleifend eleifend sapien eget convallis.
|
|
37
|
+
|
|
38
|
+
Nunc vitae metus risus.Ut iaculis dolor accumsan bibendum posuere.Morbi vitae odio sed velit dictum consequat.Aliquam vel erat vitae lacus luctus cursus vel ut risus.Aliquam a nunc eu lorem consequat finibus.Sed non enim vestibulum, ornare dui id, dignissim turpis.Etiam fermentum rutrum porttitor.Maecenas id nisl sodales, ornare turpis placerat, tincidunt dui.
|
|
39
|
+
|
|
40
|
+
Nulla aliquam purus at leo fringilla euismod.Praesent condimentum augue nibh, sed scelerisque mauris bibendum vitae.Vivamus maximus enim non massa commodo gravida.Cras iaculis elit ac est dapibus convallis.Quisque in tortor tincidunt, placerat turpis pulvinar, rhoncus orci.In vel risus et lacus lacinia volutpat.Maecenas facilisis fermentum dictum.Lorem ipsum dolor sit amet, consectetur adipiscing elit.Praesent aliquam pretium pellentesque.In eleifend leo eros, in lobortis eros elementum maximus.Fusce in orci ut massa vehicula mollis vitae non nibh.Sed ac porttitor urna.Nulla ac venenatis sapien, eget vulputate metus.
|
|
41
|
+
|
|
42
|
+
Mauris hendrerit lacus quam, vel mollis ligula porttitor ac.Nulla ornare libero at faucibus dictum.Donec tincidunt viverra sapien nec tincidunt.Donec leo sapien, rutrum quis dui a, auctor sodales nisi.Fusce condimentum eros sit amet ligula viverra, eget ullamcorper erat dapibus.Suspendisse dignissim ligula sed luctus aliquet.Aenean consectetur enim non nibh semper volutpat.
|
|
43
|
+
|
|
44
|
+
Mauris diam dolor, maximus et ultrices sed, semper sed felis.Morbi ac eros tellus.Maecenas eget ex vitae quam lacinia eleifend non nec leo.Donec condimentum consectetur nunc, quis luctus elit commodo eu.Nunc tincidunt condimentum neque, sed porta ligula porttitor et.Suspendisse scelerisque id massa sit amet placerat.Sed eleifend aliquet facilisis.Donec ac purus nec metus vestibulum euismod.Maecenas sollicitudin consequat ornare.Suspendisse pharetra vehicula eros nec malesuada.`;
|
|
45
|
+
|
|
46
|
+
test('should return identical text that chunker was passed, given large chunk size (1812)', async t => {
|
|
47
|
+
const maxChunkToken = 1812;
|
|
48
|
+
const chunks = getSemanticChunks(testText, maxChunkToken);
|
|
49
|
+
t.true(chunks.length > 0); //check chunking
|
|
50
|
+
t.true(chunks.every(chunk => encode(chunk).length <= maxChunkToken)); //check chunk size
|
|
51
|
+
const recomposedText = chunks.reduce((acc, chunk) => acc + chunk, '');
|
|
52
|
+
t.is(recomposedText, testText); //check recomposition
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
test('should return identical text that chunker was passed, given medium chunk size (500)', async t => {
|
|
56
|
+
const maxChunkToken = 500;
|
|
57
|
+
const chunks = getSemanticChunks(testText, maxChunkToken);
|
|
58
|
+
t.true(chunks.length > 1); //check chunking
|
|
59
|
+
t.true(chunks.every(chunk => encode(chunk).length <= maxChunkToken)); //check chunk size
|
|
60
|
+
const recomposedText = chunks.reduce((acc, chunk) => acc + chunk, '');
|
|
61
|
+
t.is(recomposedText, testText); //check recomposition
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
test('should return identical text that chunker was passed, given tiny chunk size (1)', async t => {
|
|
65
|
+
const maxChunkToken = 1;
|
|
66
|
+
const chunks = getSemanticChunks(testText, maxChunkToken);
|
|
67
|
+
t.true(chunks.length > 1); //check chunking
|
|
68
|
+
t.true(chunks.every(chunk => encode(chunk).length <= maxChunkToken)); //check chunk size
|
|
69
|
+
const recomposedText = chunks.reduce((acc, chunk) => acc + chunk, '');
|
|
70
|
+
t.is(recomposedText, testText); //check recomposition
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
/*
|
|
74
|
+
it('should return identical text that chunker was passed, given tiny chunk size (1)', () => {
|
|
75
|
+
const maxChunkToken = 1;
|
|
76
|
+
const chunks = getSemanticChunks(testText, maxChunkToken);
|
|
77
|
+
expect(chunks.length).toBeGreaterThan(1); //check chunking
|
|
78
|
+
expect(chunks.every(chunk => encode(chunk).length <= maxChunkToken)).toBe(true); //check chunk size
|
|
79
|
+
const recomposedText = chunks.reduce((acc, chunk) => acc + chunk, '');
|
|
80
|
+
expect(recomposedText).toBe(testText); //check recomposition
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it('should return identical text that chunker was passed, given huge chunk size (32000)', () => {
|
|
84
|
+
const maxChunkToken = 32000;
|
|
85
|
+
const chunks = getSemanticChunks(testText, maxChunkToken);
|
|
86
|
+
expect(chunks.length).toBe(1); //check chunking
|
|
87
|
+
expect(chunks.every(chunk => encode(chunk).length <= maxChunkToken)).toBe(true); //check chunk size
|
|
88
|
+
const recomposedText = chunks.reduce((acc, chunk) => acc + chunk, '');
|
|
89
|
+
expect(recomposedText).toBe(testText); //check recomposition
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
const testTextNoSpaces = `Loremipsumdolorsitamet,consecteturadipiscingelit.Inideratsem.Phasellusacdapibuspurus,infermentumnunc.Maurisquisrutrummagna.Quisquerutrum,auguevelblanditposuere,auguemagnacon vallisturpis,necelementumauguemaurissitametnunc.Aeneansitametleoest.Nuncanteex,blanditetfelisut,iaculislaciniaest.Phasellusdictumorciidliberoullamcorpertempor.Vivamusidpharetraodioq.Sedconsecteturleosedtortordictumvenenatis.Donecgravidaliberononaccumsansuscipit.Doneclectusturpis,ullamcorpereupulvinariaculis,ornareutrisus.Phasellusaliquam,turpisquisviverracondimentum,risusestpretiummetus,inportaips umtortorvita elit.Pellentesqueidfinibuserat.Insuscipit,sapiennonposueredignissim,auguenisl ultricestortor,sitameteleifendnibhelitatrisus.`;
|
|
93
|
+
|
|
94
|
+
it('should return identical text that chunker was passed, given no spaces and small chunks(5)', () => {
|
|
95
|
+
const maxChunkToken = 5;
|
|
96
|
+
const chunks = getSemanticChunks(testTextNoSpaces, maxChunkToken);
|
|
97
|
+
expect(chunks.length).toBeGreaterThan(0); //check chunking
|
|
98
|
+
expect(chunks.every(chunk => encode(chunk).length <= maxChunkToken)).toBe(true); //check chunk size
|
|
99
|
+
const recomposedText = chunks.reduce((acc, chunk) => acc + chunk, '');
|
|
100
|
+
expect(recomposedText).toBe(testTextNoSpaces); //check recomposition
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
const testTextShortWeirdSpaces=`Lorem ipsum dolor sit amet, consectetur adipiscing elit. In id erat sem. Phasellus ac dapibus purus, in fermentum nunc.............................. Mauris quis rutrum magna. Quisque rutrum, augue vel blandit posuere, augue magna convallis turpis, nec elementum augue mauris sit amet nunc. Aenean sit a;lksjdf 098098- -23 eln ;lkn l;kn09 oij[0u ,,,,,,,,,,,,,,,,,,,,, amet leo est. Nunc ante ex, blandit et felis ut, iaculis lacinia est. Phasellus dictum orci id libero ullamcorper tempor.
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
Vivamus id pharetra odio. Sed consectetur leo sed tortor dictum venenatis.Donec gravida libero non accumsan suscipit.Donec lectus turpis, ullamcorper eu pulvinar iaculis, ornare ut risus.Phasellus aliquam, turpis quis viverra condimentum, risus est pretium metus, in porta ipsum tortor vitae elit.Pellentesque id finibus erat. In suscipit, sapien non posuere dignissim, augue nisl ultrices tortor, sit amet eleifend nibh elit at risus.`;
|
|
109
|
+
|
|
110
|
+
it('should return identical text that chunker was passed, given weird spaces and tiny chunks(1)', () => {
|
|
111
|
+
const maxChunkToken = 1;
|
|
112
|
+
const chunks = getSemanticChunks(testTextShortWeirdSpaces, maxChunkToken);
|
|
113
|
+
expect(chunks.length).toBeGreaterThan(0); //check chunking
|
|
114
|
+
expect(chunks.every(chunk => encode(chunk).length <= maxChunkToken)).toBe(true); //check chunk size
|
|
115
|
+
const recomposedText = chunks.reduce((acc, chunk) => acc + chunk, '');
|
|
116
|
+
expect(recomposedText).toBe(testTextShortWeirdSpaces); //check recomposition
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
it('should return identical text that chunker was passed, given weird spaces and small chunks(10)', () => {
|
|
120
|
+
const maxChunkToken = 1;
|
|
121
|
+
const chunks = getSemanticChunks(testTextShortWeirdSpaces, maxChunkToken);
|
|
122
|
+
expect(chunks.length).toBeGreaterThan(0); //check chunking
|
|
123
|
+
expect(chunks.every(chunk => encode(chunk).length <= maxChunkToken)).toBe(true); //check chunk size
|
|
124
|
+
const recomposedText = chunks.reduce((acc, chunk) => acc + chunk, '');
|
|
125
|
+
expect(recomposedText).toBe(testTextShortWeirdSpaces); //check recomposition
|
|
126
|
+
});*/
|
package/tests/chunking.test.js
CHANGED
|
@@ -1,15 +1,14 @@
|
|
|
1
|
+
const test = require('ava');
|
|
1
2
|
const { getTestServer } = require('./main.test');
|
|
2
3
|
|
|
3
|
-
jest.setTimeout(1800000);
|
|
4
|
-
|
|
5
4
|
const testServer = getTestServer();
|
|
6
5
|
|
|
7
|
-
|
|
8
|
-
afterAll(async () => {
|
|
6
|
+
test.after.always(async () => {
|
|
9
7
|
await testServer.stop();
|
|
10
8
|
});
|
|
11
9
|
|
|
12
|
-
|
|
10
|
+
test('chunking test of translate endpoint with huge text', async t => {
|
|
11
|
+
t.timeout(180000);
|
|
13
12
|
const response = await testServer.executeOperation({
|
|
14
13
|
query: 'query translate($text: String!) { translate(text: $text) { result } }',
|
|
15
14
|
variables: {
|
|
@@ -54,12 +53,12 @@ Mauris hendrerit lacus quam, vel mollis ligula porttitor ac.Nulla ornare libero
|
|
|
54
53
|
Mauris diam dolor, maximus et ultrices sed, semper sed felis.Morbi ac eros tellus.Maecenas eget ex vitae quam lacinia eleifend non nec leo.Donec condimentum consectetur nunc, quis luctus elit commodo eu.Nunc tincidunt condimentum neque, sed porta ligula porttitor et.Suspendisse scelerisque id massa sit amet placerat.Sed eleifend aliquet facilisis.Donec ac purus nec metus vestibulum euismod.Maecenas sollicitudin consequat ornare.Suspendisse pharetra vehicula eros nec malesuada.` },
|
|
55
54
|
});
|
|
56
55
|
|
|
57
|
-
|
|
58
|
-
|
|
56
|
+
t.is(response.errors, undefined);
|
|
57
|
+
t.true(response.data?.translate.result.length > 1000);
|
|
59
58
|
});
|
|
60
59
|
|
|
61
|
-
|
|
62
|
-
|
|
60
|
+
test('chunking test of translate endpoint with single long text sentence', async t => {
|
|
61
|
+
t.timeout(180000);
|
|
63
62
|
const response = await testServer.executeOperation({
|
|
64
63
|
query: 'query translate($text: String!) { translate(text: $text) { result } }',
|
|
65
64
|
variables: {
|
|
@@ -67,11 +66,12 @@ it('chunking test of translate endpoint with single long text sentence', async (
|
|
|
67
66
|
}
|
|
68
67
|
});
|
|
69
68
|
|
|
70
|
-
|
|
71
|
-
|
|
69
|
+
t.is(response.errors, undefined);
|
|
70
|
+
t.true(response.data?.translate.result.length > 200);
|
|
72
71
|
});
|
|
73
72
|
|
|
74
|
-
|
|
73
|
+
test('chunking test of translate endpoint with two long text sentence', async t => {
|
|
74
|
+
t.timeout(180000);
|
|
75
75
|
const response = await testServer.executeOperation({
|
|
76
76
|
query: 'query translate($text: String!) { translate(text: $text) { result } }',
|
|
77
77
|
variables: {
|
|
@@ -79,11 +79,12 @@ it('chunking test of translate endpoint with two long text sentence', async () =
|
|
|
79
79
|
}
|
|
80
80
|
});
|
|
81
81
|
|
|
82
|
-
|
|
83
|
-
|
|
82
|
+
t.is(response.errors, undefined);
|
|
83
|
+
t.true(response.data?.translate.result.length > 500);
|
|
84
84
|
});
|
|
85
85
|
|
|
86
|
-
|
|
86
|
+
test('chunking test...', async t => {
|
|
87
|
+
t.timeout(180000);
|
|
87
88
|
const response = await testServer.executeOperation({
|
|
88
89
|
query: 'query translate($text: String!) { translate(text: $text) { result } }',
|
|
89
90
|
variables: {
|
|
@@ -144,6 +145,6 @@ it('chunking test...', async () => {
|
|
|
144
145
|
}
|
|
145
146
|
});
|
|
146
147
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
});
|
|
148
|
+
t.is(response.errors, undefined);
|
|
149
|
+
t.true(response.data?.translate.result.length > 500);
|
|
150
|
+
});
|
package/tests/main.test.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
+
const test = require('ava');
|
|
2
|
+
|
|
1
3
|
const { ApolloServer } = require('apollo-server');
|
|
2
4
|
const { config } = require('../config');
|
|
3
5
|
const { typeDefs, resolvers } = require('../index')();
|
|
4
6
|
|
|
5
|
-
jest.setTimeout(60000);
|
|
6
|
-
|
|
7
7
|
const getTestServer = () => {
|
|
8
8
|
return new ApolloServer({
|
|
9
9
|
typeDefs,
|
|
@@ -15,82 +15,82 @@ const getTestServer = () => {
|
|
|
15
15
|
const testServer = getTestServer();
|
|
16
16
|
|
|
17
17
|
//stop server after all tests
|
|
18
|
-
|
|
18
|
+
test.after.always('cleanup', async () => {
|
|
19
19
|
await testServer.stop();
|
|
20
20
|
});
|
|
21
21
|
|
|
22
|
-
|
|
22
|
+
test('validates bias endpoint', async (t) => {
|
|
23
23
|
const response = await testServer.executeOperation({
|
|
24
24
|
query: 'query bias($text: String!) { bias(text: $text) { result } }',
|
|
25
25
|
variables: { text: 'hello there my dear world!' },
|
|
26
26
|
});
|
|
27
27
|
|
|
28
|
-
|
|
29
|
-
|
|
28
|
+
t.is(response.errors, undefined);
|
|
29
|
+
t.regex(response.data?.bias?.result, /(yes|no|bias)/i);
|
|
30
30
|
});
|
|
31
31
|
|
|
32
|
-
|
|
32
|
+
test('validates completion endpoint', async (t) => {
|
|
33
33
|
const response = await testServer.executeOperation({
|
|
34
34
|
query: 'query complete($text: String!) { complete(text: $text) { result } }',
|
|
35
35
|
variables: { text: 'hello there my dear world!' },
|
|
36
36
|
});
|
|
37
37
|
|
|
38
|
-
|
|
39
|
-
|
|
38
|
+
t.is(response.errors, undefined);
|
|
39
|
+
t.true(response.data?.complete?.result.length > 0);
|
|
40
40
|
});
|
|
41
41
|
|
|
42
|
-
|
|
42
|
+
test('validates entities endpoint with given num of count return', async (t) => {
|
|
43
43
|
const response = await testServer.executeOperation({
|
|
44
44
|
query: 'query entities($text: String!, $count: Int) { entities(text: $text, count: $count){ result { name, definition } } }',
|
|
45
45
|
variables: { text: 'hello there my dear world!', count: 3 },
|
|
46
46
|
});
|
|
47
47
|
|
|
48
|
-
|
|
49
|
-
|
|
48
|
+
t.is(response.errors, undefined);
|
|
49
|
+
t.is(response.data?.entities.result.length, 3);
|
|
50
50
|
response.data?.result?.entities.forEach((entity) => {
|
|
51
|
-
|
|
52
|
-
|
|
51
|
+
t.truthy(entity.name);
|
|
52
|
+
t.truthy(entity.definition);
|
|
53
53
|
});
|
|
54
54
|
});
|
|
55
55
|
|
|
56
|
-
|
|
56
|
+
test('validates paraphrase endpoint', async (t) => {
|
|
57
57
|
const response = await testServer.executeOperation({
|
|
58
58
|
query: 'query paraphrase($text: String!) { paraphrase(text: $text) { result } }',
|
|
59
59
|
variables: { text: 'hello there my dear world!' },
|
|
60
60
|
});
|
|
61
61
|
|
|
62
|
-
|
|
63
|
-
|
|
62
|
+
t.is(response.errors, undefined);
|
|
63
|
+
t.truthy(response.data?.paraphrase?.result);
|
|
64
64
|
});
|
|
65
65
|
|
|
66
|
-
|
|
66
|
+
test('validates sentiment endpoint', async (t) => {
|
|
67
67
|
const response = await testServer.executeOperation({
|
|
68
68
|
query: 'query sentiment($text: String!) { sentiment(text: $text) { result } }',
|
|
69
69
|
variables: { text: 'hello there my dear world!' },
|
|
70
70
|
});
|
|
71
71
|
|
|
72
|
-
|
|
73
|
-
|
|
72
|
+
t.is(response.errors, undefined);
|
|
73
|
+
t.truthy(response.data?.sentiment.result);
|
|
74
74
|
});
|
|
75
75
|
|
|
76
|
-
|
|
76
|
+
test('validates edit endpoint', async (t) => {
|
|
77
77
|
const response = await testServer.executeOperation({
|
|
78
78
|
query: 'query edit($text: String!) { edit(text: $text) { result } }',
|
|
79
79
|
variables: { text: 'helo there my dear worldd!' },
|
|
80
80
|
});
|
|
81
81
|
|
|
82
|
-
|
|
83
|
-
|
|
82
|
+
t.is(response.errors, undefined);
|
|
83
|
+
t.regex(response.data?.edit.result, /hello.*world/i);
|
|
84
84
|
});
|
|
85
85
|
|
|
86
|
-
|
|
86
|
+
test('validates summary endpoint', async (t) => {
|
|
87
87
|
const response = await testServer.executeOperation({
|
|
88
88
|
query: 'query summary($text: String!) { summary(text: $text) { result } }',
|
|
89
89
|
variables: { text: 'hello there my dear world!' },
|
|
90
90
|
});
|
|
91
91
|
|
|
92
|
-
|
|
93
|
-
|
|
92
|
+
t.is(response.errors, undefined);
|
|
93
|
+
t.truthy(response.data?.summary.result);
|
|
94
94
|
});
|
|
95
95
|
|
|
96
96
|
module.exports = {
|
package/tests/translate.test.js
CHANGED
|
@@ -1,15 +1,14 @@
|
|
|
1
|
+
const test = require('ava');
|
|
1
2
|
const { getTestServer } = require('./main.test');
|
|
2
3
|
|
|
3
|
-
jest.setTimeout(1800000);
|
|
4
|
-
|
|
5
4
|
const testServer = getTestServer();
|
|
6
5
|
|
|
7
|
-
//stop server after all tests
|
|
8
|
-
|
|
6
|
+
// stop server after all tests
|
|
7
|
+
test.after.always(async () => {
|
|
9
8
|
await testServer.stop();
|
|
10
9
|
});
|
|
11
10
|
|
|
12
|
-
|
|
11
|
+
test('test translate endpoint with huge arabic text english translation and check return non-arabic/english', async t => {
|
|
13
12
|
const response = await testServer.executeOperation({
|
|
14
13
|
query: 'query translate($text: String!, $to:String) { translate(text: $text, to:$to) { result } }',
|
|
15
14
|
variables: {
|
|
@@ -115,9 +114,9 @@ it('test translate endpoint with huge arabic text english translation and check
|
|
|
115
114
|
` },
|
|
116
115
|
});
|
|
117
116
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
//check return only contains non-Arabic characters
|
|
121
|
-
|
|
117
|
+
t.falsy(response.errors);
|
|
118
|
+
t.true(response.data?.translate.result.length > 1000); // check return length huge
|
|
119
|
+
// check return only contains non-Arabic characters
|
|
120
|
+
t.notRegex(response.data?.translate.result, /[ء-ي]/);
|
|
122
121
|
});
|
|
123
122
|
|