@mikugg/guidance 0.17.0 → 0.17.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/lib/template.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { AbstractTokenizer } from
|
|
2
|
-
import { AbstractTokenGenerator } from
|
|
1
|
+
import { AbstractTokenizer } from './tokenizer';
|
|
2
|
+
import { AbstractTokenGenerator } from './token-generator';
|
|
3
3
|
export declare enum TEMPLATE_METHODS {
|
|
4
4
|
SEL = "SEL",
|
|
5
5
|
GEN = "GEN"
|
|
@@ -7,6 +7,7 @@ export declare enum TEMPLATE_METHODS {
|
|
|
7
7
|
export declare class TemplateProcessor<TRequestOptions = undefined> {
|
|
8
8
|
private tokenizer;
|
|
9
9
|
private generator;
|
|
10
|
+
private isnemo;
|
|
10
11
|
constructor(tokenizer: AbstractTokenizer, generator: AbstractTokenGenerator<TRequestOptions>);
|
|
11
12
|
setTokenizer(tokenizer: AbstractTokenizer): void;
|
|
12
13
|
setGenerator(generator: AbstractTokenGenerator<TRequestOptions>): void;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"template.d.ts","sourceRoot":"","sources":["../../src/lib/template.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAChD,OAAO,EAAE,sBAAsB,EAAE,MAAM,mBAAmB,CAAC;AAG3D,oBAAY,gBAAgB;IAC1B,GAAG,QAAQ;IACX,GAAG,QAAQ;CACZ;AAED,qBAAa,iBAAiB,CAAC,eAAe,GAAG,SAAS;IACxD,OAAO,CAAC,SAAS,CAAoB;IACrC,OAAO,CAAC,SAAS,CAA0C;
|
|
1
|
+
{"version":3,"file":"template.d.ts","sourceRoot":"","sources":["../../src/lib/template.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAChD,OAAO,EAAE,sBAAsB,EAAE,MAAM,mBAAmB,CAAC;AAG3D,oBAAY,gBAAgB;IAC1B,GAAG,QAAQ;IACX,GAAG,QAAQ;CACZ;AAED,qBAAa,iBAAiB,CAAC,eAAe,GAAG,SAAS;IACxD,OAAO,CAAC,SAAS,CAAoB;IACrC,OAAO,CAAC,SAAS,CAA0C;IAC3D,OAAO,CAAC,MAAM,CAAU;gBAEZ,SAAS,EAAE,iBAAiB,EAAE,SAAS,EAAE,sBAAsB,CAAC,eAAe,CAAC;IAMrF,YAAY,CAAC,SAAS,EAAE,iBAAiB;IAIzC,YAAY,CAAC,SAAS,EAAE,sBAAsB,CAAC,eAAe,CAAC;IAIzD,eAAe,CAC1B,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC,EACzC,UAAU,CAAC,EAAE,eAAe,GAC3B,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IASjB,qBAAqB,CACjC,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC,EACzC,UAAU,CAAC,EAAE,eAAe,GAC3B,cAAc,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,IAAI,CAAC;IAuI5C,OAAO,CAAC,cAAc;CAQvB"}
|
package/dist/lib/template.js
CHANGED
|
@@ -43,6 +43,7 @@ class TemplateProcessor {
|
|
|
43
43
|
constructor(tokenizer, generator) {
|
|
44
44
|
this.tokenizer = tokenizer;
|
|
45
45
|
this.generator = generator;
|
|
46
|
+
this.isnemo = tokenizer.name === 'nemo';
|
|
46
47
|
}
|
|
47
48
|
setTokenizer(tokenizer) {
|
|
48
49
|
this.tokenizer = tokenizer;
|
|
@@ -78,22 +79,19 @@ class TemplateProcessor {
|
|
|
78
79
|
const result = new Map();
|
|
79
80
|
// Replace {{val}} in template with variables[val]
|
|
80
81
|
variables.forEach((value, key) => {
|
|
81
|
-
template = template.replace(new RegExp(`{{${key}}}`,
|
|
82
|
+
template = template.replace(new RegExp(`{{${key}}}`, 'g'), value.toString());
|
|
82
83
|
});
|
|
83
84
|
// Replace {{method variableName methodArg1=methodArg1Value methodArg2=methodArg2Value}} in template
|
|
84
|
-
const indexes = [
|
|
85
|
-
...this.findAllIndexes(template, "{{GEN"),
|
|
86
|
-
...this.findAllIndexes(template, "{{SEL"),
|
|
87
|
-
].sort((a, b) => a - b);
|
|
85
|
+
const indexes = [...this.findAllIndexes(template, '{{GEN'), ...this.findAllIndexes(template, '{{SEL')].sort((a, b) => a - b);
|
|
88
86
|
let nextTemplateIndexForPrompt = 0;
|
|
89
|
-
let prompt =
|
|
87
|
+
let prompt = '';
|
|
90
88
|
for (let i = 0; i < indexes.length; i++) {
|
|
91
89
|
prompt += template.substring(nextTemplateIndexForPrompt, indexes[i]);
|
|
92
90
|
const start = indexes[i];
|
|
93
|
-
const end = template.substring(start).indexOf(
|
|
91
|
+
const end = template.substring(start).indexOf('}}') + 2 + start;
|
|
94
92
|
const content = template.substring(start, end);
|
|
95
|
-
const { type: method, name: variableName, params: methodArgs
|
|
96
|
-
let completion =
|
|
93
|
+
const { type: method, name: variableName, params: methodArgs } = (0, templateParser_1.default)(content);
|
|
94
|
+
let completion = '';
|
|
97
95
|
switch (method) {
|
|
98
96
|
case TEMPLATE_METHODS.GEN:
|
|
99
97
|
const stream = this.generator.generateString(prompt, methodArgs, reqOptions);
|
|
@@ -118,26 +116,35 @@ class TemplateProcessor {
|
|
|
118
116
|
case TEMPLATE_METHODS.SEL:
|
|
119
117
|
const trie = new _trie_1.default();
|
|
120
118
|
// Get options from variables
|
|
121
|
-
const options = variables.get(String(methodArgs[
|
|
119
|
+
const options = variables.get(String(methodArgs['options']));
|
|
122
120
|
if (!options) {
|
|
123
|
-
throw new Error(`${methodArgs[
|
|
121
|
+
throw new Error(`${methodArgs['options']} variable not found`);
|
|
122
|
+
}
|
|
123
|
+
if (this.isnemo) {
|
|
124
|
+
options.forEach((option) => {
|
|
125
|
+
const prefix = this.tokenizer.encodeString(option);
|
|
126
|
+
trie.addPrefix(prefix);
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
else {
|
|
130
|
+
prompt = this.tokenizer.decodeString(this.tokenizer.encodeString(prompt));
|
|
131
|
+
// Add all options to trie
|
|
132
|
+
options.forEach((option) => {
|
|
133
|
+
const prefix = this.tokenizer.encodeString(prompt + option);
|
|
134
|
+
trie.addPrefix(prefix);
|
|
135
|
+
});
|
|
124
136
|
}
|
|
125
|
-
prompt = this.tokenizer.decodeString(this.tokenizer.encodeString(prompt));
|
|
126
|
-
// Add all options to trie
|
|
127
|
-
options.forEach((option) => {
|
|
128
|
-
const prefix = this.tokenizer.encodeString(prompt + option);
|
|
129
|
-
trie.addPrefix(prefix);
|
|
130
|
-
});
|
|
131
137
|
let currentPrefixPrompt = prompt;
|
|
132
138
|
do {
|
|
133
|
-
const currentPrefix = trie.getNextPrefix(this.tokenizer.encodeString(currentPrefixPrompt));
|
|
134
|
-
currentPrefixPrompt = this.tokenizer.decodeString(currentPrefix);
|
|
139
|
+
const currentPrefix = trie.getNextPrefix(this.isnemo ? [1] : this.tokenizer.encodeString(currentPrefixPrompt));
|
|
140
|
+
currentPrefixPrompt = this.isnemo ? currentPrefixPrompt : this.tokenizer.decodeString(currentPrefix);
|
|
135
141
|
const nextChildren = trie.getNextChildren(currentPrefix);
|
|
136
142
|
if (nextChildren.length < 2) {
|
|
137
143
|
// If there is only one child, we complete
|
|
138
|
-
completion = this.tokenizer
|
|
139
|
-
|
|
140
|
-
.substring(prompt.length);
|
|
144
|
+
completion = this.tokenizer.decodeString(trie.getWord(currentPrefix));
|
|
145
|
+
if (!this.isnemo) {
|
|
146
|
+
completion = completion.substring(prompt.length);
|
|
147
|
+
}
|
|
141
148
|
break;
|
|
142
149
|
}
|
|
143
150
|
else {
|
|
@@ -149,19 +156,27 @@ class TemplateProcessor {
|
|
|
149
156
|
const top_logprobs = yield __await(this.generator.generateTokenLogProgs(currentPrefixPrompt, logit_bias, reqOptions));
|
|
150
157
|
// get max top_logpobs that is in logit_bias
|
|
151
158
|
let max = -Infinity;
|
|
152
|
-
let max_key =
|
|
159
|
+
let max_key = '';
|
|
153
160
|
for (const key in top_logprobs) {
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
+
if (this.isnemo) {
|
|
162
|
+
const keyTokens = this.tokenizer.encodeString(key);
|
|
163
|
+
keyTokens.shift();
|
|
164
|
+
if (top_logprobs[key] > max && keyTokens[0] && keyTokens[0] in logit_bias) {
|
|
165
|
+
max = top_logprobs[key];
|
|
166
|
+
max_key = key;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
else {
|
|
170
|
+
const completedPrefix = this.tokenizer.encodeString(currentPrefixPrompt + key);
|
|
171
|
+
const completionTokens = completedPrefix.slice(currentPrefix.length);
|
|
172
|
+
if (top_logprobs[key] > max && completionTokens[0] && completionTokens[0] in logit_bias) {
|
|
173
|
+
max = top_logprobs[key];
|
|
174
|
+
max_key = key;
|
|
175
|
+
}
|
|
161
176
|
}
|
|
162
177
|
}
|
|
163
178
|
// if no key in logit_bias, get max top_logprobs
|
|
164
|
-
if (max_key ===
|
|
179
|
+
if (max_key === '') {
|
|
165
180
|
// no key in logit_bias
|
|
166
181
|
max = -Infinity;
|
|
167
182
|
for (const key in top_logprobs) {
|
|
@@ -171,7 +186,13 @@ class TemplateProcessor {
|
|
|
171
186
|
}
|
|
172
187
|
}
|
|
173
188
|
}
|
|
174
|
-
|
|
189
|
+
if (this.isnemo) {
|
|
190
|
+
completion = this.tokenizer.decodeString(trie.getWord(this.tokenizer.encodeString(max_key)));
|
|
191
|
+
break;
|
|
192
|
+
}
|
|
193
|
+
else {
|
|
194
|
+
currentPrefixPrompt = currentPrefixPrompt + max_key;
|
|
195
|
+
}
|
|
175
196
|
}
|
|
176
197
|
} while (!completion);
|
|
177
198
|
result.set(variableName, completion);
|
|
@@ -4,7 +4,7 @@ function templateParser(template) {
|
|
|
4
4
|
const patternRegex = /{{(GEN|SEL)\s+(\w+)\s+([^}]+)}}/;
|
|
5
5
|
const match = template.match(patternRegex);
|
|
6
6
|
if (!match) {
|
|
7
|
-
return { type:
|
|
7
|
+
return { type: '', name: '', params: {} };
|
|
8
8
|
}
|
|
9
9
|
const type = match[1];
|
|
10
10
|
const name = match[2];
|
|
@@ -16,7 +16,7 @@ function templateParser(template) {
|
|
|
16
16
|
while ((paramMatch = paramsRegex.exec(paramsString)) !== null) {
|
|
17
17
|
const key = paramMatch[1];
|
|
18
18
|
const value = paramMatch[2].trim();
|
|
19
|
-
if (type ===
|
|
19
|
+
if (type === 'SEL' && key === 'options') {
|
|
20
20
|
params[key] = value;
|
|
21
21
|
}
|
|
22
22
|
else if (!isNaN(Number(value))) {
|
|
@@ -27,11 +27,12 @@ function templateParser(template) {
|
|
|
27
27
|
params[key] = JSON.parse(value);
|
|
28
28
|
}
|
|
29
29
|
catch (e) {
|
|
30
|
-
console.error(
|
|
30
|
+
console.error('Error parsing JSON:', e);
|
|
31
|
+
console.error(value);
|
|
31
32
|
}
|
|
32
33
|
}
|
|
33
34
|
else {
|
|
34
|
-
params[key] = value.replace(/^"|"$/g,
|
|
35
|
+
params[key] = value.replace(/^"|"$/g, '');
|
|
35
36
|
}
|
|
36
37
|
}
|
|
37
38
|
return {
|
package/dist/lib/tokenizer.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../../src/lib/tokenizer.ts"],"names":[],"mappings":"AAIA,8BAAsB,iBAAiB;
|
|
1
|
+
{"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../../src/lib/tokenizer.ts"],"names":[],"mappings":"AAIA,8BAAsB,iBAAiB;IAC9B,IAAI,EAAE,MAAM,CAAc;IACjC,QAAQ,CAAC,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,EAAE;IAC5C,QAAQ,CAAC,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,MAAM;IAC5C,QAAQ,CAAC,MAAM,IAAI,MAAM;CAC1B;AAED,qBAAa,cAAe,SAAQ,iBAAiB;IAC1C,YAAY,CACnB,GAAG,EAAE,MAAM,EACX,aAAa,CAAC,EAAE,OAAO,EACvB,mBAAmB,CAAC,EAAE,OAAO,EAC7B,eAAe,CAAC,EAAE,OAAO,GACxB,MAAM,EAAE;IAWF,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,EAAE,aAAa,CAAC,EAAE,OAAO,EAAE,mBAAmB,CAAC,EAAE,OAAO,GAAG,MAAM;IAQ3F,MAAM,IAAI,MAAM;CAG1B;AAED,qBAAa,YAAa,SAAQ,iBAAiB;IACxC,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,EAAE;IAMnC,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,MAAM;IAInC,MAAM,IAAI,MAAM;CAG1B;AAED,qBAAa,gBAAiB,SAAQ,iBAAiB;IAC5C,YAAY,CACnB,GAAG,EAAE,MAAM,EACX,aAAa,CAAC,EAAE,OAAO,EACvB,mBAAmB,CAAC,EAAE,OAAO,EAC7B,eAAe,CAAC,EAAE,OAAO,GACxB,MAAM,EAAE;IAWF,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,EAAE,aAAa,CAAC,EAAE,OAAO,EAAE,mBAAmB,CAAC,EAAE,OAAO,GAAG,MAAM;IAQ3F,MAAM,IAAI,MAAM;CAG1B"}
|
package/dist/lib/tokenizer.js
CHANGED
|
@@ -8,6 +8,9 @@ const _llama_tokenizer_1 = __importDefault(require("./_llama-tokenizer"));
|
|
|
8
8
|
const gpt_tokenizer_1 = require("gpt-tokenizer");
|
|
9
9
|
const _mistral_tokenizer_1 = __importDefault(require("./_mistral-tokenizer"));
|
|
10
10
|
class AbstractTokenizer {
|
|
11
|
+
constructor() {
|
|
12
|
+
this.name = 'abstract';
|
|
13
|
+
}
|
|
11
14
|
}
|
|
12
15
|
exports.AbstractTokenizer = AbstractTokenizer;
|
|
13
16
|
class LLaMATokenizer extends AbstractTokenizer {
|
|
@@ -24,13 +27,12 @@ class LLaMATokenizer extends AbstractTokenizer {
|
|
|
24
27
|
decodeString(arr, add_bos_token, add_preceding_space) {
|
|
25
28
|
if (arr[arr.length - 1] === 2) {
|
|
26
29
|
arr = arr.slice(0, arr.length - 1);
|
|
27
|
-
return
|
|
28
|
-
this.getEOS());
|
|
30
|
+
return _llama_tokenizer_1.default.decode(arr, add_bos_token, add_preceding_space) + this.getEOS();
|
|
29
31
|
}
|
|
30
32
|
return _llama_tokenizer_1.default.decode(arr, add_bos_token, add_preceding_space);
|
|
31
33
|
}
|
|
32
34
|
getEOS() {
|
|
33
|
-
return
|
|
35
|
+
return '</s>';
|
|
34
36
|
}
|
|
35
37
|
}
|
|
36
38
|
exports.LLaMATokenizer = LLaMATokenizer;
|
|
@@ -44,7 +46,7 @@ class GTPTokenizer extends AbstractTokenizer {
|
|
|
44
46
|
return (0, gpt_tokenizer_1.decode)(arr);
|
|
45
47
|
}
|
|
46
48
|
getEOS() {
|
|
47
|
-
return
|
|
49
|
+
return '<|endoftext|>';
|
|
48
50
|
}
|
|
49
51
|
}
|
|
50
52
|
exports.GTPTokenizer = GTPTokenizer;
|
|
@@ -62,13 +64,12 @@ class MistralTokenizer extends AbstractTokenizer {
|
|
|
62
64
|
decodeString(arr, add_bos_token, add_preceding_space) {
|
|
63
65
|
if (arr[arr.length - 1] === 2) {
|
|
64
66
|
arr = arr.slice(0, arr.length - 1);
|
|
65
|
-
return
|
|
66
|
-
this.getEOS());
|
|
67
|
+
return _mistral_tokenizer_1.default.decode(arr, add_bos_token, add_preceding_space) + this.getEOS();
|
|
67
68
|
}
|
|
68
69
|
return _mistral_tokenizer_1.default.decode(arr, add_bos_token, add_preceding_space);
|
|
69
70
|
}
|
|
70
71
|
getEOS() {
|
|
71
|
-
return
|
|
72
|
+
return '</s>';
|
|
72
73
|
}
|
|
73
74
|
}
|
|
74
75
|
exports.MistralTokenizer = MistralTokenizer;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mikugg/guidance",
|
|
3
|
-
"version": "0.17.
|
|
3
|
+
"version": "0.17.1",
|
|
4
4
|
"description": "",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -40,5 +40,5 @@
|
|
|
40
40
|
"ts-node": "^10.9.1",
|
|
41
41
|
"typescript": "^5.2.2"
|
|
42
42
|
},
|
|
43
|
-
"gitHead": "
|
|
43
|
+
"gitHead": "4bd2d27eb1edcbe500b91da80180d719d2619eab"
|
|
44
44
|
}
|