@mikugg/guidance 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +660 -0
- package/README.md +73 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +32 -0
- package/dist/lib/__test__/_trie.test.d.ts +2 -0
- package/dist/lib/__test__/_trie.test.d.ts.map +1 -0
- package/dist/lib/__test__/_trie.test.js +43 -0
- package/dist/lib/__test__/template.test.d.ts +2 -0
- package/dist/lib/__test__/template.test.d.ts.map +1 -0
- package/dist/lib/__test__/template.test.js +96 -0
- package/dist/lib/_llama-tokenizer.d.ts +19 -0
- package/dist/lib/_llama-tokenizer.d.ts.map +1 -0
- package/dist/lib/_llama-tokenizer.js +439 -0
- package/dist/lib/_trie.d.ts +41 -0
- package/dist/lib/_trie.d.ts.map +1 -0
- package/dist/lib/_trie.js +102 -0
- package/dist/lib/template.d.ts +16 -0
- package/dist/lib/template.d.ts.map +1 -0
- package/dist/lib/template.js +117 -0
- package/dist/lib/token-generator.d.ts +21 -0
- package/dist/lib/token-generator.d.ts.map +1 -0
- package/dist/lib/token-generator.js +72 -0
- package/dist/lib/tokenizer.d.ts +16 -0
- package/dist/lib/tokenizer.d.ts.map +1 -0
- package/dist/lib/tokenizer.js +48 -0
- package/package.json +43 -0
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
declare class TrieNode {
|
|
2
|
+
children: Map<number, TrieNode>;
|
|
3
|
+
isEndOfWord: boolean;
|
|
4
|
+
constructor();
|
|
5
|
+
}
|
|
6
|
+
/**
|
|
7
|
+
* Trie data structure for storing a list of numbers
|
|
8
|
+
*/
|
|
9
|
+
export default class Trie {
|
|
10
|
+
root: TrieNode;
|
|
11
|
+
constructor();
|
|
12
|
+
/**
|
|
13
|
+
* Add a prefix to the trie, a list of numbers
|
|
14
|
+
*
|
|
15
|
+
* @param prefix A list of numbers
|
|
16
|
+
*/
|
|
17
|
+
addPrefix(prefix: number[]): void;
|
|
18
|
+
/**
|
|
19
|
+
* Get the next children of a prefix, a list of numbers
|
|
20
|
+
*
|
|
21
|
+
* @param prefix A list of numbers
|
|
22
|
+
* @returns
|
|
23
|
+
*/
|
|
24
|
+
getNextChildren(prefix: number[]): number[];
|
|
25
|
+
/**
|
|
26
|
+
* Returns the word until it find multiple children or isEndOfWord
|
|
27
|
+
*
|
|
28
|
+
* @param prefix A list of numbers
|
|
29
|
+
* @returns
|
|
30
|
+
*/
|
|
31
|
+
getNextPrefix(prefix: number[]): number[];
|
|
32
|
+
/**
|
|
33
|
+
* Returns the word until the end of the prefix, a list of numbers
|
|
34
|
+
*
|
|
35
|
+
* @param prefix A list of numbers
|
|
36
|
+
* @returns
|
|
37
|
+
*/
|
|
38
|
+
getWord(prefix: number[]): number[];
|
|
39
|
+
}
|
|
40
|
+
export {};
|
|
41
|
+
//# sourceMappingURL=_trie.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"_trie.d.ts","sourceRoot":"","sources":["../../src/lib/_trie.ts"],"names":[],"mappings":"AAAA,cAAM,QAAQ;IACZ,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;IAChC,WAAW,EAAE,OAAO,CAAC;;CAMtB;AAED;;GAEG;AACH,MAAM,CAAC,OAAO,OAAO,IAAI;IACvB,IAAI,EAAE,QAAQ,CAAC;;IAMf;;;;OAIG;IACH,SAAS,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,IAAI;IAWjC;;;;;OAKG;IACH,eAAe,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE;IAY3C;;;;;OAKG;IACH,aAAa,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE;IAoBzC;;;;;OAKG;IACH,OAAO,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE;CAmBpC"}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
class TrieNode {
|
|
4
|
+
constructor() {
|
|
5
|
+
this.children = new Map();
|
|
6
|
+
this.isEndOfWord = false;
|
|
7
|
+
}
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Trie data structure for storing a list of numbers
|
|
11
|
+
*/
|
|
12
|
+
class Trie {
|
|
13
|
+
constructor() {
|
|
14
|
+
this.root = new TrieNode();
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Add a prefix to the trie, a list of numbers
|
|
18
|
+
*
|
|
19
|
+
* @param prefix A list of numbers
|
|
20
|
+
*/
|
|
21
|
+
addPrefix(prefix) {
|
|
22
|
+
let node = this.root;
|
|
23
|
+
prefix.forEach(num => {
|
|
24
|
+
if (!node.children.has(num)) {
|
|
25
|
+
node.children.set(num, new TrieNode());
|
|
26
|
+
}
|
|
27
|
+
node = node.children.get(num);
|
|
28
|
+
});
|
|
29
|
+
node.isEndOfWord = true;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Get the next children of a prefix, a list of numbers
|
|
33
|
+
*
|
|
34
|
+
* @param prefix A list of numbers
|
|
35
|
+
* @returns
|
|
36
|
+
*/
|
|
37
|
+
getNextChildren(prefix) {
|
|
38
|
+
let node = this.root;
|
|
39
|
+
for (const num of prefix) {
|
|
40
|
+
if (node.children.has(num)) {
|
|
41
|
+
node = node.children.get(num);
|
|
42
|
+
}
|
|
43
|
+
else {
|
|
44
|
+
return []; // prefix not found
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
return Array.from(node.children.keys());
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Returns the word until it find multiple children or isEndOfWord
|
|
51
|
+
*
|
|
52
|
+
* @param prefix A list of numbers
|
|
53
|
+
* @returns
|
|
54
|
+
*/
|
|
55
|
+
getNextPrefix(prefix) {
|
|
56
|
+
let node = this.root;
|
|
57
|
+
const nextPrefix = [];
|
|
58
|
+
for (const num of prefix) {
|
|
59
|
+
if (node.children.has(num)) {
|
|
60
|
+
nextPrefix.push(num);
|
|
61
|
+
node = node.children.get(num);
|
|
62
|
+
}
|
|
63
|
+
else {
|
|
64
|
+
return []; // prefix not found
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
// continue until children > 1 or isEndOfWord
|
|
68
|
+
while (node.children.size === 1 && !node.isEndOfWord) {
|
|
69
|
+
const nextChild = Array.from(node.children.keys())[0];
|
|
70
|
+
nextPrefix.push(nextChild);
|
|
71
|
+
node = node.children.get(nextChild);
|
|
72
|
+
}
|
|
73
|
+
return nextPrefix;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Returns the word until the end of the prefix, a list of numbers
|
|
77
|
+
*
|
|
78
|
+
* @param prefix A list of numbers
|
|
79
|
+
* @returns
|
|
80
|
+
*/
|
|
81
|
+
getWord(prefix) {
|
|
82
|
+
let node = this.root;
|
|
83
|
+
const word = [];
|
|
84
|
+
for (const num of prefix) {
|
|
85
|
+
if (node.children.has(num)) {
|
|
86
|
+
word.push(num);
|
|
87
|
+
node = node.children.get(num);
|
|
88
|
+
}
|
|
89
|
+
else {
|
|
90
|
+
return []; // prefix not found
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
// cotinue to get the first child until the end of the word
|
|
94
|
+
while (!node.isEndOfWord && node.children.size > 0) {
|
|
95
|
+
const nextChild = Array.from(node.children.keys())[0];
|
|
96
|
+
word.push(nextChild);
|
|
97
|
+
node = node.children.get(nextChild);
|
|
98
|
+
}
|
|
99
|
+
return word;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
exports.default = Trie;
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { AbstractTokenizer } from './tokenizer';
|
|
2
|
+
import { AbstractTokenGenerator } from './token-generator';
|
|
3
|
+
export declare enum TEMPLATE_METHODS {
|
|
4
|
+
SEL = "SEL",
|
|
5
|
+
GEN = "GEN"
|
|
6
|
+
}
|
|
7
|
+
export declare class TemplateProcessor {
|
|
8
|
+
private tokenizer;
|
|
9
|
+
private generator;
|
|
10
|
+
constructor(tokenizer: AbstractTokenizer, generator: AbstractTokenGenerator);
|
|
11
|
+
setTokenizer(tokenizer: AbstractTokenizer): void;
|
|
12
|
+
setGenerator(generator: AbstractTokenGenerator): void;
|
|
13
|
+
processTemplate(template: string, variables: Map<string, string | string[]>): Promise<Map<string, string>>;
|
|
14
|
+
private findAllIndexes;
|
|
15
|
+
}
|
|
16
|
+
//# sourceMappingURL=template.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"template.d.ts","sourceRoot":"","sources":["../../src/lib/template.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAChD,OAAO,EAAE,sBAAsB,EAAE,MAAM,mBAAmB,CAAC;AAE3D,oBAAY,gBAAgB;IAC1B,GAAG,QAAQ;IACX,GAAG,QAAQ;CACZ;AAED,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,SAAS,CAAoB;IACrC,OAAO,CAAC,SAAS,CAAyB;gBAE9B,SAAS,EAAE,iBAAiB,EAAE,SAAS,EAAE,sBAAsB;IAKpE,YAAY,CAAC,SAAS,EAAE,iBAAiB;IAIzC,YAAY,CAAC,SAAS,EAAE,sBAAsB;IAIxC,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,GAAC,MAAM,EAAE,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAkFrH,OAAO,CAAC,cAAc;CAQvB"}
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
exports.TemplateProcessor = exports.TEMPLATE_METHODS = void 0;
|
|
16
|
+
const _trie_1 = __importDefault(require("./_trie"));
|
|
17
|
+
var TEMPLATE_METHODS;
|
|
18
|
+
(function (TEMPLATE_METHODS) {
|
|
19
|
+
TEMPLATE_METHODS["SEL"] = "SEL";
|
|
20
|
+
TEMPLATE_METHODS["GEN"] = "GEN";
|
|
21
|
+
})(TEMPLATE_METHODS || (exports.TEMPLATE_METHODS = TEMPLATE_METHODS = {}));
|
|
22
|
+
class TemplateProcessor {
|
|
23
|
+
constructor(tokenizer, generator) {
|
|
24
|
+
this.tokenizer = tokenizer;
|
|
25
|
+
this.generator = generator;
|
|
26
|
+
}
|
|
27
|
+
setTokenizer(tokenizer) {
|
|
28
|
+
this.tokenizer = tokenizer;
|
|
29
|
+
}
|
|
30
|
+
setGenerator(generator) {
|
|
31
|
+
this.generator = generator;
|
|
32
|
+
}
|
|
33
|
+
processTemplate(template, variables) {
|
|
34
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
35
|
+
const result = new Map();
|
|
36
|
+
// Replace {{val}} in template with variables[val]
|
|
37
|
+
variables.forEach((value, key) => {
|
|
38
|
+
template = template.replace(new RegExp(`{{${key}}}`, 'g'), value.toString());
|
|
39
|
+
});
|
|
40
|
+
// Replace {{method variableName methodArg1=methodArg1Value methodArg2=methodArg2Value}} in template
|
|
41
|
+
const indexes = this.findAllIndexes(template, '{{');
|
|
42
|
+
let nextTemplateIndexForPrompt = 0;
|
|
43
|
+
let prompt = '';
|
|
44
|
+
for (let i = 0; i < indexes.length; i++) {
|
|
45
|
+
prompt += template.substring(nextTemplateIndexForPrompt, indexes[i]);
|
|
46
|
+
const start = indexes[i] + 2;
|
|
47
|
+
const end = template.substring(start).indexOf('}}') + start;
|
|
48
|
+
const content = template.substring(start, end);
|
|
49
|
+
const args = content.split(' ');
|
|
50
|
+
const method = args[0];
|
|
51
|
+
const variableName = args[1];
|
|
52
|
+
const methodArgs = args.slice(2).reduce((acc, arg) => {
|
|
53
|
+
const [key, value] = arg.split('=');
|
|
54
|
+
acc[key] = value;
|
|
55
|
+
return acc;
|
|
56
|
+
}, {});
|
|
57
|
+
let completion = '';
|
|
58
|
+
switch (method) {
|
|
59
|
+
case TEMPLATE_METHODS.GEN:
|
|
60
|
+
completion = yield this.generator.generateString(prompt, methodArgs);
|
|
61
|
+
// Remove string after stop
|
|
62
|
+
if (methodArgs['stop']) {
|
|
63
|
+
if (completion.indexOf(methodArgs['stop']) >= 0) {
|
|
64
|
+
completion = completion.substring(0, completion.indexOf(methodArgs['stop']));
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
result.set(variableName, completion);
|
|
68
|
+
break;
|
|
69
|
+
case TEMPLATE_METHODS.SEL:
|
|
70
|
+
const trie = new _trie_1.default();
|
|
71
|
+
// Get options from variables
|
|
72
|
+
const options = variables.get(methodArgs['options']);
|
|
73
|
+
if (!options) {
|
|
74
|
+
throw new Error(`${methodArgs['options']} variable not found`);
|
|
75
|
+
}
|
|
76
|
+
// Add all options to trie
|
|
77
|
+
options.forEach(option => trie.addPrefix(this.tokenizer.encodeString(prompt + option + this.tokenizer.getEOS())));
|
|
78
|
+
let currentPrefixPrompt = prompt;
|
|
79
|
+
do {
|
|
80
|
+
const currentPrefix = trie.getNextPrefix(this.tokenizer.encodeString(currentPrefixPrompt));
|
|
81
|
+
currentPrefixPrompt = this.tokenizer.decodeString(currentPrefix);
|
|
82
|
+
const nextChildren = trie.getNextChildren(currentPrefix);
|
|
83
|
+
if (nextChildren.length < 2) {
|
|
84
|
+
// If there is only one child, we complete
|
|
85
|
+
completion = this.tokenizer.decodeString(trie.getWord(currentPrefix)).substring(prompt.length).replace(this.tokenizer.getEOS(), '');
|
|
86
|
+
break;
|
|
87
|
+
}
|
|
88
|
+
else {
|
|
89
|
+
// If there is more than one child, we generate the next token
|
|
90
|
+
const nextToken = yield this.generator.generateToken(prompt, nextChildren.reduce((acc, child) => {
|
|
91
|
+
acc[child.toString()] = 100;
|
|
92
|
+
return acc;
|
|
93
|
+
}, {}));
|
|
94
|
+
currentPrefixPrompt = currentPrefixPrompt + nextToken;
|
|
95
|
+
}
|
|
96
|
+
} while (!completion);
|
|
97
|
+
result.set(variableName, completion);
|
|
98
|
+
break;
|
|
99
|
+
default:
|
|
100
|
+
throw new Error(`Invalid method ${method} in template`);
|
|
101
|
+
}
|
|
102
|
+
prompt += completion;
|
|
103
|
+
nextTemplateIndexForPrompt = end + 2;
|
|
104
|
+
}
|
|
105
|
+
return result;
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
findAllIndexes(str, substr) {
|
|
109
|
+
const indexes = [];
|
|
110
|
+
let i = -1;
|
|
111
|
+
while ((i = str.indexOf(substr, i + 1)) >= 0) {
|
|
112
|
+
indexes.push(i);
|
|
113
|
+
}
|
|
114
|
+
return indexes;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
exports.TemplateProcessor = TemplateProcessor;
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { ClientOptions } from 'openai';
|
|
2
|
+
export declare abstract class AbstractTokenGenerator {
|
|
3
|
+
abstract generateToken(prompt: string, logit_bias: Record<string, number>): Promise<string>;
|
|
4
|
+
abstract generateString(prompt: string, options: Record<string, string>): Promise<string>;
|
|
5
|
+
}
|
|
6
|
+
/**
|
|
7
|
+
* OpenAI Token Generator
|
|
8
|
+
*
|
|
9
|
+
*/
|
|
10
|
+
export declare class OpenAITokenGenerator extends AbstractTokenGenerator {
|
|
11
|
+
private openai;
|
|
12
|
+
private model;
|
|
13
|
+
constructor(params: {
|
|
14
|
+
apiKey: string;
|
|
15
|
+
model: string;
|
|
16
|
+
baseURL?: string;
|
|
17
|
+
}, options?: ClientOptions);
|
|
18
|
+
generateToken(prompt: string, logit_bias: Record<string, number>): Promise<string>;
|
|
19
|
+
generateString(prompt: string, options: Record<string, string>): Promise<string>;
|
|
20
|
+
}
|
|
21
|
+
//# sourceMappingURL=token-generator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"token-generator.d.ts","sourceRoot":"","sources":["../../src/lib/token-generator.ts"],"names":[],"mappings":"AAAA,OAAe,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAA;AAE9C,8BAAsB,sBAAsB;IAC1C,QAAQ,CAAC,aAAa,CAAC,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;IAC3F,QAAQ,CAAC,cAAc,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;CAC1F;AACD;;;GAGG;AACH,qBAAa,oBAAqB,SAAQ,sBAAsB;IAC9D,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,KAAK,CAAS;gBAEV,MAAM,EAAE;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,KAAK,EAAE,MAAM,CAAC;QACd,OAAO,CAAC,EAAE,MAAM,CAAC;KAClB,EAAE,OAAO,CAAC,EAAE,aAAa;IAUX,aAAa,CAAC,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;IAoClF,cAAc,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;CAShG"}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
exports.OpenAITokenGenerator = exports.AbstractTokenGenerator = void 0;
|
|
16
|
+
const openai_1 = __importDefault(require("openai"));
|
|
17
|
+
class AbstractTokenGenerator {
|
|
18
|
+
}
|
|
19
|
+
exports.AbstractTokenGenerator = AbstractTokenGenerator;
|
|
20
|
+
/**
|
|
21
|
+
* OpenAI Token Generator
|
|
22
|
+
*
|
|
23
|
+
*/
|
|
24
|
+
class OpenAITokenGenerator extends AbstractTokenGenerator {
|
|
25
|
+
constructor(params, options) {
|
|
26
|
+
super();
|
|
27
|
+
this.model = params.model;
|
|
28
|
+
this.openai = new openai_1.default(Object.assign({ apiKey: params.apiKey, baseURL: params.baseURL }, options));
|
|
29
|
+
}
|
|
30
|
+
generateToken(prompt, logit_bias) {
|
|
31
|
+
var _a;
|
|
32
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
33
|
+
const result = yield this.openai.completions.create({
|
|
34
|
+
model: this.model,
|
|
35
|
+
prompt,
|
|
36
|
+
logit_bias,
|
|
37
|
+
logprobs: 10,
|
|
38
|
+
max_tokens: 1,
|
|
39
|
+
});
|
|
40
|
+
const logprobsResult = ((_a = result.choices[0].logprobs) === null || _a === void 0 ? void 0 : _a.top_logprobs) || [];
|
|
41
|
+
const top_logprobs = logprobsResult ? logprobsResult[0] : { '2': 0 };
|
|
42
|
+
// get max top_logpobs that is in logit_bias
|
|
43
|
+
let max = -Infinity;
|
|
44
|
+
let max_key = '';
|
|
45
|
+
for (const key in top_logprobs) {
|
|
46
|
+
if (top_logprobs[key] > max && key in logit_bias) {
|
|
47
|
+
max = top_logprobs[key];
|
|
48
|
+
max_key = key;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
// if no key in logit_bias, get max top_logprobs
|
|
52
|
+
if (max_key === '') {
|
|
53
|
+
// no key in logit_bias
|
|
54
|
+
max = -Infinity;
|
|
55
|
+
for (const key in top_logprobs) {
|
|
56
|
+
if (top_logprobs[key] > max) {
|
|
57
|
+
max = top_logprobs[key];
|
|
58
|
+
max_key = key;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
return max_key;
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
generateString(prompt, options) {
|
|
66
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
67
|
+
const result = yield this.openai.completions.create(Object.assign({ model: this.model, prompt }, options));
|
|
68
|
+
return result.choices[0].text;
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
exports.OpenAITokenGenerator = OpenAITokenGenerator;
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
export declare abstract class AbstractTokenizer {
|
|
2
|
+
abstract encodeString(str: string): number[];
|
|
3
|
+
abstract decodeString(arr: number[]): string;
|
|
4
|
+
abstract getEOS(): string;
|
|
5
|
+
}
|
|
6
|
+
export declare class LLaMATokenizer extends AbstractTokenizer {
|
|
7
|
+
encodeString(str: string, add_bos_token?: boolean, add_preceding_space?: boolean, log_performance?: boolean): number[];
|
|
8
|
+
decodeString(arr: number[], add_bos_token?: boolean, add_preceding_space?: boolean): string;
|
|
9
|
+
getEOS(): string;
|
|
10
|
+
}
|
|
11
|
+
export declare class GTPTokenizer extends AbstractTokenizer {
|
|
12
|
+
encodeString(str: string): number[];
|
|
13
|
+
decodeString(arr: number[]): string;
|
|
14
|
+
getEOS(): string;
|
|
15
|
+
}
|
|
16
|
+
//# sourceMappingURL=tokenizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../../src/lib/tokenizer.ts"],"names":[],"mappings":"AAGA,8BAAsB,iBAAiB;IACrC,QAAQ,CAAC,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,EAAE;IAC5C,QAAQ,CAAC,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,MAAM;IAC5C,QAAQ,CAAC,MAAM,IAAI,MAAM;CAC1B;AAED,qBAAa,cAAe,SAAQ,iBAAiB;IAC1C,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,aAAa,CAAC,EAAE,OAAO,EAAE,mBAAmB,CAAC,EAAE,OAAO,EAAE,eAAe,CAAC,EAAE,OAAO,GAAG,MAAM,EAAE;IAWtH,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,EAAE,aAAa,CAAC,EAAE,OAAO,EAAE,mBAAmB,CAAC,EAAE,OAAO,GAAG,MAAM;IAQ3F,MAAM,IAAI,MAAM;CAG1B;AAED,qBAAa,YAAa,SAAQ,iBAAiB;IACxC,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,EAAE;IAMnC,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,MAAM;IAInC,MAAM,IAAI,MAAM;CAG1B"}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.GTPTokenizer = exports.LLaMATokenizer = exports.AbstractTokenizer = void 0;
|
|
7
|
+
const _llama_tokenizer_1 = __importDefault(require("./_llama-tokenizer"));
|
|
8
|
+
const gpt_tokenizer_1 = require("gpt-tokenizer");
|
|
9
|
+
class AbstractTokenizer {
|
|
10
|
+
}
|
|
11
|
+
exports.AbstractTokenizer = AbstractTokenizer;
|
|
12
|
+
class LLaMATokenizer extends AbstractTokenizer {
|
|
13
|
+
encodeString(str, add_bos_token, add_preceding_space, log_performance) {
|
|
14
|
+
if (str.endsWith(this.getEOS())) {
|
|
15
|
+
str = str.substring(0, str.length - this.getEOS().length);
|
|
16
|
+
return [
|
|
17
|
+
..._llama_tokenizer_1.default.encode(str, add_bos_token, add_preceding_space, log_performance),
|
|
18
|
+
2 // EOS
|
|
19
|
+
];
|
|
20
|
+
}
|
|
21
|
+
return _llama_tokenizer_1.default.encode(str, add_bos_token, add_preceding_space, log_performance);
|
|
22
|
+
}
|
|
23
|
+
decodeString(arr, add_bos_token, add_preceding_space) {
|
|
24
|
+
if (arr[arr.length - 1] === 2) {
|
|
25
|
+
arr = arr.slice(0, arr.length - 1);
|
|
26
|
+
return _llama_tokenizer_1.default.decode(arr, add_bos_token, add_preceding_space) + this.getEOS();
|
|
27
|
+
}
|
|
28
|
+
return _llama_tokenizer_1.default.decode(arr, add_bos_token, add_preceding_space);
|
|
29
|
+
}
|
|
30
|
+
getEOS() {
|
|
31
|
+
return '</s>';
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
exports.LLaMATokenizer = LLaMATokenizer;
|
|
35
|
+
class GTPTokenizer extends AbstractTokenizer {
|
|
36
|
+
encodeString(str) {
|
|
37
|
+
return (0, gpt_tokenizer_1.encode)(str, {
|
|
38
|
+
allowedSpecial: new Set([this.getEOS()])
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
decodeString(arr) {
|
|
42
|
+
return (0, gpt_tokenizer_1.decode)(arr);
|
|
43
|
+
}
|
|
44
|
+
getEOS() {
|
|
45
|
+
return '<|endoftext|>';
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
exports.GTPTokenizer = GTPTokenizer;
|
package/package.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@mikugg/guidance",
|
|
3
|
+
"version": "0.8.0",
|
|
4
|
+
"description": "",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"types": "dist/index.d.ts",
|
|
7
|
+
"files": [
|
|
8
|
+
"dist/**/*"
|
|
9
|
+
],
|
|
10
|
+
"scripts": {
|
|
11
|
+
"build": "tsc",
|
|
12
|
+
"demo": "ts-node demo/index.ts",
|
|
13
|
+
"lint": "eslint . --ext .ts",
|
|
14
|
+
"prepare": "npm run build",
|
|
15
|
+
"test": "jest"
|
|
16
|
+
},
|
|
17
|
+
"author": "mikudev",
|
|
18
|
+
"license": "MIT",
|
|
19
|
+
"repository": {
|
|
20
|
+
"type": "git",
|
|
21
|
+
"url": "https://github.com/miku-gg/miku.git",
|
|
22
|
+
"directory": "packages/guidance"
|
|
23
|
+
},
|
|
24
|
+
"publishConfig": {
|
|
25
|
+
"access": "public",
|
|
26
|
+
"registry": "https://registry.npmjs.org/"
|
|
27
|
+
},
|
|
28
|
+
"dependencies": {
|
|
29
|
+
"gpt-tokenizer": "^2.1.2",
|
|
30
|
+
"openai": "^4.19.0"
|
|
31
|
+
},
|
|
32
|
+
"devDependencies": {
|
|
33
|
+
"@types/jest": "^29.5.8",
|
|
34
|
+
"@typescript-eslint/eslint-plugin": "^6.11.0",
|
|
35
|
+
"@typescript-eslint/parser": "^6.11.0",
|
|
36
|
+
"eslint": "^8.54.0",
|
|
37
|
+
"jest": "^29.7.0",
|
|
38
|
+
"ts-jest": "^29.1.1",
|
|
39
|
+
"ts-node": "^10.9.1",
|
|
40
|
+
"typescript": "^5.2.2"
|
|
41
|
+
},
|
|
42
|
+
"gitHead": "e0e2c0cc7fbde0c7d477a6907658a76a18e4fd0b"
|
|
43
|
+
}
|