@mikugg/guidance 0.17.0 → 0.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
- import { AbstractTokenizer } from "./tokenizer";
2
- import { AbstractTokenGenerator } from "./token-generator";
1
+ import { AbstractTokenizer } from './tokenizer';
2
+ import { AbstractTokenGenerator } from './token-generator';
3
3
  export declare enum TEMPLATE_METHODS {
4
4
  SEL = "SEL",
5
5
  GEN = "GEN"
@@ -7,6 +7,7 @@ export declare enum TEMPLATE_METHODS {
7
7
  export declare class TemplateProcessor<TRequestOptions = undefined> {
8
8
  private tokenizer;
9
9
  private generator;
10
+ private isnemo;
10
11
  constructor(tokenizer: AbstractTokenizer, generator: AbstractTokenGenerator<TRequestOptions>);
11
12
  setTokenizer(tokenizer: AbstractTokenizer): void;
12
13
  setGenerator(generator: AbstractTokenGenerator<TRequestOptions>): void;
@@ -1 +1 @@
1
- {"version":3,"file":"template.d.ts","sourceRoot":"","sources":["../../src/lib/template.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAChD,OAAO,EAAE,sBAAsB,EAAE,MAAM,mBAAmB,CAAC;AAG3D,oBAAY,gBAAgB;IAC1B,GAAG,QAAQ;IACX,GAAG,QAAQ;CACZ;AAED,qBAAa,iBAAiB,CAAC,eAAe,GAAG,SAAS;IACxD,OAAO,CAAC,SAAS,CAAoB;IACrC,OAAO,CAAC,SAAS,CAA0C;gBAGzD,SAAS,EAAE,iBAAiB,EAC5B,SAAS,EAAE,sBAAsB,CAAC,eAAe,CAAC;IAM7C,YAAY,CAAC,SAAS,EAAE,iBAAiB;IAIzC,YAAY,CAAC,SAAS,EAAE,sBAAsB,CAAC,eAAe,CAAC;IAIzD,eAAe,CAC1B,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC,EACzC,UAAU,CAAC,EAAE,eAAe,GAC3B,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAajB,qBAAqB,CACjC,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC,EACzC,UAAU,CAAC,EAAE,eAAe,GAC3B,cAAc,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,IAAI,CAAC;IA0I5C,OAAO,CAAC,cAAc;CAQvB"}
1
+ {"version":3,"file":"template.d.ts","sourceRoot":"","sources":["../../src/lib/template.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAChD,OAAO,EAAE,sBAAsB,EAAE,MAAM,mBAAmB,CAAC;AAG3D,oBAAY,gBAAgB;IAC1B,GAAG,QAAQ;IACX,GAAG,QAAQ;CACZ;AAED,qBAAa,iBAAiB,CAAC,eAAe,GAAG,SAAS;IACxD,OAAO,CAAC,SAAS,CAAoB;IACrC,OAAO,CAAC,SAAS,CAA0C;IAC3D,OAAO,CAAC,MAAM,CAAU;gBAEZ,SAAS,EAAE,iBAAiB,EAAE,SAAS,EAAE,sBAAsB,CAAC,eAAe,CAAC;IAMrF,YAAY,CAAC,SAAS,EAAE,iBAAiB;IAIzC,YAAY,CAAC,SAAS,EAAE,sBAAsB,CAAC,eAAe,CAAC;IAIzD,eAAe,CAC1B,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC,EACzC,UAAU,CAAC,EAAE,eAAe,GAC3B,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IASjB,qBAAqB,CACjC,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC,EACzC,UAAU,CAAC,EAAE,eAAe,GAC3B,cAAc,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,IAAI,CAAC;IAuI5C,OAAO,CAAC,cAAc;CAQvB"}
@@ -43,6 +43,7 @@ class TemplateProcessor {
43
43
  constructor(tokenizer, generator) {
44
44
  this.tokenizer = tokenizer;
45
45
  this.generator = generator;
46
+ this.isnemo = tokenizer.name === 'nemo';
46
47
  }
47
48
  setTokenizer(tokenizer) {
48
49
  this.tokenizer = tokenizer;
@@ -78,22 +79,19 @@ class TemplateProcessor {
78
79
  const result = new Map();
79
80
  // Replace {{val}} in template with variables[val]
80
81
  variables.forEach((value, key) => {
81
- template = template.replace(new RegExp(`{{${key}}}`, "g"), value.toString());
82
+ template = template.replace(new RegExp(`{{${key}}}`, 'g'), value.toString());
82
83
  });
83
84
  // Replace {{method variableName methodArg1=methodArg1Value methodArg2=methodArg2Value}} in template
84
- const indexes = [
85
- ...this.findAllIndexes(template, "{{GEN"),
86
- ...this.findAllIndexes(template, "{{SEL"),
87
- ].sort((a, b) => a - b);
85
+ const indexes = [...this.findAllIndexes(template, '{{GEN'), ...this.findAllIndexes(template, '{{SEL')].sort((a, b) => a - b);
88
86
  let nextTemplateIndexForPrompt = 0;
89
- let prompt = "";
87
+ let prompt = '';
90
88
  for (let i = 0; i < indexes.length; i++) {
91
89
  prompt += template.substring(nextTemplateIndexForPrompt, indexes[i]);
92
90
  const start = indexes[i];
93
- const end = template.substring(start).indexOf("}}") + 2 + start;
91
+ const end = template.substring(start).indexOf('}}') + 2 + start;
94
92
  const content = template.substring(start, end);
95
- const { type: method, name: variableName, params: methodArgs, } = (0, templateParser_1.default)(content);
96
- let completion = "";
93
+ const { type: method, name: variableName, params: methodArgs } = (0, templateParser_1.default)(content);
94
+ let completion = '';
97
95
  switch (method) {
98
96
  case TEMPLATE_METHODS.GEN:
99
97
  const stream = this.generator.generateString(prompt, methodArgs, reqOptions);
@@ -118,26 +116,35 @@ class TemplateProcessor {
118
116
  case TEMPLATE_METHODS.SEL:
119
117
  const trie = new _trie_1.default();
120
118
  // Get options from variables
121
- const options = variables.get(String(methodArgs["options"]));
119
+ const options = variables.get(String(methodArgs['options']));
122
120
  if (!options) {
123
- throw new Error(`${methodArgs["options"]} variable not found`);
121
+ throw new Error(`${methodArgs['options']} variable not found`);
122
+ }
123
+ if (this.isnemo) {
124
+ options.forEach((option) => {
125
+ const prefix = this.tokenizer.encodeString(option);
126
+ trie.addPrefix(prefix);
127
+ });
128
+ }
129
+ else {
130
+ prompt = this.tokenizer.decodeString(this.tokenizer.encodeString(prompt));
131
+ // Add all options to trie
132
+ options.forEach((option) => {
133
+ const prefix = this.tokenizer.encodeString(prompt + option);
134
+ trie.addPrefix(prefix);
135
+ });
124
136
  }
125
- prompt = this.tokenizer.decodeString(this.tokenizer.encodeString(prompt));
126
- // Add all options to trie
127
- options.forEach((option) => {
128
- const prefix = this.tokenizer.encodeString(prompt + option);
129
- trie.addPrefix(prefix);
130
- });
131
137
  let currentPrefixPrompt = prompt;
132
138
  do {
133
- const currentPrefix = trie.getNextPrefix(this.tokenizer.encodeString(currentPrefixPrompt));
134
- currentPrefixPrompt = this.tokenizer.decodeString(currentPrefix);
139
+ const currentPrefix = trie.getNextPrefix(this.isnemo ? [1] : this.tokenizer.encodeString(currentPrefixPrompt));
140
+ currentPrefixPrompt = this.isnemo ? currentPrefixPrompt : this.tokenizer.decodeString(currentPrefix);
135
141
  const nextChildren = trie.getNextChildren(currentPrefix);
136
142
  if (nextChildren.length < 2) {
137
143
  // If there is only one child, we complete
138
- completion = this.tokenizer
139
- .decodeString(trie.getWord(currentPrefix))
140
- .substring(prompt.length);
144
+ completion = this.tokenizer.decodeString(trie.getWord(currentPrefix));
145
+ if (!this.isnemo) {
146
+ completion = completion.substring(prompt.length);
147
+ }
141
148
  break;
142
149
  }
143
150
  else {
@@ -149,19 +156,27 @@ class TemplateProcessor {
149
156
  const top_logprobs = yield __await(this.generator.generateTokenLogProgs(currentPrefixPrompt, logit_bias, reqOptions));
150
157
  // get max top_logpobs that is in logit_bias
151
158
  let max = -Infinity;
152
- let max_key = "";
159
+ let max_key = '';
153
160
  for (const key in top_logprobs) {
154
- const completedPrefix = this.tokenizer.encodeString(currentPrefixPrompt + key);
155
- const completionTokens = completedPrefix.slice(currentPrefix.length);
156
- if (top_logprobs[key] > max &&
157
- completionTokens[0] &&
158
- completionTokens[0] in logit_bias) {
159
- max = top_logprobs[key];
160
- max_key = key;
161
+ if (this.isnemo) {
162
+ const keyTokens = this.tokenizer.encodeString(key);
163
+ keyTokens.shift();
164
+ if (top_logprobs[key] > max && keyTokens[0] && keyTokens[0] in logit_bias) {
165
+ max = top_logprobs[key];
166
+ max_key = key;
167
+ }
168
+ }
169
+ else {
170
+ const completedPrefix = this.tokenizer.encodeString(currentPrefixPrompt + key);
171
+ const completionTokens = completedPrefix.slice(currentPrefix.length);
172
+ if (top_logprobs[key] > max && completionTokens[0] && completionTokens[0] in logit_bias) {
173
+ max = top_logprobs[key];
174
+ max_key = key;
175
+ }
161
176
  }
162
177
  }
163
178
  // if no key in logit_bias, get max top_logprobs
164
- if (max_key === "") {
179
+ if (max_key === '') {
165
180
  // no key in logit_bias
166
181
  max = -Infinity;
167
182
  for (const key in top_logprobs) {
@@ -171,7 +186,13 @@ class TemplateProcessor {
171
186
  }
172
187
  }
173
188
  }
174
- currentPrefixPrompt = currentPrefixPrompt + max_key;
189
+ if (this.isnemo) {
190
+ completion = this.tokenizer.decodeString(trie.getWord(this.tokenizer.encodeString(max_key)));
191
+ break;
192
+ }
193
+ else {
194
+ currentPrefixPrompt = currentPrefixPrompt + max_key;
195
+ }
175
196
  }
176
197
  } while (!completion);
177
198
  result.set(variableName, completion);
@@ -4,7 +4,7 @@ function templateParser(template) {
4
4
  const patternRegex = /{{(GEN|SEL)\s+(\w+)\s+([^}]+)}}/;
5
5
  const match = template.match(patternRegex);
6
6
  if (!match) {
7
- return { type: "", name: "", params: {} };
7
+ return { type: '', name: '', params: {} };
8
8
  }
9
9
  const type = match[1];
10
10
  const name = match[2];
@@ -16,7 +16,7 @@ function templateParser(template) {
16
16
  while ((paramMatch = paramsRegex.exec(paramsString)) !== null) {
17
17
  const key = paramMatch[1];
18
18
  const value = paramMatch[2].trim();
19
- if (type === "SEL" && key === "options") {
19
+ if (type === 'SEL' && key === 'options') {
20
20
  params[key] = value;
21
21
  }
22
22
  else if (!isNaN(Number(value))) {
@@ -27,11 +27,12 @@ function templateParser(template) {
27
27
  params[key] = JSON.parse(value);
28
28
  }
29
29
  catch (e) {
30
- console.error("Error parsing JSON:", e);
30
+ console.error('Error parsing JSON:', e);
31
+ console.error(value);
31
32
  }
32
33
  }
33
34
  else {
34
- params[key] = value.replace(/^"|"$/g, "");
35
+ params[key] = value.replace(/^"|"$/g, '');
35
36
  }
36
37
  }
37
38
  return {
@@ -1,4 +1,5 @@
1
1
  export declare abstract class AbstractTokenizer {
2
+ name: string;
2
3
  abstract encodeString(str: string): number[];
3
4
  abstract decodeString(arr: number[]): string;
4
5
  abstract getEOS(): string;
@@ -1 +1 @@
1
- {"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../../src/lib/tokenizer.ts"],"names":[],"mappings":"AAIA,8BAAsB,iBAAiB;IACrC,QAAQ,CAAC,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,EAAE;IAC5C,QAAQ,CAAC,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,MAAM;IAC5C,QAAQ,CAAC,MAAM,IAAI,MAAM;CAC1B;AAED,qBAAa,cAAe,SAAQ,iBAAiB;IAC1C,YAAY,CACnB,GAAG,EAAE,MAAM,EACX,aAAa,CAAC,EAAE,OAAO,EACvB,mBAAmB,CAAC,EAAE,OAAO,EAC7B,eAAe,CAAC,EAAE,OAAO,GACxB,MAAM,EAAE;IAqBF,YAAY,CACnB,GAAG,EAAE,MAAM,EAAE,EACb,aAAa,CAAC,EAAE,OAAO,EACvB,mBAAmB,CAAC,EAAE,OAAO,GAC5B,MAAM;IAWA,MAAM,IAAI,MAAM;CAG1B;AAED,qBAAa,YAAa,SAAQ,iBAAiB;IACxC,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,EAAE;IAMnC,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,MAAM;IAInC,MAAM,IAAI,MAAM;CAG1B;AAED,qBAAa,gBAAiB,SAAQ,iBAAiB;IAC5C,YAAY,CACnB,GAAG,EAAE,MAAM,EACX,aAAa,CAAC,EAAE,OAAO,EACvB,mBAAmB,CAAC,EAAE,OAAO,EAC7B,eAAe,CAAC,EAAE,OAAO,GACxB,MAAM,EAAE;IAqBF,YAAY,CACnB,GAAG,EAAE,MAAM,EAAE,EACb,aAAa,CAAC,EAAE,OAAO,EACvB,mBAAmB,CAAC,EAAE,OAAO,GAC5B,MAAM;IAWA,MAAM,IAAI,MAAM;CAG1B"}
1
+ {"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../../src/lib/tokenizer.ts"],"names":[],"mappings":"AAIA,8BAAsB,iBAAiB;IAC9B,IAAI,EAAE,MAAM,CAAc;IACjC,QAAQ,CAAC,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,EAAE;IAC5C,QAAQ,CAAC,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,MAAM;IAC5C,QAAQ,CAAC,MAAM,IAAI,MAAM;CAC1B;AAED,qBAAa,cAAe,SAAQ,iBAAiB;IAC1C,YAAY,CACnB,GAAG,EAAE,MAAM,EACX,aAAa,CAAC,EAAE,OAAO,EACvB,mBAAmB,CAAC,EAAE,OAAO,EAC7B,eAAe,CAAC,EAAE,OAAO,GACxB,MAAM,EAAE;IAWF,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,EAAE,aAAa,CAAC,EAAE,OAAO,EAAE,mBAAmB,CAAC,EAAE,OAAO,GAAG,MAAM;IAQ3F,MAAM,IAAI,MAAM;CAG1B;AAED,qBAAa,YAAa,SAAQ,iBAAiB;IACxC,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,EAAE;IAMnC,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,MAAM;IAInC,MAAM,IAAI,MAAM;CAG1B;AAED,qBAAa,gBAAiB,SAAQ,iBAAiB;IAC5C,YAAY,CACnB,GAAG,EAAE,MAAM,EACX,aAAa,CAAC,EAAE,OAAO,EACvB,mBAAmB,CAAC,EAAE,OAAO,EAC7B,eAAe,CAAC,EAAE,OAAO,GACxB,MAAM,EAAE;IAWF,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,EAAE,aAAa,CAAC,EAAE,OAAO,EAAE,mBAAmB,CAAC,EAAE,OAAO,GAAG,MAAM;IAQ3F,MAAM,IAAI,MAAM;CAG1B"}
@@ -8,6 +8,9 @@ const _llama_tokenizer_1 = __importDefault(require("./_llama-tokenizer"));
8
8
  const gpt_tokenizer_1 = require("gpt-tokenizer");
9
9
  const _mistral_tokenizer_1 = __importDefault(require("./_mistral-tokenizer"));
10
10
  class AbstractTokenizer {
11
+ constructor() {
12
+ this.name = 'abstract';
13
+ }
11
14
  }
12
15
  exports.AbstractTokenizer = AbstractTokenizer;
13
16
  class LLaMATokenizer extends AbstractTokenizer {
@@ -24,13 +27,12 @@ class LLaMATokenizer extends AbstractTokenizer {
24
27
  decodeString(arr, add_bos_token, add_preceding_space) {
25
28
  if (arr[arr.length - 1] === 2) {
26
29
  arr = arr.slice(0, arr.length - 1);
27
- return (_llama_tokenizer_1.default.decode(arr, add_bos_token, add_preceding_space) +
28
- this.getEOS());
30
+ return _llama_tokenizer_1.default.decode(arr, add_bos_token, add_preceding_space) + this.getEOS();
29
31
  }
30
32
  return _llama_tokenizer_1.default.decode(arr, add_bos_token, add_preceding_space);
31
33
  }
32
34
  getEOS() {
33
- return "</s>";
35
+ return '</s>';
34
36
  }
35
37
  }
36
38
  exports.LLaMATokenizer = LLaMATokenizer;
@@ -44,7 +46,7 @@ class GTPTokenizer extends AbstractTokenizer {
44
46
  return (0, gpt_tokenizer_1.decode)(arr);
45
47
  }
46
48
  getEOS() {
47
- return "<|endoftext|>";
49
+ return '<|endoftext|>';
48
50
  }
49
51
  }
50
52
  exports.GTPTokenizer = GTPTokenizer;
@@ -62,13 +64,12 @@ class MistralTokenizer extends AbstractTokenizer {
62
64
  decodeString(arr, add_bos_token, add_preceding_space) {
63
65
  if (arr[arr.length - 1] === 2) {
64
66
  arr = arr.slice(0, arr.length - 1);
65
- return (_mistral_tokenizer_1.default.decode(arr, add_bos_token, add_preceding_space) +
66
- this.getEOS());
67
+ return _mistral_tokenizer_1.default.decode(arr, add_bos_token, add_preceding_space) + this.getEOS();
67
68
  }
68
69
  return _mistral_tokenizer_1.default.decode(arr, add_bos_token, add_preceding_space);
69
70
  }
70
71
  getEOS() {
71
- return "</s>";
72
+ return '</s>';
72
73
  }
73
74
  }
74
75
  exports.MistralTokenizer = MistralTokenizer;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mikugg/guidance",
3
- "version": "0.17.0",
3
+ "version": "0.17.1",
4
4
  "description": "",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -40,5 +40,5 @@
40
40
  "ts-node": "^10.9.1",
41
41
  "typescript": "^5.2.2"
42
42
  },
43
- "gitHead": "bc2da106abe2ac0473544691c85f5f1731b6edc1"
43
+ "gitHead": "4bd2d27eb1edcbe500b91da80180d719d2619eab"
44
44
  }