pi-free 2.2.3 → 2.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,251 @@
1
+ /**
2
+ * Streaming parser for HTML-style thinking tags in LLM responses.
3
+ *
4
+ * Some providers (Qoder, DeepSeek via certain gateways) emit reasoning in
5
+ * HTML-style tags like <thinking>, <think>, <reasoning>, <thought> within
6
+ * the text stream, rather than via a structured reasoning_content field.
7
+ *
8
+ * This parser handles streaming chunks safely — it never emits partial tags
9
+ * by tracking trailing tag prefixes and deferring output until the boundary
10
+ * is clear.
11
+ */
12
+
13
+ import type {
14
+ AssistantMessage,
15
+ AssistantMessageEventStream,
16
+ TextContent,
17
+ ThinkingContent,
18
+ } from "@earendil-works/pi-ai";
19
+
20
+ const THINKING_TAG_VARIANTS: Array<{ open: string; close: string }> = [
21
+ { open: "<thinking>", close: "</thinking>" },
22
+ { open: "<think>", close: "</think>" },
23
+ { open: "<reasoning>", close: "</reasoning>" },
24
+ { open: "<thought>", close: "</thought>" },
25
+ ];
26
+
27
+ function getTrailingPossibleTagPrefixLength(text: string, tag: string): number {
28
+ const maxPrefixLength = Math.min(text.length, tag.length - 1);
29
+ for (let len = maxPrefixLength; len > 0; len--) {
30
+ if (text.endsWith(tag.slice(0, len))) return len;
31
+ }
32
+ return 0;
33
+ }
34
+
35
+ function getMaxTrailingPossibleTagPrefixLength(
36
+ text: string,
37
+ tags: string[],
38
+ ): number {
39
+ let maxLength = 0;
40
+ for (const tag of tags) {
41
+ maxLength = Math.max(
42
+ maxLength,
43
+ getTrailingPossibleTagPrefixLength(text, tag),
44
+ );
45
+ }
46
+ return maxLength;
47
+ }
48
+
49
+ /**
50
+ * Streaming parser that extracts <thinking>/<think>/<reasoning>/<thought> tags
51
+ * from a text stream and emits them as thinking_start/thinking_delta/thinking_end
52
+ * events on the Pi event stream.
53
+ *
54
+ * Usage:
55
+ * ```ts
56
+ * const parser = new ThinkingTagParser(output, stream);
57
+ * for (const chunk of textChunks) {
58
+ * parser.processChunk(chunk);
59
+ * }
60
+ * parser.finalize();
61
+ * ```
62
+ */
63
+ export class ThinkingTagParser {
64
+ private textBuffer = "";
65
+ private inThinking = false;
66
+ private thinkingBlockIndex: number | null = null;
67
+ private textBlockIndex: number | null = null;
68
+ private activeEndTag = "";
69
+
70
+ constructor(
71
+ private readonly output: AssistantMessage,
72
+ private readonly stream: AssistantMessageEventStream,
73
+ ) {
74
+ // Set initial active end tag to the first variant's close
75
+ this.activeEndTag = THINKING_TAG_VARIANTS[0]!.close;
76
+ }
77
+
78
+ processChunk(chunk: string): void {
79
+ this.textBuffer += chunk;
80
+ while (this.textBuffer.length > 0) {
81
+ const prevLength = this.textBuffer.length;
82
+ if (!this.inThinking) {
83
+ this.processBeforeThinking();
84
+ if (this.textBuffer.length === 0) break;
85
+ }
86
+ if (this.inThinking) {
87
+ this.processInsideThinking();
88
+ if (this.textBuffer.length === 0) break;
89
+ }
90
+ if (this.textBuffer.length >= prevLength) break;
91
+ }
92
+ }
93
+
94
+ finalize(): void {
95
+ if (this.textBuffer.length === 0) return;
96
+ if (this.inThinking && this.thinkingBlockIndex !== null) {
97
+ const block = this.output.content[
98
+ this.thinkingBlockIndex
99
+ ] as ThinkingContent;
100
+ block.thinking += this.textBuffer;
101
+ this.stream.push({
102
+ type: "thinking_delta",
103
+ contentIndex: this.thinkingBlockIndex,
104
+ delta: this.textBuffer,
105
+ partial: this.output,
106
+ });
107
+ this.stream.push({
108
+ type: "thinking_end",
109
+ contentIndex: this.thinkingBlockIndex,
110
+ content: block.thinking,
111
+ partial: this.output,
112
+ });
113
+ } else {
114
+ this.emitText(this.textBuffer);
115
+ }
116
+ this.textBuffer = "";
117
+ }
118
+
119
+ /** Get the index of the final text block (after thinking, or null if none) */
120
+ getTextBlockIndex(): number | null {
121
+ return this.textBlockIndex;
122
+ }
123
+
124
+ private processBeforeThinking(): void {
125
+ let bestPos = -1;
126
+ let bestVariant: (typeof THINKING_TAG_VARIANTS)[number] | null = null;
127
+ for (const variant of THINKING_TAG_VARIANTS) {
128
+ const pos = this.textBuffer.indexOf(variant.open);
129
+ if (pos !== -1 && (bestPos === -1 || pos < bestPos)) {
130
+ bestPos = pos;
131
+ bestVariant = variant;
132
+ }
133
+ }
134
+
135
+ if (bestPos !== -1 && bestVariant) {
136
+ if (bestPos > 0) this.emitText(this.textBuffer.slice(0, bestPos));
137
+ this.textBuffer = this.textBuffer.slice(
138
+ bestPos + bestVariant.open.length,
139
+ );
140
+ this.activeEndTag = bestVariant.close;
141
+ this.inThinking = true;
142
+ return;
143
+ }
144
+
145
+ // No thinking tag found yet, but the buffer might end with a partial tag
146
+ const trailingPrefixLength = getMaxTrailingPossibleTagPrefixLength(
147
+ this.textBuffer,
148
+ THINKING_TAG_VARIANTS.map((variant) => variant.open),
149
+ );
150
+ const safeLen = this.textBuffer.length - trailingPrefixLength;
151
+ if (safeLen > 0) {
152
+ this.emitText(this.textBuffer.slice(0, safeLen));
153
+ this.textBuffer = this.textBuffer.slice(safeLen);
154
+ }
155
+ }
156
+
157
+ private processInsideThinking(): void {
158
+ const endPos = this.textBuffer.indexOf(this.activeEndTag);
159
+ if (endPos !== -1) {
160
+ if (endPos > 0) this.emitThinking(this.textBuffer.slice(0, endPos));
161
+ if (this.thinkingBlockIndex !== null) {
162
+ const block = this.output.content[
163
+ this.thinkingBlockIndex
164
+ ] as ThinkingContent;
165
+ this.stream.push({
166
+ type: "thinking_end",
167
+ contentIndex: this.thinkingBlockIndex,
168
+ content: block.thinking,
169
+ partial: this.output,
170
+ });
171
+ }
172
+ this.textBuffer = this.textBuffer.slice(
173
+ endPos + this.activeEndTag.length,
174
+ );
175
+ this.inThinking = false;
176
+ this.thinkingBlockIndex = null;
177
+ this.textBlockIndex = null;
178
+ if (this.textBuffer.startsWith("\n\n"))
179
+ this.textBuffer = this.textBuffer.slice(2);
180
+ return;
181
+ }
182
+
183
+ // Buffer might end with a partial close tag
184
+ const trailingPrefixLength = getTrailingPossibleTagPrefixLength(
185
+ this.textBuffer,
186
+ this.activeEndTag,
187
+ );
188
+ const safeLen = this.textBuffer.length - trailingPrefixLength;
189
+ if (safeLen > 0) {
190
+ this.emitThinking(this.textBuffer.slice(0, safeLen));
191
+ this.textBuffer = this.textBuffer.slice(safeLen);
192
+ }
193
+ }
194
+
195
+ private emitText(text: string): void {
196
+ if (!text) return;
197
+ if (this.textBlockIndex === null) {
198
+ this.textBlockIndex = this.output.content.length;
199
+ this.output.content.push({ type: "text", text: "" } as TextContent);
200
+ this.stream.push({
201
+ type: "text_start",
202
+ contentIndex: this.textBlockIndex,
203
+ partial: this.output,
204
+ });
205
+ }
206
+ const block = this.output.content[this.textBlockIndex] as TextContent;
207
+ block.text += text;
208
+ this.stream.push({
209
+ type: "text_delta",
210
+ contentIndex: this.textBlockIndex,
211
+ delta: text,
212
+ partial: this.output,
213
+ });
214
+ }
215
+
216
+ private emitThinking(thinking: string): void {
217
+ if (thinking.length === 0) return;
218
+ if (this.thinkingBlockIndex === null) {
219
+ if (this.textBlockIndex === null) {
220
+ this.thinkingBlockIndex = this.output.content.length;
221
+ this.output.content.push({
222
+ type: "thinking",
223
+ thinking: "",
224
+ } as ThinkingContent);
225
+ } else {
226
+ // Insert thinking block before the existing text block
227
+ this.thinkingBlockIndex = this.textBlockIndex;
228
+ this.output.content.splice(this.thinkingBlockIndex, 0, {
229
+ type: "thinking",
230
+ thinking: "",
231
+ } as ThinkingContent);
232
+ this.textBlockIndex = this.textBlockIndex + 1;
233
+ }
234
+ this.stream.push({
235
+ type: "thinking_start",
236
+ contentIndex: this.thinkingBlockIndex,
237
+ partial: this.output,
238
+ });
239
+ }
240
+ const block = this.output.content[
241
+ this.thinkingBlockIndex
242
+ ] as ThinkingContent;
243
+ block.thinking += thinking;
244
+ this.stream.push({
245
+ type: "thinking_delta",
246
+ contentIndex: this.thinkingBlockIndex,
247
+ delta: thinking,
248
+ partial: this.output,
249
+ });
250
+ }
251
+ }
@@ -0,0 +1,189 @@
1
+ /**
2
+ * Message format transformation between Pi's internal format and Qoder's
3
+ * proprietary API format.
4
+ *
5
+ * Pi uses a structured message format with typed content blocks (TextContent,
6
+ * ThinkingContent, ImageContent, ToolCall). Qoder's API expects an
7
+ * OpenAI-compatible format with some custom extensions.
8
+ */
9
+
10
+ import type {
11
+ AssistantMessage,
12
+ ImageContent,
13
+ Message,
14
+ TextContent,
15
+ ThinkingContent,
16
+ Tool,
17
+ ToolCall,
18
+ ToolResultMessage,
19
+ } from "@earendil-works/pi-ai";
20
+
21
+ /** OpenAI-style tool definition sent to the Qoder API. */
22
+ interface QoderTool {
23
+ type: "function";
24
+ function: {
25
+ name: string;
26
+ description?: string;
27
+ parameters?: unknown;
28
+ };
29
+ }
30
+
31
+ /** OpenAI-style tool call within an assistant message. */
32
+ interface QoderToolCall {
33
+ id?: string;
34
+ type: "function";
35
+ function: { name?: string; arguments: string };
36
+ }
37
+
38
+ type QoderTextPart = { type: "text"; text: string };
39
+ type QoderImagePart = { type: "image_url"; image_url: { url: string } };
40
+ type QoderContent = string | Array<QoderTextPart | QoderImagePart>;
41
+
42
+ /** OpenAI-style message sent to the Qoder API. */
43
+ interface QoderMessage {
44
+ role: "user" | "assistant" | "tool";
45
+ content: QoderContent | null;
46
+ tool_calls?: QoderToolCall[];
47
+ tool_call_id?: string;
48
+ }
49
+
50
+ /**
51
+ * Extract text content from a message, joining all text/thinking blocks.
52
+ */
53
+ export function getContentText(msg: Message): string {
54
+ if (typeof msg.content === "string") return msg.content;
55
+ if (Array.isArray(msg.content)) {
56
+ return msg.content
57
+ .map((c) => {
58
+ if (c.type === "text") return (c as TextContent).text;
59
+ if (c.type === "thinking") return (c as ThinkingContent).thinking;
60
+ return "";
61
+ })
62
+ .join("");
63
+ }
64
+ return "";
65
+ }
66
+
67
+ /**
68
+ * Convert Pi's Tool[] to Qoder's tool format.
69
+ */
70
+ export function transformTools(tools: Tool[]): QoderTool[] {
71
+ return tools.map((t) => ({
72
+ type: "function",
73
+ function: {
74
+ name: t.name,
75
+ description: t.description,
76
+ parameters: t.parameters,
77
+ },
78
+ }));
79
+ }
80
+
81
+ /**
82
+ * Convert Pi's internal messages to Qoder's expected format.
83
+ *
84
+ * Handles:
85
+ * - User messages with text and/or image content
86
+ * - Assistant messages with text, thinking, and tool calls
87
+ * - Tool result messages
88
+ * - Skips error/aborted assistant messages
89
+ */
90
+ export function transformMessagesForQoder(messages: Message[]): QoderMessage[] {
91
+ const normalizedMessages: QoderMessage[] = [];
92
+ for (const msg of messages) {
93
+ if (isSkippableMessage(msg)) continue;
94
+ if (msg.role === "user") {
95
+ normalizedMessages.push(transformUserMessage(msg));
96
+ } else if (msg.role === "assistant") {
97
+ normalizedMessages.push(
98
+ transformAssistantMessage(msg as AssistantMessage),
99
+ );
100
+ } else if (msg.role === "toolResult") {
101
+ normalizedMessages.push(
102
+ transformToolResultMessage(msg as ToolResultMessage),
103
+ );
104
+ }
105
+ }
106
+ return normalizedMessages;
107
+ }
108
+
109
+ function isSkippableMessage(msg: Message): boolean {
110
+ if (msg.role !== "assistant") return false;
111
+ const am = msg as AssistantMessage;
112
+ return am.stopReason === "error" || am.stopReason === "aborted";
113
+ }
114
+
115
+ function transformUserMessage(msg: Message): QoderMessage {
116
+ let content: QoderContent = "";
117
+ if (typeof msg.content === "string") {
118
+ content = msg.content;
119
+ } else if (Array.isArray(msg.content)) {
120
+ const hasImage = msg.content.some((c) => c.type === "image");
121
+ if (hasImage) {
122
+ content = msg.content
123
+ .map((c): QoderTextPart | QoderImagePart | null => {
124
+ if (c.type === "text") {
125
+ return { type: "text", text: (c as TextContent).text };
126
+ }
127
+ if (c.type === "image") {
128
+ const img = c as ImageContent;
129
+ return {
130
+ type: "image_url",
131
+ image_url: { url: `data:${img.mimeType};base64,${img.data}` },
132
+ };
133
+ }
134
+ return null;
135
+ })
136
+ .filter((p): p is QoderTextPart | QoderImagePart => p !== null);
137
+ } else {
138
+ content = getContentText(msg);
139
+ }
140
+ }
141
+ return { role: "user", content };
142
+ }
143
+
144
+ function transformAssistantMessage(am: AssistantMessage): QoderMessage {
145
+ let content = "";
146
+ const toolCalls: QoderToolCall[] = [];
147
+
148
+ if (Array.isArray(am.content)) {
149
+ for (const block of am.content) {
150
+ if (block.type === "text") {
151
+ content += (block as TextContent).text;
152
+ } else if (block.type === "thinking") {
153
+ content += `<thinking>${(block as ThinkingContent).thinking}</thinking>\n\n`;
154
+ } else if (block.type === "toolCall") {
155
+ const tc = block as ToolCall;
156
+ toolCalls.push({
157
+ id: tc.id,
158
+ type: "function",
159
+ function: {
160
+ name: tc.name,
161
+ arguments:
162
+ typeof tc.arguments === "string"
163
+ ? tc.arguments
164
+ : JSON.stringify(tc.arguments),
165
+ },
166
+ });
167
+ }
168
+ }
169
+ } else {
170
+ content = am.content || "";
171
+ }
172
+
173
+ const mapped: QoderMessage = {
174
+ role: "assistant",
175
+ content: content || null,
176
+ };
177
+ if (toolCalls.length > 0) {
178
+ mapped.tool_calls = toolCalls;
179
+ }
180
+ return mapped;
181
+ }
182
+
183
+ function transformToolResultMessage(tr: ToolResultMessage): QoderMessage {
184
+ return {
185
+ role: "tool",
186
+ tool_call_id: tr.toolCallId,
187
+ content: getContentText(tr),
188
+ };
189
+ }
@@ -112,12 +112,9 @@ function isTokenRouterModel(model: { provider?: string }): boolean {
112
112
 
113
113
  // =============================================================================
114
114
  // Known Free Models
115
- // TokenRouter doesn't expose pricing via /v1/models, so known-free models
116
- // are hardcoded. Detected via name suffix also catches `:free`-tagged models.
115
+ // TokenRouter doesn't expose pricing via /v1/models.
116
+ // Known-free detection uses `:free` name suffix for promotional models.
117
117
  // =============================================================================
118
-
119
- const MINIMAX_M3_ID = "MiniMax-M3";
120
- const KNOWN_FREE_MODELS = new Set([MINIMAX_M3_ID]);
121
118
  const TOKENROUTER_OPENAI_API = "tokenrouter-openai-completions" as const;
122
119
  const TOKENROUTER_HIGH_LOAD_RETRY_DELAY_MS = 30_000;
123
120
  const MINIMAX_ADAPTIVE_COMPAT: NonNullable<ProviderModelConfig["compat"]> = {
@@ -482,7 +479,7 @@ export function mapTokenRouterModel(
482
479
  const reasoning = isMinimax || isLikelyReasoningModel({ id: model.id, name });
483
480
  const isResponseApi =
484
481
  model.supported_endpoint_types.includes("openai-response");
485
- const isKnownFree = KNOWN_FREE_MODELS.has(model.id);
482
+ const isKnownFree = model.id.toLowerCase().endsWith(":free");
486
483
 
487
484
  return {
488
485
  id: model.id,