@pwshub/aisdk 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,6 +11,17 @@ A thin, unified AI client for OpenAI, Anthropic, Google, DashScope, and DeepSeek
11
11
  - **Token usage tracking**: Detailed token counts and estimated cost per request
12
12
  - **Provider-specific options**: Pass provider-specific parameters when needed
13
13
 
14
+ ## Limitations
15
+
16
+ This package is designed for **personal project usage** with a focus on simplicity:
17
+
18
+ - **Text-only chat**: Supports basic text generation and conversation
19
+ - **No streaming**: All responses are returned as complete results
20
+ - **No multimodal inputs**: Images, audio, video, and file uploads are not supported
21
+ - **No function calling**: Tool use and function calling features are not available
22
+
23
+ For production applications requiring advanced features, consider using the official provider SDKs directly.
24
+
14
25
  ## Installation
15
26
 
16
27
  ```bash
@@ -80,7 +91,8 @@ Sends a text generation request.
80
91
  inputTokens: number,
81
92
  outputTokens: number,
82
93
  cacheTokens: number,
83
- estimatedCost: number // USD
94
+ reasoningTokens: number, // Reasoning/thinking tokens (0 for non-reasoning models)
95
+ estimatedCost: number // USD
84
96
  }
85
97
  }
86
98
  ```
@@ -133,6 +145,27 @@ const result = await ai.ask({
133
145
  })
134
146
  ```
135
147
 
148
+ ### Google (Disable Thinking Mode)
149
+
150
+ Gemini 2.5 Pro and other reasoning models use thinking tokens by default. Disable thinking mode to reduce latency and cost:
151
+
152
+ ```javascript
153
+ const result = await ai.ask({
154
+ model: 'gemini-2.5-pro',
155
+ apikey: process.env.GOOGLE_API_KEY,
156
+ prompt: 'What is the capital of Vietnam?',
157
+ maxTokens: 256,
158
+ providerOptions: {
159
+ thinkingConfig: {
160
+ thinkingBudget: 0, // Disable reasoning tokens
161
+ includeThoughts: false, // Don't include thought process in response
162
+ },
163
+ },
164
+ })
165
+ ```
166
+
167
+ > **Note:** When thinking mode is enabled (default for Gemini 2.5 Pro), the model may use most of the `maxTokens` budget for reasoning. Set a higher `maxTokens` (e.g., 2048) or disable thinking with `thinkingBudget: 0`.
168
+
136
169
  ### With Fallbacks
137
170
 
138
171
  ```javascript
@@ -166,6 +199,36 @@ const result = await ai.ask({
166
199
  })
167
200
  ```
168
201
 
202
+ ### DashScope with Custom Region
203
+
204
+ DashScope endpoints vary by region. Use `gatewayUrl` to specify your region:
205
+
206
+ ```javascript
207
+ import { createAi } from '@pwshub/aisdk'
208
+
209
+ // Singapore region
210
+ const aiSingapore = createAi({
211
+ gatewayUrl: 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1',
212
+ })
213
+
214
+ // Virginia region (US)
215
+ const aiUS = createAi({
216
+ gatewayUrl: 'https://dashscope-us.aliyuncs.com/compatible-mode/v1',
217
+ })
218
+
219
+ // Beijing region (China)
220
+ const aiCN = createAi({
221
+ gatewayUrl: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
222
+ })
223
+
224
+ // Use the regional client
225
+ const result = await aiSingapore.ask({
226
+ model: 'qwen3.5-plus',
227
+ apikey: process.env.DASHSCOPE_API_KEY,
228
+ prompt: 'Hello from Singapore!',
229
+ })
230
+ ```
231
+
169
232
  ### DeepSeek
170
233
 
171
234
  ```javascript
package/index.d.ts CHANGED
@@ -25,6 +25,7 @@ export interface Usage {
25
25
  inputTokens: number;
26
26
  outputTokens: number;
27
27
  cacheTokens: number;
28
+ reasoningTokens: number;
28
29
  estimatedCost: number;
29
30
  }
30
31
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pwshub/aisdk",
3
- "version": "0.0.2",
3
+ "version": "0.0.3",
4
4
  "description": "A thin, unified AI client for OpenAI, Anthropic, Google, DashScope, and DeepSeek with automatic param normalization and fallback support",
5
5
  "repository": {
6
6
  "type": "git",
@@ -11,11 +11,12 @@
11
11
  "bun": ">=1.0.0"
12
12
  },
13
13
  "type": "module",
14
- "main": "./src/index.js",
15
14
  "exports": {
16
- ".": "./src/index.js"
15
+ ".": {
16
+ "types": "./index.d.ts",
17
+ "default": "./src/index.js"
18
+ }
17
19
  },
18
- "types": "./index.d.ts",
19
20
  "files": [
20
21
  "src",
21
22
  "index.d.ts"
package/src/index.js CHANGED
@@ -12,7 +12,7 @@
12
12
  * temperature: 0.5,
13
13
  * })
14
14
  * console.log(result.text)
15
- * console.log(result.usage) // { inputTokens, outputTokens, cacheTokens, estimatedCost }
15
+ * console.log(result.usage) // { inputTokens, outputTokens, cacheTokens, reasoningTokens, estimatedCost }
16
16
  *
17
17
  * @example With fallbacks
18
18
  * const result = await ai.ask({
@@ -81,6 +81,7 @@ export {
81
81
  * @property {number} inputTokens
82
82
  * @property {number} outputTokens
83
83
  * @property {number} cacheTokens
84
+ * @property {number} reasoningTokens
84
85
  * @property {number} estimatedCost - In USD, based on models.json pricing
85
86
  */
86
87
 
@@ -113,7 +114,7 @@ const extractGenConfig = (params) => {
113
114
  const calcCost = (usage, record) => {
114
115
  const M = 1_000_000
115
116
  const inputCost = (usage.inputTokens / M) * record.input_price
116
- const outputCost = (usage.outputTokens / M) * record.output_price
117
+ const outputCost = ((usage.outputTokens + usage.reasoningTokens) / M) * record.output_price
117
118
  const cacheCost = (usage.cacheTokens / M) * record.cache_price
118
119
 
119
120
  // Round to 8 decimal places to avoid floating point noise
package/src/providers.js CHANGED
@@ -23,7 +23,8 @@
23
23
  * @typedef {Object} RawUsage
24
24
  * @property {number} inputTokens
25
25
  * @property {number} outputTokens
26
- * @property {number} cacheTokens - 0 when not applicable
26
+ * @property {number} cacheTokens - 0 when not applicable
27
+ * @property {number} reasoningTokens - 0 when not applicable
27
28
  */
28
29
 
29
30
  /**
@@ -84,6 +85,7 @@ const openai = {
84
85
  inputTokens: data.usage?.prompt_tokens ?? 0,
85
86
  outputTokens: data.usage?.completion_tokens ?? 0,
86
87
  cacheTokens: data.usage?.prompt_tokens_details?.cached_tokens ?? 0,
88
+ reasoningTokens: data.usage?.completion_tokens_details?.reasoning_tokens ?? 0,
87
89
  }),
88
90
  }
89
91
 
@@ -119,7 +121,8 @@ const anthropic = {
119
121
  extractUsage: (data) => ({
120
122
  inputTokens: data.usage?.input_tokens ?? 0,
121
123
  outputTokens: data.usage?.output_tokens ?? 0,
122
- cacheTokens: data.usage?.cache_read_input_tokens ?? 0,
124
+ cacheTokens: (data.usage?.cache_read_input_tokens ?? 0) + (data.usage?.cache_creation_input_tokens ?? 0),
125
+ reasoningTokens: 0,
123
126
  }),
124
127
  }
125
128
 
@@ -155,17 +158,53 @@ const google = {
155
158
  throw new Error('Google response blocked by safety filters')
156
159
  }
157
160
 
158
- const text = candidate.content?.parts?.[0]?.text
159
- if (!text) {
161
+ // Handle different content structures
162
+ const content = candidate.content
163
+ if (!content) {
160
164
  throw new Error('Google response missing content')
161
165
  }
162
- return text
166
+
167
+ // Gemini 2.5 Pro may return parts as array or direct text
168
+ if (Array.isArray(content.parts)) {
169
+ const text = content.parts[0]?.text
170
+ if (!text) {
171
+ // Model may have used all tokens for reasoning (thoughtsTokenCount)
172
+ const thoughts = data.usageMetadata?.thoughtsTokenCount ?? 0
173
+ if (finishReason === 'MAX_TOKENS' && thoughts > 0) {
174
+ throw new Error(`Google response missing content (used ${thoughts} tokens for reasoning, maxTokens may be too low)`)
175
+ }
176
+ throw new Error('Google response missing content')
177
+ }
178
+ return text
179
+ }
180
+
181
+ // Some models may return content directly
182
+ if (typeof content.parts === 'string') {
183
+ return content.parts
184
+ }
185
+
186
+ throw new Error('Google response missing content')
187
+ },
188
+ extractUsage: (data) => {
189
+ // For Gemini models with reasoning, candidatesTokenCount may be undefined
190
+ // when all tokens were used for thinking. Calculate output tokens from
191
+ // totalTokenCount - promptTokenCount to get actual tokens used.
192
+ const totalTokens = data.usageMetadata?.totalTokenCount ?? 0
193
+ const promptTokens = data.usageMetadata?.promptTokenCount ?? 0
194
+ const candidatesTokens = data.usageMetadata?.candidatesTokenCount ?? 0
195
+ const thoughtsTokens = data.usageMetadata?.thoughtsTokenCount ?? 0
196
+
197
+ // outputTokens = actual generated tokens (including reasoning)
198
+ // If candidatesTokenCount is missing, derive from total - prompt
199
+ const outputTokens = candidatesTokens || (totalTokens - promptTokens)
200
+
201
+ return {
202
+ inputTokens: promptTokens,
203
+ outputTokens,
204
+ cacheTokens: data.usageMetadata?.cachedContentTokenCount ?? 0,
205
+ reasoningTokens: thoughtsTokens,
206
+ }
163
207
  },
164
- extractUsage: (data) => ({
165
- inputTokens: data.usageMetadata?.promptTokenCount ?? 0,
166
- outputTokens: data.usageMetadata?.candidatesTokenCount ?? 0,
167
- cacheTokens: data.usageMetadata?.cachedContentTokenCount ?? 0,
168
- }),
169
208
  }
170
209
 
171
210
  /** @type {ProviderAdapter} */
@@ -198,6 +237,7 @@ const dashscope = {
198
237
  inputTokens: usage?.input_tokens ?? usage?.prompt_tokens ?? 0,
199
238
  outputTokens: usage?.output_tokens ?? usage?.completion_tokens ?? 0,
200
239
  cacheTokens: 0,
240
+ reasoningTokens: 0,
201
241
  }
202
242
  },
203
243
  }
@@ -225,7 +265,8 @@ const deepseek = {
225
265
  extractUsage: (data) => ({
226
266
  inputTokens: data.usage?.prompt_tokens ?? 0,
227
267
  outputTokens: data.usage?.completion_tokens ?? 0,
228
- cacheTokens: 0,
268
+ cacheTokens: data.usage?.prompt_cache_hit_tokens ?? 0,
269
+ reasoningTokens: data.usage?.completion_tokens_details?.reasoning_tokens ?? 0,
229
270
  }),
230
271
  }
231
272