@pwshub/aisdk 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +64 -1
- package/index.d.ts +1 -0
- package/package.json +5 -4
- package/src/index.js +3 -2
- package/src/providers.js +52 -11
package/README.md
CHANGED
|
@@ -11,6 +11,17 @@ A thin, unified AI client for OpenAI, Anthropic, Google, DashScope, and DeepSeek
|
|
|
11
11
|
- **Token usage tracking**: Detailed token counts and estimated cost per request
|
|
12
12
|
- **Provider-specific options**: Pass provider-specific parameters when needed
|
|
13
13
|
|
|
14
|
+
## Limitations
|
|
15
|
+
|
|
16
|
+
This package is designed for **personal project usage** with a focus on simplicity:
|
|
17
|
+
|
|
18
|
+
- **Text-only chat**: Supports basic text generation and conversation
|
|
19
|
+
- **No streaming**: All responses are returned as complete results
|
|
20
|
+
- **No multimodal inputs**: Images, audio, video, and file uploads are not supported
|
|
21
|
+
- **No function calling**: Tool use and function calling features are not available
|
|
22
|
+
|
|
23
|
+
For production applications requiring advanced features, consider using the official provider SDKs directly.
|
|
24
|
+
|
|
14
25
|
## Installation
|
|
15
26
|
|
|
16
27
|
```bash
|
|
@@ -80,7 +91,8 @@ Sends a text generation request.
|
|
|
80
91
|
inputTokens: number,
|
|
81
92
|
outputTokens: number,
|
|
82
93
|
cacheTokens: number,
|
|
83
|
-
|
|
94
|
+
reasoningTokens: number, // Reasoning/thinking tokens (0 for non-reasoning models)
|
|
95
|
+
estimatedCost: number // USD
|
|
84
96
|
}
|
|
85
97
|
}
|
|
86
98
|
```
|
|
@@ -133,6 +145,27 @@ const result = await ai.ask({
|
|
|
133
145
|
})
|
|
134
146
|
```
|
|
135
147
|
|
|
148
|
+
### Google (Disable Thinking Mode)
|
|
149
|
+
|
|
150
|
+
Gemini 2.5 Pro and other reasoning models use thinking tokens by default. Disable thinking mode to reduce latency and cost:
|
|
151
|
+
|
|
152
|
+
```javascript
|
|
153
|
+
const result = await ai.ask({
|
|
154
|
+
model: 'gemini-2.5-pro',
|
|
155
|
+
apikey: process.env.GOOGLE_API_KEY,
|
|
156
|
+
prompt: 'What is the capital of Vietnam?',
|
|
157
|
+
maxTokens: 256,
|
|
158
|
+
providerOptions: {
|
|
159
|
+
thinkingConfig: {
|
|
160
|
+
thinkingBudget: 0, // Disable reasoning tokens
|
|
161
|
+
includeThoughts: false, // Don't include thought process in response
|
|
162
|
+
},
|
|
163
|
+
},
|
|
164
|
+
})
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
> **Note:** When thinking mode is enabled (default for Gemini 2.5 Pro), the model may use most of the `maxTokens` budget for reasoning. Set a higher `maxTokens` (e.g., 2048) or disable thinking with `thinkingBudget: 0`.
|
|
168
|
+
|
|
136
169
|
### With Fallbacks
|
|
137
170
|
|
|
138
171
|
```javascript
|
|
@@ -166,6 +199,36 @@ const result = await ai.ask({
|
|
|
166
199
|
})
|
|
167
200
|
```
|
|
168
201
|
|
|
202
|
+
### DashScope with Custom Region
|
|
203
|
+
|
|
204
|
+
DashScope endpoints vary by region. Use `gatewayUrl` to specify your region:
|
|
205
|
+
|
|
206
|
+
```javascript
|
|
207
|
+
import { createAi } from '@pwshub/aisdk'
|
|
208
|
+
|
|
209
|
+
// Singapore region
|
|
210
|
+
const aiSingapore = createAi({
|
|
211
|
+
gatewayUrl: 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1',
|
|
212
|
+
})
|
|
213
|
+
|
|
214
|
+
// Virginia region (US)
|
|
215
|
+
const aiUS = createAi({
|
|
216
|
+
gatewayUrl: 'https://dashscope-us.aliyuncs.com/compatible-mode/v1',
|
|
217
|
+
})
|
|
218
|
+
|
|
219
|
+
// Beijing region (China)
|
|
220
|
+
const aiCN = createAi({
|
|
221
|
+
gatewayUrl: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
|
|
222
|
+
})
|
|
223
|
+
|
|
224
|
+
// Use the regional client
|
|
225
|
+
const result = await aiSingapore.ask({
|
|
226
|
+
model: 'qwen3.5-plus',
|
|
227
|
+
apikey: process.env.DASHSCOPE_API_KEY,
|
|
228
|
+
prompt: 'Hello from Singapore!',
|
|
229
|
+
})
|
|
230
|
+
```
|
|
231
|
+
|
|
169
232
|
### DeepSeek
|
|
170
233
|
|
|
171
234
|
```javascript
|
package/index.d.ts
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pwshub/aisdk",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.3",
|
|
4
4
|
"description": "A thin, unified AI client for OpenAI, Anthropic, Google, DashScope, and DeepSeek with automatic param normalization and fallback support",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -11,11 +11,12 @@
|
|
|
11
11
|
"bun": ">=1.0.0"
|
|
12
12
|
},
|
|
13
13
|
"type": "module",
|
|
14
|
-
"main": "./src/index.js",
|
|
15
14
|
"exports": {
|
|
16
|
-
".":
|
|
15
|
+
".": {
|
|
16
|
+
"types": "./index.d.ts",
|
|
17
|
+
"default": "./src/index.js"
|
|
18
|
+
}
|
|
17
19
|
},
|
|
18
|
-
"types": "./index.d.ts",
|
|
19
20
|
"files": [
|
|
20
21
|
"src",
|
|
21
22
|
"index.d.ts"
|
package/src/index.js
CHANGED
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
* temperature: 0.5,
|
|
13
13
|
* })
|
|
14
14
|
* console.log(result.text)
|
|
15
|
-
* console.log(result.usage) // { inputTokens, outputTokens, cacheTokens, estimatedCost }
|
|
15
|
+
* console.log(result.usage) // { inputTokens, outputTokens, cacheTokens, reasoningTokens, estimatedCost }
|
|
16
16
|
*
|
|
17
17
|
* @example With fallbacks
|
|
18
18
|
* const result = await ai.ask({
|
|
@@ -81,6 +81,7 @@ export {
|
|
|
81
81
|
* @property {number} inputTokens
|
|
82
82
|
* @property {number} outputTokens
|
|
83
83
|
* @property {number} cacheTokens
|
|
84
|
+
* @property {number} reasoningTokens
|
|
84
85
|
* @property {number} estimatedCost - In USD, based on models.json pricing
|
|
85
86
|
*/
|
|
86
87
|
|
|
@@ -113,7 +114,7 @@ const extractGenConfig = (params) => {
|
|
|
113
114
|
const calcCost = (usage, record) => {
|
|
114
115
|
const M = 1_000_000
|
|
115
116
|
const inputCost = (usage.inputTokens / M) * record.input_price
|
|
116
|
-
const outputCost = (usage.outputTokens / M) * record.output_price
|
|
117
|
+
const outputCost = ((usage.outputTokens + usage.reasoningTokens) / M) * record.output_price
|
|
117
118
|
const cacheCost = (usage.cacheTokens / M) * record.cache_price
|
|
118
119
|
|
|
119
120
|
// Round to 8 decimal places to avoid floating point noise
|
package/src/providers.js
CHANGED
|
@@ -23,7 +23,8 @@
|
|
|
23
23
|
* @typedef {Object} RawUsage
|
|
24
24
|
* @property {number} inputTokens
|
|
25
25
|
* @property {number} outputTokens
|
|
26
|
-
* @property {number} cacheTokens
|
|
26
|
+
* @property {number} cacheTokens - 0 when not applicable
|
|
27
|
+
* @property {number} reasoningTokens - 0 when not applicable
|
|
27
28
|
*/
|
|
28
29
|
|
|
29
30
|
/**
|
|
@@ -84,6 +85,7 @@ const openai = {
|
|
|
84
85
|
inputTokens: data.usage?.prompt_tokens ?? 0,
|
|
85
86
|
outputTokens: data.usage?.completion_tokens ?? 0,
|
|
86
87
|
cacheTokens: data.usage?.prompt_tokens_details?.cached_tokens ?? 0,
|
|
88
|
+
reasoningTokens: data.usage?.completion_tokens_details?.reasoning_tokens ?? 0,
|
|
87
89
|
}),
|
|
88
90
|
}
|
|
89
91
|
|
|
@@ -119,7 +121,8 @@ const anthropic = {
|
|
|
119
121
|
extractUsage: (data) => ({
|
|
120
122
|
inputTokens: data.usage?.input_tokens ?? 0,
|
|
121
123
|
outputTokens: data.usage?.output_tokens ?? 0,
|
|
122
|
-
cacheTokens: data.usage?.cache_read_input_tokens ?? 0,
|
|
124
|
+
cacheTokens: (data.usage?.cache_read_input_tokens ?? 0) + (data.usage?.cache_creation_input_tokens ?? 0),
|
|
125
|
+
reasoningTokens: 0,
|
|
123
126
|
}),
|
|
124
127
|
}
|
|
125
128
|
|
|
@@ -155,17 +158,53 @@ const google = {
|
|
|
155
158
|
throw new Error('Google response blocked by safety filters')
|
|
156
159
|
}
|
|
157
160
|
|
|
158
|
-
|
|
159
|
-
|
|
161
|
+
// Handle different content structures
|
|
162
|
+
const content = candidate.content
|
|
163
|
+
if (!content) {
|
|
160
164
|
throw new Error('Google response missing content')
|
|
161
165
|
}
|
|
162
|
-
|
|
166
|
+
|
|
167
|
+
// Gemini 2.5 Pro may return parts as array or direct text
|
|
168
|
+
if (Array.isArray(content.parts)) {
|
|
169
|
+
const text = content.parts[0]?.text
|
|
170
|
+
if (!text) {
|
|
171
|
+
// Model may have used all tokens for reasoning (thoughtsTokenCount)
|
|
172
|
+
const thoughts = data.usageMetadata?.thoughtsTokenCount ?? 0
|
|
173
|
+
if (finishReason === 'MAX_TOKENS' && thoughts > 0) {
|
|
174
|
+
throw new Error(`Google response missing content (used ${thoughts} tokens for reasoning, maxTokens may be too low)`)
|
|
175
|
+
}
|
|
176
|
+
throw new Error('Google response missing content')
|
|
177
|
+
}
|
|
178
|
+
return text
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// Some models may return content directly
|
|
182
|
+
if (typeof content.parts === 'string') {
|
|
183
|
+
return content.parts
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
throw new Error('Google response missing content')
|
|
187
|
+
},
|
|
188
|
+
extractUsage: (data) => {
|
|
189
|
+
// For Gemini models with reasoning, candidatesTokenCount may be undefined
|
|
190
|
+
// when all tokens were used for thinking. Calculate output tokens from
|
|
191
|
+
// totalTokenCount - promptTokenCount to get actual tokens used.
|
|
192
|
+
const totalTokens = data.usageMetadata?.totalTokenCount ?? 0
|
|
193
|
+
const promptTokens = data.usageMetadata?.promptTokenCount ?? 0
|
|
194
|
+
const candidatesTokens = data.usageMetadata?.candidatesTokenCount ?? 0
|
|
195
|
+
const thoughtsTokens = data.usageMetadata?.thoughtsTokenCount ?? 0
|
|
196
|
+
|
|
197
|
+
// outputTokens = actual generated tokens (including reasoning)
|
|
198
|
+
// If candidatesTokenCount is missing, derive from total - prompt
|
|
199
|
+
const outputTokens = candidatesTokens || (totalTokens - promptTokens)
|
|
200
|
+
|
|
201
|
+
return {
|
|
202
|
+
inputTokens: promptTokens,
|
|
203
|
+
outputTokens,
|
|
204
|
+
cacheTokens: data.usageMetadata?.cachedContentTokenCount ?? 0,
|
|
205
|
+
reasoningTokens: thoughtsTokens,
|
|
206
|
+
}
|
|
163
207
|
},
|
|
164
|
-
extractUsage: (data) => ({
|
|
165
|
-
inputTokens: data.usageMetadata?.promptTokenCount ?? 0,
|
|
166
|
-
outputTokens: data.usageMetadata?.candidatesTokenCount ?? 0,
|
|
167
|
-
cacheTokens: data.usageMetadata?.cachedContentTokenCount ?? 0,
|
|
168
|
-
}),
|
|
169
208
|
}
|
|
170
209
|
|
|
171
210
|
/** @type {ProviderAdapter} */
|
|
@@ -198,6 +237,7 @@ const dashscope = {
|
|
|
198
237
|
inputTokens: usage?.input_tokens ?? usage?.prompt_tokens ?? 0,
|
|
199
238
|
outputTokens: usage?.output_tokens ?? usage?.completion_tokens ?? 0,
|
|
200
239
|
cacheTokens: 0,
|
|
240
|
+
reasoningTokens: 0,
|
|
201
241
|
}
|
|
202
242
|
},
|
|
203
243
|
}
|
|
@@ -225,7 +265,8 @@ const deepseek = {
|
|
|
225
265
|
extractUsage: (data) => ({
|
|
226
266
|
inputTokens: data.usage?.prompt_tokens ?? 0,
|
|
227
267
|
outputTokens: data.usage?.completion_tokens ?? 0,
|
|
228
|
-
cacheTokens: 0,
|
|
268
|
+
cacheTokens: data.usage?.prompt_cache_hit_tokens ?? 0,
|
|
269
|
+
reasoningTokens: data.usage?.completion_tokens_details?.reasoning_tokens ?? 0,
|
|
229
270
|
}),
|
|
230
271
|
}
|
|
231
272
|
|