mimo2codex 0.1.16 → 0.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,21 +1,29 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- mimo_chat.py — single-shot or streaming chat with Xiaomi MiMo V2.5.
3
+ mimo_chat.py — single-shot or streaming chat. Works WITHOUT any API key.
4
4
 
5
- Hits MiMo's OpenAI-compatible /v1/chat/completions endpoint directly. Handles
6
- the MiMo-specific quirks:
5
+ Engines (--engine):
6
+ auto (default) mimo if MIMO_API_KEY set, else pollinations
7
+ mimo — Xiaomi MiMo V2.5 (best quality, needs MIMO_API_KEY)
8
+ pollinations — pollinations.ai free public chat endpoint. NO KEY REQUIRED
7
9
 
10
+ When the mimo engine is used, handles the MiMo-specific quirks:
8
11
  - max_completion_tokens (not max_tokens)
9
12
  - vision via mimo-v2.5 / mimo-v2-omni (and the required text part next to
10
13
  image_url, otherwise MiMo 400s with "text is not set")
11
- - web_search builtin tool (requires Web Search Plugin activated in console)
14
+ - web_search builtin: auto-enabled on pay-as-you-go (sk-*) keys, skipped on
15
+ token-plan (tp-*) keys. Model decides when to invoke (tool_choice: auto).
16
+ Requires the Web Search Plugin to be activated in the MiMo console.
12
17
  - reasoning_content extraction
13
18
 
14
19
  Usage:
15
- export MIMO_API_KEY=sk-xxxx
20
+ # Zero-setup
16
21
  python3 mimo_chat.py "your prompt"
17
- python3 mimo_chat.py --model mimo-v2.5 --image https://x/y.png "describe"
18
- python3 mimo_chat.py --search "今天上海天气?"
22
+ python3 mimo_chat.py --image https://x/y.png "describe"
23
+
24
+ # MiMo key — gets best quality + native web search (when sk-*)
25
+ export MIMO_API_KEY=sk-xxxx
26
+ python3 mimo_chat.py "今天上海天气?"
19
27
  python3 mimo_chat.py --stream "tell me a story"
20
28
 
21
29
  Only depends on the standard library — no `openai` SDK install needed.
@@ -48,51 +56,64 @@ def build_messages(prompt: str, image: str | None) -> list[dict[str, Any]]:
48
56
  ]
49
57
 
50
58
 
59
+ POLLINATIONS_URL = "https://text.pollinations.ai/openai"
60
+ POLLINATIONS_DEFAULT_MODEL = "openai" # vision-capable, free, no key
61
+
62
+
51
63
  def build_body(
52
64
  *,
53
65
  prompt: str,
54
66
  image: str | None,
55
67
  model: str,
56
68
  stream: bool,
57
- search: bool,
69
+ enable_web_search: bool,
58
70
  max_tokens: int,
59
71
  temperature: float,
72
+ engine: str,
60
73
  ) -> dict[str, Any]:
61
74
  body: dict[str, Any] = {
62
75
  "model": model,
63
76
  "messages": build_messages(prompt, image),
64
- "max_completion_tokens": max_tokens,
65
77
  "temperature": temperature,
66
78
  "stream": stream,
67
79
  }
68
- if search:
69
- # MiMo native web_search builtin. Requires the Web Search Plugin to
70
- # be activated at https://platform.xiaomimimo.com/#/console/plugin.
71
- body["tools"] = [{"type": "web_search", "force_search": True}]
80
+ if engine == "mimo":
81
+ # MiMo's quirk: max_completion_tokens, not max_tokens.
82
+ body["max_completion_tokens"] = max_tokens
83
+ else:
84
+ body["max_tokens"] = max_tokens
85
+ if enable_web_search:
86
+ # MiMo native web_search builtin. The model decides whether to invoke
87
+ # it (tool_choice=auto). Requires the Web Search Plugin to be
88
+ # activated at https://platform.xiaomimimo.com/#/console/plugin —
89
+ # without that, MiMo returns 400 and the error body is printed.
90
+ body["tools"] = [{"type": "web_search"}]
72
91
  body["tool_choice"] = "auto"
73
92
  return body
74
93
 
75
94
 
76
- def post(url: str, body: dict[str, Any], api_key: str, stream: bool) -> Any:
95
+ def post(url: str, body: dict[str, Any], api_key: str | None, stream: bool, *, engine: str) -> Any:
96
+ headers = {
97
+ "Content-Type": "application/json",
98
+ "Accept": "text/event-stream" if stream else "application/json",
99
+ "User-Agent": "mimoskill/0.1",
100
+ }
101
+ if api_key:
102
+ headers["Authorization"] = f"Bearer {api_key}"
77
103
  req = urllib.request.Request(
78
104
  url,
79
105
  method="POST",
80
106
  data=json.dumps(body).encode("utf-8"),
81
- headers={
82
- "Content-Type": "application/json",
83
- "Accept": "text/event-stream" if stream else "application/json",
84
- "Authorization": f"Bearer {api_key}",
85
- "User-Agent": "mimoskill/0.1",
86
- },
107
+ headers=headers,
87
108
  )
88
109
  try:
89
110
  return urllib.request.urlopen(req, timeout=300)
90
111
  except urllib.error.HTTPError as e:
91
112
  snippet = e.read().decode("utf-8", "replace")
92
- sys.stderr.write(f"MiMo returned HTTP {e.code}: {snippet}\n")
113
+ sys.stderr.write(f"{engine} returned HTTP {e.code}: {snippet}\n")
93
114
  sys.exit(1)
94
115
  except urllib.error.URLError as e:
95
- sys.stderr.write(f"connection failed: {e}\n")
116
+ sys.stderr.write(f"connection failed ({engine}): {e}\n")
96
117
  sys.exit(1)
97
118
 
98
119
 
@@ -144,51 +165,99 @@ def main() -> None:
144
165
  p.add_argument("prompt", nargs="?", default="", help="user message text")
145
166
  p.add_argument("--model", default=os.environ.get("MIMO_MODEL", "mimo-v2.5-pro"))
146
167
  p.add_argument("--image", help="image URL to attach (forces vision-capable model)")
147
- p.add_argument("--search", action="store_true", help="enable MiMo web_search builtin")
148
168
  p.add_argument("--stream", action="store_true", help="stream the response")
149
169
  p.add_argument("--max-tokens", type=int, default=2048)
150
170
  p.add_argument("--temperature", type=float, default=0.7)
171
+ p.add_argument(
172
+ "--engine",
173
+ choices=["auto", "mimo", "pollinations"],
174
+ default=os.environ.get("MIMO_CHAT_ENGINE", "auto"),
175
+ help="chat backend. auto = mimo if MIMO_API_KEY set, else pollinations "
176
+ "(free, no key required). default: %(default)s",
177
+ )
151
178
  p.add_argument(
152
179
  "--base-url",
153
180
  default=os.environ.get("MIMO_BASE_URL", "https://api.xiaomimimo.com/v1"),
154
- help="set to https://token-plan-cn.xiaomimimo.com/v1 for tp-* keys",
181
+ help="MiMo endpoint, ignored when --engine=pollinations "
182
+ "(tp-* keys use https://token-plan-cn.xiaomimimo.com/v1)",
183
+ )
184
+ p.add_argument(
185
+ "--pollinations-model",
186
+ default=os.environ.get("POLLINATIONS_MODEL", POLLINATIONS_DEFAULT_MODEL),
187
+ help="model id when --engine=pollinations (default: %(default)s)",
155
188
  )
156
189
  args = p.parse_args()
157
190
 
158
191
  api_key = os.environ.get("MIMO_API_KEY")
159
- if not api_key:
160
- sys.stderr.write("error: MIMO_API_KEY not set in environment\n")
161
- sys.stderr.write(
162
- " get one at https://platform.xiaomimimo.com/#/console/api-keys\n"
163
- )
164
- sys.exit(2)
192
+
193
+ # Resolve engine.
194
+ if args.engine == "mimo":
195
+ engine = "mimo"
196
+ if not api_key:
197
+ sys.stderr.write(
198
+ "error: --engine mimo requires MIMO_API_KEY.\n"
199
+ " get one at https://platform.xiaomimimo.com/#/console/api-keys\n"
200
+ " OR drop the flag to fall back to pollinations (free, no key required):\n"
201
+ " python3 mimo_chat.py <prompt>\n"
202
+ )
203
+ sys.exit(3)
204
+ elif args.engine == "pollinations":
205
+ engine = "pollinations"
206
+ else: # auto
207
+ engine = "mimo" if api_key else "pollinations"
208
+ if engine == "pollinations":
209
+ sys.stderr.write(
210
+ "[engine] auto -> pollinations (free, no key). "
211
+ "Set MIMO_API_KEY for higher quality (mimo-v2.5).\n"
212
+ )
165
213
 
166
214
  if not args.prompt and not args.image:
167
215
  sys.stderr.write("error: pass a prompt and/or --image\n")
168
216
  sys.exit(2)
169
217
 
170
- # Auto-bump to a vision model if user passed --image with a non-vision model
171
- model = args.model
172
- if args.image and "omni" not in model.lower() and not model.startswith("mimo-v2.5["):
173
- if model != "mimo-v2.5":
174
- sys.stderr.write(
175
- f"note: --image given but model is '{model}' which doesn't see images.\n"
176
- f" switching to mimo-v2.5 for this call.\n"
177
- )
178
- model = "mimo-v2.5"
218
+ enable_web_search = False
219
+ if engine == "mimo":
220
+ # Auto-bump to a vision model if user passed --image with a non-vision model.
221
+ model = args.model
222
+ if args.image and "omni" not in model.lower() and not model.startswith("mimo-v2.5["):
223
+ if model != "mimo-v2.5":
224
+ sys.stderr.write(
225
+ f"note: --image given but model is '{model}' which doesn't see images.\n"
226
+ f" switching to mimo-v2.5 for this call.\n"
227
+ )
228
+ model = "mimo-v2.5"
229
+ url = args.base_url.rstrip("/") + "/chat/completions"
230
+ auth: str | None = api_key
231
+ # MiMo native web_search: pay-as-you-go (sk-*) supports it, token-plan
232
+ # (tp-*) does not. Always include the tool on sk-* and let the model
233
+ # decide via tool_choice=auto — no extra flag needed.
234
+ enable_web_search = bool(api_key and api_key.startswith("sk-"))
235
+ else:
236
+ # Pollinations: pick the configured vision-capable model. The user's
237
+ # --model (mimo-*) is mimo-specific so we don't honor it here unless
238
+ # they explicitly passed --pollinations-model.
239
+ model = args.pollinations_model
240
+ url = POLLINATIONS_URL
241
+ auth = None
242
+
243
+ sys.stderr.write(
244
+ f"[chat] engine={engine} model={model}"
245
+ + (" web_search=on" if enable_web_search else "")
246
+ + "\n"
247
+ )
179
248
 
180
249
  body = build_body(
181
250
  prompt=args.prompt,
182
251
  image=args.image,
183
252
  model=model,
184
253
  stream=args.stream,
185
- search=args.search,
254
+ enable_web_search=enable_web_search,
186
255
  max_tokens=args.max_tokens,
187
256
  temperature=args.temperature,
257
+ engine=engine,
188
258
  )
189
259
 
190
- url = args.base_url.rstrip("/") + "/chat/completions"
191
- resp = post(url, body, api_key, args.stream)
260
+ resp = post(url, body, auth, args.stream, engine=engine)
192
261
  if args.stream:
193
262
  stream_chat(resp)
194
263
  else:
@@ -1,11 +1,14 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- ocr.py — OCR / image recognition via Xiaomi MiMo V2.5 vision.
3
+ ocr.py — OCR / image recognition that works without any API key.
4
4
 
5
5
  Use this when the surrounding chat model can't see images (mimo-v2.5-pro,
6
- mimo-v2.5-pro[1m], mimo-v2-flash, or any third-party model without vision).
7
- ocr.py always calls mimo-v2.5 internally regardless of what the rest of the
8
- conversation is using.
6
+ mimo-v2.5-pro[1m], mimo-v2-flash, deepseek-*, or any text-only model).
7
+
8
+ Engines (--engine):
9
+ auto (default) — mimo if MIMO_API_KEY set, else pollinations
10
+ mimo — Xiaomi MiMo V2.5 vision. Highest quality. Needs MIMO_API_KEY
11
+ pollinations — pollinations.ai free public vision endpoint. NO KEY REQUIRED
9
12
 
10
13
  Modes (--mode):
11
14
  text (default) verbatim OCR — raw text, preserves line breaks
@@ -21,9 +24,12 @@ Image inputs (positional, 0+):
21
24
  (none, stdin not a TTY) same as `-`
22
25
 
23
26
  Usage:
24
- export MIMO_API_KEY=sk-xxxx
27
+ # Zero-setup: free fallback, works for DeepSeek-only / no-key users
25
28
  python3 ocr.py path/to/image.png
26
29
  python3 ocr.py --mode describe https://example.com/x.png
30
+
31
+ # Best quality (needs MiMo key)
32
+ export MIMO_API_KEY=sk-xxxx
27
33
  python3 ocr.py --mode structured a.png b.jpg
28
34
  cat scan.png | python3 ocr.py --mode markdown
29
35
 
@@ -194,26 +200,32 @@ def build_messages(
194
200
 
195
201
  # --- HTTP -------------------------------------------------------------------
196
202
 
197
- def post(url: str, body: dict[str, Any], api_key: str, stream: bool) -> Any:
203
+ POLLINATIONS_URL = "https://text.pollinations.ai/openai"
204
+ POLLINATIONS_DEFAULT_MODEL = "openai" # vision-capable, free, no key
205
+
206
+
207
+ def post(url: str, body: dict[str, Any], api_key: str | None, stream: bool, *, engine: str) -> Any:
208
+ headers = {
209
+ "Content-Type": "application/json",
210
+ "Accept": "text/event-stream" if stream else "application/json",
211
+ "User-Agent": "mimoskill-ocr/0.1",
212
+ }
213
+ if api_key:
214
+ headers["Authorization"] = f"Bearer {api_key}"
198
215
  req = urllib.request.Request(
199
216
  url,
200
217
  method="POST",
201
218
  data=json.dumps(body).encode("utf-8"),
202
- headers={
203
- "Content-Type": "application/json",
204
- "Accept": "text/event-stream" if stream else "application/json",
205
- "Authorization": f"Bearer {api_key}",
206
- "User-Agent": "mimoskill-ocr/0.1",
207
- },
219
+ headers=headers,
208
220
  )
209
221
  try:
210
222
  return urllib.request.urlopen(req, timeout=300)
211
223
  except urllib.error.HTTPError as e:
212
224
  snippet = e.read().decode("utf-8", "replace")
213
- sys.stderr.write(f"MiMo returned HTTP {e.code}: {snippet}\n")
225
+ sys.stderr.write(f"{engine} returned HTTP {e.code}: {snippet}\n")
214
226
  sys.exit(1)
215
227
  except urllib.error.URLError as e:
216
- sys.stderr.write(f"connection failed: {e}\n")
228
+ sys.stderr.write(f"connection failed ({engine}): {e}\n")
217
229
  sys.exit(1)
218
230
 
219
231
 
@@ -289,10 +301,23 @@ def main() -> None:
289
301
  )
290
302
  p.add_argument("--max-tokens", type=int, default=4096)
291
303
  p.add_argument("--temperature", type=float, default=0.2)
304
+ p.add_argument(
305
+ "--engine",
306
+ choices=["auto", "mimo", "pollinations"],
307
+ default=os.environ.get("MIMO_OCR_ENGINE", "auto"),
308
+ help="OCR backend. auto = mimo if MIMO_API_KEY set, else pollinations "
309
+ "(free, no key required). default: %(default)s",
310
+ )
292
311
  p.add_argument(
293
312
  "--base-url",
294
313
  default=os.environ.get("MIMO_BASE_URL", "https://api.xiaomimimo.com/v1"),
295
- help="MiMo OpenAI-compat endpoint (default: %(default)s)",
314
+ help="MiMo OpenAI-compat endpoint, ignored when --engine=pollinations "
315
+ "(default: %(default)s)",
316
+ )
317
+ p.add_argument(
318
+ "--pollinations-model",
319
+ default=os.environ.get("POLLINATIONS_MODEL", POLLINATIONS_DEFAULT_MODEL),
320
+ help="model id when --engine=pollinations (default: %(default)s)",
296
321
  )
297
322
  p.add_argument(
298
323
  "--prompt",
@@ -304,18 +329,27 @@ def main() -> None:
304
329
  args = p.parse_args()
305
330
 
306
331
  api_key = os.environ.get("MIMO_API_KEY")
307
- if not api_key:
308
- sys.stderr.write(
309
- "error: MIMO_API_KEY is not set; ocr.py needs MiMo V2.5 vision to read images.\n"
310
- " set one at https://platform.xiaomimimo.com/#/console/api-keys\n"
311
- " OR if you want fully-local OCR with no API key, install tesseract:\n"
312
- " macOS: brew install tesseract tesseract-lang\n"
313
- " Ubuntu: sudo apt install tesseract-ocr tesseract-ocr-chi-sim\n"
314
- " Windows: https://github.com/UB-Mannheim/tesseract/wiki\n"
315
- " then run: tesseract <image> - -l eng+chi_sim\n"
316
- " (tesseract is NOT installed or invoked by this skill; this is just a pointer.)\n"
317
- )
318
- sys.exit(3)
332
+
333
+ # Resolve engine.
334
+ if args.engine == "mimo":
335
+ engine = "mimo"
336
+ if not api_key:
337
+ sys.stderr.write(
338
+ "error: --engine mimo requires MIMO_API_KEY.\n"
339
+ " set one at https://platform.xiaomimimo.com/#/console/api-keys\n"
340
+ " OR drop the flag to fall back to pollinations (free, no key required):\n"
341
+ " python3 ocr.py <image>\n"
342
+ )
343
+ sys.exit(3)
344
+ elif args.engine == "pollinations":
345
+ engine = "pollinations"
346
+ else: # auto
347
+ engine = "mimo" if api_key else "pollinations"
348
+ if engine == "pollinations":
349
+ sys.stderr.write(
350
+ "[engine] auto -> pollinations (free, no key). "
351
+ "Set MIMO_API_KEY for higher quality (mimo-v2.5).\n"
352
+ )
319
353
 
320
354
  # Resolve images: explicit args, else stdin if not a TTY.
321
355
  raw_args = args.images
@@ -330,12 +364,20 @@ def main() -> None:
330
364
 
331
365
  image_urls = [resolve_image_arg(a) for a in raw_args]
332
366
 
333
- model, note = pick_model(args.model)
334
- if note:
335
- sys.stderr.write(note)
367
+ if engine == "mimo":
368
+ model, note = pick_model(args.model)
369
+ if note:
370
+ sys.stderr.write(note)
371
+ else:
372
+ if args.model:
373
+ sys.stderr.write(
374
+ f"note: --model is mimo-specific; ignoring on pollinations "
375
+ f"(use --pollinations-model instead).\n"
376
+ )
377
+ model = args.pollinations_model
336
378
 
337
379
  sys.stderr.write(
338
- f"[ocr] mode={args.mode} model={model} images={len(image_urls)}\n"
380
+ f"[ocr] engine={engine} mode={args.mode} model={model} images={len(image_urls)}\n"
339
381
  )
340
382
 
341
383
  messages = build_messages(
@@ -348,13 +390,20 @@ def main() -> None:
348
390
  body: dict[str, Any] = {
349
391
  "model": model,
350
392
  "messages": messages,
351
- "max_completion_tokens": args.max_tokens,
352
393
  "temperature": args.temperature,
353
394
  "stream": args.stream,
354
395
  }
396
+ if engine == "mimo":
397
+ # MiMo's quirk: max_completion_tokens, not max_tokens.
398
+ body["max_completion_tokens"] = args.max_tokens
399
+ url = args.base_url.rstrip("/") + "/chat/completions"
400
+ auth = api_key
401
+ else:
402
+ body["max_tokens"] = args.max_tokens
403
+ url = POLLINATIONS_URL
404
+ auth = None
355
405
 
356
- url = args.base_url.rstrip("/") + "/chat/completions"
357
- resp = post(url, body, api_key, args.stream)
406
+ resp = post(url, body, auth, args.stream, engine=engine)
358
407
 
359
408
  if args.stream:
360
409
  content, reasoning = stream_chat(resp)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mimo2codex",
3
- "version": "0.1.16",
3
+ "version": "0.1.17",
4
4
  "description": "Local proxy that lets the latest OpenAI Codex CLI / desktop talk to Xiaomi MiMo (V2.5 Pro) via the Responses API by translating to Chat Completions on the fly.",
5
5
  "keywords": [
6
6
  "codex",