@plusonelabs/cue 0.0.94 → 0.0.95

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/bin/cue.js +10 -10
  2. package/bin/windows-bootstrap.ps1 +9 -9
  3. package/bin/windows-runtime-artifact.json +2 -2
  4. package/dist/cli.mjs +1087 -820
  5. package/dist/skills/README.md +199 -0
  6. package/dist/skills/_lib/credentials.py +72 -0
  7. package/dist/skills/activity/SKILL.md +97 -0
  8. package/dist/skills/assistant/SKILL.md +173 -0
  9. package/dist/skills/audio/SKILL.md +132 -0
  10. package/dist/skills/elevenlabs-tts/SKILL.md +76 -0
  11. package/dist/skills/elevenlabs-tts/scripts/speak.ts +226 -0
  12. package/dist/skills/event/SKILL.md +98 -0
  13. package/dist/skills/gemini-search/SKILL.md +52 -0
  14. package/dist/skills/gemini-search/generate.py +195 -0
  15. package/dist/skills/image/SKILL.md +169 -0
  16. package/dist/skills/like/SKILL.md +66 -0
  17. package/dist/skills/listen/SKILL.md +57 -0
  18. package/dist/skills/listen/scripts/listen.sh +74 -0
  19. package/dist/skills/listen/scripts/record.swift +94 -0
  20. package/dist/skills/markdown-to-pdf/SKILL.md +31 -0
  21. package/dist/skills/message/SKILL.md +136 -0
  22. package/dist/skills/mini-apps/SKILL.md +256 -0
  23. package/dist/skills/music/SKILL.md +139 -0
  24. package/dist/skills/nano-banana/SKILL.md +70 -0
  25. package/dist/skills/nano-banana/generate.py +191 -0
  26. package/dist/skills/news/SKILL.md +41 -0
  27. package/dist/skills/notify/SKILL.md +123 -0
  28. package/dist/skills/places/SKILL.md +215 -0
  29. package/dist/skills/posts/SKILL.md +440 -0
  30. package/dist/skills/project/SKILL.md +116 -0
  31. package/dist/skills/pulse/SKILL.md +106 -0
  32. package/dist/skills/reddit/SKILL.md +41 -0
  33. package/dist/skills/seeddance/SKILL.md +81 -0
  34. package/dist/skills/seeddance/generate.py +303 -0
  35. package/dist/skills/seedream/SKILL.md +86 -0
  36. package/dist/skills/seedream/generate.py +301 -0
  37. package/dist/skills/social-graph/SKILL.md +119 -0
  38. package/dist/skills/transcribe/SKILL.md +150 -0
  39. package/dist/skills/transcribe/generate.py +389 -0
  40. package/dist/skills/user/SKILL.md +180 -0
  41. package/dist/skills/veo3/SKILL.md +76 -0
  42. package/dist/skills/veo3/generate.py +339 -0
  43. package/dist/skills/video/SKILL.md +163 -0
  44. package/dist/skills/weather/SKILL.md +101 -0
  45. package/dist/skills/web-fetch/SKILL.md +43 -0
  46. package/dist/skills/web-search/SKILL.md +52 -0
  47. package/dist/skills/z-asr/SKILL.md +58 -0
  48. package/dist/skills/z-asr/generate.py +177 -0
  49. package/dist/skills/z-search/SKILL.md +57 -0
  50. package/dist/skills/z-search/generate.py +189 -0
  51. package/dist/skills/z-tts/SKILL.md +51 -0
  52. package/dist/skills/z-tts/generate.py +172 -0
  53. package/package.json +1 -1
@@ -0,0 +1,58 @@
1
+ ---
2
+ name: z-asr
3
+ description: Transcribe audio using Z.AI GLM-ASR model. Use when user asks to transcribe audio with Z.AI, ZhipuAI, or GLM-ASR. Supports wav/mp3 files, international and Chinese endpoints.
4
+ category: media
5
+ type: hybrid
6
+ env:
7
+ - Z_AI_API_KEY
8
+ metadata:
9
+ short-description: Transcribe audio with Z.AI
10
+ scope: first-party
11
+ ---
12
+
13
+ Transcribe audio files to text using Z.AI's GLM-ASR-2512 model.
14
+
15
+ ## Requirements
16
+
17
+ - Python 3.10+, `httpx` package
18
+ - `Z_AI_API_KEY` environment variable (or configured via `cue skill env set`)
19
+ - Get your API key at: https://z.ai/manage-apikey/apikey-list
20
+
21
+ ## Usage
22
+
23
+ ```bash
24
+ # Transcribe a local audio file
25
+ python3 <skill-dir>/generate.py audio.mp3
26
+
27
+ # Use Chinese endpoint (open.bigmodel.cn)
28
+ python3 <skill-dir>/generate.py audio.mp3 --cn
29
+
30
+ # Save transcript to file
31
+ python3 <skill-dir>/generate.py audio.mp3 --output transcript.txt
32
+
33
+ # JSON output with metadata
34
+ python3 <skill-dir>/generate.py audio.mp3 --json
35
+
36
+ # Provide context for better accuracy
37
+ python3 <skill-dir>/generate.py audio.mp3 --prompt "Previous sentence context here"
38
+
39
+ # Hotwords for domain-specific vocabulary
40
+ python3 <skill-dir>/generate.py audio.mp3 --hotword "TensorFlow" --hotword "PyTorch"
41
+ ```
42
+
43
+ **Constraints:** Audio files must be `.wav` or `.mp3`, max 25MB, max 30 seconds duration, mono channel.
44
+
45
+ **Endpoints:**
46
+
47
+ - Default: `api.z.ai` (international)
48
+ - `--cn`: `open.bigmodel.cn` (Chinese domestic)
49
+
50
+ ## Troubleshooting
51
+
52
+ | Error | Solution |
53
+ | ---------------------------- | ---------------------------------------------------- |
54
+ | "Z_AI_API_KEY not set" | Set with: `/skill env set Z_AI_API_KEY <your-key>` |
55
+ | "File too large" | Audio must be under 25MB |
56
+ | "Unsupported format" | Only `.wav` and `.mp3` are supported |
57
+ | "API error: 401" | Check your API key is valid |
58
+ | "only supports mono channel" | Convert with: `ffmpeg -i input.wav -ac 1 output.wav` |
@@ -0,0 +1,177 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Z-ASR - Audio transcription using Z.AI GLM-ASR-2512 model.
4
+
5
+ Usage:
6
+ python generate.py audio.mp3
7
+ python generate.py audio.wav --output transcript.txt
8
+ python generate.py audio.mp3 --json
9
+ python generate.py audio.mp3 --prompt "context" --hotword "term1" --hotword "term2"
10
+ python generate.py audio.mp3 --cn
11
+ """
12
+
13
+ import argparse
14
+ import json
15
+ import os
16
+ import sys
17
+ import time
18
+ from datetime import datetime
19
+ from pathlib import Path
20
+
21
+ sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '_lib'))
22
+ from credentials import get_credential
23
+
24
+ try:
25
+ import httpx
26
+ except ImportError:
27
+ print("Error: httpx not installed. Run: pip install httpx")
28
+ sys.exit(1)
29
+
30
+
31
+ BASE_URLS = {
32
+ "intl": "https://api.z.ai/api/paas/v4/audio/transcriptions",
33
+ "cn": "https://open.bigmodel.cn/api/paas/v4/audio/transcriptions",
34
+ }
35
+ MODEL = "glm-asr-2512"
36
+ TIMEOUT = 120.0
37
+ MAX_FILE_SIZE = 25 * 1024 * 1024
38
+ SUPPORTED_FORMATS = {".wav", ".mp3"}
39
+ SKILL_DIR = Path(__file__).parent
40
+ TEMP_DIR = SKILL_DIR / "temp"
41
+
42
+
43
+ def transcribe(
44
+ file_path: Path,
45
+ prompt: str | None = None,
46
+ hotwords: list[str] | None = None,
47
+ use_cn: bool = False,
48
+ ) -> dict:
49
+ api_key = get_credential("Z_AI_API_KEY", "z-asr")
50
+ api_url = BASE_URLS["cn"] if use_cn else BASE_URLS["intl"]
51
+
52
+ if not file_path.exists():
53
+ print(f"Error: File not found: {file_path}")
54
+ sys.exit(1)
55
+
56
+ suffix = file_path.suffix.lower()
57
+ if suffix not in SUPPORTED_FORMATS:
58
+ print(f"Error: Unsupported format '{suffix}'. Use .wav or .mp3")
59
+ sys.exit(1)
60
+
61
+ file_size = file_path.stat().st_size
62
+ if file_size > MAX_FILE_SIZE:
63
+ print(f"Error: File too large ({file_size / 1024 / 1024:.1f}MB). Max 25MB.")
64
+ sys.exit(1)
65
+
66
+ region = "CN" if use_cn else "intl"
67
+ print(f"Transcribing {file_path.name} ({file_size / 1024:.0f}KB) with {MODEL} ({region})...")
68
+
69
+ data: dict = {"model": MODEL, "stream": "false"}
70
+ if prompt:
71
+ data["prompt"] = prompt
72
+ if hotwords:
73
+ data["hotwords"] = json.dumps(hotwords)
74
+
75
+ mime = "audio/wav" if suffix == ".wav" else "audio/mpeg"
76
+
77
+ start_time = time.time()
78
+
79
+ with httpx.Client(timeout=TIMEOUT) as client:
80
+ with open(file_path, "rb") as f:
81
+ response = client.post(
82
+ api_url,
83
+ headers={"Authorization": f"Bearer {api_key}"},
84
+ data=data,
85
+ files={"file": (file_path.name, f, mime)},
86
+ )
87
+
88
+ if response.status_code != 200:
89
+ print(f"Error: API returned {response.status_code}")
90
+ print(response.text[:500])
91
+ sys.exit(1)
92
+
93
+ result = response.json()
94
+
95
+ processing_time = int((time.time() - start_time) * 1000)
96
+
97
+ if "error" in result or "code" in result:
98
+ err = result.get("error", result)
99
+ msg = err.get("message", err) if isinstance(err, dict) else str(err)
100
+ print(f"Error: {msg}")
101
+ sys.exit(1)
102
+
103
+ return {
104
+ "text": result.get("text", ""),
105
+ "model": result.get("model", MODEL),
106
+ "id": result.get("id", ""),
107
+ "processing_time_ms": processing_time,
108
+ }
109
+
110
+
111
+ def main():
112
+ parser = argparse.ArgumentParser(
113
+ description="Transcribe audio using Z.AI GLM-ASR-2512",
114
+ formatter_class=argparse.RawDescriptionHelpFormatter,
115
+ epilog="""
116
+ Examples:
117
+ %(prog)s audio.mp3
118
+ %(prog)s recording.wav --output transcript.txt
119
+ %(prog)s audio.mp3 --json
120
+ %(prog)s audio.mp3 --prompt "Previous context" --hotword "TensorFlow"
121
+ %(prog)s audio.mp3 --cn
122
+ """,
123
+ )
124
+ parser.add_argument("file", type=Path, help="Audio file to transcribe (.wav or .mp3)")
125
+ parser.add_argument(
126
+ "--output", "-o",
127
+ help="Output file path (default: print to stdout)",
128
+ )
129
+ parser.add_argument(
130
+ "--json", "-j",
131
+ action="store_true",
132
+ help="Output JSON with metadata",
133
+ )
134
+ parser.add_argument(
135
+ "--prompt", "-p",
136
+ help="Context from previous transcription (up to 8000 chars)",
137
+ )
138
+ parser.add_argument(
139
+ "--hotword",
140
+ action="append",
141
+ help="Hotword for better recognition (can be used multiple times, max 100)",
142
+ )
143
+ parser.add_argument(
144
+ "--cn",
145
+ action="store_true",
146
+ help="Use Chinese endpoint (open.bigmodel.cn) instead of international (api.z.ai)",
147
+ )
148
+
149
+ args = parser.parse_args()
150
+
151
+ result = transcribe(
152
+ file_path=args.file,
153
+ prompt=args.prompt,
154
+ hotwords=args.hotword,
155
+ use_cn=args.cn,
156
+ )
157
+
158
+ if args.json:
159
+ output_content = json.dumps(result, indent=2, ensure_ascii=False)
160
+ else:
161
+ output_content = result["text"]
162
+
163
+ if args.output:
164
+ output_path = Path(args.output)
165
+ output_path.parent.mkdir(parents=True, exist_ok=True)
166
+ output_path.write_text(output_content, encoding="utf-8")
167
+ print(f"Saved: {output_path}")
168
+ else:
169
+ print(output_content)
170
+
171
+ print(f"\nProcessing time: {result['processing_time_ms']}ms")
172
+ word_count = len(result["text"].split())
173
+ print(f"Word count: {word_count}")
174
+
175
+
176
+ if __name__ == "__main__":
177
+ main()
@@ -0,0 +1,57 @@
1
+ ---
2
+ name: z-search
3
+ description: Web search using Z.AI Search Prime engine. Use when user asks to search the web with Z.AI, ZhipuAI, or needs LLM-optimized search results.
4
+ category: data
5
+ type: hybrid
6
+ env:
7
+ - Z_AI_API_KEY
8
+ metadata:
9
+ short-description: Web search with Z.AI
10
+ scope: first-party
11
+ ---
12
+
13
+ Search the web using Z.AI's Search Prime engine, optimized for LLM consumption.
14
+
15
+ ## Requirements
16
+
17
+ - Python 3.10+, `httpx` package
18
+ - `Z_AI_API_KEY` environment variable (or configured via `cue skill env set`)
19
+ - Get your API key at: https://z.ai/manage-apikey/apikey-list
20
+
21
+ ## Usage
22
+
23
+ ```bash
24
+ # Basic search
25
+ python3 <skill-dir>/generate.py "latest news about AI"
26
+
27
+ # Limit results
28
+ python3 <skill-dir>/generate.py "Python 3.14 features" --count 5
29
+
30
+ # Filter by recency
31
+ python3 <skill-dir>/generate.py "breaking news" --recency oneDay
32
+
33
+ # Filter by domain
34
+ python3 <skill-dir>/generate.py "machine learning" --domain arxiv.org
35
+
36
+ # JSON output with metadata
37
+ python3 <skill-dir>/generate.py "web search API" --json
38
+
39
+ # Save results to file
40
+ python3 <skill-dir>/generate.py "search query" --output results.txt
41
+ ```
42
+
43
+ **Options:**
44
+
45
+ - `--count N` — Number of results (1-50, default 10)
46
+ - `--recency` — Time filter: `oneDay`, `oneWeek`, `oneMonth`, `oneYear`, `noLimit` (default)
47
+ - `--domain` — Restrict results to a specific domain
48
+ - `--json` — Output full JSON with metadata
49
+ - `--output FILE` — Save results to file
50
+
51
+ ## Troubleshooting
52
+
53
+ | Error | Solution |
54
+ | ---------------------- | -------------------------------------------------- |
55
+ | "Z_AI_API_KEY not set" | Set with: `/skill env set Z_AI_API_KEY <your-key>` |
56
+ | "API error: 401" | Check your API key is valid |
57
+ | "API error: 429" | Rate limited or insufficient balance |
@@ -0,0 +1,189 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Z-Search - Web search using Z.AI Search Prime engine.
4
+
5
+ Usage:
6
+ python generate.py "search query"
7
+ python generate.py "search query" --count 5
8
+ python generate.py "search query" --recency oneDay
9
+ python generate.py "search query" --domain arxiv.org
10
+ python generate.py "search query" --json
11
+ """
12
+
13
+ import argparse
14
+ import json
15
+ import os
16
+ import sys
17
+ import time
18
+ from pathlib import Path
19
+
20
+ sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '_lib'))
21
+ from credentials import get_credential
22
+
23
+ try:
24
+ import httpx
25
+ except ImportError:
26
+ print("Error: httpx not installed. Run: pip install httpx")
27
+ sys.exit(1)
28
+
29
+
30
+ API_URL = "https://api.z.ai/api/paas/v4/web_search"
31
+ SEARCH_ENGINE = "search-prime"
32
+ TIMEOUT = 30.0
33
+ RECENCY_CHOICES = ["oneDay", "oneWeek", "oneMonth", "oneYear", "noLimit"]
34
+
35
+
36
+ def search(
37
+ query: str,
38
+ count: int = 10,
39
+ recency: str | None = None,
40
+ domain: str | None = None,
41
+ ) -> dict:
42
+ api_key = get_credential("Z_AI_API_KEY", "z-search")
43
+
44
+ payload: dict = {
45
+ "search_engine": SEARCH_ENGINE,
46
+ "search_query": query,
47
+ }
48
+ if count != 10:
49
+ payload["count"] = count
50
+ if recency and recency != "noLimit":
51
+ payload["search_recency_filter"] = recency
52
+ if domain:
53
+ payload["search_domain_filter"] = domain
54
+
55
+ start_time = time.time()
56
+
57
+ with httpx.Client(timeout=TIMEOUT) as client:
58
+ response = client.post(
59
+ API_URL,
60
+ headers={
61
+ "Authorization": f"Bearer {api_key}",
62
+ "Content-Type": "application/json",
63
+ },
64
+ json=payload,
65
+ )
66
+
67
+ if response.status_code != 200:
68
+ print(f"Error: API returned {response.status_code}")
69
+ print(response.text[:500])
70
+ sys.exit(1)
71
+
72
+ result = response.json()
73
+
74
+ processing_time = int((time.time() - start_time) * 1000)
75
+
76
+ if "error" in result or "code" in result:
77
+ err = result.get("error", result)
78
+ msg = err.get("message", err) if isinstance(err, dict) else str(err)
79
+ print(f"Error: {msg}")
80
+ sys.exit(1)
81
+
82
+ results = result.get("search_result", [])
83
+ return {
84
+ "query": query,
85
+ "results": results,
86
+ "count": len(results),
87
+ "id": result.get("id", ""),
88
+ "processing_time_ms": processing_time,
89
+ }
90
+
91
+
92
+ def format_results(data: dict) -> str:
93
+ lines = []
94
+ results = data["results"]
95
+ if not results:
96
+ return "No results found."
97
+
98
+ for i, item in enumerate(results, 1):
99
+ title = item.get("title", "No title")
100
+ link = item.get("link", "")
101
+ content = item.get("content", "")
102
+ media = item.get("media", "")
103
+ date = item.get("publish_date", "")
104
+
105
+ header = f"{i}. {title}"
106
+ if media:
107
+ header += f" ({media})"
108
+ lines.append(header)
109
+ if link:
110
+ lines.append(f" {link}")
111
+ if date:
112
+ lines.append(f" Published: {date}")
113
+ if content:
114
+ lines.append(f" {content}")
115
+ lines.append("")
116
+
117
+ return "\n".join(lines)
118
+
119
+
120
+ def main():
121
+ parser = argparse.ArgumentParser(
122
+ description="Search the web using Z.AI Search Prime",
123
+ formatter_class=argparse.RawDescriptionHelpFormatter,
124
+ epilog="""
125
+ Examples:
126
+ %(prog)s "latest AI news"
127
+ %(prog)s "Python tutorials" --count 5
128
+ %(prog)s "breaking news" --recency oneDay
129
+ %(prog)s "machine learning papers" --domain arxiv.org
130
+ %(prog)s "search query" --json
131
+ """,
132
+ )
133
+ parser.add_argument("query", help="Search query string")
134
+ parser.add_argument(
135
+ "--count", "-c",
136
+ type=int,
137
+ default=10,
138
+ help="Number of results (1-50, default: 10)",
139
+ )
140
+ parser.add_argument(
141
+ "--recency", "-r",
142
+ choices=RECENCY_CHOICES,
143
+ help="Filter by recency: oneDay, oneWeek, oneMonth, oneYear, noLimit",
144
+ )
145
+ parser.add_argument(
146
+ "--domain", "-d",
147
+ help="Restrict results to a specific domain (e.g., arxiv.org)",
148
+ )
149
+ parser.add_argument(
150
+ "--output", "-o",
151
+ help="Output file path (default: print to stdout)",
152
+ )
153
+ parser.add_argument(
154
+ "--json", "-j",
155
+ action="store_true",
156
+ help="Output JSON with metadata",
157
+ )
158
+
159
+ args = parser.parse_args()
160
+
161
+ if args.count < 1 or args.count > 50:
162
+ print("Error: --count must be between 1 and 50")
163
+ sys.exit(1)
164
+
165
+ result = search(
166
+ query=args.query,
167
+ count=args.count,
168
+ recency=args.recency,
169
+ domain=args.domain,
170
+ )
171
+
172
+ if args.json:
173
+ output_content = json.dumps(result, indent=2, ensure_ascii=False)
174
+ else:
175
+ output_content = format_results(result)
176
+
177
+ if args.output:
178
+ output_path = Path(args.output)
179
+ output_path.parent.mkdir(parents=True, exist_ok=True)
180
+ output_path.write_text(output_content, encoding="utf-8")
181
+ print(f"Saved: {output_path}")
182
+ else:
183
+ print(output_content)
184
+
185
+ print(f"\n{result['count']} results in {result['processing_time_ms']}ms")
186
+
187
+
188
+ if __name__ == "__main__":
189
+ main()
@@ -0,0 +1,51 @@
1
+ ---
2
+ name: z-tts
3
+ description: Text-to-speech using Z.AI GLM-TTS model. Use when user asks to convert text to speech, generate audio from text, or synthesize voice with Z.AI or ZhipuAI.
4
+ category: media
5
+ type: hybrid
6
+ env:
7
+ - Z_AI_API_KEY
8
+ metadata:
9
+ short-description: Text-to-speech with Z.AI
10
+ scope: first-party
11
+ ---
12
+
13
+ Convert text to natural speech using Z.AI's GLM-TTS model.
14
+
15
+ ## Requirements
16
+
17
+ - Python 3.10+, `httpx` package
18
+ - `Z_AI_API_KEY` environment variable (or configured via `cue skill env set`)
19
+ - Get your API key at: https://z.ai/manage-apikey/apikey-list
20
+ - Uses Chinese endpoint (`open.bigmodel.cn`) — TTS is only available on this domain
21
+
22
+ ## Usage
23
+
24
+ ```bash
25
+ # Generate speech (default voice: tongtong)
26
+ python3 <skill-dir>/generate.py "Hello, how are you today?"
27
+
28
+ # Choose a voice
29
+ python3 <skill-dir>/generate.py "你好世界" --voice kazi
30
+
31
+ # Adjust speed and volume
32
+ python3 <skill-dir>/generate.py "Fast speech" --speed 1.5 --volume 5
33
+
34
+ # Save to specific file
35
+ python3 <skill-dir>/generate.py "Save this" --output speech.wav
36
+
37
+ # Output as WAV (default) or PCM
38
+ python3 <skill-dir>/generate.py "Hello" --format pcm
39
+ ```
40
+
41
+ **Voices:** `tongtong` (default), `chuichui`, `xiaochen`, `jam`, `kazi`, `douji`, `luodo`
42
+
43
+ **Constraints:** Input text max 1024 characters.
44
+
45
+ ## Troubleshooting
46
+
47
+ | Error | Solution |
48
+ | ---------------------- | -------------------------------------------------- |
49
+ | "Z_AI_API_KEY not set" | Set with: `/skill env set Z_AI_API_KEY <your-key>` |
50
+ | "Input too long" | Text must be under 1024 characters |
51
+ | "API error: 401" | Check your API key is valid |
@@ -0,0 +1,172 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Z-TTS - Text-to-speech using Z.AI GLM-TTS model.
4
+
5
+ Usage:
6
+ python generate.py "Hello, how are you?"
7
+ python generate.py "你好世界" --voice kazi
8
+ python generate.py "Fast speech" --speed 1.5
9
+ python generate.py "Save this" --output speech.wav
10
+ """
11
+
12
+ import argparse
13
+ import os
14
+ import sys
15
+ import time
16
+ from datetime import datetime
17
+ from pathlib import Path
18
+
19
+ sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '_lib'))
20
+ from credentials import get_credential
21
+
22
+ try:
23
+ import httpx
24
+ except ImportError:
25
+ print("Error: httpx not installed. Run: pip install httpx")
26
+ sys.exit(1)
27
+
28
+
29
+ API_URL = "https://open.bigmodel.cn/api/paas/v4/audio/speech"
30
+ MODEL = "glm-tts"
31
+ TIMEOUT = 60.0
32
+ MAX_INPUT_LENGTH = 1024
33
+ VOICES = ["tongtong", "chuichui", "xiaochen", "jam", "kazi", "douji", "luodo"]
34
+ FORMATS = ["wav", "pcm"]
35
+ SKILL_DIR = Path(__file__).parent
36
+ TEMP_DIR = SKILL_DIR / "temp"
37
+
38
+
39
+ def synthesize(
40
+ text: str,
41
+ voice: str = "tongtong",
42
+ speed: float = 1.0,
43
+ volume: float = 1.0,
44
+ response_format: str = "wav",
45
+ ) -> tuple[bytes, int]:
46
+ api_key = get_credential("Z_AI_API_KEY", "z-tts")
47
+
48
+ if len(text) > MAX_INPUT_LENGTH:
49
+ print(f"Error: Input too long ({len(text)} chars). Max {MAX_INPUT_LENGTH}.")
50
+ sys.exit(1)
51
+
52
+ if voice not in VOICES:
53
+ print(f"Error: Unknown voice '{voice}'. Choose from: {', '.join(VOICES)}")
54
+ sys.exit(1)
55
+
56
+ print(f"Generating speech with {MODEL} (voice: {voice}, speed: {speed}x)...")
57
+
58
+ payload = {
59
+ "model": MODEL,
60
+ "input": text,
61
+ "voice": voice,
62
+ "response_format": response_format,
63
+ "speed": speed,
64
+ "volume": volume,
65
+ }
66
+
67
+ start_time = time.time()
68
+
69
+ with httpx.Client(timeout=TIMEOUT) as client:
70
+ response = client.post(
71
+ API_URL,
72
+ headers={
73
+ "Authorization": f"Bearer {api_key}",
74
+ "Content-Type": "application/json",
75
+ },
76
+ json=payload,
77
+ )
78
+
79
+ if response.status_code != 200:
80
+ print(f"Error: API returned {response.status_code}")
81
+ try:
82
+ print(response.json())
83
+ except Exception:
84
+ print(response.text[:500])
85
+ sys.exit(1)
86
+
87
+ audio_data = response.content
88
+
89
+ processing_time = int((time.time() - start_time) * 1000)
90
+ return audio_data, processing_time
91
+
92
+
93
+ def main():
94
+ parser = argparse.ArgumentParser(
95
+ description="Text-to-speech using Z.AI GLM-TTS",
96
+ formatter_class=argparse.RawDescriptionHelpFormatter,
97
+ epilog="""
98
+ Voices: tongtong (default), chuichui, xiaochen, jam, kazi, douji, luodo
99
+
100
+ Examples:
101
+ %(prog)s "Hello, how are you?"
102
+ %(prog)s "你好世界" --voice kazi
103
+ %(prog)s "Fast speech" --speed 1.5
104
+ %(prog)s "Save this" --output speech.wav
105
+ %(prog)s "Hello" --format pcm
106
+ """,
107
+ )
108
+ parser.add_argument("text", help="Text to convert to speech (max 1024 chars)")
109
+ parser.add_argument(
110
+ "--voice", "-v",
111
+ default="tongtong",
112
+ choices=VOICES,
113
+ help="Voice selection (default: tongtong)",
114
+ )
115
+ parser.add_argument(
116
+ "--speed", "-s",
117
+ type=float,
118
+ default=1.0,
119
+ help="Speech rate 0.5-2.0 (default: 1.0)",
120
+ )
121
+ parser.add_argument(
122
+ "--volume",
123
+ type=float,
124
+ default=1.0,
125
+ help="Volume level 0-10 (default: 1.0)",
126
+ )
127
+ parser.add_argument(
128
+ "--format", "-f",
129
+ default="wav",
130
+ choices=FORMATS,
131
+ help="Audio format: wav or pcm (default: wav)",
132
+ )
133
+ parser.add_argument(
134
+ "--output", "-o",
135
+ help="Output file path (default: auto-generated in skill temp dir)",
136
+ )
137
+
138
+ args = parser.parse_args()
139
+
140
+ if args.speed < 0.5 or args.speed > 2.0:
141
+ print("Error: --speed must be between 0.5 and 2.0")
142
+ sys.exit(1)
143
+
144
+ if args.volume < 0 or args.volume > 10:
145
+ print("Error: --volume must be between 0 and 10")
146
+ sys.exit(1)
147
+
148
+ audio_data, processing_time = synthesize(
149
+ text=args.text,
150
+ voice=args.voice,
151
+ speed=args.speed,
152
+ volume=args.volume,
153
+ response_format=args.format,
154
+ )
155
+
156
+ if args.output:
157
+ output_path = Path(args.output)
158
+ else:
159
+ TEMP_DIR.mkdir(parents=True, exist_ok=True)
160
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
161
+ output_path = TEMP_DIR / f"tts_{timestamp}.{args.format}"
162
+
163
+ output_path.parent.mkdir(parents=True, exist_ok=True)
164
+ output_path.write_bytes(audio_data)
165
+
166
+ size_kb = len(audio_data) / 1024
167
+ print(f"Saved: {output_path} ({size_kb:.1f}KB)")
168
+ print(f"Processing time: {processing_time}ms")
169
+
170
+
171
+ if __name__ == "__main__":
172
+ main()