@seeed-studio/meshtastic 0.1.1 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/scripts/translate_readme.py +632 -0
- package/.github/translate/do-not-translate.md +6 -0
- package/.github/translate/glossary/es.md +23 -0
- package/.github/translate/glossary/fr.md +23 -0
- package/.github/translate/glossary/ja.md +23 -0
- package/.github/translate/glossary/pt.md +23 -0
- package/.github/translate/glossary/zh-CN.md +23 -0
- package/.github/translate/languages.json +37 -0
- package/.github/translate/prompts/es.md +16 -0
- package/.github/translate/prompts/fr.md +16 -0
- package/.github/translate/prompts/ja.md +17 -0
- package/.github/translate/prompts/pt.md +16 -0
- package/.github/translate/prompts/zh-CN.md +15 -0
- package/.github/workflows/publish.yml +25 -0
- package/.github/workflows/readme-translate.yml +166 -0
- package/AGENTS.md +172 -0
- package/LICENSE +21 -0
- package/README.es.md +337 -0
- package/README.fr.md +350 -0
- package/README.ja.md +344 -0
- package/README.md +262 -88
- package/README.pt.md +337 -0
- package/README.zh-CN.md +337 -0
- package/package.json +4 -3
- package/src/channel.ts +70 -17
- package/src/client.ts +108 -17
- package/src/config-schema.ts +37 -7
- package/src/inbound.ts +19 -4
- package/src/monitor.ts +131 -104
- package/src/mqtt-client.ts +30 -6
- package/src/normalize.ts +12 -4
- package/src/onboarding.ts +116 -28
- package/src/policy.ts +6 -2
- package/src/send.ts +13 -7
- package/src/types.ts +4 -2
|
@@ -0,0 +1,632 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Translate README.md to target languages using an OpenAI-compatible LLM API."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import argparse
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
import re
|
|
10
|
+
import sys
|
|
11
|
+
import threading
|
|
12
|
+
import time
|
|
13
|
+
import urllib.error
|
|
14
|
+
import urllib.request
|
|
15
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
ROOT = Path(__file__).resolve().parents[2]
|
|
21
|
+
SOURCE = ROOT / "README.md"
|
|
22
|
+
TRANSLATE_DIR = ROOT / ".github" / "translate"
|
|
23
|
+
LANGUAGES_FILE = TRANSLATE_DIR / "languages.json"
|
|
24
|
+
GLOSSARY_DIR = TRANSLATE_DIR / "glossary"
|
|
25
|
+
PROMPTS_DIR = TRANSLATE_DIR / "prompts"
|
|
26
|
+
DO_NOT_TRANSLATE_FILE = TRANSLATE_DIR / "do-not-translate.md"
|
|
27
|
+
|
|
28
|
+
API_URL = os.environ.get(
|
|
29
|
+
"LLM_BASE_URL", "https://api.apimart.ai/v1/chat/completions"
|
|
30
|
+
)
|
|
31
|
+
MODEL = os.environ.get("LLM_MODEL", "gpt-5")
|
|
32
|
+
API_KEY_VAR = "LLM_API_KEY"
|
|
33
|
+
TIMEOUT = int(os.environ.get("LLM_TIMEOUT", "600"))
|
|
34
|
+
MAX_RETRIES = int(os.environ.get("LLM_MAX_RETRIES", "3"))
|
|
35
|
+
MAX_CONCURRENCY = int(os.environ.get("LLM_MAX_CONCURRENCY", "2"))
|
|
36
|
+
RETRY_BASE_DELAY = 15 # seconds
|
|
37
|
+
|
|
38
|
+
_SHARED_RULES = (
|
|
39
|
+
"- Preserve markdown structure exactly: headings, links, tables, inline code, image paths\n"
|
|
40
|
+
"- Do not translate URLs, package names, commands, file paths, env vars\n"
|
|
41
|
+
"- CRITICAL: NEVER translate content inside code fences (```...```). "
|
|
42
|
+
"Copy every fenced code block BYTE-FOR-BYTE from the source, including the opening "
|
|
43
|
+
"language tag and every line inside. This applies to ALL code fences: bash, yaml, "
|
|
44
|
+
"mermaid, and any other language.\n"
|
|
45
|
+
' BAD: ```mermaid\\n subgraph mesh ["translated text"]\\n ```\n'
|
|
46
|
+
' GOOD: ```mermaid\\nflowchart LR\\n subgraph mesh ["LoRa Mesh Network"]\\n ```\n'
|
|
47
|
+
" (The GOOD version is identical to the English source — that is the requirement.)\n"
|
|
48
|
+
"- CRITICAL: Table of Contents anchors MUST match the translated heading text. "
|
|
49
|
+
"GitHub generates anchors from heading text, so the TOC link anchor must use "
|
|
50
|
+
"the TRANSLATED heading, not the English original.\n"
|
|
51
|
+
"- Keep line breaks and section order identical\n"
|
|
52
|
+
"- Return only the translated markdown, no explanation\n"
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass(frozen=True)
|
|
57
|
+
class LangConfig:
|
|
58
|
+
code: str
|
|
59
|
+
name: str
|
|
60
|
+
label: str
|
|
61
|
+
target_file: str
|
|
62
|
+
toc_heading_pattern: str
|
|
63
|
+
glossary: str
|
|
64
|
+
style_prompt: str
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# ---------------------------------------------------------------------------
|
|
68
|
+
# Language config loading
|
|
69
|
+
# ---------------------------------------------------------------------------
|
|
70
|
+
|
|
71
|
+
def _strip_markdown_fence(text: str) -> str:
|
|
72
|
+
stripped = text.strip()
|
|
73
|
+
match = re.match(r"^```(?:markdown|md)?\n([\s\S]*?)\n```$", stripped, re.IGNORECASE)
|
|
74
|
+
if match:
|
|
75
|
+
return match.group(1).strip() + "\n"
|
|
76
|
+
return text
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _load_languages() -> list[LangConfig]:
|
|
80
|
+
if not LANGUAGES_FILE.exists():
|
|
81
|
+
raise FileNotFoundError(f"Missing languages config file: {LANGUAGES_FILE}")
|
|
82
|
+
|
|
83
|
+
parsed = json.loads(LANGUAGES_FILE.read_text(encoding="utf-8"))
|
|
84
|
+
if not isinstance(parsed, list):
|
|
85
|
+
raise ValueError(f"Invalid {LANGUAGES_FILE}: expected top-level list")
|
|
86
|
+
|
|
87
|
+
languages: list[LangConfig] = []
|
|
88
|
+
for idx, entry in enumerate(parsed):
|
|
89
|
+
if not isinstance(entry, dict):
|
|
90
|
+
raise ValueError(f"Invalid language entry at index {idx}: expected object")
|
|
91
|
+
|
|
92
|
+
code = str(entry.get("code", "")).strip()
|
|
93
|
+
name = str(entry.get("name", "")).strip()
|
|
94
|
+
label = str(entry.get("label", "")).strip()
|
|
95
|
+
target = str(entry.get("target", "")).strip()
|
|
96
|
+
toc_pattern = str(entry.get("toc_pattern", "")).strip()
|
|
97
|
+
|
|
98
|
+
if not all([code, name, label, target, toc_pattern]):
|
|
99
|
+
raise ValueError(
|
|
100
|
+
f"Invalid language entry at index {idx}: code, name, label, target, toc_pattern are required",
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
glossary_path = GLOSSARY_DIR / f"{code}.md"
|
|
104
|
+
prompt_path = PROMPTS_DIR / f"{code}.md"
|
|
105
|
+
if not glossary_path.exists():
|
|
106
|
+
raise FileNotFoundError(
|
|
107
|
+
f"Missing glossary file for {code}: {glossary_path}"
|
|
108
|
+
)
|
|
109
|
+
if not prompt_path.exists():
|
|
110
|
+
raise FileNotFoundError(f"Missing prompt file for {code}: {prompt_path}")
|
|
111
|
+
|
|
112
|
+
languages.append(
|
|
113
|
+
LangConfig(
|
|
114
|
+
code=code,
|
|
115
|
+
name=name,
|
|
116
|
+
label=label,
|
|
117
|
+
target_file=target,
|
|
118
|
+
toc_heading_pattern=toc_pattern,
|
|
119
|
+
glossary=glossary_path.read_text(encoding="utf-8").strip(),
|
|
120
|
+
style_prompt=prompt_path.read_text(encoding="utf-8").strip(),
|
|
121
|
+
)
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
return languages
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# ---------------------------------------------------------------------------
|
|
128
|
+
# Language switcher
|
|
129
|
+
# ---------------------------------------------------------------------------
|
|
130
|
+
|
|
131
|
+
def _build_lang_switcher(current_file: str, languages: list[LangConfig]) -> str:
|
|
132
|
+
links: list[str] = []
|
|
133
|
+
|
|
134
|
+
if current_file == "README.md":
|
|
135
|
+
links.append("<b>English</b>")
|
|
136
|
+
else:
|
|
137
|
+
links.append('<a href="README.md">English</a>')
|
|
138
|
+
|
|
139
|
+
for lang in languages:
|
|
140
|
+
if current_file == lang.target_file:
|
|
141
|
+
links.append(f"<b>{lang.label}</b>")
|
|
142
|
+
else:
|
|
143
|
+
links.append(f'<a href="{lang.target_file}">{lang.label}</a>')
|
|
144
|
+
|
|
145
|
+
return (
|
|
146
|
+
'<!-- LANG_SWITCHER_START -->\n<p align="center">\n '
|
|
147
|
+
+ " | ".join(links)
|
|
148
|
+
+ "\n</p>\n<!-- LANG_SWITCHER_END -->"
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _fix_lang_switcher(
|
|
153
|
+
translated: str, current_file: str, languages: list[LangConfig]
|
|
154
|
+
) -> str:
|
|
155
|
+
pattern = re.compile(
|
|
156
|
+
r"<!-- LANG_SWITCHER_START -->[\s\S]*?<!-- LANG_SWITCHER_END -->",
|
|
157
|
+
re.IGNORECASE,
|
|
158
|
+
)
|
|
159
|
+
replacement = _build_lang_switcher(current_file, languages)
|
|
160
|
+
result, count = pattern.subn(replacement, translated, count=1)
|
|
161
|
+
if count == 0:
|
|
162
|
+
print(
|
|
163
|
+
f"Warning: language switcher anchors not found in {current_file}; skipping switcher rewrite.",
|
|
164
|
+
file=sys.stderr,
|
|
165
|
+
)
|
|
166
|
+
return translated
|
|
167
|
+
return result
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
# ---------------------------------------------------------------------------
|
|
171
|
+
# Post-processing — fix common LLM translation mistakes programmatically
|
|
172
|
+
# ---------------------------------------------------------------------------
|
|
173
|
+
|
|
174
|
+
def _strip_preamble(translated: str) -> str:
|
|
175
|
+
"""Remove LLM artifacts while preserving legitimate pre-heading content.
|
|
176
|
+
|
|
177
|
+
Only strips known LLM artifacts (<think> blocks, markdown fences).
|
|
178
|
+
Does NOT remove legitimate HTML before the first heading (e.g. logo images).
|
|
179
|
+
"""
|
|
180
|
+
# Strip <think>...</think> reasoning blocks (e.g. from GPT-5, DeepSeek)
|
|
181
|
+
translated = re.sub(r"<think>[\s\S]*?</think>\s*", "", translated)
|
|
182
|
+
return translated.strip()
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def _restore_code_blocks(source: str, translated: str) -> str:
|
|
186
|
+
"""Replace translated code blocks with original source code blocks."""
|
|
187
|
+
source_blocks = re.findall(r"^```[^\n]*\n[\s\S]*?^```", source, re.MULTILINE)
|
|
188
|
+
translated_blocks = re.findall(r"^```[^\n]*\n[\s\S]*?^```", translated, re.MULTILINE)
|
|
189
|
+
if len(source_blocks) != len(translated_blocks):
|
|
190
|
+
return translated
|
|
191
|
+
for src_block, trans_block in zip(source_blocks, translated_blocks):
|
|
192
|
+
if src_block != trans_block:
|
|
193
|
+
translated = translated.replace(trans_block, src_block, 1)
|
|
194
|
+
return translated
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _fix_toc_anchors(translated: str, toc_heading_pattern: str) -> str:
|
|
198
|
+
"""Rewrite TOC link anchors to match the actual translated heading anchors."""
|
|
199
|
+
headings = re.findall(r"^(#{2,})\s+(.+)$", translated, re.MULTILINE)
|
|
200
|
+
if not headings:
|
|
201
|
+
return translated
|
|
202
|
+
|
|
203
|
+
# Build ordered list of anchors from actual headings
|
|
204
|
+
heading_anchors: list[str] = []
|
|
205
|
+
for _, text in headings:
|
|
206
|
+
anchor = text.strip().lower()
|
|
207
|
+
anchor = re.sub(r"[^\w\s\u3000-\u9fff\uac00-\ud7af-]", "", anchor)
|
|
208
|
+
anchor = re.sub(r"\s+", "-", anchor)
|
|
209
|
+
heading_anchors.append(anchor)
|
|
210
|
+
|
|
211
|
+
# Find TOC section
|
|
212
|
+
toc_match = re.search(
|
|
213
|
+
rf"^##\s+.*(?:{toc_heading_pattern}).*\n([\s\S]*?)(?=\n##\s)",
|
|
214
|
+
translated,
|
|
215
|
+
re.MULTILINE,
|
|
216
|
+
)
|
|
217
|
+
if not toc_match:
|
|
218
|
+
return translated
|
|
219
|
+
|
|
220
|
+
toc_section = toc_match.group(0)
|
|
221
|
+
toc_links = re.findall(r"\[([^\]]*)\]\(#[^)]*\)", toc_section)
|
|
222
|
+
|
|
223
|
+
if not toc_links:
|
|
224
|
+
return translated
|
|
225
|
+
|
|
226
|
+
# Find the TOC heading index
|
|
227
|
+
toc_heading_idx = None
|
|
228
|
+
for i, (_, text) in enumerate(headings):
|
|
229
|
+
if re.search(toc_heading_pattern, text):
|
|
230
|
+
toc_heading_idx = i
|
|
231
|
+
break
|
|
232
|
+
|
|
233
|
+
if toc_heading_idx is None:
|
|
234
|
+
return translated
|
|
235
|
+
|
|
236
|
+
# Content headings are those after the TOC heading
|
|
237
|
+
content_anchors = heading_anchors[toc_heading_idx + 1:]
|
|
238
|
+
|
|
239
|
+
# Replace each TOC link anchor with the correct one
|
|
240
|
+
new_toc = toc_section
|
|
241
|
+
toc_link_pattern = re.finditer(r"\[([^\]]*)\]\(#([^)]*)\)", toc_section)
|
|
242
|
+
replacements: list[tuple[str, str]] = []
|
|
243
|
+
for i, m in enumerate(toc_link_pattern):
|
|
244
|
+
if i < len(content_anchors):
|
|
245
|
+
old = m.group(0)
|
|
246
|
+
new = f"[{m.group(1)}](#{content_anchors[i]})"
|
|
247
|
+
if old != new:
|
|
248
|
+
replacements.append((old, new))
|
|
249
|
+
|
|
250
|
+
for old, new in replacements:
|
|
251
|
+
new_toc = new_toc.replace(old, new, 1)
|
|
252
|
+
|
|
253
|
+
return translated.replace(toc_section, new_toc, 1)
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
# ---------------------------------------------------------------------------
|
|
257
|
+
# Validation
|
|
258
|
+
# ---------------------------------------------------------------------------
|
|
259
|
+
|
|
260
|
+
def _extract_code_blocks(md: str) -> list[str]:
|
|
261
|
+
return re.findall(r"^```[^\n]*\n[\s\S]*?^```", md, re.MULTILINE)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def _extract_headings(md: str) -> list[tuple[str, str]]:
|
|
265
|
+
return re.findall(r"^(#{2,})\s+(.+)$", md, re.MULTILINE)
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def _github_anchor(heading_text: str) -> str:
|
|
269
|
+
anchor = heading_text.strip().lower()
|
|
270
|
+
anchor = re.sub(r"[^\w\s\u3000-\u9fff\uac00-\ud7af-]", "", anchor)
|
|
271
|
+
anchor = re.sub(r"\s+", "-", anchor)
|
|
272
|
+
return anchor
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def _extract_toc_links(md: str, toc_heading_pattern: str) -> list[str]:
|
|
276
|
+
toc_section = re.search(
|
|
277
|
+
rf"^##\s+.*(?:{toc_heading_pattern}).*\n([\s\S]*?)(?=\n##\s)",
|
|
278
|
+
md,
|
|
279
|
+
re.MULTILINE,
|
|
280
|
+
)
|
|
281
|
+
if not toc_section:
|
|
282
|
+
return []
|
|
283
|
+
return re.findall(
|
|
284
|
+
r"\[.*?\]\(#([\w\u3000-\u9fff\uac00-\ud7af-]+)\)", toc_section.group(1)
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def _validate_translation(
|
|
289
|
+
source: str,
|
|
290
|
+
translated: str,
|
|
291
|
+
lang: LangConfig,
|
|
292
|
+
) -> list[str]:
|
|
293
|
+
errors: list[str] = []
|
|
294
|
+
|
|
295
|
+
# Allow HTML before first heading (e.g. logo images)
|
|
296
|
+
stripped = re.sub(r"<[^>]+>", "", translated).strip()
|
|
297
|
+
if stripped and not stripped.startswith("#"):
|
|
298
|
+
errors.append("Translation does not start with a heading or HTML block")
|
|
299
|
+
if len(translated) < len(source) * 0.3:
|
|
300
|
+
errors.append(
|
|
301
|
+
f"Translation suspiciously short ({len(translated)} chars vs "
|
|
302
|
+
f"source {len(source)} chars — under 30%)",
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
source_blocks = _extract_code_blocks(source)
|
|
306
|
+
translated_blocks = _extract_code_blocks(translated)
|
|
307
|
+
if len(source_blocks) != len(translated_blocks):
|
|
308
|
+
errors.append(
|
|
309
|
+
f"Code block count mismatch: source has {len(source_blocks)}, "
|
|
310
|
+
f"translation has {len(translated_blocks)}",
|
|
311
|
+
)
|
|
312
|
+
else:
|
|
313
|
+
for i, (src_block, trans_block) in enumerate(
|
|
314
|
+
zip(source_blocks, translated_blocks),
|
|
315
|
+
):
|
|
316
|
+
if src_block != trans_block:
|
|
317
|
+
first_line = src_block.split("\n", 1)[0]
|
|
318
|
+
errors.append(
|
|
319
|
+
f"Code block {i + 1} ({first_line}) was modified in translation",
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
source_headings = _extract_headings(source)
|
|
323
|
+
translated_headings = _extract_headings(translated)
|
|
324
|
+
if len(source_headings) != len(translated_headings):
|
|
325
|
+
errors.append(
|
|
326
|
+
f"Heading count mismatch: source has {len(source_headings)}, "
|
|
327
|
+
f"translation has {len(translated_headings)}",
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
toc_links = _extract_toc_links(translated, lang.toc_heading_pattern)
|
|
331
|
+
heading_anchors = {_github_anchor(text) for _, text in translated_headings}
|
|
332
|
+
for link in toc_links:
|
|
333
|
+
if link not in heading_anchors:
|
|
334
|
+
errors.append(f"TOC link #{link} does not match any heading anchor")
|
|
335
|
+
|
|
336
|
+
source_refs = set(re.findall(r"^\[[\w-]+\]:\s+", source, re.MULTILINE))
|
|
337
|
+
translated_refs = set(re.findall(r"^\[[\w-]+\]:\s+", translated, re.MULTILINE))
|
|
338
|
+
missing_refs = source_refs - translated_refs
|
|
339
|
+
if missing_refs:
|
|
340
|
+
errors.append(f"Missing reference-style links: {missing_refs}")
|
|
341
|
+
|
|
342
|
+
source_html = re.findall(r"<(?:p|div|img|a)\b[^>]*>", source, re.IGNORECASE)
|
|
343
|
+
translated_html = re.findall(r"<(?:p|div|img|a)\b[^>]*>", translated, re.IGNORECASE)
|
|
344
|
+
if len(source_html) != len(translated_html):
|
|
345
|
+
errors.append(
|
|
346
|
+
f"HTML tag count mismatch: source has {len(source_html)}, "
|
|
347
|
+
f"translation has {len(translated_html)}",
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
return errors
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
# ---------------------------------------------------------------------------
|
|
354
|
+
# LLM API — dual-mode request (non-streaming primary, streaming fallback)
|
|
355
|
+
# ---------------------------------------------------------------------------
|
|
356
|
+
|
|
357
|
+
def _make_request(payload: dict, api_key: str) -> urllib.request.Request:
|
|
358
|
+
return urllib.request.Request(
|
|
359
|
+
API_URL,
|
|
360
|
+
data=json.dumps(payload).encode("utf-8"),
|
|
361
|
+
headers={
|
|
362
|
+
"Authorization": f"Bearer {api_key}",
|
|
363
|
+
"Content-Type": "application/json",
|
|
364
|
+
"User-Agent": os.environ.get("LLM_USER_AGENT", "readme-translator/1.0"),
|
|
365
|
+
},
|
|
366
|
+
method="POST",
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def _non_stream_response(req: urllib.request.Request) -> str:
|
|
371
|
+
"""Standard synchronous request — preferred for batch workloads."""
|
|
372
|
+
with urllib.request.urlopen(req, timeout=TIMEOUT) as resp:
|
|
373
|
+
body = json.loads(resp.read().decode("utf-8"))
|
|
374
|
+
return (
|
|
375
|
+
body.get("choices", [{}])[0]
|
|
376
|
+
.get("message", {})
|
|
377
|
+
.get("content", "")
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def _stream_response(req: urllib.request.Request) -> str:
|
|
382
|
+
"""SSE streaming fallback — used only when non-streaming returns empty."""
|
|
383
|
+
chunks: list[str] = []
|
|
384
|
+
with urllib.request.urlopen(req, timeout=TIMEOUT) as resp:
|
|
385
|
+
for raw_line in resp:
|
|
386
|
+
line = raw_line.decode("utf-8", errors="replace").strip()
|
|
387
|
+
if not line or line.startswith(":"):
|
|
388
|
+
continue
|
|
389
|
+
if not line.startswith("data: "):
|
|
390
|
+
continue
|
|
391
|
+
data = line[len("data: "):]
|
|
392
|
+
if data == "[DONE]":
|
|
393
|
+
break
|
|
394
|
+
try:
|
|
395
|
+
parsed = json.loads(data)
|
|
396
|
+
except json.JSONDecodeError:
|
|
397
|
+
continue
|
|
398
|
+
delta = (
|
|
399
|
+
parsed.get("choices", [{}])[0]
|
|
400
|
+
.get("delta", {})
|
|
401
|
+
.get("content", "")
|
|
402
|
+
)
|
|
403
|
+
if delta:
|
|
404
|
+
chunks.append(delta)
|
|
405
|
+
return "".join(chunks)
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
def _call_llm_with_retry(
|
|
409
|
+
messages: list[dict],
|
|
410
|
+
api_key: str,
|
|
411
|
+
lang_code: str,
|
|
412
|
+
) -> str:
|
|
413
|
+
"""Call LLM with retry logic. Non-streaming first, streaming fallback."""
|
|
414
|
+
strategies = [
|
|
415
|
+
("non-streaming", False, _non_stream_response),
|
|
416
|
+
("streaming", True, _stream_response),
|
|
417
|
+
]
|
|
418
|
+
|
|
419
|
+
for strategy_name, use_stream, response_fn in strategies:
|
|
420
|
+
payload = {
|
|
421
|
+
"model": MODEL,
|
|
422
|
+
"temperature": 0.2,
|
|
423
|
+
"stream": use_stream,
|
|
424
|
+
"messages": messages,
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
last_exc: Exception | None = None
|
|
428
|
+
for attempt in range(1, MAX_RETRIES + 1):
|
|
429
|
+
req = _make_request(payload, api_key)
|
|
430
|
+
try:
|
|
431
|
+
content = response_fn(req)
|
|
432
|
+
if content:
|
|
433
|
+
return content
|
|
434
|
+
# Empty response — no point retrying same strategy
|
|
435
|
+
print(
|
|
436
|
+
f" [{lang_code}] {strategy_name} returned empty content",
|
|
437
|
+
file=sys.stderr,
|
|
438
|
+
)
|
|
439
|
+
break
|
|
440
|
+
except urllib.error.HTTPError as exc:
|
|
441
|
+
detail = exc.read().decode("utf-8", errors="replace")
|
|
442
|
+
if 400 <= exc.code < 500 and exc.code != 429:
|
|
443
|
+
raise RuntimeError(f"LLM API HTTP {exc.code}: {detail}") from exc
|
|
444
|
+
last_exc = RuntimeError(f"LLM API HTTP {exc.code}: {detail}")
|
|
445
|
+
except (urllib.error.URLError, TimeoutError, ConnectionError, OSError) as exc:
|
|
446
|
+
last_exc = RuntimeError(f"LLM API request failed: {exc}")
|
|
447
|
+
|
|
448
|
+
if attempt < MAX_RETRIES:
|
|
449
|
+
delay = RETRY_BASE_DELAY * (2 ** (attempt - 1))
|
|
450
|
+
print(
|
|
451
|
+
f" [{lang_code}] {strategy_name} attempt {attempt}/{MAX_RETRIES} failed, "
|
|
452
|
+
f"retrying in {delay}s...",
|
|
453
|
+
file=sys.stderr,
|
|
454
|
+
)
|
|
455
|
+
time.sleep(delay)
|
|
456
|
+
elif last_exc:
|
|
457
|
+
print(
|
|
458
|
+
f" [{lang_code}] {strategy_name} exhausted {MAX_RETRIES} retries",
|
|
459
|
+
file=sys.stderr,
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
raise RuntimeError(
|
|
463
|
+
f"All strategies exhausted for {lang_code} — both non-streaming and streaming returned no content"
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
# ---------------------------------------------------------------------------
|
|
468
|
+
# Translation pipeline
|
|
469
|
+
# ---------------------------------------------------------------------------
|
|
470
|
+
|
|
471
|
+
def _request_translation(
|
|
472
|
+
source_markdown: str,
|
|
473
|
+
api_key: str,
|
|
474
|
+
lang: LangConfig,
|
|
475
|
+
do_not_translate: str,
|
|
476
|
+
) -> str:
|
|
477
|
+
system_prompt = (
|
|
478
|
+
lang.style_prompt
|
|
479
|
+
+ _SHARED_RULES
|
|
480
|
+
+ "\n"
|
|
481
|
+
+ lang.glossary
|
|
482
|
+
+ "\n"
|
|
483
|
+
+ do_not_translate
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
messages = [
|
|
487
|
+
{"role": "system", "content": system_prompt},
|
|
488
|
+
{
|
|
489
|
+
"role": "user",
|
|
490
|
+
"content": f"Translate the following README markdown to {lang.name}:\n\n"
|
|
491
|
+
+ source_markdown,
|
|
492
|
+
},
|
|
493
|
+
]
|
|
494
|
+
|
|
495
|
+
content = _call_llm_with_retry(messages, api_key, lang.code)
|
|
496
|
+
return _strip_markdown_fence(content)
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
def _translate_one(
|
|
500
|
+
source_markdown: str,
|
|
501
|
+
api_key: str,
|
|
502
|
+
lang: LangConfig,
|
|
503
|
+
do_not_translate: str,
|
|
504
|
+
languages: list[LangConfig],
|
|
505
|
+
semaphore: threading.Semaphore | None = None,
|
|
506
|
+
) -> None:
|
|
507
|
+
if semaphore:
|
|
508
|
+
semaphore.acquire()
|
|
509
|
+
try:
|
|
510
|
+
target = ROOT / lang.target_file
|
|
511
|
+
translated = _request_translation(source_markdown, api_key, lang, do_not_translate)
|
|
512
|
+
|
|
513
|
+
# Post-processing: fix common LLM mistakes programmatically
|
|
514
|
+
translated = _strip_preamble(translated)
|
|
515
|
+
translated = _restore_code_blocks(source_markdown, translated)
|
|
516
|
+
translated = _fix_toc_anchors(translated, lang.toc_heading_pattern)
|
|
517
|
+
translated = _fix_lang_switcher(translated, lang.target_file, languages)
|
|
518
|
+
|
|
519
|
+
errors = _validate_translation(source_markdown, translated, lang)
|
|
520
|
+
if errors:
|
|
521
|
+
print(f"Translation validation warnings ({lang.code}):", file=sys.stderr)
|
|
522
|
+
for err in errors:
|
|
523
|
+
print(f" - {err}", file=sys.stderr)
|
|
524
|
+
|
|
525
|
+
target.write_text(translated, encoding="utf-8")
|
|
526
|
+
|
|
527
|
+
print(f"Translated {SOURCE.name} -> {lang.target_file} ({lang.name})")
|
|
528
|
+
if errors:
|
|
529
|
+
print(f" ({len(errors)} validation warning(s) — see stderr)")
|
|
530
|
+
finally:
|
|
531
|
+
if semaphore:
|
|
532
|
+
semaphore.release()
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
# ---------------------------------------------------------------------------
|
|
536
|
+
# CLI
|
|
537
|
+
# ---------------------------------------------------------------------------
|
|
538
|
+
|
|
539
|
+
def main() -> int:
|
|
540
|
+
parser = argparse.ArgumentParser(
|
|
541
|
+
description="Translate README.md via OpenAI-compatible LLM API"
|
|
542
|
+
)
|
|
543
|
+
group = parser.add_mutually_exclusive_group(required=True)
|
|
544
|
+
group.add_argument(
|
|
545
|
+
"--lang",
|
|
546
|
+
help="Target language code (from .github/translate/languages.json)",
|
|
547
|
+
)
|
|
548
|
+
group.add_argument(
|
|
549
|
+
"--all",
|
|
550
|
+
action="store_true",
|
|
551
|
+
help="Translate all languages from .github/translate/languages.json",
|
|
552
|
+
)
|
|
553
|
+
args = parser.parse_args()
|
|
554
|
+
|
|
555
|
+
api_key = os.environ.get(API_KEY_VAR)
|
|
556
|
+
if not api_key:
|
|
557
|
+
print(f"{API_KEY_VAR} is not set; skipping translation.")
|
|
558
|
+
return 0
|
|
559
|
+
|
|
560
|
+
if not SOURCE.exists():
|
|
561
|
+
raise FileNotFoundError(f"Missing source file: {SOURCE}")
|
|
562
|
+
if not DO_NOT_TRANSLATE_FILE.exists():
|
|
563
|
+
raise FileNotFoundError(
|
|
564
|
+
f"Missing do-not-translate file: {DO_NOT_TRANSLATE_FILE}"
|
|
565
|
+
)
|
|
566
|
+
|
|
567
|
+
source_markdown = SOURCE.read_text(encoding="utf-8")
|
|
568
|
+
do_not_translate = DO_NOT_TRANSLATE_FILE.read_text(encoding="utf-8").strip()
|
|
569
|
+
languages = _load_languages()
|
|
570
|
+
lang_map = {lang.code: lang for lang in languages}
|
|
571
|
+
|
|
572
|
+
# Sync English README.md lang switcher from languages.json
|
|
573
|
+
updated_source = _fix_lang_switcher(source_markdown, "README.md", languages)
|
|
574
|
+
if updated_source != source_markdown:
|
|
575
|
+
SOURCE.write_text(updated_source, encoding="utf-8")
|
|
576
|
+
source_markdown = updated_source
|
|
577
|
+
print("Updated language switcher in README.md.")
|
|
578
|
+
|
|
579
|
+
if args.lang:
|
|
580
|
+
lang = lang_map.get(args.lang)
|
|
581
|
+
if lang is None:
|
|
582
|
+
allowed = ", ".join(sorted(lang_map.keys()))
|
|
583
|
+
raise ValueError(f"Unsupported --lang '{args.lang}'. Allowed: {allowed}")
|
|
584
|
+
_translate_one(
|
|
585
|
+
source_markdown, api_key, lang, do_not_translate, languages
|
|
586
|
+
)
|
|
587
|
+
return 0
|
|
588
|
+
|
|
589
|
+
# --all mode: controlled concurrency via semaphore
|
|
590
|
+
semaphore = threading.Semaphore(MAX_CONCURRENCY)
|
|
591
|
+
failed: list[str] = []
|
|
592
|
+
succeeded: list[str] = []
|
|
593
|
+
|
|
594
|
+
with ThreadPoolExecutor(max_workers=len(languages)) as pool:
|
|
595
|
+
futures = {
|
|
596
|
+
pool.submit(
|
|
597
|
+
_translate_one,
|
|
598
|
+
source_markdown,
|
|
599
|
+
api_key,
|
|
600
|
+
lang,
|
|
601
|
+
do_not_translate,
|
|
602
|
+
languages,
|
|
603
|
+
semaphore,
|
|
604
|
+
): lang
|
|
605
|
+
for lang in languages
|
|
606
|
+
}
|
|
607
|
+
for future in as_completed(futures):
|
|
608
|
+
lang = futures[future]
|
|
609
|
+
try:
|
|
610
|
+
future.result()
|
|
611
|
+
succeeded.append(lang.code)
|
|
612
|
+
except Exception as exc:
|
|
613
|
+
failed.append(lang.code)
|
|
614
|
+
print(
|
|
615
|
+
f"Translation failed for {lang.code}: {exc}",
|
|
616
|
+
file=sys.stderr,
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
if succeeded:
|
|
620
|
+
print(f"Succeeded: {', '.join(sorted(succeeded))}")
|
|
621
|
+
if failed:
|
|
622
|
+
print(f"Failed: {', '.join(sorted(failed))}", file=sys.stderr)
|
|
623
|
+
# Exit 0 if any succeeded — partial success is still success
|
|
624
|
+
return 0 if succeeded else 1
|
|
625
|
+
|
|
626
|
+
|
|
627
|
+
if __name__ == "__main__":
|
|
628
|
+
try:
|
|
629
|
+
raise SystemExit(main())
|
|
630
|
+
except Exception as exc:
|
|
631
|
+
print(f"Translation failed: {exc}", file=sys.stderr)
|
|
632
|
+
raise SystemExit(1)
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
Do-not-translate list (keep exactly as-is in output):
|
|
2
|
+
- CLI commands: openclaw onboard, openclaw plugins install, openclaw channels status --probe, openclaw config edit, npm install, git clone
|
|
3
|
+
- Config keys: transport, serialPort, httpAddress, httpTls, mqtt.broker, mqtt.port, mqtt.username, mqtt.password, mqtt.topic, mqtt.publishTopic, mqtt.tls, region, nodeName, dmPolicy, allowFrom, groupPolicy, channels, textChunkLimit, requireMention, accounts
|
|
4
|
+
- Config values: serial, http, mqtt, pairing, open, allowlist, disabled, UNSET
|
|
5
|
+
- Package/path names: @seeed-studio/meshtastic, index.ts, /dev/ttyUSB0, meshtastic.local, mqtt.meshtastic.org, msh/US/2/json/#
|
|
6
|
+
- Environment variables: MESHTASTIC_TRANSPORT, MESHTASTIC_SERIAL_PORT, etc.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
channel plugin → plugin de canal
|
|
2
|
+
channel → canal (when referring to messaging channels)
|
|
3
|
+
group channel → canal de grupo
|
|
4
|
+
group policy → política de grupo
|
|
5
|
+
DM → mensaje directo
|
|
6
|
+
access control → control de acceso
|
|
7
|
+
allowlist → lista de permitidos
|
|
8
|
+
mention gating → filtrado por @mention
|
|
9
|
+
pairing → emparejamiento
|
|
10
|
+
node → nodo
|
|
11
|
+
gateway → gateway
|
|
12
|
+
mesh network → red mesh
|
|
13
|
+
transport → transporte
|
|
14
|
+
repository → repositorio
|
|
15
|
+
pull request → Pull Request (keep English)
|
|
16
|
+
issue → issue (keep English)
|
|
17
|
+
broker → broker (keep English, MQTT term)
|
|
18
|
+
Serial → Serial (keep English in transport context)
|
|
19
|
+
AI Agent → AI Agent (keep English)
|
|
20
|
+
MeshClaw → MeshClaw (keep English)
|
|
21
|
+
OpenClaw → OpenClaw (keep English)
|
|
22
|
+
Meshtastic → Meshtastic (keep English)
|
|
23
|
+
LoRa → LoRa (keep English)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
channel plugin → plugin de canal
|
|
2
|
+
channel → canal (when referring to messaging channels)
|
|
3
|
+
group channel → canal de groupe
|
|
4
|
+
group policy → politique de groupe
|
|
5
|
+
DM → message privé
|
|
6
|
+
access control → contrôle d'accès
|
|
7
|
+
allowlist → liste autorisée
|
|
8
|
+
mention gating → filtrage par @mention
|
|
9
|
+
pairing → appairage
|
|
10
|
+
node → nœud
|
|
11
|
+
gateway → passerelle
|
|
12
|
+
mesh network → réseau mesh
|
|
13
|
+
transport → transport
|
|
14
|
+
repository → dépôt
|
|
15
|
+
pull request → Pull Request (keep English)
|
|
16
|
+
issue → issue (keep English)
|
|
17
|
+
broker → broker (keep English, MQTT term)
|
|
18
|
+
Serial → Serial (keep English in transport context)
|
|
19
|
+
AI Agent → AI Agent (keep English)
|
|
20
|
+
MeshClaw → MeshClaw (keep English)
|
|
21
|
+
OpenClaw → OpenClaw (keep English)
|
|
22
|
+
Meshtastic → Meshtastic (keep English)
|
|
23
|
+
LoRa → LoRa (keep English)
|