omni-localizer 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. omni_localizer-0.1.0/AGENT_USAGE.md +312 -0
  2. omni_localizer-0.1.0/CHANGELOG.md +21 -0
  3. omni_localizer-0.1.0/LICENSE +21 -0
  4. omni_localizer-0.1.0/MANIFEST.in +24 -0
  5. omni_localizer-0.1.0/PKG-INFO +205 -0
  6. omni_localizer-0.1.0/README.md +161 -0
  7. omni_localizer-0.1.0/config/default.yaml +44 -0
  8. omni_localizer-0.1.0/config/test_universal.yaml +45 -0
  9. omni_localizer-0.1.0/pyproject.toml +90 -0
  10. omni_localizer-0.1.0/setup.cfg +4 -0
  11. omni_localizer-0.1.0/src/.hermes/skills/ol-localizer/README.md +8 -0
  12. omni_localizer-0.1.0/src/.hermes/skills/ol-localizer/SKILL.md +100 -0
  13. omni_localizer-0.1.0/src/.opencode/skills/ol-localizer/SKILL.md +40 -0
  14. omni_localizer-0.1.0/src/ol_batch/__init__.py +14 -0
  15. omni_localizer-0.1.0/src/ol_batch/config.py +30 -0
  16. omni_localizer-0.1.0/src/ol_batch/discovery.py +36 -0
  17. omni_localizer-0.1.0/src/ol_batch/processor.py +115 -0
  18. omni_localizer-0.1.0/src/ol_batch/progress.py +44 -0
  19. omni_localizer-0.1.0/src/ol_batch/summary.py +68 -0
  20. omni_localizer-0.1.0/src/ol_buses/__init__.py +22 -0
  21. omni_localizer-0.1.0/src/ol_buses/format_guard.py +61 -0
  22. omni_localizer-0.1.0/src/ol_buses/md_bus.py +121 -0
  23. omni_localizer-0.1.0/src/ol_buses/md_shield.py +68 -0
  24. omni_localizer-0.1.0/src/ol_buses/xliff_bus.py +114 -0
  25. omni_localizer-0.1.0/src/ol_buses/xliff_shield.py +78 -0
  26. omni_localizer-0.1.0/src/ol_checkpoint/__init__.py +4 -0
  27. omni_localizer-0.1.0/src/ol_checkpoint/checkpoint.py +124 -0
  28. omni_localizer-0.1.0/src/ol_checkpoint/exceptions.py +11 -0
  29. omni_localizer-0.1.0/src/ol_concurrency/__init__.py +5 -0
  30. omni_localizer-0.1.0/src/ol_concurrency/scheduler.py +69 -0
  31. omni_localizer-0.1.0/src/ol_config/__init__.py +11 -0
  32. omni_localizer-0.1.0/src/ol_config/loader.py +54 -0
  33. omni_localizer-0.1.0/src/ol_config/schema.py +56 -0
  34. omni_localizer-0.1.0/src/ol_core/__init__.py +29 -0
  35. omni_localizer-0.1.0/src/ol_core/dataclass.py +87 -0
  36. omni_localizer-0.1.0/src/ol_core/exceptions.py +26 -0
  37. omni_localizer-0.1.0/src/ol_core/interfaces.py +52 -0
  38. omni_localizer-0.1.0/src/ol_logging/__init__.py +28 -0
  39. omni_localizer-0.1.0/src/ol_logging/constants.py +22 -0
  40. omni_localizer-0.1.0/src/ol_logging/context.py +29 -0
  41. omni_localizer-0.1.0/src/ol_logging/core.py +33 -0
  42. omni_localizer-0.1.0/src/ol_logging/formatters.py +10 -0
  43. omni_localizer-0.1.0/src/ol_logging/handlers.py +35 -0
  44. omni_localizer-0.1.0/src/ol_lqa/__init__.py +3 -0
  45. omni_localizer-0.1.0/src/ol_lqa/comet.py +90 -0
  46. omni_localizer-0.1.0/src/ol_lqa/judge.py +175 -0
  47. omni_localizer-0.1.0/src/ol_lqa/report.py +305 -0
  48. omni_localizer-0.1.0/src/ol_lqa/scorer.py +61 -0
  49. omni_localizer-0.1.0/src/ol_lqa/stability.py +81 -0
  50. omni_localizer-0.1.0/src/ol_lqa/templates/report.csv.j2 +4 -0
  51. omni_localizer-0.1.0/src/ol_lqa/templates/report.html.j2 +178 -0
  52. omni_localizer-0.1.0/src/ol_md/__init__.py +0 -0
  53. omni_localizer-0.1.0/src/ol_md/extractor.py +81 -0
  54. omni_localizer-0.1.0/src/ol_md/pipeline.py +49 -0
  55. omni_localizer-0.1.0/src/ol_md/repair/__init__.py +11 -0
  56. omni_localizer-0.1.0/src/ol_md/repair/level1.py +8 -0
  57. omni_localizer-0.1.0/src/ol_md/repair/level2.py +12 -0
  58. omni_localizer-0.1.0/src/ol_md/repair/level3.py +87 -0
  59. omni_localizer-0.1.0/src/ol_md/repair/level4.py +13 -0
  60. omni_localizer-0.1.0/src/ol_md/shield.py +103 -0
  61. omni_localizer-0.1.0/src/ol_md/token_stream.py +24 -0
  62. omni_localizer-0.1.0/src/ol_pool/__init__.py +4 -0
  63. omni_localizer-0.1.0/src/ol_pool/router.py +103 -0
  64. omni_localizer-0.1.0/src/ol_retry/__init__.py +3 -0
  65. omni_localizer-0.1.0/src/ol_retry/retry.py +64 -0
  66. omni_localizer-0.1.0/src/ol_routing/__init__.py +4 -0
  67. omni_localizer-0.1.0/src/ol_routing/router.py +50 -0
  68. omni_localizer-0.1.0/src/ol_tm/__init__.py +3 -0
  69. omni_localizer-0.1.0/src/ol_tm/service.py +123 -0
  70. omni_localizer-0.1.0/src/ol_xliff/__init__.py +5 -0
  71. omni_localizer-0.1.0/src/ol_xliff/parser.py +313 -0
  72. omni_localizer-0.1.0/src/ol_xliff/pipeline.py +91 -0
  73. omni_localizer-0.1.0/src/ol_xliff/repair/__init__.py +12 -0
  74. omni_localizer-0.1.0/src/ol_xliff/repair/level1.py +32 -0
  75. omni_localizer-0.1.0/src/ol_xliff/repair/level2.py +12 -0
  76. omni_localizer-0.1.0/src/ol_xliff/repair/level3.py +80 -0
  77. omni_localizer-0.1.0/src/ol_xliff/repair/level4.py +25 -0
  78. omni_localizer-0.1.0/src/ol_xliff/shield.py +29 -0
  79. omni_localizer-0.1.0/src/omni_localizer.egg-info/PKG-INFO +205 -0
  80. omni_localizer-0.1.0/src/omni_localizer.egg-info/SOURCES.txt +136 -0
  81. omni_localizer-0.1.0/src/omni_localizer.egg-info/dependency_links.txt +1 -0
  82. omni_localizer-0.1.0/src/omni_localizer.egg-info/entry_points.txt +2 -0
  83. omni_localizer-0.1.0/src/omni_localizer.egg-info/requires.txt +23 -0
  84. omni_localizer-0.1.0/src/omni_localizer.egg-info/top_level.txt +14 -0
  85. omni_localizer-0.1.0/tests/test_batch_edge_cases.py +366 -0
  86. omni_localizer-0.1.0/tests/test_batch_processor.py +279 -0
  87. omni_localizer-0.1.0/tests/test_checkpoint.py +107 -0
  88. omni_localizer-0.1.0/tests/test_checkpoint_resume.py +122 -0
  89. omni_localizer-0.1.0/tests/test_cli_batch.py +218 -0
  90. omni_localizer-0.1.0/tests/test_concurrent_engine.py +105 -0
  91. omni_localizer-0.1.0/tests/test_config_loader.py +144 -0
  92. omni_localizer-0.1.0/tests/test_e2e_md_pipeline.py +291 -0
  93. omni_localizer-0.1.0/tests/test_e2e_xliff_pipeline.py +165 -0
  94. omni_localizer-0.1.0/tests/test_evaluation_result.py +79 -0
  95. omni_localizer-0.1.0/tests/test_format_guard.py +48 -0
  96. omni_localizer-0.1.0/tests/test_hermes_skill.py +44 -0
  97. omni_localizer-0.1.0/tests/test_integration_3a.py +397 -0
  98. omni_localizer-0.1.0/tests/test_integration_3b.py +367 -0
  99. omni_localizer-0.1.0/tests/test_llm_restorer_interface.py +51 -0
  100. omni_localizer-0.1.0/tests/test_logging.py +266 -0
  101. omni_localizer-0.1.0/tests/test_lqa_comet.py +254 -0
  102. omni_localizer-0.1.0/tests/test_lqa_judge.py +165 -0
  103. omni_localizer-0.1.0/tests/test_lqa_report.py +616 -0
  104. omni_localizer-0.1.0/tests/test_lqa_scorer.py +66 -0
  105. omni_localizer-0.1.0/tests/test_lqa_stability.py +121 -0
  106. omni_localizer-0.1.0/tests/test_md_auto_repair.py +67 -0
  107. omni_localizer-0.1.0/tests/test_md_bus.py +44 -0
  108. omni_localizer-0.1.0/tests/test_md_format_preservation.py +79 -0
  109. omni_localizer-0.1.0/tests/test_md_protector.py +62 -0
  110. omni_localizer-0.1.0/tests/test_md_rebuilder.py +72 -0
  111. omni_localizer-0.1.0/tests/test_md_repair_level1.py +20 -0
  112. omni_localizer-0.1.0/tests/test_md_repair_level2.py +13 -0
  113. omni_localizer-0.1.0/tests/test_md_repair_level3.py +9 -0
  114. omni_localizer-0.1.0/tests/test_md_repair_level4.py +16 -0
  115. omni_localizer-0.1.0/tests/test_md_repair_pipeline.py +15 -0
  116. omni_localizer-0.1.0/tests/test_md_shield.py +45 -0
  117. omni_localizer-0.1.0/tests/test_md_text_integrity.py +57 -0
  118. omni_localizer-0.1.0/tests/test_md_token_stream.py +29 -0
  119. omni_localizer-0.1.0/tests/test_model_pool_failover.py +95 -0
  120. omni_localizer-0.1.0/tests/test_model_pool_schema.py +122 -0
  121. omni_localizer-0.1.0/tests/test_ol_cli.py +197 -0
  122. omni_localizer-0.1.0/tests/test_opencode_skill.py +44 -0
  123. omni_localizer-0.1.0/tests/test_repair_context.py +44 -0
  124. omni_localizer-0.1.0/tests/test_retry.py +127 -0
  125. omni_localizer-0.1.0/tests/test_review_extractor.py +232 -0
  126. omni_localizer-0.1.0/tests/test_routing.py +92 -0
  127. omni_localizer-0.1.0/tests/test_skill_invocation.py +66 -0
  128. omni_localizer-0.1.0/tests/test_tm_service.py +57 -0
  129. omni_localizer-0.1.0/tests/test_translation_context.py +69 -0
  130. omni_localizer-0.1.0/tests/test_xliff_bus.py +62 -0
  131. omni_localizer-0.1.0/tests/test_xliff_format_preservation.py +126 -0
  132. omni_localizer-0.1.0/tests/test_xliff_parser.py +88 -0
  133. omni_localizer-0.1.0/tests/test_xliff_repair_level1.py +63 -0
  134. omni_localizer-0.1.0/tests/test_xliff_repair_level2.py +61 -0
  135. omni_localizer-0.1.0/tests/test_xliff_repair_level3.py +59 -0
  136. omni_localizer-0.1.0/tests/test_xliff_repair_level4.py +75 -0
  137. omni_localizer-0.1.0/tests/test_xliff_repair_pipeline.py +105 -0
  138. omni_localizer-0.1.0/tests/test_xliff_shield.py +97 -0
@@ -0,0 +1,312 @@
1
+ # Omni-Localizer Agent Usage Guide
2
+
3
+ ## Overview
4
+
5
+ Omni-Localizer is an AI-native localization pipeline that translates Markdown documents. It now supports agent integration via SKILL.md files for OpenCode and Hermes.
6
+
7
+ ---
8
+
9
+ ## Quick Start
10
+
11
+ ### For Agents
12
+
13
+ 1. **Discover the skill**
14
+ ```
15
+ Look for: src/.opencode/skills/ol-localizer/SKILL.md
16
+ Look for: src/.hermes/skills/ol-localizer/SKILL.md
17
+ ```
18
+
19
+ 2. **Read the SKILL.md** for instructions on how to invoke
20
+
21
+ 3. **Configure API keys** in environment:
22
+ ```
23
+ export OPENAI_API_KEY=sk-...
24
+ export MINIMAX_API_KEY=... # if using MiniMax
25
+ export BAIDU_API_KEY=... # if using Baidu
26
+ ```
27
+
28
+ 4. **Invoke via CLI**:
29
+ ```
30
+ python -m ol_cli translate-md <file.md> -c config/default.yaml -s en -t zh -o output/ --json
31
+ ```
32
+
33
+ ---
34
+
35
+ ## CLI Commands
36
+
37
+ ### Basic Translation
38
+
39
+ ```bash
40
+ # Translate Markdown (human-readable output)
41
+ python -m ol_cli translate-md input.md -c config/default.yaml -s en -t zh -o output/
42
+
43
+ # Translate with JSON output (for agents)
44
+ python -m ol_cli translate-md input.md -c config/default.yaml -s en -t zh -o output/ --json
45
+
46
+ # Translate XLIFF
47
+ python -m ol_cli translate-xliff input.xlf -c config/default.yaml -s en -t zh -o output/
48
+ ```
49
+
50
+ ### JSON Output Format
51
+
52
+ When `--json` is used, output is:
53
+ ```json
54
+ {
55
+ "success": true,
56
+ "input_file": "input.md",
57
+ "output_file": "output/input.md",
58
+ "source_lang": "en",
59
+ "target_lang": "zh"
60
+ }
61
+ ```
62
+
63
+ On error:
64
+ ```json
65
+ {
66
+ "success": false,
67
+ "error": "Error message here"
68
+ }
69
+ ```
70
+
71
+ ---
72
+
73
+ ## Configuration
74
+
75
+ ### Default Config Location
76
+ `config/default.yaml`
77
+
78
+ ### Environment Variables
79
+ Set in shell before running:
80
+ ```bash
81
+ export OPENAI_API_KEY=sk-your-key
82
+ export MINIMAX_API_KEY=your-minimax-key # optional
83
+ export BAIDU_API_KEY=your-baidu-key # optional
84
+ ```
85
+
86
+ ### Config Structure
87
+ ```yaml
88
+ project_id: "your-project"
89
+ source_lang: "en"
90
+ target_lang: "zh"
91
+ llm_pool:
92
+ translation:
93
+ - provider: "openai"
94
+ model: "gpt-4o-mini"
95
+ priority: 1
96
+ api_key: "${OPENAI_API_KEY}"
97
+ role: "translation"
98
+ judging:
99
+ - provider: "openai"
100
+ model: "gpt-4o-mini"
101
+ priority: 1
102
+ api_key: "${OPENAI_API_KEY}"
103
+ role: "judging"
104
+ restoration:
105
+ - provider: "openai"
106
+ model: "gpt-4o-mini"
107
+ priority: 1
108
+ api_key: "${OPENAI_API_KEY}"
109
+ role: "restoration"
110
+ ```
111
+
112
+ ---
113
+
114
+ ## Agent Integration
115
+
116
+ ### OpenCode
117
+
118
+ 1. Copy skill to project:
119
+ ```bash
120
+ cp -r src/.opencode/skills/ol-localizer <your-project>/.opencode/skills/
121
+ ```
122
+
123
+ 2. Read `SKILL.md` in that directory for detailed instructions
124
+
125
+ ### Hermes
126
+
127
+ 1. Copy skill to Hermes:
128
+ ```bash
129
+ cp -r src/.hermes/skills/ol-localizer ~/.hermes/skills/
130
+ ```
131
+
132
+ 2. Restart Hermes to activate
133
+
134
+ ---
135
+
136
+ ## Testing
137
+
138
+ ### Run All Skill Tests
139
+ ```bash
140
+ pytest tests/test_opencode_skill.py tests/test_hermes_skill.py tests/test_skill_invocation.py -v
141
+ ```
142
+
143
+ ### Test Individual Skill
144
+ ```bash
145
+ pytest tests/test_opencode_skill.py -v
146
+ pytest tests/test_hermes_skill.py -v
147
+ ```
148
+
149
+ ### Verify JSON Output
150
+ ```bash
151
+ python -m ol_cli translate-md nonexistent.md -o /tmp/out --json
152
+ ```
153
+
154
+ Expected: JSON error output
155
+
156
+ ---
157
+
158
+ ## Common Scenarios
159
+
160
+ ### Scenario 1: Agent Needs to Translate a File
161
+
162
+ **Agent action:**
163
+ 1. Read SKILL.md for instructions
164
+ 2. Check if API keys are set
165
+ 3. Write source text to temp .md file
166
+ 4. Run CLI with --json flag
167
+ 5. Parse JSON response
168
+ 6. Read translated file from output directory
169
+
170
+ **Example invocation:**
171
+ ```bash
172
+ # Agent writes temp file
173
+ echo "# Hello" > /tmp/test.md
174
+
175
+ # Agent runs translation
176
+ python -m ol_cli translate-md /tmp/test.md -c config/default.yaml -s en -t zh -o /tmp/ --json
177
+
178
+ # Agent parses JSON output
179
+ # Reads /tmp/test.md for translated content
180
+ ```
181
+
182
+ ### Scenario 2: Agent Wants to Verify Skill is Available
183
+
184
+ **Agent action:**
185
+ ```bash
186
+ ls src/.opencode/skills/ol-localizer/SKILL.md
187
+ ls src/.hermes/skills/ol-localizer/SKILL.md
188
+ ```
189
+
190
+ ### Scenario 3: Agent Needs to Configure API Keys
191
+
192
+ **Agent action:**
193
+ 1. Read SKILL.md Configuration section
194
+ 2. Set required environment variables:
195
+ ```bash
196
+ export OPENAI_API_KEY=sk-...
197
+ ```
198
+
199
+ ### Scenario 4: Translation Fails
200
+
201
+ **Agent checks:**
202
+ 1. JSON output for error message:
203
+ ```bash
204
+ python -m ol_cli translate-md input.md -c config/default.yaml -s en -t zh -o output/ --json 2>/dev/null
205
+ ```
206
+
207
+ 2. Common fixes:
208
+ - Missing API key → Set `OPENAI_API_KEY`
209
+ - Invalid config → Check `config/default.yaml` exists
210
+ - File not found → Verify input path
211
+ - Rate limit → Wait and retry
212
+
213
+ ### Scenario 5: Agent Wants to Use Different LLM Provider
214
+
215
+ **Agent action:**
216
+ 1. Modify `config/default.yaml`:
217
+ ```yaml
218
+ llm_pool:
219
+ translation:
220
+ - provider: "openai" # or "anthropic", "deepseek", etc.
221
+ model: "gpt-4o" # or "claude-3-sonnet", etc.
222
+ api_key: "${PROVIDER_API_KEY}"
223
+ ```
224
+
225
+ 2. Set appropriate API key:
226
+ ```bash
227
+ export PROVIDER_API_KEY=sk-...
228
+ ```
229
+
230
+ ---
231
+
232
+ ## Troubleshooting
233
+
234
+ ### "No module named ol_cli"
235
+
236
+ **Cause:** Running without `PYTHONPATH=src`
237
+
238
+ **Fix:**
239
+ ```bash
240
+ PYTHONPATH=src python -m ol_cli translate-md ...
241
+ ```
242
+
243
+ ### JSON Output Not Valid
244
+
245
+ **Cause:** Error occurred before JSON could be generated
246
+
247
+ **Fix:** Check stderr for error message, fix issue, retry
248
+
249
+ ### Tests Failing
250
+
251
+ **Agent action:**
252
+ ```bash
253
+ # Run specific failing test
254
+ pytest tests/test_opencode_skill.py::TestOpenCodeSkill::test_opencode_skill_exists -v
255
+
256
+ # Run all tests
257
+ pytest tests/test_opencode_skill.py tests/test_hermes_skill.py -v
258
+ ```
259
+
260
+ ### Skill Not Discovered by Agent
261
+
262
+ **Agent checks:**
263
+ 1. File exists:
264
+ ```bash
265
+ ls src/.opencode/skills/ol-localizer/SKILL.md
266
+ ```
267
+
268
+ 2. YAML frontmatter valid:
269
+ ```bash
270
+ python -c "import yaml; yaml.safe_load(open('src/.opencode/skills/ol-localizer/SKILL.md').read().split('---')[1])"
271
+ ```
272
+
273
+ 3. Required fields present:
274
+ ```bash
275
+ grep -q "name:" src/.opencode/skills/ol-localizer/SKILL.md
276
+ grep -q "description:" src/.opencode/skills/ol-localizer/SKILL.md
277
+ ```
278
+
279
+ ---
280
+
281
+ ## File Locations
282
+
283
+ | Component | Path |
284
+ |-----------|------|
285
+ | OpenCode Skill | `src/.opencode/skills/ol-localizer/SKILL.md` |
286
+ | Hermes Skill | `src/.hermes/skills/ol-localizer/SKILL.md` |
287
+ | CLI Entry | `src/ol_cli.py` |
288
+ | Default Config | `config/default.yaml` |
289
+ | Test Helpers | `tests/skill_helpers.py` |
290
+ | OpenCode Tests | `tests/test_opencode_skill.py` |
291
+ | Hermes Tests | `tests/test_hermes_skill.py` |
292
+ | Invocation Tests | `tests/test_skill_invocation.py` |
293
+
294
+ ---
295
+
296
+ ## Key Design Decisions
297
+
298
+ 1. **SKILL.md format** - Universal skill format supported by OpenCode and Hermes
299
+ 2. **JSON output** - Machine-readable for agent parsing
300
+ 3. **Environment variables** - API keys never in code or config files
301
+ 4. **Shell invocation** - Agents invoke via `python -m ol_cli` with CLI arguments
302
+ 5. **No daemon/server** - Stateless single-shot invocations
303
+ 6. **Failover** - Multiple LLM providers configured, automatic fallback
304
+
305
+ ---
306
+
307
+ ## Security Notes
308
+
309
+ - API keys stored in environment, never in code
310
+ - `PYTHONPATH=src` required when running from repo root
311
+ - Temp files should be cleaned up after use
312
+ - No persistent state - each invocation is independent
@@ -0,0 +1,21 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [0.1.0] - 2025-01-01
9
+
10
+ ### Added
11
+ - Initial release
12
+ - Translate markdown files using LLM APIs
13
+ - Translate XLIFF files using LLM APIs
14
+ - Model pool failover with LiteLLM router
15
+ - Content shielding for code blocks, links, images
16
+ - 4-layer semantic repair pipeline
17
+ - LLM-based translation quality judging
18
+ - Translation memory integration via hypomnema
19
+ - Span alignment for content preservation
20
+ - Agent skill support for OpenCode and Hermes
21
+ - JSON output mode for machine-readable results
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 1StepMore
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,24 @@
1
+ # Documentation
2
+ include README.md
3
+ include LICENSE
4
+ include CHANGELOG.md
5
+ include AGENT_USAGE.md
6
+
7
+ # Config files
8
+ include config/*.yaml
9
+
10
+ # OpenCode skill files (recursively include all files in skill directories)
11
+ recursive-include src/.opencode *
12
+
13
+ # Hermes skill files (recursively include all files in skill directories)
14
+ recursive-include src/.hermes *
15
+
16
+ # Jinja2 templates in ol_lqa
17
+ recursive-include src/ol_lqa *.j2
18
+
19
+ # Exclude unnecessary files
20
+ exclude .gitignore
21
+ exclude .env
22
+ exclude .env.example
23
+ exclude *.pyc
24
+ exclude __pycache__
@@ -0,0 +1,205 @@
1
+ Metadata-Version: 2.4
2
+ Name: omni-localizer
3
+ Version: 0.1.0
4
+ Summary: AI-native localization pipeline with automated quality control
5
+ Author-email: 1StepMore <renanzai@foxmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/1StepMore/Omni_Localizer
8
+ Project-URL: Documentation, https://github.com/1StepMore/Omni_Localizer#readme
9
+ Project-URL: Repository, https://github.com/1StepMore/Omni_Localizer
10
+ Project-URL: Issues, https://github.com/1StepMore/Omni_Localizer/issues
11
+ Keywords: localization,translation,llm,ai,markdown,xliff,i18n,internationalization
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Topic :: Software Development :: Localization
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Operating System :: OS Independent
19
+ Requires-Python: >=3.13
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: litellm>=1.84.0
23
+ Requires-Dist: translate-toolkit>=3.19.9
24
+ Requires-Dist: markdown-it-py>=3.0.0
25
+ Requires-Dist: pydantic>=2.0.0
26
+ Requires-Dist: PyYAML>=6.0.0
27
+ Requires-Dist: pytest>=8.0.0
28
+ Requires-Dist: span-aligner>=0.3.2
29
+ Requires-Dist: hypomnema>=0.8
30
+ Requires-Dist: openevalkit>=0.1.7
31
+ Requires-Dist: typer[all]>=0.15.0
32
+ Requires-Dist: jinja2>=3.1.0
33
+ Requires-Dist: rich>=13.0.0
34
+ Provides-Extra: ml
35
+ Requires-Dist: torch>=2.0.0; extra == "ml"
36
+ Requires-Dist: sentence-transformers>=3.0.0; extra == "ml"
37
+ Requires-Dist: transformers>=4.41.0; extra == "ml"
38
+ Provides-Extra: dev
39
+ Requires-Dist: pytest>=8.0; extra == "dev"
40
+ Requires-Dist: ruff>=0.9.0; extra == "dev"
41
+ Requires-Dist: mypy>=1.14.0; extra == "dev"
42
+ Requires-Dist: pip-audit>=2.8.0; extra == "dev"
43
+ Dynamic: license-file
44
+
45
+ # Omni-Localizer (OL)
46
+
47
+ AI-native localization pipeline that translates documents through intelligent LLM routing with built-in quality control.
48
+
49
+ ## What It Does
50
+
51
+ - **Translate documents** (Markdown, XLIFF) using LLM APIs
52
+ - **Automatic failover** — switches to backup model if primary fails
53
+ - **Quality preservation** — shields code blocks, links, images during translation
54
+ - **LLM-based judging** — evaluates translation accuracy and fluency
55
+ - **Restoration layer** — uses LLM to restore placeholders after translation
56
+
57
+ ## Quick Start
58
+
59
+ ### 1. Install
60
+
61
+ ```bash
62
+ pip install -e .
63
+ ```
64
+
65
+ ### 2. Configure API Keys
66
+
67
+ Create a `.bat` file (gitignored) with your API keys:
68
+
69
+ ```bat
70
+ @echo off
71
+ set OPENAI_API_KEY=your_api_key
72
+ set PYTHONPATH=src
73
+ python -m ol_cli translate-md %* -c config/default.yaml -s en -t zh
74
+ ```
75
+
76
+ ### 3. Run
77
+
78
+ ```cmd
79
+ test_en_to_zh.bat your_document.md -o output/
80
+ ```
81
+
82
+ ## Configuration
83
+
84
+ `config/default.yaml` — Example LLM pool configuration:
85
+
86
+ ```yaml
87
+ llm_pool:
88
+ translation:
89
+ - provider: "openai"
90
+ model: "gpt-4o-mini"
91
+ priority: 1
92
+ api_key: "${OPENAI_API_KEY}"
93
+ role: "translation"
94
+ - provider: "openai"
95
+ model: "gpt-4o"
96
+ priority: 2
97
+ api_key: "${OPENAI_API_KEY}"
98
+ role: "translation"
99
+ judging:
100
+ - provider: "openai"
101
+ model: "gpt-4o-mini"
102
+ priority: 1
103
+ api_key: "${OPENAI_API_KEY}"
104
+ role: "judging"
105
+ restoration:
106
+ - provider: "openai"
107
+ model: "gpt-4o-mini"
108
+ priority: 1
109
+ api_key: "${OPENAI_API_KEY}"
110
+ role: "restoration"
111
+ ```
112
+
113
+ ## CLI Commands
114
+
115
+ ```bash
116
+ # Translate markdown
117
+ ol translate-md <file.md> -c <config.yaml> -s en -t zh -o output/
118
+
119
+ # Translate XLIFF
120
+ ol translate-xliff <file.xlf> -c <config.yaml> -s en -t zh -o output/
121
+
122
+ # Extract warnings from file
123
+ ol extract-warnings <file> -o warnings.md
124
+ ```
125
+
126
+ ## Key Features
127
+
128
+ | Feature | Description |
129
+ |---------|-------------|
130
+ | **Model Pool Failover** | LiteLLM router with primary + backup models per role |
131
+ | **Content Shielding** | Code blocks, links, images preserved during translation |
132
+ | **4-Layer Repair** | Regex → Span alignment → LLM restoration → Safe fallback |
133
+ | **Translation + Judging** | JudgeService evaluates quality (adequacy, fluency, terminology) |
134
+ | **TM Integration** | hypomnema for translation memory lookups |
135
+
136
+ ## Architecture
137
+
138
+ - **MD Channel**: Token Stream + 4-layer semantic repair
139
+ - **XLIFF Channel**: translate-toolkit based
140
+ - **LLM Routing**: LiteLLM with model pool failover
141
+ - **LQA**: openevalkit Scorer→Judge + COMET
142
+ - **TM**: hypomnema (TMX)
143
+ - **Alignment**: span-aligner + VectorAlign
144
+
145
+ ## Agent Usage
146
+
147
+ Omni-Localizer can be used as a **skill** by coding agents (OpenCode, Hermes). Agents read the SKILL.md file to understand how to invoke translation.
148
+
149
+ ### OpenCode
150
+
151
+ 1. Add the skill to your project:
152
+ ```bash
153
+ cp -r src/.opencode/skills/ol-localizer <your-project>/.opencode/skills/
154
+ ```
155
+
156
+ 2. Reference it in your OpenCode configuration if needed
157
+
158
+ For detailed usage, see `src/.opencode/skills/ol-localizer/SKILL.md`
159
+
160
+ ### Hermes
161
+
162
+ 1. Copy or symlink the skill:
163
+ ```bash
164
+ cp -r src/.hermes/skills/ol-localizer ~/.hermes/skills/
165
+ ```
166
+
167
+ 2. Restart Hermes to activate
168
+
169
+ For detailed usage, see `src/.hermes/skills/ol-localizer/SKILL.md`
170
+
171
+ ### Environment Variables
172
+
173
+ Configure your LLM provider API keys in your shell environment.
174
+
175
+ ### Testing the Agent Integration
176
+
177
+ **Verify skill files exist:**
178
+ ```bash
179
+ ls src/.opencode/skills/ol-localizer/SKILL.md
180
+ ls src/.hermes/skills/ol-localizer/SKILL.md
181
+ ```
182
+
183
+ **Test JSON output (machine-readable for agents):**
184
+ ```bash
185
+ python -m ol_cli translate-md input.md -c config/default.yaml -s en -t zh -o output/ --json
186
+ ```
187
+
188
+ Expected JSON output:
189
+ ```json
190
+ {"success": true, "input_file": "input.md", "output_file": "output/input.md", "source_lang": "en", "target_lang": "zh"}
191
+ ```
192
+
193
+ **Run skill tests:**
194
+ ```bash
195
+ pytest tests/test_opencode_skill.py tests/test_hermes_skill.py -v
196
+ ```
197
+
198
+ **Verify --json flag in help:**
199
+ ```bash
200
+ python -m ol_cli translate-md --help | grep json
201
+ ```
202
+
203
+ ## License
204
+
205
+ MIT