contextweave 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. context_aware_translation/AGENTS.md +134 -0
  2. context_aware_translation/__init__.py +114 -0
  3. context_aware_translation/adapters/__init__.py +1 -0
  4. context_aware_translation/adapters/files/__init__.py +3 -0
  5. context_aware_translation/adapters/files/glossary_io.py +203 -0
  6. context_aware_translation/adapters/qt/__init__.py +1 -0
  7. context_aware_translation/adapters/qt/application_event_bridge.py +78 -0
  8. context_aware_translation/adapters/qt/task_engine.py +392 -0
  9. context_aware_translation/adapters/qt/workers/__init__.py +1 -0
  10. context_aware_translation/adapters/qt/workers/base_worker.py +61 -0
  11. context_aware_translation/adapters/qt/workers/batch_task_overlap_guard.py +57 -0
  12. context_aware_translation/adapters/qt/workers/batch_translation_task_worker.py +228 -0
  13. context_aware_translation/adapters/qt/workers/chunk_retranslation_task_worker.py +136 -0
  14. context_aware_translation/adapters/qt/workers/export_worker.py +75 -0
  15. context_aware_translation/adapters/qt/workers/glossary_export_task_worker.py +119 -0
  16. context_aware_translation/adapters/qt/workers/glossary_extraction_task_worker.py +102 -0
  17. context_aware_translation/adapters/qt/workers/glossary_review_task_worker.py +98 -0
  18. context_aware_translation/adapters/qt/workers/glossary_translation_task_worker.py +97 -0
  19. context_aware_translation/adapters/qt/workers/image_reembedding_task_worker.py +114 -0
  20. context_aware_translation/adapters/qt/workers/import_worker.py +49 -0
  21. context_aware_translation/adapters/qt/workers/ocr_task_worker.py +149 -0
  22. context_aware_translation/adapters/qt/workers/operation_tracker.py +94 -0
  23. context_aware_translation/adapters/qt/workers/translate_and_export_task_worker.py +394 -0
  24. context_aware_translation/adapters/qt/workers/translation_manga_task_worker.py +133 -0
  25. context_aware_translation/adapters/qt/workers/translation_text_task_worker.py +158 -0
  26. context_aware_translation/app_identity.py +33 -0
  27. context_aware_translation/application/__init__.py +7 -0
  28. context_aware_translation/application/composition.py +104 -0
  29. context_aware_translation/application/contracts/__init__.py +57 -0
  30. context_aware_translation/application/contracts/app_setup.py +188 -0
  31. context_aware_translation/application/contracts/common.py +193 -0
  32. context_aware_translation/application/contracts/document.py +235 -0
  33. context_aware_translation/application/contracts/project_setup.py +19 -0
  34. context_aware_translation/application/contracts/projects.py +36 -0
  35. context_aware_translation/application/contracts/queue.py +35 -0
  36. context_aware_translation/application/contracts/terms.py +148 -0
  37. context_aware_translation/application/contracts/work.py +111 -0
  38. context_aware_translation/application/errors.py +35 -0
  39. context_aware_translation/application/events.py +146 -0
  40. context_aware_translation/application/runtime.py +1302 -0
  41. context_aware_translation/application/services/__init__.py +19 -0
  42. context_aware_translation/application/services/_export_support.py +281 -0
  43. context_aware_translation/application/services/app_setup.py +532 -0
  44. context_aware_translation/application/services/document.py +2309 -0
  45. context_aware_translation/application/services/project_setup.py +161 -0
  46. context_aware_translation/application/services/projects.py +152 -0
  47. context_aware_translation/application/services/queue.py +118 -0
  48. context_aware_translation/application/services/terms.py +582 -0
  49. context_aware_translation/application/services/work.py +525 -0
  50. context_aware_translation/cli/__init__.py +1 -0
  51. context_aware_translation/cli/config_file.py +337 -0
  52. context_aware_translation/cli/main.py +362 -0
  53. context_aware_translation/cli/output.py +91 -0
  54. context_aware_translation/cli/runtime.py +26 -0
  55. context_aware_translation/cli/wait.py +51 -0
  56. context_aware_translation/config.py +1354 -0
  57. context_aware_translation/core/AGENTS.md +133 -0
  58. context_aware_translation/core/__init__.py +1 -0
  59. context_aware_translation/core/cancellation.py +17 -0
  60. context_aware_translation/core/context_extractor.py +68 -0
  61. context_aware_translation/core/context_manager.py +2257 -0
  62. context_aware_translation/core/manga_document_handler.py +253 -0
  63. context_aware_translation/core/models.py +211 -0
  64. context_aware_translation/core/progress.py +42 -0
  65. context_aware_translation/core/term_memory.py +14 -0
  66. context_aware_translation/core/term_memory_builder.py +246 -0
  67. context_aware_translation/core/translation_strategies.py +202 -0
  68. context_aware_translation/documents/AGENTS.md +139 -0
  69. context_aware_translation/documents/__init__.py +0 -0
  70. context_aware_translation/documents/base.py +377 -0
  71. context_aware_translation/documents/content/AGENTS.md +162 -0
  72. context_aware_translation/documents/content/__init__.py +0 -0
  73. context_aware_translation/documents/content/ocr_content.py +235 -0
  74. context_aware_translation/documents/content/ocr_items.py +940 -0
  75. context_aware_translation/documents/epub.py +2669 -0
  76. context_aware_translation/documents/epub_container.py +23 -0
  77. context_aware_translation/documents/epub_support/AGENTS.md +183 -0
  78. context_aware_translation/documents/epub_support/__init__.py +1 -0
  79. context_aware_translation/documents/epub_support/container_model.py +75 -0
  80. context_aware_translation/documents/epub_support/container_patch.py +43 -0
  81. context_aware_translation/documents/epub_support/container_reader.py +579 -0
  82. context_aware_translation/documents/epub_support/container_shared.py +109 -0
  83. context_aware_translation/documents/epub_support/container_writer.py +413 -0
  84. context_aware_translation/documents/epub_support/inline_markers.py +285 -0
  85. context_aware_translation/documents/epub_support/nav_ops.py +384 -0
  86. context_aware_translation/documents/epub_support/slot_lines.py +61 -0
  87. context_aware_translation/documents/epub_support/xml_utils.py +38 -0
  88. context_aware_translation/documents/epub_xhtml_utils.py +1457 -0
  89. context_aware_translation/documents/manga.py +695 -0
  90. context_aware_translation/documents/manga_alignment.py +88 -0
  91. context_aware_translation/documents/manga_reembed_planner.py +476 -0
  92. context_aware_translation/documents/pdf.py +757 -0
  93. context_aware_translation/documents/scanned_book.py +408 -0
  94. context_aware_translation/documents/subtitle.py +334 -0
  95. context_aware_translation/documents/text.py +265 -0
  96. context_aware_translation/llm/AGENTS.md +144 -0
  97. context_aware_translation/llm/__init__.py +1 -0
  98. context_aware_translation/llm/batch_jobs/AGENTS.md +148 -0
  99. context_aware_translation/llm/batch_jobs/__init__.py +21 -0
  100. context_aware_translation/llm/batch_jobs/base.py +101 -0
  101. context_aware_translation/llm/batch_jobs/gemini_gateway.py +668 -0
  102. context_aware_translation/llm/client.py +430 -0
  103. context_aware_translation/llm/epub_ocr.py +131 -0
  104. context_aware_translation/llm/extractor.py +341 -0
  105. context_aware_translation/llm/glossary_translator.py +254 -0
  106. context_aware_translation/llm/image_backend_base.py +94 -0
  107. context_aware_translation/llm/image_backends/AGENTS.md +199 -0
  108. context_aware_translation/llm/image_backends/__init__.py +1 -0
  109. context_aware_translation/llm/image_backends/gemini_backend.py +169 -0
  110. context_aware_translation/llm/image_backends/openai_backend.py +139 -0
  111. context_aware_translation/llm/image_backends/qwen_backend.py +172 -0
  112. context_aware_translation/llm/image_generator.py +109 -0
  113. context_aware_translation/llm/language_detector.py +162 -0
  114. context_aware_translation/llm/manga_ocr.py +302 -0
  115. context_aware_translation/llm/manga_translator.py +127 -0
  116. context_aware_translation/llm/ocr.py +205 -0
  117. context_aware_translation/llm/reviewer.py +173 -0
  118. context_aware_translation/llm/session_trace.py +38 -0
  119. context_aware_translation/llm/summarizor.py +352 -0
  120. context_aware_translation/llm/token_tracker.py +170 -0
  121. context_aware_translation/llm/translation_strategies.py +280 -0
  122. context_aware_translation/llm/translator.py +771 -0
  123. context_aware_translation/resources/opencc/config/hk2s.json +33 -0
  124. context_aware_translation/resources/opencc/config/jp2s.json +33 -0
  125. context_aware_translation/resources/opencc/config/s2hk.json +27 -0
  126. context_aware_translation/resources/opencc/config/s2t.json +22 -0
  127. context_aware_translation/resources/opencc/config/s2tw.json +27 -0
  128. context_aware_translation/resources/opencc/config/s2twp.json +32 -0
  129. context_aware_translation/resources/opencc/config/t2hk.json +16 -0
  130. context_aware_translation/resources/opencc/config/t2s.json +22 -0
  131. context_aware_translation/resources/opencc/config/t2tw.json +16 -0
  132. context_aware_translation/resources/opencc/config/tw2s.json +33 -0
  133. context_aware_translation/resources/opencc/config/tw2sp.json +36 -0
  134. context_aware_translation/resources/opencc/dictionary/HKVariants.txt +63 -0
  135. context_aware_translation/resources/opencc/dictionary/HKVariantsPhrases.txt +17 -0
  136. context_aware_translation/resources/opencc/dictionary/HKVariantsRev.txt +70 -0
  137. context_aware_translation/resources/opencc/dictionary/HKVariantsRevPhrases.txt +156 -0
  138. context_aware_translation/resources/opencc/dictionary/JPVariants.txt +367 -0
  139. context_aware_translation/resources/opencc/dictionary/JPVariantsRev.txt +367 -0
  140. context_aware_translation/resources/opencc/dictionary/STCharacters.txt +3980 -0
  141. context_aware_translation/resources/opencc/dictionary/STPhrases.txt +49051 -0
  142. context_aware_translation/resources/opencc/dictionary/TSCharacters.txt +4113 -0
  143. context_aware_translation/resources/opencc/dictionary/TSPhrases.txt +277 -0
  144. context_aware_translation/resources/opencc/dictionary/TWPhrases.txt +509 -0
  145. context_aware_translation/resources/opencc/dictionary/TWPhrasesRev.txt +518 -0
  146. context_aware_translation/resources/opencc/dictionary/TWVariants.txt +39 -0
  147. context_aware_translation/resources/opencc/dictionary/TWVariantsRev.txt +39 -0
  148. context_aware_translation/resources/opencc/dictionary/TWVariantsRevPhrases.txt +68 -0
  149. context_aware_translation/resources/tokenizers/deepseek-v3/special_tokens_map.json +23 -0
  150. context_aware_translation/resources/tokenizers/deepseek-v3/tokenizer.json +646418 -0
  151. context_aware_translation/resources/tokenizers/deepseek-v3/tokenizer_config.json +6562 -0
  152. context_aware_translation/storage/AGENTS.md +192 -0
  153. context_aware_translation/storage/__init__.py +3 -0
  154. context_aware_translation/storage/library/__init__.py +3 -0
  155. context_aware_translation/storage/library/book_manager.py +670 -0
  156. context_aware_translation/storage/models/__init__.py +3 -0
  157. context_aware_translation/storage/models/book.py +85 -0
  158. context_aware_translation/storage/models/config_profile.py +67 -0
  159. context_aware_translation/storage/models/endpoint_profile.py +97 -0
  160. context_aware_translation/storage/repositories/__init__.py +80 -0
  161. context_aware_translation/storage/repositories/document_repository.py +325 -0
  162. context_aware_translation/storage/repositories/llm_batch_store.py +165 -0
  163. context_aware_translation/storage/repositories/task_store.py +295 -0
  164. context_aware_translation/storage/repositories/term_repository.py +431 -0
  165. context_aware_translation/storage/repositories/translation_batch_task_store.py +315 -0
  166. context_aware_translation/storage/schema/__init__.py +17 -0
  167. context_aware_translation/storage/schema/book_db.py +1958 -0
  168. context_aware_translation/storage/schema/registry_db.py +949 -0
  169. context_aware_translation/storage/sqlite_locking.py +17 -0
  170. context_aware_translation/ui/AGENTS.md +115 -0
  171. context_aware_translation/ui/__init__.py +0 -0
  172. context_aware_translation/ui/chrome_sizing.py +21 -0
  173. context_aware_translation/ui/constants.py +104 -0
  174. context_aware_translation/ui/features/app_settings_pane.py +585 -0
  175. context_aware_translation/ui/features/app_setup_view.py +921 -0
  176. context_aware_translation/ui/features/document_images_view.py +725 -0
  177. context_aware_translation/ui/features/document_ocr_tab.py +1038 -0
  178. context_aware_translation/ui/features/document_translation_view.py +1401 -0
  179. context_aware_translation/ui/features/document_workspace_view.py +806 -0
  180. context_aware_translation/ui/features/library_view.py +401 -0
  181. context_aware_translation/ui/features/project_settings_pane.py +448 -0
  182. context_aware_translation/ui/features/queue_drawer_view.py +472 -0
  183. context_aware_translation/ui/features/terms_table_widget.py +599 -0
  184. context_aware_translation/ui/features/terms_view.py +1149 -0
  185. context_aware_translation/ui/features/work_view.py +697 -0
  186. context_aware_translation/ui/features/workflow_profile_editor.py +1312 -0
  187. context_aware_translation/ui/i18n.py +954 -0
  188. context_aware_translation/ui/json_utils.py +23 -0
  189. context_aware_translation/ui/main.py +140 -0
  190. context_aware_translation/ui/main_window.py +585 -0
  191. context_aware_translation/ui/qml/BootstrapProbe.qml +10 -0
  192. context_aware_translation/ui/qml/app/AppShellChrome.qml +161 -0
  193. context_aware_translation/ui/qml/dialogs/app_settings/AppSettingsDialogChrome.qml +77 -0
  194. context_aware_translation/ui/qml/dialogs/app_settings/AppSettingsPane.qml +131 -0
  195. context_aware_translation/ui/qml/dialogs/project_settings/ProjectSettingsDialogChrome.qml +77 -0
  196. context_aware_translation/ui/qml/dialogs/project_settings/ProjectSettingsPane.qml +205 -0
  197. context_aware_translation/ui/qml/document/DocumentShellChrome.qml +205 -0
  198. context_aware_translation/ui/qml/document/export/DocumentExportPaneChrome.qml +89 -0
  199. context_aware_translation/ui/qml/document/images/DocumentImagesPaneChrome.qml +457 -0
  200. context_aware_translation/ui/qml/document/ocr/DocumentOCRPaneChrome.qml +416 -0
  201. context_aware_translation/ui/qml/document/translation/DocumentTranslationPaneChrome.qml +136 -0
  202. context_aware_translation/ui/qml/project/ProjectShellChrome.qml +175 -0
  203. context_aware_translation/ui/qml/project/terms/TermsPaneChrome.qml +143 -0
  204. context_aware_translation/ui/qml/project/work_home/WorkHomeChrome.qml +331 -0
  205. context_aware_translation/ui/qml/queue/QueueShellChrome.qml +70 -0
  206. context_aware_translation/ui/qml_resources.py +57 -0
  207. context_aware_translation/ui/resources/__init__.py +0 -0
  208. context_aware_translation/ui/resources/styles.qss +283 -0
  209. context_aware_translation/ui/shell_hosts/__init__.py +11 -0
  210. context_aware_translation/ui/shell_hosts/app_settings_dialog_host.py +55 -0
  211. context_aware_translation/ui/shell_hosts/app_shell_host.py +77 -0
  212. context_aware_translation/ui/shell_hosts/document_shell_host.py +190 -0
  213. context_aware_translation/ui/shell_hosts/hybrid.py +156 -0
  214. context_aware_translation/ui/shell_hosts/project_settings_dialog_host.py +55 -0
  215. context_aware_translation/ui/shell_hosts/project_shell_host.py +144 -0
  216. context_aware_translation/ui/shell_hosts/queue_shell_host.py +60 -0
  217. context_aware_translation/ui/sleep_inhibitor.py +135 -0
  218. context_aware_translation/ui/startup.py +78 -0
  219. context_aware_translation/ui/tips.py +16 -0
  220. context_aware_translation/ui/translations/zh_CN.qm +0 -0
  221. context_aware_translation/ui/translations/zh_CN.ts +4351 -0
  222. context_aware_translation/ui/viewmodels/__init__.py +30 -0
  223. context_aware_translation/ui/viewmodels/app_settings_dialog.py +53 -0
  224. context_aware_translation/ui/viewmodels/app_settings_pane.py +63 -0
  225. context_aware_translation/ui/viewmodels/app_shell.py +87 -0
  226. context_aware_translation/ui/viewmodels/base.py +114 -0
  227. context_aware_translation/ui/viewmodels/document_export_pane.py +65 -0
  228. context_aware_translation/ui/viewmodels/document_images_pane.py +338 -0
  229. context_aware_translation/ui/viewmodels/document_ocr_pane.py +261 -0
  230. context_aware_translation/ui/viewmodels/document_shell.py +122 -0
  231. context_aware_translation/ui/viewmodels/document_translation_pane.py +113 -0
  232. context_aware_translation/ui/viewmodels/project_settings_dialog.py +51 -0
  233. context_aware_translation/ui/viewmodels/project_settings_pane.py +194 -0
  234. context_aware_translation/ui/viewmodels/project_shell.py +98 -0
  235. context_aware_translation/ui/viewmodels/queue_shell.py +51 -0
  236. context_aware_translation/ui/viewmodels/router.py +202 -0
  237. context_aware_translation/ui/viewmodels/terms_pane.py +230 -0
  238. context_aware_translation/ui/viewmodels/work_home.py +250 -0
  239. context_aware_translation/ui/widgets/AGENTS.md +90 -0
  240. context_aware_translation/ui/widgets/hybrid_controls.py +153 -0
  241. context_aware_translation/ui/widgets/image_viewer.py +343 -0
  242. context_aware_translation/ui/widgets/progress_widget.py +136 -0
  243. context_aware_translation/ui/widgets/table_support.py +59 -0
  244. context_aware_translation/ui/window_controllers.py +310 -0
  245. context_aware_translation/utils/AGENTS.md +157 -0
  246. context_aware_translation/utils/__init__.py +3 -0
  247. context_aware_translation/utils/chunking.py +147 -0
  248. context_aware_translation/utils/cjk_normalize.py +141 -0
  249. context_aware_translation/utils/compression_marker.py +18 -0
  250. context_aware_translation/utils/file_utils.py +34 -0
  251. context_aware_translation/utils/hard_wrap.py +87 -0
  252. context_aware_translation/utils/hashing.py +20 -0
  253. context_aware_translation/utils/image_utils.py +79 -0
  254. context_aware_translation/utils/llm_json_cleaner.py +91 -0
  255. context_aware_translation/utils/markdown_escape.py +195 -0
  256. context_aware_translation/utils/pandoc_export.py +52 -0
  257. context_aware_translation/utils/semantic_chunker.py +92 -0
  258. context_aware_translation/utils/string_similarity.py +33 -0
  259. context_aware_translation/utils/symbol_check.py +29 -0
  260. context_aware_translation/workflow/AGENTS.md +171 -0
  261. context_aware_translation/workflow/__init__.py +1 -0
  262. context_aware_translation/workflow/bootstrap.py +123 -0
  263. context_aware_translation/workflow/image_fetcher.py +53 -0
  264. context_aware_translation/workflow/ops/__init__.py +1 -0
  265. context_aware_translation/workflow/ops/bootstrap_ops.py +202 -0
  266. context_aware_translation/workflow/ops/export_ops.py +234 -0
  267. context_aware_translation/workflow/ops/glossary_ops.py +116 -0
  268. context_aware_translation/workflow/ops/import_ops.py +108 -0
  269. context_aware_translation/workflow/ops/import_support.py +209 -0
  270. context_aware_translation/workflow/ops/ocr_ops.py +118 -0
  271. context_aware_translation/workflow/ops/translation_ops.py +156 -0
  272. context_aware_translation/workflow/runtime.py +27 -0
  273. context_aware_translation/workflow/session.py +69 -0
  274. context_aware_translation/workflow/task_runtime.py +89 -0
  275. context_aware_translation/workflow/tasks/AGENTS.md +176 -0
  276. context_aware_translation/workflow/tasks/__init__.py +0 -0
  277. context_aware_translation/workflow/tasks/claims.py +83 -0
  278. context_aware_translation/workflow/tasks/engine_core.py +740 -0
  279. context_aware_translation/workflow/tasks/exceptions.py +13 -0
  280. context_aware_translation/workflow/tasks/execution/AGENTS.md +126 -0
  281. context_aware_translation/workflow/tasks/execution/__init__.py +0 -0
  282. context_aware_translation/workflow/tasks/execution/batch_translation_executor.py +819 -0
  283. context_aware_translation/workflow/tasks/execution/batch_translation_ops.py +1190 -0
  284. context_aware_translation/workflow/tasks/glossary_preflight.py +138 -0
  285. context_aware_translation/workflow/tasks/handlers/AGENTS.md +245 -0
  286. context_aware_translation/workflow/tasks/handlers/__init__.py +0 -0
  287. context_aware_translation/workflow/tasks/handlers/base.py +42 -0
  288. context_aware_translation/workflow/tasks/handlers/batch_translation.py +221 -0
  289. context_aware_translation/workflow/tasks/handlers/chunk_retranslation.py +194 -0
  290. context_aware_translation/workflow/tasks/handlers/glossary_export.py +182 -0
  291. context_aware_translation/workflow/tasks/handlers/glossary_extraction.py +255 -0
  292. context_aware_translation/workflow/tasks/handlers/glossary_review.py +196 -0
  293. context_aware_translation/workflow/tasks/handlers/glossary_translation.py +177 -0
  294. context_aware_translation/workflow/tasks/handlers/image_reembedding.py +371 -0
  295. context_aware_translation/workflow/tasks/handlers/ocr.py +333 -0
  296. context_aware_translation/workflow/tasks/handlers/translate_and_export.py +183 -0
  297. context_aware_translation/workflow/tasks/handlers/translation_manga.py +252 -0
  298. context_aware_translation/workflow/tasks/handlers/translation_text.py +207 -0
  299. context_aware_translation/workflow/tasks/models.py +70 -0
  300. context_aware_translation/workflow/tasks/translate_and_export_support.py +410 -0
  301. context_aware_translation/workflow/tasks/worker_deps.py +28 -0
  302. contextweave-0.2.0.dist-info/METADATA +185 -0
  303. contextweave-0.2.0.dist-info/RECORD +306 -0
  304. contextweave-0.2.0.dist-info/WHEEL +4 -0
  305. contextweave-0.2.0.dist-info/entry_points.txt +3 -0
  306. contextweave-0.2.0.dist-info/licenses/LICENSE +674 -0
@@ -0,0 +1,134 @@
1
+ <!-- Parent: ../AGENTS.md -->
2
+ <!-- Generated: 2026-02-26 -->
3
+
4
+ # context_aware_translation
5
+
6
+ ## Purpose
7
+ Core package for LLM-powered document translation with context-aware glossary management. Implements hierarchical context trees (LSM-tree-like), multi-pass glossary extraction, SQLite storage with WAL mode, and PySide6 GUI. Supports text, PDF, scanned books, and manga documents.
8
+
9
+ ## Key Files
10
+
11
+ | File | Description |
12
+ |------|-------------|
13
+ | `config.py` | All config dataclasses: `LLMConfig`, `ExtractorConfig`, `SummarizerConfig`, `TranslatorConfig`, `GlossaryConfig`, `ReviewConfig`, `OCRConfig`, `ImageReembeddingConfig`, `MangaTranslatorConfig`, `EndpointProfile`. Central config hub; note: `num_of_chunks_per_llm_call` must NOT exceed 10. |
14
+ | `adapters/files/glossary_io.py` | Glossary import/export to/from JSON files; handles term consolidation and validation at the file boundary. |
15
+ | `__init__.py` | Package initialization and logging configuration via `configure_logging()`. |
16
+
17
+ ## Subdirectories
18
+
19
+ | Directory | Purpose |
20
+ |-----------|---------|
21
+ | `core/` | Context tree and translation strategies: `context_tree.py` (hierarchical summarization), `context_manager.py` (context lifecycle), `translation_strategies.py` (strategy patterns), `context_extractor.py`, `progress.py`, `models.py`. |
22
+ | `documents/` | Document type implementations: `text.py`, `pdf.py`, `scanned_book.py`, `manga.py`, `base.py` (abstract Document class), plus EPUB support and alignment utilities. |
23
+ | `llm/` | LLM integration layer: `client.py` (OpenAI client with retry/timeout), `translator.py`, `extractor.py`, `glossary_translator.py`, `summarizor.py`, `reviewer.py`, `ocr.py`, `manga_ocr.py`, `language_detector.py`, `token_tracker.py`, `image_backends/` (PIL, DALL-E, etc.), `batch_jobs/` (batch processing). |
24
+ | `storage/` | SQLite persistence layer with `schema/` (raw DB/schema owners), `repositories/` (query/update services), `models/` (persisted records), `library/` (book lifecycle), and batch/task storage. |
25
+ | `adapters/` | Boundary adapters: Qt event bridge, Qt task engine, and Qt workers live under `adapters/qt/`; file import/export adapters live under `adapters/files/`. |
26
+ | `ui/` | PySide6 GUI surfaces: `main_window.py`, `features/`, `widgets/`, `resources/`, `translations/`, `i18n.py`. |
27
+ | `workflow/` | Task orchestration and execution: `service.py` (WorkflowService), `runtime.py`, `bootstrap.py`, `session.py`, `tasks/` (EngineCore, task handlers, claims, execution), `__init__.py` (exports entry points). |
28
+ | `utils/` | Helper utilities: `chunking.py`, `cjk_normalize.py`, `markdown_escape.py`, `image_utils.py`, `hashing.py`, `file_utils.py`, semantic chunking, string similarity, symbol checking. |
29
+ | `resources/` | Static assets: tokenizer data and other resources. |
30
+
31
+ ## For AI Agents
32
+
33
+ ### Working In This Directory
34
+
35
+ **Config Management:**
36
+ - Central config hub is `config.py` (all dataclasses for 5 required + 3 optional steps)
37
+ - `EndpointProfile` allows reusable API endpoint configs; step configs can reference profiles
38
+ - **Critical:** `num_of_chunks_per_llm_call` must NOT exceed 10 (causes hallucinations; default 5)
39
+ - `noise_filtering_threshold`: 0.5 default (0=lenient, 1=strict)
40
+ - `max_gleaning`: 3 default (multi-pass extraction; more = thorough but costlier)
41
+ - `ocr_dpi`: 150 default (72-300 range)
42
+
43
+ **Architecture:**
44
+ - `workflow/ops/*.py` contains workflow domain operations (translation/glossary/ocr/export/bootstrap)
45
+ - `workflow/tasks/engine_core.py` is the pure-Python task scheduling engine (no Qt)
46
+ - Task handlers in `workflow/tasks/handlers/` implement task_type-specific logic
47
+ - Qt adapter workers live in `adapters/qt/workers/` and bridge UI signals to workflow operations
48
+ - SQLite with WAL mode everywhere (registry.db global, book.db per-book, context_tree.db)
49
+
50
+ **Type Safety:**
51
+ - mypy strict mode enabled (excludes ui/ and tests/)
52
+ - All core logic is type-checked
53
+ - Config models are dataclass-based with validation in `__post_init__`
54
+
55
+ **Entry Points:**
56
+ - UI: `ui.main:main` (PySide6 GUI application)
57
+ - Workflow ops: `workflow/ops/translation_ops.py` (programmatic translation operations)
58
+
59
+ ### Testing Requirements
60
+
61
+ - Tests mirror main package structure in `tests/`
62
+ - Run `uv run pytest tests/` or `make test`
63
+ - Tests run in parallel via pytest-xdist
64
+ - Async tests use pytest-asyncio with auto mode
65
+ - Key test files: `test_context_tree.py`, `test_book_db.py`, `test_translation_view.py`, `test_glossary_view.py`, task worker tests
66
+
67
+ ### Common Patterns
68
+
69
+ **Config and Dataclasses:**
70
+ - All config classes inherit from `LLMConfig` or extend it
71
+ - Use `to_dict()` / `from_dict()` for serialization
72
+ - Profile references are resolved at config load time
73
+
74
+ **SQLite Storage:**
75
+ - All tables use WAL mode for concurrent access
76
+ - Use transaction context managers (`with db.conn:`)
77
+ - Term records include descriptions (multi-pass gleaning), occurrence counts, votes, timestamps
78
+
79
+ **Document Abstraction:**
80
+ - `Document` base class in `documents/base.py`
81
+ - Subclasses: `TextDocument`, `PDFDocument`, `ScannedBookDocument`, `MangaDocument`
82
+ - Document type determines available operations (OCR, image extraction, etc.)
83
+
84
+ **Context Trees:**
85
+ - Hierarchical summaries stored in `context_tree_db`
86
+ - Reduces token usage by 99%+ via LSM-tree-like compression
87
+ - Managed by `ContextManager` and `ContextTree` in core/
88
+
89
+ **LLM Integration:**
90
+ - `LLMClient` wraps OpenAI API with retry logic (tenacity)
91
+ - All LLM calls go through `client.py` (single source of truth for API config)
92
+ - Async execution with configurable concurrency
93
+ - Token tracking via `TokenTracker`
94
+
95
+ **PySide6 UI:**
96
+ - App/project/document chrome is migrating to hybrid QML hosts under `ui/qml/`, `ui/viewmodels/`, and `ui/shell_hosts/`
97
+ - `MainWindow` is moving toward composition and lifetime management; shell policy should live in hosts/viewmodels instead of widget-local navigation code
98
+ - Translation strings live in `.ts` / `.qm` files (zh_CN), and QML-backed chrome still depends on the same Qt translation pipeline
99
+ - Workers handle long-running operations (glossary extraction, OCR, translation)
100
+ - Task status monitoring still flows through `TaskStatusCard`, `TaskActivityPanel`, and the queue drawer surfaces
101
+
102
+ **Task Execution:**
103
+ - Pure-Python `EngineCore` in `workflow/tasks/engine_core.py`
104
+ - Task handlers implement `TaskTypeHandler` interface
105
+ - Claims-based resource arbitration to prevent concurrent conflicts
106
+ - Config snapshot capture for fault tolerance
107
+
108
+ ## Dependencies
109
+
110
+ ### Internal
111
+ - `context_aware_translation.config` - central config models
112
+ - `context_aware_translation.core.*` - context tree, strategies, progress tracking
113
+ - `context_aware_translation.documents.*` - document type implementations
114
+ - `context_aware_translation.llm.*` - LLM client, translator, extractor, OCR
115
+ - `context_aware_translation.storage.*` - SQLite repos, book manager
116
+ - `context_aware_translation.workflow.*` - task orchestration
117
+ - `context_aware_translation.ui.*` - PySide6 GUI (type-checked separately)
118
+ - `context_aware_translation.utils.*` - text/image processing utilities
119
+
120
+ ### External (Top-Level)
121
+ - `pyside6` - Qt GUI framework (mypy strict excluded)
122
+ - `openai` - OpenAI API client
123
+ - `google-genai` - Google Gemini API client
124
+ - `tenacity` - Exponential backoff retry logic
125
+ - `torch`, `transformers` - ML models for embeddings, tokenization
126
+ - `faiss-cpu` - Vector similarity for context relevance
127
+ - `pikepdf`, `pypdfium2` - PDF parsing and extraction
128
+ - `pypandoc-binary` - Document format conversion (Markdown, DOCX)
129
+ - `semchunk` - Semantic text chunking
130
+ - `yaml` - Config file parsing
131
+ - `pillow` - Image processing
132
+ - `pydantic` - Config validation (used selectively)
133
+
134
+ <!-- MANUAL: -->
@@ -0,0 +1,114 @@
1
+ """
2
+ Term disambiguation package.
3
+ """
4
+
5
+ import logging
6
+ import logging.handlers
7
+ import os
8
+ import sys
9
+ from pathlib import Path
10
+ from typing import TYPE_CHECKING
11
+
12
+ if TYPE_CHECKING:
13
+ from context_aware_translation.config import Config
14
+
15
+ # Track if we've already configured logging to avoid duplicate handlers
16
+ _logging_configured = False
17
+
18
+ # Default log file location
19
+ DEFAULT_LOG_MAX_BYTES = 10 * 1024 * 1024 # 10MB
20
+ DEFAULT_LOG_BACKUP_COUNT = 50
21
+
22
+
23
+ class SafeRotatingFileHandler(logging.handlers.RotatingFileHandler):
24
+ """Rotating file handler that tolerates disappearing parent directories.
25
+
26
+ Book directories can be removed while background threads still emit logs.
27
+ In that case, silently drop file writes instead of surfacing noisy
28
+ logging tracebacks to users.
29
+ """
30
+
31
+ def emit(self, record: logging.LogRecord) -> None:
32
+ parent = Path(self.baseFilename).parent
33
+ if not parent.exists():
34
+ return
35
+ try:
36
+ super().emit(record)
37
+ except (FileNotFoundError, OSError, ValueError):
38
+ # Runtime race: path removed, stream already closed, or rollover reopen failed.
39
+ # Keep console logging alive and avoid cascading logging exceptions.
40
+ return
41
+
42
+
43
+ def configure_logging(config: "Config") -> None:
44
+ """
45
+ Configure logging with Config.
46
+
47
+ Uses config.log_dir for the log file location.
48
+ Can be called multiple times - if called again, it updates
49
+ the file handler location while preserving the console handler.
50
+
51
+ Args:
52
+ config: Config instance with log_dir set
53
+ """
54
+ global _logging_configured
55
+
56
+ # Skip configuration if we're in a test environment (pytest will handle it)
57
+ is_test_env = (
58
+ "pytest" in sys.modules
59
+ or "_pytest" in sys.modules
60
+ or any("pytest" in arg.lower() for arg in sys.argv)
61
+ or os.environ.get("PYTEST_CURRENT_TEST") is not None
62
+ )
63
+
64
+ if is_test_env:
65
+ _logging_configured = True
66
+ return
67
+
68
+ root_logger = logging.root
69
+
70
+ assert config.log_dir is not None, "log_dir should be set after Config.__post_init__"
71
+ log_file_path = config.log_dir / "app.log"
72
+ # Create formatters
73
+ detailed_formatter = logging.Formatter(
74
+ "%(asctime)s [%(levelname)s] %(name)s: %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
75
+ )
76
+ console_formatter = logging.Formatter("%(asctime)s %(levelname)s %(name)s: %(message)s", datefmt="%H:%M:%S")
77
+
78
+ # Set up console handler (only if not already configured)
79
+ # Check if we already have a console handler (StreamHandler writing to stderr)
80
+ has_console_handler = any(
81
+ isinstance(h, logging.StreamHandler) and h.stream is sys.stderr for h in root_logger.handlers
82
+ )
83
+ if not has_console_handler:
84
+ console_handler = logging.StreamHandler(sys.stderr)
85
+ console_handler.setFormatter(console_formatter)
86
+ console_handler.setLevel(logging.INFO) # Only INFO and above to console
87
+ root_logger.addHandler(console_handler)
88
+
89
+ # Remove existing file handlers (if any) to update location
90
+ file_handlers = [h for h in root_logger.handlers if isinstance(h, logging.handlers.RotatingFileHandler)]
91
+ for handler in file_handlers:
92
+ root_logger.removeHandler(handler)
93
+ handler.close()
94
+
95
+ # Create file handler with rotation
96
+ try:
97
+ log_file_path.parent.mkdir(parents=True, exist_ok=True)
98
+ file_handler = SafeRotatingFileHandler(
99
+ filename=str(log_file_path),
100
+ maxBytes=DEFAULT_LOG_MAX_BYTES,
101
+ backupCount=DEFAULT_LOG_BACKUP_COUNT,
102
+ encoding="utf-8",
103
+ )
104
+ file_handler.setFormatter(detailed_formatter)
105
+ file_handler.setLevel(logging.DEBUG) # All levels to file
106
+
107
+ root_logger.setLevel(logging.DEBUG)
108
+ root_logger.addHandler(file_handler)
109
+ root_logger.info(f"Logging to file: {log_file_path.absolute()}")
110
+ except (PermissionError, OSError) as e:
111
+ root_logger.warning(f"Could not create log file at {log_file_path}: {e}")
112
+ root_logger.warning("Continuing with console logging only")
113
+
114
+ _logging_configured = True
@@ -0,0 +1 @@
1
+ """Qt adapter layer for UI-facing orchestration and workers."""
@@ -0,0 +1,3 @@
1
+ """File-format adapters."""
2
+
3
+ __all__ = ["glossary_io"]
@@ -0,0 +1,203 @@
1
+ """Glossary import/export functionality."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import time
7
+ from pathlib import Path
8
+ from typing import TYPE_CHECKING, cast
9
+
10
+ from context_aware_translation.core.models import normalize_term_type, ordered_description_values
11
+
12
+ if TYPE_CHECKING:
13
+ from context_aware_translation.storage.schema.book_db import SQLiteBookDB
14
+
15
+ from context_aware_translation.storage.schema.book_db import TermRecord
16
+
17
+
18
+ def _consolidate_description(descriptions: dict[str, str]) -> str:
19
+ return " ".join(ordered_description_values(descriptions))
20
+
21
+
22
+ def export_glossary(
23
+ db: SQLiteBookDB,
24
+ output_path: Path,
25
+ summarized_descriptions: dict[str, str] | None = None,
26
+ ) -> int:
27
+ """Export all glossary terms to a JSON file.
28
+
29
+ Returns the number of terms exported.
30
+ """
31
+ terms = db.list_terms()
32
+ entries = []
33
+ for term in terms:
34
+ description = (
35
+ summarized_descriptions.get(term.key)
36
+ if summarized_descriptions is not None and term.key in summarized_descriptions
37
+ else _consolidate_description(term.descriptions)
38
+ )
39
+ entries.append(
40
+ {
41
+ "key": term.key,
42
+ "translated_name": term.translated_name,
43
+ "description": description,
44
+ "term_type": term.term_type,
45
+ "ignored": term.ignored,
46
+ "is_reviewed": term.is_reviewed,
47
+ }
48
+ )
49
+
50
+ data = {"version": 1, "terms": entries}
51
+ with open(output_path, "w", encoding="utf-8") as f:
52
+ json.dump(data, f, ensure_ascii=False, indent=2)
53
+
54
+ return len(entries)
55
+
56
+
57
+ def _validate_glossary_json(data: object) -> None:
58
+ """Validate glossary JSON structure. Raises ValueError on invalid data."""
59
+ if not isinstance(data, dict):
60
+ raise ValueError("Invalid glossary file: expected a JSON object")
61
+
62
+ version = data.get("version")
63
+ if version is not None and version > 1:
64
+ raise ValueError(
65
+ f"Unsupported glossary format version: {version}. This version of the application supports version 1."
66
+ )
67
+
68
+ if "terms" not in data:
69
+ raise ValueError("Invalid glossary file: missing 'terms' key")
70
+
71
+ terms = data["terms"]
72
+ if not isinstance(terms, list):
73
+ raise ValueError("Invalid glossary file: 'terms' must be a list")
74
+
75
+ for i, entry in enumerate(terms):
76
+ if not isinstance(entry, dict):
77
+ raise ValueError(f"Invalid glossary file: entry {i} must be a dict")
78
+ if "key" not in entry or not entry["key"]:
79
+ raise ValueError(f"Invalid glossary file: entry {i} missing or empty 'key'")
80
+
81
+
82
+ def _validate_simple_glossary_json(data: object) -> None:
83
+ """Validate flat term-to-translation JSON mappings."""
84
+ if not isinstance(data, dict):
85
+ raise ValueError("Invalid glossary file: expected a JSON object")
86
+ for key, value in data.items():
87
+ if not isinstance(key, str) or not key.strip():
88
+ raise ValueError("Invalid glossary file: flat mapping keys must be non-empty strings")
89
+ if not isinstance(value, str) or not value.strip():
90
+ raise ValueError("Invalid glossary file: flat mapping values must be non-empty strings")
91
+
92
+
93
+ def _build_import_term_record(
94
+ key: str,
95
+ *,
96
+ translated_name: str | None = None,
97
+ description: str = "",
98
+ term_type: str | None = None,
99
+ ignored: bool = False,
100
+ is_reviewed: bool = False,
101
+ ) -> TermRecord:
102
+ now = time.time()
103
+ return TermRecord(
104
+ key=key,
105
+ descriptions={"imported": description} if description else {},
106
+ occurrence={},
107
+ votes=1,
108
+ total_api_calls=1,
109
+ term_type=normalize_term_type(term_type),
110
+ new_translation=None,
111
+ translated_name=translated_name,
112
+ ignored=ignored,
113
+ is_reviewed=is_reviewed,
114
+ created_at=now,
115
+ updated_at=now,
116
+ )
117
+
118
+
119
+ def _import_structured_glossary(
120
+ db: SQLiteBookDB,
121
+ data: dict[str, object],
122
+ *,
123
+ include_translations: bool,
124
+ ) -> int:
125
+ # Wipe existing data
126
+ db.delete_all_term_memory(auto_commit=False)
127
+ db.conn.execute("DELETE FROM terms")
128
+
129
+ term_records = []
130
+ for entry in cast(list[object], data["terms"]):
131
+ if not isinstance(entry, dict):
132
+ continue
133
+ term_records.append(
134
+ _build_import_term_record(
135
+ str(entry["key"]),
136
+ translated_name=entry.get("translated_name") if include_translations else None,
137
+ description=str(entry.get("description", "")),
138
+ term_type=entry.get("term_type") if isinstance(entry.get("term_type"), str) else None,
139
+ ignored=bool(entry.get("ignored", False)),
140
+ is_reviewed=bool(entry.get("is_reviewed", False)),
141
+ )
142
+ )
143
+
144
+ db.upsert_terms(term_records)
145
+ return len(term_records)
146
+
147
+
148
+ def _import_simple_glossary(
149
+ db: SQLiteBookDB,
150
+ data: dict[str, object],
151
+ *,
152
+ include_translations: bool,
153
+ ) -> int:
154
+ term_records: list[TermRecord] = []
155
+ now = time.time()
156
+ for raw_key, raw_value in data.items():
157
+ key = raw_key.strip()
158
+ translated_name = raw_value.strip() if isinstance(raw_value, str) else ""
159
+ if not key:
160
+ raise ValueError("Invalid glossary file: flat mapping keys must be non-empty strings")
161
+ existing = db.get_term(key)
162
+ if existing is not None:
163
+ existing.translated_name = translated_name if include_translations else None
164
+ existing.ignored = False
165
+ existing.is_reviewed = True
166
+ existing.updated_at = now
167
+ term_records.append(existing)
168
+ continue
169
+ term_records.append(
170
+ _build_import_term_record(
171
+ key,
172
+ translated_name=translated_name if include_translations else None,
173
+ ignored=False,
174
+ is_reviewed=True,
175
+ )
176
+ )
177
+
178
+ if term_records:
179
+ db.upsert_terms(term_records)
180
+ return len(term_records)
181
+
182
+
183
+ def import_glossary(
184
+ db: SQLiteBookDB,
185
+ input_path: Path,
186
+ include_translations: bool = True,
187
+ ) -> int:
188
+ """Import glossary terms from a JSON file.
189
+
190
+ Structured glossary JSON preserves current replace-all behavior.
191
+ Flat term-to-translation JSON mappings are merged into the existing glossary.
192
+
193
+ Returns the number of terms processed.
194
+ """
195
+ with open(input_path, encoding="utf-8") as f:
196
+ data = json.load(f)
197
+
198
+ if isinstance(data, dict) and ("terms" in data or "version" in data):
199
+ _validate_glossary_json(data)
200
+ return _import_structured_glossary(db, data, include_translations=include_translations)
201
+
202
+ _validate_simple_glossary_json(data)
203
+ return _import_simple_glossary(db, data, include_translations=include_translations)
@@ -0,0 +1 @@
1
+ """Qt-specific adapters used by the desktop UI."""
@@ -0,0 +1,78 @@
1
+ from __future__ import annotations
2
+
3
+ from PySide6.QtCore import QObject, Qt, QThread, Signal, Slot
4
+
5
+ from context_aware_translation.application.events import (
6
+ ApplicationEventKind,
7
+ ApplicationEventPayload,
8
+ ApplicationEventSubscriber,
9
+ DocumentInvalidatedEvent,
10
+ ProjectsInvalidatedEvent,
11
+ QueueChangedEvent,
12
+ SetupInvalidatedEvent,
13
+ TermsInvalidatedEvent,
14
+ WorkboardInvalidatedEvent,
15
+ )
16
+
17
+
18
+ class QtApplicationEventBridge(QObject):
19
+ """Adapt application events into Qt signals.
20
+
21
+ The application event bus is the system of record. This bridge only turns
22
+ those framework-agnostic events into Qt-friendly notifications.
23
+ """
24
+
25
+ event_received = Signal(object)
26
+ projects_invalidated = Signal(object)
27
+ queue_changed = Signal(object)
28
+ workboard_invalidated = Signal(object)
29
+ document_invalidated = Signal(object)
30
+ terms_invalidated = Signal(object)
31
+ setup_invalidated = Signal(object)
32
+
33
+ _enqueue_event = Signal(object)
34
+
35
+ def __init__(self, events: ApplicationEventSubscriber, parent: QObject | None = None) -> None:
36
+ super().__init__(parent)
37
+ self._subscription = events.subscribe(self._on_event_from_any_thread)
38
+ self._enqueue_event.connect(self._dispatch_event, Qt.ConnectionType.QueuedConnection)
39
+
40
+ def close(self) -> None:
41
+ self._subscription.close()
42
+
43
+ def __del__(self) -> None:
44
+ self.close()
45
+
46
+ def _on_event_from_any_thread(self, event: ApplicationEventPayload) -> None:
47
+ if QThread.currentThread() is self.thread():
48
+ self._dispatch_event(event)
49
+ return
50
+ self._enqueue_event.emit(event)
51
+
52
+ @Slot(object)
53
+ def _dispatch_event(self, event: ApplicationEventPayload) -> None:
54
+ self.event_received.emit(event)
55
+ if event.kind is ApplicationEventKind.PROJECTS_INVALIDATED:
56
+ self.projects_invalidated.emit(event)
57
+ elif event.kind is ApplicationEventKind.QUEUE_CHANGED:
58
+ self.queue_changed.emit(event)
59
+ elif event.kind is ApplicationEventKind.WORKBOARD_INVALIDATED:
60
+ self.workboard_invalidated.emit(event)
61
+ elif event.kind is ApplicationEventKind.DOCUMENT_INVALIDATED:
62
+ self.document_invalidated.emit(event)
63
+ elif event.kind is ApplicationEventKind.TERMS_INVALIDATED:
64
+ self.terms_invalidated.emit(event)
65
+ elif event.kind is ApplicationEventKind.SETUP_INVALIDATED:
66
+ self.setup_invalidated.emit(event)
67
+
68
+
69
+ __all__ = [
70
+ "QtApplicationEventBridge",
71
+ "ApplicationEventPayload",
72
+ "ProjectsInvalidatedEvent",
73
+ "QueueChangedEvent",
74
+ "WorkboardInvalidatedEvent",
75
+ "DocumentInvalidatedEvent",
76
+ "TermsInvalidatedEvent",
77
+ "SetupInvalidatedEvent",
78
+ ]