scitex 2.7.0__py3-none-any.whl → 2.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (355) hide show
  1. scitex/__init__.py +6 -2
  2. scitex/__version__.py +1 -1
  3. scitex/audio/README.md +52 -0
  4. scitex/audio/__init__.py +384 -0
  5. scitex/audio/__main__.py +129 -0
  6. scitex/audio/_tts.py +334 -0
  7. scitex/audio/engines/__init__.py +44 -0
  8. scitex/audio/engines/base.py +275 -0
  9. scitex/audio/engines/elevenlabs_engine.py +143 -0
  10. scitex/audio/engines/gtts_engine.py +162 -0
  11. scitex/audio/engines/pyttsx3_engine.py +131 -0
  12. scitex/audio/mcp_server.py +757 -0
  13. scitex/bridge/_helpers.py +1 -1
  14. scitex/bridge/_plt_vis.py +1 -1
  15. scitex/bridge/_stats_vis.py +1 -1
  16. scitex/dev/plt/__init__.py +272 -0
  17. scitex/dev/plt/plot_mpl_axhline.py +28 -0
  18. scitex/dev/plt/plot_mpl_axhspan.py +28 -0
  19. scitex/dev/plt/plot_mpl_axvline.py +28 -0
  20. scitex/dev/plt/plot_mpl_axvspan.py +28 -0
  21. scitex/dev/plt/plot_mpl_bar.py +29 -0
  22. scitex/dev/plt/plot_mpl_barh.py +29 -0
  23. scitex/dev/plt/plot_mpl_boxplot.py +28 -0
  24. scitex/dev/plt/plot_mpl_contour.py +31 -0
  25. scitex/dev/plt/plot_mpl_contourf.py +31 -0
  26. scitex/dev/plt/plot_mpl_errorbar.py +30 -0
  27. scitex/dev/plt/plot_mpl_eventplot.py +28 -0
  28. scitex/dev/plt/plot_mpl_fill.py +30 -0
  29. scitex/dev/plt/plot_mpl_fill_between.py +31 -0
  30. scitex/dev/plt/plot_mpl_hexbin.py +28 -0
  31. scitex/dev/plt/plot_mpl_hist.py +28 -0
  32. scitex/dev/plt/plot_mpl_hist2d.py +28 -0
  33. scitex/dev/plt/plot_mpl_imshow.py +29 -0
  34. scitex/dev/plt/plot_mpl_pcolormesh.py +31 -0
  35. scitex/dev/plt/plot_mpl_pie.py +29 -0
  36. scitex/dev/plt/plot_mpl_plot.py +29 -0
  37. scitex/dev/plt/plot_mpl_quiver.py +31 -0
  38. scitex/dev/plt/plot_mpl_scatter.py +28 -0
  39. scitex/dev/plt/plot_mpl_stackplot.py +31 -0
  40. scitex/dev/plt/plot_mpl_stem.py +29 -0
  41. scitex/dev/plt/plot_mpl_step.py +29 -0
  42. scitex/dev/plt/plot_mpl_violinplot.py +28 -0
  43. scitex/dev/plt/plot_sns_barplot.py +29 -0
  44. scitex/dev/plt/plot_sns_boxplot.py +29 -0
  45. scitex/dev/plt/plot_sns_heatmap.py +28 -0
  46. scitex/dev/plt/plot_sns_histplot.py +29 -0
  47. scitex/dev/plt/plot_sns_kdeplot.py +29 -0
  48. scitex/dev/plt/plot_sns_lineplot.py +31 -0
  49. scitex/dev/plt/plot_sns_scatterplot.py +29 -0
  50. scitex/dev/plt/plot_sns_stripplot.py +29 -0
  51. scitex/dev/plt/plot_sns_swarmplot.py +29 -0
  52. scitex/dev/plt/plot_sns_violinplot.py +29 -0
  53. scitex/dev/plt/plot_stx_bar.py +29 -0
  54. scitex/dev/plt/plot_stx_barh.py +29 -0
  55. scitex/dev/plt/plot_stx_box.py +28 -0
  56. scitex/dev/plt/plot_stx_boxplot.py +28 -0
  57. scitex/dev/plt/plot_stx_conf_mat.py +28 -0
  58. scitex/dev/plt/plot_stx_contour.py +31 -0
  59. scitex/dev/plt/plot_stx_ecdf.py +28 -0
  60. scitex/dev/plt/plot_stx_errorbar.py +30 -0
  61. scitex/dev/plt/plot_stx_fill_between.py +31 -0
  62. scitex/dev/plt/plot_stx_fillv.py +28 -0
  63. scitex/dev/plt/plot_stx_heatmap.py +28 -0
  64. scitex/dev/plt/plot_stx_image.py +28 -0
  65. scitex/dev/plt/plot_stx_imshow.py +28 -0
  66. scitex/dev/plt/plot_stx_joyplot.py +28 -0
  67. scitex/dev/plt/plot_stx_kde.py +28 -0
  68. scitex/dev/plt/plot_stx_line.py +28 -0
  69. scitex/dev/plt/plot_stx_mean_ci.py +28 -0
  70. scitex/dev/plt/plot_stx_mean_std.py +28 -0
  71. scitex/dev/plt/plot_stx_median_iqr.py +28 -0
  72. scitex/dev/plt/plot_stx_raster.py +28 -0
  73. scitex/dev/plt/plot_stx_rectangle.py +28 -0
  74. scitex/dev/plt/plot_stx_scatter.py +29 -0
  75. scitex/dev/plt/plot_stx_shaded_line.py +29 -0
  76. scitex/dev/plt/plot_stx_violin.py +28 -0
  77. scitex/dev/plt/plot_stx_violinplot.py +28 -0
  78. scitex/diagram/README.md +197 -0
  79. scitex/diagram/__init__.py +48 -0
  80. scitex/diagram/_compile.py +312 -0
  81. scitex/diagram/_diagram.py +355 -0
  82. scitex/diagram/_presets.py +173 -0
  83. scitex/diagram/_schema.py +182 -0
  84. scitex/diagram/_split.py +278 -0
  85. scitex/fig/__init__.py +352 -0
  86. scitex/{vis → fig}/backend/_parser.py +1 -1
  87. scitex/{vis → fig}/canvas.py +1 -1
  88. scitex/{vis → fig}/editor/__init__.py +5 -2
  89. scitex/{vis → fig}/editor/_dearpygui_editor.py +1 -1
  90. scitex/{vis → fig}/editor/_defaults.py +70 -5
  91. scitex/{vis → fig}/editor/_mpl_editor.py +1 -1
  92. scitex/{vis → fig}/editor/_qt_editor.py +182 -2
  93. scitex/{vis → fig}/editor/_tkinter_editor.py +1 -1
  94. scitex/fig/editor/edit/__init__.py +50 -0
  95. scitex/fig/editor/edit/backend_detector.py +109 -0
  96. scitex/fig/editor/edit/bundle_resolver.py +240 -0
  97. scitex/fig/editor/edit/editor_launcher.py +239 -0
  98. scitex/fig/editor/edit/manual_handler.py +53 -0
  99. scitex/fig/editor/edit/panel_loader.py +232 -0
  100. scitex/fig/editor/edit/path_resolver.py +67 -0
  101. scitex/fig/editor/flask_editor/_bbox.py +1299 -0
  102. scitex/fig/editor/flask_editor/_core.py +1429 -0
  103. scitex/{vis → fig}/editor/flask_editor/_plotter.py +38 -4
  104. scitex/fig/editor/flask_editor/_renderer.py +813 -0
  105. scitex/fig/editor/flask_editor/static/css/base/reset.css +41 -0
  106. scitex/fig/editor/flask_editor/static/css/base/typography.css +16 -0
  107. scitex/fig/editor/flask_editor/static/css/base/variables.css +85 -0
  108. scitex/fig/editor/flask_editor/static/css/components/buttons.css +217 -0
  109. scitex/fig/editor/flask_editor/static/css/components/context-menu.css +93 -0
  110. scitex/fig/editor/flask_editor/static/css/components/dropdown.css +57 -0
  111. scitex/fig/editor/flask_editor/static/css/components/forms.css +112 -0
  112. scitex/fig/editor/flask_editor/static/css/components/modal.css +59 -0
  113. scitex/fig/editor/flask_editor/static/css/components/sections.css +212 -0
  114. scitex/fig/editor/flask_editor/static/css/features/canvas.css +176 -0
  115. scitex/fig/editor/flask_editor/static/css/features/element-inspector.css +190 -0
  116. scitex/fig/editor/flask_editor/static/css/features/loading.css +59 -0
  117. scitex/fig/editor/flask_editor/static/css/features/overlay.css +45 -0
  118. scitex/fig/editor/flask_editor/static/css/features/panel-grid.css +95 -0
  119. scitex/fig/editor/flask_editor/static/css/features/selection.css +101 -0
  120. scitex/fig/editor/flask_editor/static/css/features/statistics.css +138 -0
  121. scitex/fig/editor/flask_editor/static/css/index.css +31 -0
  122. scitex/fig/editor/flask_editor/static/css/layout/container.css +7 -0
  123. scitex/fig/editor/flask_editor/static/css/layout/controls.css +56 -0
  124. scitex/fig/editor/flask_editor/static/css/layout/preview.css +78 -0
  125. scitex/fig/editor/flask_editor/static/js/alignment/axis.js +314 -0
  126. scitex/fig/editor/flask_editor/static/js/alignment/basic.js +107 -0
  127. scitex/fig/editor/flask_editor/static/js/alignment/distribute.js +54 -0
  128. scitex/fig/editor/flask_editor/static/js/canvas/canvas.js +172 -0
  129. scitex/fig/editor/flask_editor/static/js/canvas/dragging.js +258 -0
  130. scitex/fig/editor/flask_editor/static/js/canvas/resize.js +48 -0
  131. scitex/fig/editor/flask_editor/static/js/canvas/selection.js +71 -0
  132. scitex/fig/editor/flask_editor/static/js/core/api.js +288 -0
  133. scitex/fig/editor/flask_editor/static/js/core/state.js +143 -0
  134. scitex/fig/editor/flask_editor/static/js/core/utils.js +245 -0
  135. scitex/fig/editor/flask_editor/static/js/dev/element-inspector.js +992 -0
  136. scitex/fig/editor/flask_editor/static/js/editor/bbox.js +339 -0
  137. scitex/fig/editor/flask_editor/static/js/editor/element-drag.js +286 -0
  138. scitex/fig/editor/flask_editor/static/js/editor/overlay.js +371 -0
  139. scitex/fig/editor/flask_editor/static/js/editor/preview.js +293 -0
  140. scitex/fig/editor/flask_editor/static/js/main.js +426 -0
  141. scitex/fig/editor/flask_editor/static/js/shortcuts/context-menu.js +152 -0
  142. scitex/fig/editor/flask_editor/static/js/shortcuts/keyboard.js +265 -0
  143. scitex/fig/editor/flask_editor/static/js/ui/controls.js +184 -0
  144. scitex/fig/editor/flask_editor/static/js/ui/download.js +57 -0
  145. scitex/fig/editor/flask_editor/static/js/ui/help.js +100 -0
  146. scitex/fig/editor/flask_editor/static/js/ui/theme.js +34 -0
  147. scitex/fig/editor/flask_editor/templates/__init__.py +123 -0
  148. scitex/fig/editor/flask_editor/templates/_html.py +852 -0
  149. scitex/fig/editor/flask_editor/templates/_scripts.py +4933 -0
  150. scitex/fig/editor/flask_editor/templates/_styles.py +1658 -0
  151. scitex/{vis → fig}/io/__init__.py +13 -1
  152. scitex/fig/io/_bundle.py +1058 -0
  153. scitex/{vis → fig}/io/_canvas.py +1 -1
  154. scitex/{vis → fig}/io/_data.py +1 -1
  155. scitex/{vis → fig}/io/_export.py +1 -1
  156. scitex/{vis → fig}/io/_load.py +1 -1
  157. scitex/{vis → fig}/io/_panel.py +1 -1
  158. scitex/{vis → fig}/io/_save.py +1 -1
  159. scitex/{vis → fig}/model/__init__.py +1 -1
  160. scitex/{vis → fig}/model/_annotations.py +1 -1
  161. scitex/{vis → fig}/model/_axes.py +1 -1
  162. scitex/{vis → fig}/model/_figure.py +1 -1
  163. scitex/{vis → fig}/model/_guides.py +1 -1
  164. scitex/{vis → fig}/model/_plot.py +1 -1
  165. scitex/{vis → fig}/model/_styles.py +1 -1
  166. scitex/{vis → fig}/utils/__init__.py +1 -1
  167. scitex/io/__init__.py +22 -26
  168. scitex/io/_bundle.py +493 -0
  169. scitex/io/_flush.py +5 -2
  170. scitex/io/_load.py +98 -0
  171. scitex/io/_load_modules/_H5Explorer.py +5 -2
  172. scitex/io/_load_modules/_canvas.py +2 -2
  173. scitex/io/_load_modules/_image.py +3 -4
  174. scitex/io/_load_modules/_txt.py +4 -2
  175. scitex/io/_metadata.py +34 -324
  176. scitex/io/_metadata_modules/__init__.py +46 -0
  177. scitex/io/_metadata_modules/_embed.py +70 -0
  178. scitex/io/_metadata_modules/_read.py +64 -0
  179. scitex/io/_metadata_modules/_utils.py +79 -0
  180. scitex/io/_metadata_modules/embed_metadata_jpeg.py +74 -0
  181. scitex/io/_metadata_modules/embed_metadata_pdf.py +53 -0
  182. scitex/io/_metadata_modules/embed_metadata_png.py +26 -0
  183. scitex/io/_metadata_modules/embed_metadata_svg.py +62 -0
  184. scitex/io/_metadata_modules/read_metadata_jpeg.py +57 -0
  185. scitex/io/_metadata_modules/read_metadata_pdf.py +51 -0
  186. scitex/io/_metadata_modules/read_metadata_png.py +39 -0
  187. scitex/io/_metadata_modules/read_metadata_svg.py +44 -0
  188. scitex/io/_qr_utils.py +5 -3
  189. scitex/io/_save.py +548 -30
  190. scitex/io/_save_modules/_canvas.py +3 -3
  191. scitex/io/_save_modules/_image.py +5 -9
  192. scitex/io/_save_modules/_tex.py +7 -4
  193. scitex/io/_zip_bundle.py +439 -0
  194. scitex/io/utils/h5_to_zarr.py +11 -9
  195. scitex/msword/__init__.py +255 -0
  196. scitex/msword/profiles.py +357 -0
  197. scitex/msword/reader.py +753 -0
  198. scitex/msword/utils.py +289 -0
  199. scitex/msword/writer.py +362 -0
  200. scitex/plt/__init__.py +5 -2
  201. scitex/plt/_subplots/_AxesWrapper.py +6 -6
  202. scitex/plt/_subplots/_AxisWrapper.py +15 -9
  203. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin/__init__.py +36 -0
  204. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin/_labels.py +264 -0
  205. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin/_metadata.py +213 -0
  206. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin/_visual.py +128 -0
  207. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin/__init__.py +59 -0
  208. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin/_base.py +34 -0
  209. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin/_scientific.py +593 -0
  210. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin/_statistical.py +654 -0
  211. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin/_stx_aliases.py +527 -0
  212. scitex/plt/_subplots/_AxisWrapperMixins/_RawMatplotlibMixin.py +321 -0
  213. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin/__init__.py +33 -0
  214. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin/_base.py +152 -0
  215. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin/_wrappers.py +600 -0
  216. scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +79 -5
  217. scitex/plt/_subplots/_FigWrapper.py +6 -6
  218. scitex/plt/_subplots/_SubplotsWrapper.py +28 -18
  219. scitex/plt/_subplots/_export_as_csv.py +35 -5
  220. scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +8 -0
  221. scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +10 -21
  222. scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +18 -7
  223. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +28 -12
  224. scitex/plt/_subplots/_export_as_csv_formatters/_format_matshow.py +10 -4
  225. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_imshow.py +13 -1
  226. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +12 -2
  227. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +10 -3
  228. scitex/plt/_subplots/_export_as_csv_formatters/_format_quiver.py +10 -4
  229. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +18 -3
  230. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +44 -36
  231. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +14 -2
  232. scitex/plt/_subplots/_export_as_csv_formatters/_format_streamplot.py +11 -5
  233. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_bar.py +84 -0
  234. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_barh.py +85 -0
  235. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_conf_mat.py +14 -3
  236. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_contour.py +54 -0
  237. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_ecdf.py +14 -2
  238. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_errorbar.py +120 -0
  239. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_heatmap.py +16 -6
  240. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_image.py +29 -19
  241. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_imshow.py +63 -0
  242. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_joyplot.py +22 -5
  243. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_mean_ci.py +18 -14
  244. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_mean_std.py +18 -14
  245. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_median_iqr.py +18 -14
  246. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_raster.py +10 -2
  247. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_scatter.py +51 -0
  248. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_scatter_hist.py +18 -9
  249. scitex/plt/ax/_plot/_stx_ecdf.py +4 -2
  250. scitex/plt/gallery/_generate.py +421 -14
  251. scitex/plt/io/__init__.py +53 -0
  252. scitex/plt/io/_bundle.py +490 -0
  253. scitex/plt/io/_layered_bundle.py +1343 -0
  254. scitex/plt/styles/SCITEX_STYLE.yaml +26 -0
  255. scitex/plt/styles/__init__.py +14 -0
  256. scitex/plt/styles/presets.py +78 -0
  257. scitex/plt/utils/__init__.py +13 -1
  258. scitex/plt/utils/_collect_figure_metadata.py +10 -14
  259. scitex/plt/utils/_configure_mpl.py +6 -18
  260. scitex/plt/utils/_crop.py +32 -14
  261. scitex/plt/utils/_csv_column_naming.py +54 -0
  262. scitex/plt/utils/_figure_mm.py +116 -1
  263. scitex/plt/utils/_hitmap.py +1643 -0
  264. scitex/plt/utils/metadata/__init__.py +25 -0
  265. scitex/plt/utils/metadata/_core.py +9 -10
  266. scitex/plt/utils/metadata/_dimensions.py +6 -3
  267. scitex/plt/utils/metadata/_editable_export.py +405 -0
  268. scitex/plt/utils/metadata/_geometry_extraction.py +570 -0
  269. scitex/schema/__init__.py +109 -16
  270. scitex/schema/_canvas.py +1 -1
  271. scitex/schema/_plot.py +1015 -0
  272. scitex/schema/_stats.py +2 -2
  273. scitex/stats/__init__.py +117 -0
  274. scitex/stats/io/__init__.py +29 -0
  275. scitex/stats/io/_bundle.py +156 -0
  276. scitex/tex/__init__.py +4 -0
  277. scitex/tex/_export.py +890 -0
  278. {scitex-2.7.0.dist-info → scitex-2.8.1.dist-info}/METADATA +11 -1
  279. {scitex-2.7.0.dist-info → scitex-2.8.1.dist-info}/RECORD +294 -170
  280. scitex/io/memo.md +0 -2827
  281. scitex/plt/REQUESTS.md +0 -191
  282. scitex/plt/_subplots/TODO.md +0 -53
  283. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +0 -559
  284. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +0 -1609
  285. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +0 -447
  286. scitex/plt/templates/research-master/scitex/vis/gallery/area/fill_between.json +0 -110
  287. scitex/plt/templates/research-master/scitex/vis/gallery/area/fill_betweenx.json +0 -88
  288. scitex/plt/templates/research-master/scitex/vis/gallery/area/stx_fill_between.json +0 -103
  289. scitex/plt/templates/research-master/scitex/vis/gallery/area/stx_fillv.json +0 -106
  290. scitex/plt/templates/research-master/scitex/vis/gallery/categorical/bar.json +0 -92
  291. scitex/plt/templates/research-master/scitex/vis/gallery/categorical/barh.json +0 -92
  292. scitex/plt/templates/research-master/scitex/vis/gallery/categorical/boxplot.json +0 -92
  293. scitex/plt/templates/research-master/scitex/vis/gallery/categorical/stx_bar.json +0 -84
  294. scitex/plt/templates/research-master/scitex/vis/gallery/categorical/stx_barh.json +0 -84
  295. scitex/plt/templates/research-master/scitex/vis/gallery/categorical/stx_box.json +0 -83
  296. scitex/plt/templates/research-master/scitex/vis/gallery/categorical/stx_boxplot.json +0 -93
  297. scitex/plt/templates/research-master/scitex/vis/gallery/categorical/stx_violin.json +0 -91
  298. scitex/plt/templates/research-master/scitex/vis/gallery/categorical/stx_violinplot.json +0 -91
  299. scitex/plt/templates/research-master/scitex/vis/gallery/categorical/violinplot.json +0 -91
  300. scitex/plt/templates/research-master/scitex/vis/gallery/contour/contour.json +0 -97
  301. scitex/plt/templates/research-master/scitex/vis/gallery/contour/contourf.json +0 -98
  302. scitex/plt/templates/research-master/scitex/vis/gallery/contour/stx_contour.json +0 -84
  303. scitex/plt/templates/research-master/scitex/vis/gallery/distribution/hist.json +0 -101
  304. scitex/plt/templates/research-master/scitex/vis/gallery/distribution/hist2d.json +0 -96
  305. scitex/plt/templates/research-master/scitex/vis/gallery/distribution/stx_ecdf.json +0 -95
  306. scitex/plt/templates/research-master/scitex/vis/gallery/distribution/stx_joyplot.json +0 -95
  307. scitex/plt/templates/research-master/scitex/vis/gallery/distribution/stx_kde.json +0 -93
  308. scitex/plt/templates/research-master/scitex/vis/gallery/grid/imshow.json +0 -95
  309. scitex/plt/templates/research-master/scitex/vis/gallery/grid/matshow.json +0 -95
  310. scitex/plt/templates/research-master/scitex/vis/gallery/grid/stx_conf_mat.json +0 -83
  311. scitex/plt/templates/research-master/scitex/vis/gallery/grid/stx_heatmap.json +0 -92
  312. scitex/plt/templates/research-master/scitex/vis/gallery/grid/stx_image.json +0 -121
  313. scitex/plt/templates/research-master/scitex/vis/gallery/grid/stx_imshow.json +0 -84
  314. scitex/plt/templates/research-master/scitex/vis/gallery/line/plot.json +0 -110
  315. scitex/plt/templates/research-master/scitex/vis/gallery/line/step.json +0 -92
  316. scitex/plt/templates/research-master/scitex/vis/gallery/line/stx_line.json +0 -95
  317. scitex/plt/templates/research-master/scitex/vis/gallery/line/stx_shaded_line.json +0 -96
  318. scitex/plt/templates/research-master/scitex/vis/gallery/scatter/hexbin.json +0 -95
  319. scitex/plt/templates/research-master/scitex/vis/gallery/scatter/scatter.json +0 -95
  320. scitex/plt/templates/research-master/scitex/vis/gallery/scatter/stem.json +0 -92
  321. scitex/plt/templates/research-master/scitex/vis/gallery/scatter/stx_scatter.json +0 -84
  322. scitex/plt/templates/research-master/scitex/vis/gallery/special/pie.json +0 -94
  323. scitex/plt/templates/research-master/scitex/vis/gallery/special/stx_raster.json +0 -109
  324. scitex/plt/templates/research-master/scitex/vis/gallery/special/stx_rectangle.json +0 -108
  325. scitex/plt/templates/research-master/scitex/vis/gallery/statistical/errorbar.json +0 -93
  326. scitex/plt/templates/research-master/scitex/vis/gallery/statistical/stx_errorbar.json +0 -84
  327. scitex/plt/templates/research-master/scitex/vis/gallery/statistical/stx_mean_ci.json +0 -96
  328. scitex/plt/templates/research-master/scitex/vis/gallery/statistical/stx_mean_std.json +0 -96
  329. scitex/plt/templates/research-master/scitex/vis/gallery/statistical/stx_median_iqr.json +0 -96
  330. scitex/plt/templates/research-master/scitex/vis/gallery/vector/quiver.json +0 -99
  331. scitex/plt/templates/research-master/scitex/vis/gallery/vector/streamplot.json +0 -100
  332. scitex/vis/__init__.py +0 -177
  333. scitex/vis/editor/_edit.py +0 -390
  334. scitex/vis/editor/flask_editor/_bbox.py +0 -529
  335. scitex/vis/editor/flask_editor/_core.py +0 -168
  336. scitex/vis/editor/flask_editor/_renderer.py +0 -393
  337. scitex/vis/editor/flask_editor/templates/__init__.py +0 -33
  338. scitex/vis/editor/flask_editor/templates/_html.py +0 -513
  339. scitex/vis/editor/flask_editor/templates/_scripts.py +0 -1261
  340. scitex/vis/editor/flask_editor/templates/_styles.py +0 -739
  341. /scitex/{vis → fig}/README.md +0 -0
  342. /scitex/{vis → fig}/backend/__init__.py +0 -0
  343. /scitex/{vis → fig}/backend/_export.py +0 -0
  344. /scitex/{vis → fig}/backend/_render.py +0 -0
  345. /scitex/{vis → fig}/docs/CANVAS_ARCHITECTURE.md +0 -0
  346. /scitex/{vis → fig}/editor/_flask_editor.py +0 -0
  347. /scitex/{vis → fig}/editor/flask_editor/__init__.py +0 -0
  348. /scitex/{vis → fig}/editor/flask_editor/_utils.py +0 -0
  349. /scitex/{vis → fig}/io/_directory.py +0 -0
  350. /scitex/{vis → fig}/model/_plot_types.py +0 -0
  351. /scitex/{vis → fig}/utils/_defaults.py +0 -0
  352. /scitex/{vis → fig}/utils/_validate.py +0 -0
  353. {scitex-2.7.0.dist-info → scitex-2.8.1.dist-info}/WHEEL +0 -0
  354. {scitex-2.7.0.dist-info → scitex-2.8.1.dist-info}/entry_points.txt +0 -0
  355. {scitex-2.7.0.dist-info → scitex-2.8.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,753 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Timestamp: 2025-12-11 15:15:00
4
+ # File: /home/ywatanabe/proj/scitex-code/src/scitex/msword/reader.py
5
+
6
+ """
7
+ DOCX -> SciTeX writer document converter.
8
+
9
+ This module reads MS Word .docx files and converts them into
10
+ SciTeX's intermediate document format for further processing.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import hashlib
16
+ import re
17
+ from pathlib import Path
18
+ from typing import Any, Dict, List, Optional, Tuple
19
+ from datetime import datetime
20
+
21
+ from .profiles import BaseWordProfile
22
+
23
+ # Lazy import for python-docx
24
+ try:
25
+ import docx
26
+ from docx.document import Document as DocxDocument
27
+ from docx.oxml.ns import qn
28
+ from docx.shared import Inches, Pt
29
+
30
+ DOCX_AVAILABLE = True
31
+ _DOCX_IMPORT_ERROR = None
32
+ except ImportError as exc:
33
+ DOCX_AVAILABLE = False
34
+ _DOCX_IMPORT_ERROR = exc
35
+ DocxDocument = None
36
+
37
+ # Common academic section headings for heuristic detection
38
+ COMMON_SECTION_HEADINGS = {
39
+ "abstract", "introduction", "background", "literature review",
40
+ "methods", "methodology", "materials and methods", "experimental",
41
+ "results", "findings", "analysis",
42
+ "discussion", "conclusions", "conclusion", "summary",
43
+ "acknowledgements", "acknowledgments", "acknowledgement",
44
+ "references", "bibliography", "works cited",
45
+ "appendix", "appendices", "supplementary", "supplementary material",
46
+ }
47
+
48
+ # Caption patterns for robust detection
49
+ CAPTION_PATTERNS = [
50
+ # Figure patterns
51
+ (r"^(figure|fig\.?)\s*(\d+)[\.:\s]*(.*)$", "figure"),
52
+ (r"^(scheme)\s*(\d+)[\.:\s]*(.*)$", "scheme"),
53
+ (r"^(chart)\s*(\d+)[\.:\s]*(.*)$", "chart"),
54
+ (r"^(graph)\s*(\d+)[\.:\s]*(.*)$", "graph"),
55
+ (r"^(plate)\s*(\d+)[\.:\s]*(.*)$", "plate"),
56
+ (r"^(illustration)\s*(\d+)[\.:\s]*(.*)$", "illustration"),
57
+ # Table patterns
58
+ (r"^(table|tbl\.?)\s*(\d+)[\.:\s]*(.*)$", "table"),
59
+ # Equation patterns
60
+ (r"^(equation|eq\.?)\s*(\d+)[\.:\s]*(.*)$", "equation"),
61
+ # Listing/code patterns
62
+ (r"^(listing|code)\s*(\d+)[\.:\s]*(.*)$", "listing"),
63
+ # Algorithm patterns
64
+ (r"^(algorithm|alg\.?)\s*(\d+)[\.:\s]*(.*)$", "algorithm"),
65
+ ]
66
+
67
+
68
+ class WordReader:
69
+ """
70
+ Read a DOCX file and convert it into a SciTeX writer document.
71
+
72
+ This reader focuses on:
73
+ - Sections (via heading styles)
74
+ - Plain paragraphs
75
+ - Figure/table captions (via caption style)
76
+ - Embedded images extraction
77
+ - References section boundary detection
78
+ - Basic formatting (bold, italic)
79
+
80
+ The output is a structured intermediate representation that can be
81
+ easily fed into `scitex.writer` or exported to LaTeX/other formats.
82
+ """
83
+
84
+ def __init__(
85
+ self,
86
+ profile: BaseWordProfile,
87
+ extract_images: bool = True,
88
+ ):
89
+ """
90
+ Parameters
91
+ ----------
92
+ profile : BaseWordProfile
93
+ Mapping between Word styles and SciTeX writer semantics.
94
+ extract_images : bool
95
+ Whether to extract embedded images from the document.
96
+ """
97
+ if not DOCX_AVAILABLE:
98
+ raise ImportError(
99
+ "python-docx is required for scitex.msword.WordReader. "
100
+ "Install it via `pip install python-docx`."
101
+ ) from _DOCX_IMPORT_ERROR
102
+ self.profile = profile
103
+ self.extract_images = extract_images
104
+
105
+ def read(self, path: Path) -> Dict[str, Any]:
106
+ """
107
+ Read a DOCX file and return a SciTeX writer document.
108
+
109
+ Parameters
110
+ ----------
111
+ path : Path
112
+ Path to the DOCX file.
113
+
114
+ Returns
115
+ -------
116
+ dict
117
+ SciTeX writer document structure with keys:
118
+ - blocks: List of document blocks
119
+ - metadata: Profile and source information
120
+ - images: Extracted image data (if extract_images=True)
121
+ - references: Parsed reference entries
122
+ - warnings: List of conversion warnings
123
+ """
124
+ doc = docx.Document(str(path))
125
+
126
+ # Initialize result structure
127
+ result: Dict[str, Any] = {
128
+ "blocks": [],
129
+ "metadata": {
130
+ "profile": self.profile.name,
131
+ "source_file": str(path),
132
+ "import_timestamp": datetime.now().isoformat(),
133
+ },
134
+ "images": [],
135
+ "references": [],
136
+ "warnings": [],
137
+ }
138
+
139
+ # Extract document properties if available
140
+ result["metadata"].update(self._extract_metadata(doc))
141
+
142
+ # Process paragraphs and tables
143
+ blocks = self._process_body(doc, result)
144
+ result["blocks"] = blocks
145
+
146
+ # Extract images
147
+ if self.extract_images:
148
+ result["images"] = self._extract_images(doc, path)
149
+
150
+ # Parse references section
151
+ result["references"] = self._parse_references(blocks)
152
+
153
+ # Run post-import hooks
154
+ for hook in self.profile.post_import_hooks:
155
+ result = hook(result)
156
+
157
+ return result
158
+
159
+ def _extract_metadata(self, doc: DocxDocument) -> Dict[str, Any]:
160
+ """Extract document metadata (title, author, etc.)."""
161
+ metadata = {}
162
+ try:
163
+ core_props = doc.core_properties
164
+ if core_props.title:
165
+ metadata["title"] = core_props.title
166
+ if core_props.author:
167
+ metadata["author"] = core_props.author
168
+ if core_props.subject:
169
+ metadata["subject"] = core_props.subject
170
+ if core_props.keywords:
171
+ metadata["keywords"] = core_props.keywords
172
+ if core_props.created:
173
+ metadata["created"] = core_props.created.isoformat()
174
+ if core_props.modified:
175
+ metadata["modified"] = core_props.modified.isoformat()
176
+ except Exception:
177
+ pass # Metadata extraction is optional
178
+ return metadata
179
+
180
+ def _process_body(
181
+ self,
182
+ doc: DocxDocument,
183
+ result: Dict[str, Any],
184
+ ) -> List[Dict[str, Any]]:
185
+ """Process document body: paragraphs and tables."""
186
+ blocks: List[Dict[str, Any]] = []
187
+ in_reference_section = False
188
+ block_index = 0
189
+
190
+ # Build rel_id -> hash map for image detection
191
+ rel_to_hash = {}
192
+ if self.extract_images:
193
+ for rel_id, rel in doc.part.rels.items():
194
+ if "image" in rel.reltype:
195
+ image_bytes = rel.target_part.blob
196
+ image_hash = hashlib.md5(image_bytes).hexdigest()[:12]
197
+ rel_to_hash[rel_id] = image_hash
198
+
199
+ # Namespace for picture detection
200
+ pic_ns = {"pic": "http://schemas.openxmlformats.org/drawingml/2006/picture"}
201
+ a_ns = {"a": "http://schemas.openxmlformats.org/drawingml/2006/main"}
202
+ r_ns = {"r": "http://schemas.openxmlformats.org/officeDocument/2006/relationships"}
203
+
204
+ for element in doc.element.body:
205
+ tag = element.tag.split("}")[-1] # Remove namespace
206
+
207
+ if tag == "p":
208
+ # Process paragraph
209
+ para = docx.text.paragraph.Paragraph(element, doc)
210
+
211
+ # Detect inline images in this paragraph
212
+ if self.extract_images:
213
+ for run in para.runs:
214
+ # Check for drawing elements containing pictures
215
+ drawings = run.element.findall(".//a:blip", namespaces=a_ns)
216
+ for blip in drawings:
217
+ embed_attr = qn("r:embed")
218
+ rel_id = blip.get(embed_attr)
219
+ if rel_id and rel_id in rel_to_hash:
220
+ blocks.append({
221
+ "index": block_index,
222
+ "type": "image",
223
+ "image_hash": rel_to_hash[rel_id],
224
+ "rel_id": rel_id,
225
+ })
226
+ block_index += 1
227
+
228
+ block = self._process_paragraph(
229
+ para, in_reference_section, block_index
230
+ )
231
+ if block:
232
+ # Check if entering references section
233
+ if block["type"] == "heading" and block["text"] in (
234
+ self.profile.reference_section_titles
235
+ ):
236
+ in_reference_section = True
237
+ block["is_reference_header"] = True
238
+
239
+ blocks.append(block)
240
+ block_index += 1
241
+
242
+ elif tag == "tbl":
243
+ # Process table
244
+ table = docx.table.Table(element, doc)
245
+ block = self._process_table(table, block_index)
246
+ blocks.append(block)
247
+ block_index += 1
248
+
249
+ return blocks
250
+
251
+ def _process_paragraph(
252
+ self,
253
+ para,
254
+ in_reference_section: bool,
255
+ block_index: int,
256
+ ) -> Optional[Dict[str, Any]]:
257
+ """Process a single paragraph."""
258
+ style_name = (para.style.name or "").strip() if para.style else ""
259
+ text = para.text.strip()
260
+
261
+ if not text:
262
+ return None
263
+
264
+ # Extract runs with formatting info
265
+ runs = self._extract_runs(para)
266
+
267
+ # Base block structure
268
+ block: Dict[str, Any] = {
269
+ "index": block_index,
270
+ "text": text,
271
+ "style": style_name,
272
+ "runs": runs,
273
+ }
274
+
275
+ # Check for equations (OMML)
276
+ equation_latex = self._extract_equation(para)
277
+ if equation_latex:
278
+ block["type"] = "equation"
279
+ block["latex"] = equation_latex
280
+ return block
281
+
282
+ # Detect heading (style-based first, then heuristic)
283
+ level = self._detect_heading(para, style_name, text, runs)
284
+ if level is not None:
285
+ block["type"] = "heading"
286
+ block["level"] = level
287
+ block["detection_method"] = "style" if self._heading_level_from_style(style_name) else "heuristic"
288
+ return block
289
+
290
+ # Detect caption (improved pattern matching)
291
+ caption_info = self._detect_caption(style_name, text)
292
+ if caption_info:
293
+ block["type"] = "caption"
294
+ block.update(caption_info)
295
+ return block
296
+
297
+ # Reference paragraph
298
+ if in_reference_section:
299
+ block["type"] = "reference-paragraph"
300
+ ref_info = self._parse_reference_entry(text)
301
+ block.update(ref_info)
302
+ return block
303
+
304
+ # List item detection
305
+ if self._is_list_item(para):
306
+ block["type"] = "list-item"
307
+ list_info = self._parse_list_item(para)
308
+ block.update(list_info)
309
+ return block
310
+
311
+ # Normal paragraph
312
+ block["type"] = "paragraph"
313
+ return block
314
+
315
+ def _detect_heading(
316
+ self,
317
+ para,
318
+ style_name: str,
319
+ text: str,
320
+ runs: List[Dict[str, Any]],
321
+ ) -> Optional[int]:
322
+ """
323
+ Detect heading using multiple strategies:
324
+ 1. Style-based (most reliable)
325
+ 2. Font-based heuristics (bold, larger size)
326
+ 3. Content-based (known section titles)
327
+ """
328
+ # Strategy 1: Style-based detection
329
+ level = self._heading_level_from_style(style_name)
330
+ if level is not None:
331
+ return level
332
+
333
+ # Strategy 2: Font-based heuristics
334
+ # Check if entire paragraph is bold and short
335
+ text_clean = text.strip()
336
+ if len(text_clean) < 100: # Headings are typically short
337
+ all_bold = all(r.get("bold") for r in runs if r.get("text", "").strip())
338
+ if all_bold and runs:
339
+ # Check font size - headings often larger
340
+ avg_size = self._get_average_font_size(runs)
341
+ if avg_size and avg_size >= 12:
342
+ # Check if it looks like a section heading
343
+ if self._looks_like_heading(text_clean):
344
+ return 1 if avg_size >= 14 else 2
345
+
346
+ # Strategy 3: Content-based detection (common section titles)
347
+ text_lower = text_clean.lower().rstrip(".:;")
348
+ # Check numbered sections: "1. Introduction", "2.1 Methods"
349
+ numbered_match = re.match(r"^(\d+(?:\.\d+)*)[\.:\s]+(.+)$", text_clean)
350
+ if numbered_match:
351
+ section_text = numbered_match.group(2).lower().strip()
352
+ if section_text in COMMON_SECTION_HEADINGS:
353
+ depth = numbered_match.group(1).count(".")
354
+ return min(depth + 1, 4)
355
+
356
+ # Check unnumbered common headings (if bold or all caps)
357
+ if text_lower in COMMON_SECTION_HEADINGS:
358
+ is_bold = all(r.get("bold") for r in runs if r.get("text", "").strip())
359
+ is_all_caps = text_clean.isupper() and len(text_clean) > 3
360
+ if is_bold or is_all_caps:
361
+ return 1
362
+
363
+ return None
364
+
365
+ def _looks_like_heading(self, text: str) -> bool:
366
+ """Check if text looks like a heading based on content patterns."""
367
+ text_lower = text.lower().rstrip(".:;")
368
+
369
+ # Check common section headings
370
+ if text_lower in COMMON_SECTION_HEADINGS:
371
+ return True
372
+
373
+ # Check numbered sections
374
+ if re.match(r"^\d+(?:\.\d+)*\s+\w", text):
375
+ return True
376
+
377
+ # All caps short text
378
+ if text.isupper() and 3 < len(text) < 50:
379
+ return True
380
+
381
+ return False
382
+
383
+ def _get_average_font_size(self, runs: List[Dict[str, Any]]) -> Optional[float]:
384
+ """Get average font size from runs."""
385
+ sizes = [r["font_size"] for r in runs if r.get("font_size")]
386
+ return sum(sizes) / len(sizes) if sizes else None
387
+
388
+ def _detect_caption(self, style_name: str, text: str) -> Optional[Dict[str, Any]]:
389
+ """
390
+ Detect and parse captions using multiple patterns.
391
+ Returns caption info dict or None.
392
+ """
393
+ # Check by style first
394
+ if style_name == self.profile.caption_style:
395
+ return self._parse_caption(text)
396
+
397
+ # Check using comprehensive patterns
398
+ text_stripped = text.strip()
399
+ for pattern, caption_type in CAPTION_PATTERNS:
400
+ match = re.match(pattern, text_stripped, re.IGNORECASE)
401
+ if match:
402
+ return {
403
+ "caption_type": caption_type,
404
+ "number": int(match.group(2)),
405
+ "caption_text": match.group(3).strip(),
406
+ }
407
+
408
+ # Check profile-specific prefixes
409
+ if self._is_caption(style_name, text):
410
+ return self._parse_caption(text)
411
+
412
+ return None
413
+
414
+ def _extract_equation(self, para) -> Optional[str]:
415
+ """
416
+ Extract equation from paragraph if it contains OMML (Office Math Markup).
417
+ Returns LaTeX representation or None.
418
+ """
419
+ try:
420
+ # Check for oMath elements
421
+ omml_ns = {"m": "http://schemas.openxmlformats.org/officeDocument/2006/math"}
422
+ math_elements = para._element.findall(".//m:oMath", namespaces=omml_ns)
423
+
424
+ if not math_elements:
425
+ return None
426
+
427
+ # Basic OMML to LaTeX conversion
428
+ latex_parts = []
429
+ for math_elem in math_elements:
430
+ latex = self._omml_to_latex(math_elem)
431
+ if latex:
432
+ latex_parts.append(latex)
433
+
434
+ return " ".join(latex_parts) if latex_parts else None
435
+ except Exception:
436
+ return None
437
+
438
+ def _omml_to_latex(self, math_elem) -> str:
439
+ """
440
+ Convert OMML element to LaTeX string.
441
+ This is a basic converter - handles common cases.
442
+ """
443
+ omml_ns = {"m": "http://schemas.openxmlformats.org/officeDocument/2006/math"}
444
+
445
+ def get_text(elem) -> str:
446
+ """Recursively get text from element."""
447
+ texts = []
448
+ if elem.text:
449
+ texts.append(elem.text)
450
+ for child in elem:
451
+ texts.append(get_text(child))
452
+ if child.tail:
453
+ texts.append(child.tail)
454
+ return "".join(texts)
455
+
456
+ def convert_element(elem) -> str:
457
+ """Convert a single OMML element to LaTeX."""
458
+ tag = elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
459
+
460
+ if tag == "r": # Run (text)
461
+ return get_text(elem)
462
+ elif tag == "f": # Fraction
463
+ num = elem.find("m:num", namespaces=omml_ns)
464
+ den = elem.find("m:den", namespaces=omml_ns)
465
+ num_tex = convert_children(num) if num is not None else ""
466
+ den_tex = convert_children(den) if den is not None else ""
467
+ return f"\\frac{{{num_tex}}}{{{den_tex}}}"
468
+ elif tag == "rad": # Radical/root
469
+ deg = elem.find("m:deg", namespaces=omml_ns)
470
+ content = elem.find("m:e", namespaces=omml_ns)
471
+ content_tex = convert_children(content) if content is not None else ""
472
+ if deg is not None and get_text(deg).strip():
473
+ deg_tex = convert_children(deg)
474
+ return f"\\sqrt[{deg_tex}]{{{content_tex}}}"
475
+ return f"\\sqrt{{{content_tex}}}"
476
+ elif tag == "sSup": # Superscript
477
+ base = elem.find("m:e", namespaces=omml_ns)
478
+ sup = elem.find("m:sup", namespaces=omml_ns)
479
+ base_tex = convert_children(base) if base is not None else ""
480
+ sup_tex = convert_children(sup) if sup is not None else ""
481
+ return f"{base_tex}^{{{sup_tex}}}"
482
+ elif tag == "sSub": # Subscript
483
+ base = elem.find("m:e", namespaces=omml_ns)
484
+ sub = elem.find("m:sub", namespaces=omml_ns)
485
+ base_tex = convert_children(base) if base is not None else ""
486
+ sub_tex = convert_children(sub) if sub is not None else ""
487
+ return f"{base_tex}_{{{sub_tex}}}"
488
+ elif tag == "sSubSup": # Sub-superscript
489
+ base = elem.find("m:e", namespaces=omml_ns)
490
+ sub = elem.find("m:sub", namespaces=omml_ns)
491
+ sup = elem.find("m:sup", namespaces=omml_ns)
492
+ base_tex = convert_children(base) if base is not None else ""
493
+ sub_tex = convert_children(sub) if sub is not None else ""
494
+ sup_tex = convert_children(sup) if sup is not None else ""
495
+ return f"{base_tex}_{{{sub_tex}}}^{{{sup_tex}}}"
496
+ elif tag == "nary": # N-ary (sum, product, integral)
497
+ chr_elem = elem.find(".//m:chr", namespaces=omml_ns)
498
+ symbol = chr_elem.get(qn("m:val")) if chr_elem is not None else "∑"
499
+ symbol_map = {"∑": "\\sum", "∏": "\\prod", "∫": "\\int", "∮": "\\oint"}
500
+ latex_sym = symbol_map.get(symbol, symbol)
501
+ sub = elem.find("m:sub", namespaces=omml_ns)
502
+ sup = elem.find("m:sup", namespaces=omml_ns)
503
+ content = elem.find("m:e", namespaces=omml_ns)
504
+ result = latex_sym
505
+ if sub is not None:
506
+ result += f"_{{{convert_children(sub)}}}"
507
+ if sup is not None:
508
+ result += f"^{{{convert_children(sup)}}}"
509
+ if content is not None:
510
+ result += f" {convert_children(content)}"
511
+ return result
512
+ elif tag == "d": # Delimiter (parentheses, brackets)
513
+ content = elem.find("m:e", namespaces=omml_ns)
514
+ content_tex = convert_children(content) if content is not None else ""
515
+ beg = elem.find(".//m:begChr", namespaces=omml_ns)
516
+ end = elem.find(".//m:endChr", namespaces=omml_ns)
517
+ left = beg.get(qn("m:val")) if beg is not None else "("
518
+ right = end.get(qn("m:val")) if end is not None else ")"
519
+ return f"\\left{left}{content_tex}\\right{right}"
520
+ elif tag in ("e", "num", "den", "sub", "sup", "deg"):
521
+ # Container elements - just process children
522
+ return convert_children(elem)
523
+ else:
524
+ # Unknown element - try to get text
525
+ return convert_children(elem)
526
+
527
+ def convert_children(elem) -> str:
528
+ """Convert all children of an element."""
529
+ if elem is None:
530
+ return ""
531
+ parts = []
532
+ for child in elem:
533
+ parts.append(convert_element(child))
534
+ return "".join(parts)
535
+
536
+ return convert_element(math_elem)
537
+
538
+ def _is_list_item(self, para) -> bool:
539
+ """Check if paragraph is a list item."""
540
+ try:
541
+ # Check for numbering properties
542
+ pPr = para._element.find(qn("w:pPr"))
543
+ if pPr is not None:
544
+ numPr = pPr.find(qn("w:numPr"))
545
+ if numPr is not None:
546
+ return True
547
+
548
+ # Check for bullet/number at start of text
549
+ text = para.text.strip()
550
+ if re.match(r"^[\u2022\u2023\u25E6\u2043\u2219•‣◦⁃∙]\s", text):
551
+ return True
552
+ if re.match(r"^(\d+[\.\):]|\([a-z]\)|\([ivxlc]+\)|[a-z][\.\)])\s", text, re.IGNORECASE):
553
+ return True
554
+
555
+ return False
556
+ except Exception:
557
+ return False
558
+
559
+ def _parse_list_item(self, para) -> Dict[str, Any]:
560
+ """Parse list item to extract level and content."""
561
+ info: Dict[str, Any] = {"list_type": "unordered", "level": 0}
562
+
563
+ try:
564
+ pPr = para._element.find(qn("w:pPr"))
565
+ if pPr is not None:
566
+ numPr = pPr.find(qn("w:numPr"))
567
+ if numPr is not None:
568
+ ilvl = numPr.find(qn("w:ilvl"))
569
+ if ilvl is not None:
570
+ info["level"] = int(ilvl.get(qn("w:val"), 0))
571
+
572
+ # Detect ordered vs unordered
573
+ text = para.text.strip()
574
+ if re.match(r"^\d+[\.\):]\s", text):
575
+ info["list_type"] = "ordered"
576
+ except Exception:
577
+ pass
578
+
579
+ return info
580
+
581
+ def _extract_runs(self, para) -> List[Dict[str, Any]]:
582
+ """Extract formatted runs from a paragraph."""
583
+ runs = []
584
+ for run in para.runs:
585
+ if not run.text:
586
+ continue
587
+ run_data = {
588
+ "text": run.text,
589
+ "bold": run.bold,
590
+ "italic": run.italic,
591
+ "underline": run.underline is not None,
592
+ }
593
+ if run.font.size:
594
+ run_data["font_size"] = run.font.size.pt
595
+ if run.font.name:
596
+ run_data["font_name"] = run.font.name
597
+ runs.append(run_data)
598
+ return runs
599
+
600
+ def _heading_level_from_style(self, style_name: str) -> Optional[int]:
601
+ """Return heading level for a given Word style, or None."""
602
+ for level, expected_style in self.profile.heading_styles.items():
603
+ if style_name == expected_style:
604
+ return level
605
+ return None
606
+
607
+ def _is_caption(self, style_name: str, text: str) -> bool:
608
+ """Check if paragraph is a caption."""
609
+ if style_name == self.profile.caption_style:
610
+ return True
611
+
612
+ # Check by prefix
613
+ text_lower = text.lower()
614
+ prefixes = (
615
+ self.profile.figure_caption_prefixes
616
+ + self.profile.table_caption_prefixes
617
+ )
618
+ for prefix in prefixes:
619
+ if text_lower.startswith(prefix.lower()):
620
+ return True
621
+ return False
622
+
623
+ def _parse_caption(self, text: str) -> Dict[str, Any]:
624
+ """Parse caption text to extract figure/table number."""
625
+ info: Dict[str, Any] = {}
626
+
627
+ # Check figure
628
+ for prefix in self.profile.figure_caption_prefixes:
629
+ pattern = rf"^{re.escape(prefix)}\.?\s*(\d+)[\.:]?\s*(.*)$"
630
+ match = re.match(pattern, text, re.IGNORECASE)
631
+ if match:
632
+ info["caption_type"] = "figure"
633
+ info["number"] = int(match.group(1))
634
+ info["caption_text"] = match.group(2).strip()
635
+ return info
636
+
637
+ # Check table
638
+ for prefix in self.profile.table_caption_prefixes:
639
+ pattern = rf"^{re.escape(prefix)}\.?\s*(\d+)[\.:]?\s*(.*)$"
640
+ match = re.match(pattern, text, re.IGNORECASE)
641
+ if match:
642
+ info["caption_type"] = "table"
643
+ info["number"] = int(match.group(1))
644
+ info["caption_text"] = match.group(2).strip()
645
+ return info
646
+
647
+ info["caption_type"] = "unknown"
648
+ info["caption_text"] = text
649
+ return info
650
+
651
+ def _parse_reference_entry(self, text: str) -> Dict[str, Any]:
652
+ """Parse a reference entry to extract citation number."""
653
+ info: Dict[str, Any] = {}
654
+
655
+ # Try to extract numbered reference: [1], 1., (1), etc.
656
+ patterns = [
657
+ r"^\[(\d+)\]", # [1] Author...
658
+ r"^(\d+)\.", # 1. Author...
659
+ r"^\((\d+)\)", # (1) Author...
660
+ ]
661
+ for pattern in patterns:
662
+ match = re.match(pattern, text)
663
+ if match:
664
+ info["ref_number"] = int(match.group(1))
665
+ info["ref_text"] = re.sub(pattern, "", text).strip()
666
+ break
667
+ else:
668
+ info["ref_text"] = text
669
+
670
+ return info
671
+
672
+ def _process_table(
673
+ self,
674
+ table,
675
+ block_index: int,
676
+ ) -> Dict[str, Any]:
677
+ """Process a table."""
678
+ rows = []
679
+ for row in table.rows:
680
+ cells = []
681
+ for cell in row.cells:
682
+ cells.append(cell.text.strip())
683
+ rows.append(cells)
684
+
685
+ return {
686
+ "index": block_index,
687
+ "type": "table",
688
+ "rows": rows,
689
+ "num_rows": len(rows),
690
+ "num_cols": len(rows[0]) if rows else 0,
691
+ }
692
+
693
+ def _extract_images(
694
+ self,
695
+ doc: DocxDocument,
696
+ source_path: Path,
697
+ ) -> List[Dict[str, Any]]:
698
+ """Extract embedded images from the document."""
699
+ images = []
700
+
701
+ try:
702
+ for rel_id, rel in doc.part.rels.items():
703
+ if "image" in rel.reltype:
704
+ image_part = rel.target_part
705
+ image_bytes = image_part.blob
706
+
707
+ # Generate hash for deduplication
708
+ image_hash = hashlib.md5(image_bytes).hexdigest()[:12]
709
+
710
+ # Determine extension from content type
711
+ content_type = image_part.content_type
712
+ ext_map = {
713
+ "image/png": ".png",
714
+ "image/jpeg": ".jpg",
715
+ "image/gif": ".gif",
716
+ "image/tiff": ".tiff",
717
+ "image/bmp": ".bmp",
718
+ }
719
+ ext = ext_map.get(content_type, ".png")
720
+
721
+ images.append(
722
+ {
723
+ "rel_id": rel_id,
724
+ "hash": image_hash,
725
+ "content_type": content_type,
726
+ "extension": ext,
727
+ "size_bytes": len(image_bytes),
728
+ "data": image_bytes, # Raw bytes
729
+ }
730
+ )
731
+ except Exception as e:
732
+ pass # Image extraction is optional
733
+
734
+ return images
735
+
736
+ def _parse_references(
737
+ self,
738
+ blocks: List[Dict[str, Any]],
739
+ ) -> List[Dict[str, Any]]:
740
+ """Extract and structure references from blocks."""
741
+ references = []
742
+ for block in blocks:
743
+ if block.get("type") == "reference-paragraph":
744
+ ref_entry = {
745
+ "number": block.get("ref_number"),
746
+ "text": block.get("ref_text", block.get("text", "")),
747
+ "raw": block.get("text", ""),
748
+ }
749
+ references.append(ref_entry)
750
+ return references
751
+
752
+
753
+ __all__ = ["WordReader"]