scitex 2.5.0__py3-none-any.whl → 2.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1179) hide show
  1. scitex/__init__.py +25 -10
  2. scitex/__main__.py +2 -1
  3. scitex/__version__.py +1 -1
  4. scitex/_optional_deps.py +13 -20
  5. scitex/ai/__init__.py +5 -0
  6. scitex/ai/_gen_ai/_Anthropic.py +3 -1
  7. scitex/ai/_gen_ai/_BaseGenAI.py +3 -2
  8. scitex/ai/_gen_ai/_DeepSeek.py +1 -1
  9. scitex/ai/_gen_ai/_Google.py +3 -2
  10. scitex/ai/_gen_ai/_Llama.py +4 -2
  11. scitex/ai/_gen_ai/_OpenAI.py +3 -1
  12. scitex/ai/_gen_ai/_PARAMS.py +1 -0
  13. scitex/ai/_gen_ai/_Perplexity.py +3 -1
  14. scitex/ai/_gen_ai/__init__.py +1 -0
  15. scitex/ai/_gen_ai/_format_output_func.py +3 -1
  16. scitex/ai/classification/CrossValidationExperiment.py +8 -14
  17. scitex/ai/classification/examples/timeseries_cv_demo.py +128 -112
  18. scitex/ai/classification/reporters/_BaseClassificationReporter.py +2 -0
  19. scitex/ai/classification/reporters/_ClassificationReporter.py +30 -45
  20. scitex/ai/classification/reporters/_MultiClassificationReporter.py +8 -11
  21. scitex/ai/classification/reporters/_SingleClassificationReporter.py +126 -182
  22. scitex/ai/classification/reporters/__init__.py +1 -1
  23. scitex/ai/classification/reporters/reporter_utils/_Plotter.py +213 -119
  24. scitex/ai/classification/reporters/reporter_utils/__init__.py +28 -36
  25. scitex/ai/classification/reporters/reporter_utils/aggregation.py +125 -143
  26. scitex/ai/classification/reporters/reporter_utils/data_models.py +128 -120
  27. scitex/ai/classification/reporters/reporter_utils/reporting.py +507 -340
  28. scitex/ai/classification/reporters/reporter_utils/storage.py +4 -1
  29. scitex/ai/classification/reporters/reporter_utils/validation.py +141 -154
  30. scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +204 -129
  31. scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +215 -171
  32. scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +17 -17
  33. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +67 -143
  34. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +67 -143
  35. scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +12 -13
  36. scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +231 -144
  37. scitex/ai/classification/timeseries/__init__.py +2 -4
  38. scitex/ai/classification/timeseries/_normalize_timestamp.py +3 -0
  39. scitex/ai/clustering/_pca.py +0 -1
  40. scitex/ai/clustering/_umap.py +1 -2
  41. scitex/ai/feature_extraction/__init__.py +10 -8
  42. scitex/ai/feature_extraction/vit.py +0 -1
  43. scitex/ai/feature_selection/feature_selection.py +3 -8
  44. scitex/ai/metrics/_calc_conf_mat.py +2 -0
  45. scitex/ai/metrics/_calc_feature_importance.py +3 -7
  46. scitex/ai/metrics/_calc_pre_rec_auc.py +5 -5
  47. scitex/ai/metrics/_calc_roc_auc.py +4 -2
  48. scitex/ai/metrics/_calc_seizure_prediction_metrics.py +35 -20
  49. scitex/ai/metrics/_calc_silhouette_score.py +1 -3
  50. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +0 -3
  51. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +0 -3
  52. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +0 -3
  53. scitex/ai/optim/_optimizers.py +1 -1
  54. scitex/ai/plt/__init__.py +6 -1
  55. scitex/ai/plt/_plot_feature_importance.py +1 -3
  56. scitex/ai/plt/_plot_learning_curve.py +9 -24
  57. scitex/ai/plt/_plot_optuna_study.py +4 -3
  58. scitex/ai/plt/_plot_pre_rec_curve.py +9 -15
  59. scitex/ai/plt/_plot_roc_curve.py +6 -8
  60. scitex/ai/plt/_stx_conf_mat.py +121 -122
  61. scitex/ai/sampling/undersample.py +3 -2
  62. scitex/ai/sklearn/__init__.py +2 -2
  63. scitex/ai/training/_LearningCurveLogger.py +23 -10
  64. scitex/ai/utils/_check_params.py +0 -1
  65. scitex/audio/README.md +52 -0
  66. scitex/audio/__init__.py +384 -0
  67. scitex/audio/__main__.py +129 -0
  68. scitex/audio/_tts.py +334 -0
  69. scitex/audio/engines/__init__.py +44 -0
  70. scitex/audio/engines/base.py +275 -0
  71. scitex/audio/engines/elevenlabs_engine.py +143 -0
  72. scitex/audio/engines/gtts_engine.py +162 -0
  73. scitex/audio/engines/pyttsx3_engine.py +131 -0
  74. scitex/audio/mcp_server.py +757 -0
  75. scitex/benchmark/__init__.py +15 -25
  76. scitex/benchmark/benchmark.py +124 -117
  77. scitex/benchmark/monitor.py +117 -107
  78. scitex/benchmark/profiler.py +61 -58
  79. scitex/bridge/__init__.py +110 -0
  80. scitex/bridge/_helpers.py +149 -0
  81. scitex/bridge/_plt_vis.py +529 -0
  82. scitex/bridge/_protocol.py +283 -0
  83. scitex/bridge/_stats_plt.py +261 -0
  84. scitex/bridge/_stats_vis.py +265 -0
  85. scitex/browser/__init__.py +0 -2
  86. scitex/browser/auth/__init__.py +0 -0
  87. scitex/browser/auth/google.py +16 -11
  88. scitex/browser/automation/CookieHandler.py +2 -3
  89. scitex/browser/collaboration/__init__.py +3 -0
  90. scitex/browser/collaboration/auth_helpers.py +3 -1
  91. scitex/browser/collaboration/collaborative_agent.py +2 -0
  92. scitex/browser/collaboration/interactive_panel.py +2 -2
  93. scitex/browser/collaboration/shared_session.py +20 -11
  94. scitex/browser/collaboration/standard_interactions.py +1 -0
  95. scitex/browser/core/BrowserMixin.py +12 -30
  96. scitex/browser/core/ChromeProfileManager.py +9 -24
  97. scitex/browser/debugging/_browser_logger.py +15 -25
  98. scitex/browser/debugging/_failure_capture.py +9 -2
  99. scitex/browser/debugging/_highlight_element.py +15 -6
  100. scitex/browser/debugging/_show_grid.py +5 -6
  101. scitex/browser/debugging/_sync_session.py +4 -3
  102. scitex/browser/debugging/_test_monitor.py +14 -5
  103. scitex/browser/debugging/_visual_cursor.py +46 -35
  104. scitex/browser/interaction/click_center.py +4 -3
  105. scitex/browser/interaction/click_with_fallbacks.py +7 -10
  106. scitex/browser/interaction/close_popups.py +79 -66
  107. scitex/browser/interaction/fill_with_fallbacks.py +8 -8
  108. scitex/browser/pdf/__init__.py +3 -1
  109. scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +11 -10
  110. scitex/browser/pdf/detect_chrome_pdf_viewer.py +3 -6
  111. scitex/browser/remote/CaptchaHandler.py +109 -96
  112. scitex/browser/remote/ZenRowsAPIClient.py +91 -97
  113. scitex/browser/remote/ZenRowsBrowserManager.py +138 -112
  114. scitex/browser/stealth/HumanBehavior.py +4 -9
  115. scitex/browser/stealth/StealthManager.py +11 -26
  116. scitex/capture/__init__.py +17 -17
  117. scitex/capture/__main__.py +2 -3
  118. scitex/capture/capture.py +23 -51
  119. scitex/capture/cli.py +14 -39
  120. scitex/capture/gif.py +5 -9
  121. scitex/capture/mcp_server.py +7 -20
  122. scitex/capture/session.py +4 -3
  123. scitex/capture/utils.py +18 -53
  124. scitex/cli/__init__.py +1 -1
  125. scitex/cli/cloud.py +158 -116
  126. scitex/cli/config.py +224 -0
  127. scitex/cli/main.py +41 -40
  128. scitex/cli/scholar.py +60 -27
  129. scitex/cli/security.py +14 -20
  130. scitex/cli/web.py +87 -90
  131. scitex/cli/writer.py +51 -45
  132. scitex/cloud/__init__.py +14 -11
  133. scitex/cloud/_matplotlib_hook.py +6 -6
  134. scitex/config/README.md +313 -0
  135. scitex/config/{PriorityConfig.py → _PriorityConfig.py} +114 -17
  136. scitex/config/_ScitexConfig.py +319 -0
  137. scitex/config/__init__.py +41 -9
  138. scitex/config/_paths.py +325 -0
  139. scitex/config/default.yaml +81 -0
  140. scitex/context/_suppress_output.py +2 -3
  141. scitex/db/_BaseMixins/_BaseBackupMixin.py +3 -1
  142. scitex/db/_BaseMixins/_BaseBatchMixin.py +3 -1
  143. scitex/db/_BaseMixins/_BaseBlobMixin.py +3 -1
  144. scitex/db/_BaseMixins/_BaseImportExportMixin.py +1 -3
  145. scitex/db/_BaseMixins/_BaseIndexMixin.py +3 -1
  146. scitex/db/_BaseMixins/_BaseMaintenanceMixin.py +1 -3
  147. scitex/db/_BaseMixins/_BaseQueryMixin.py +3 -1
  148. scitex/db/_BaseMixins/_BaseRowMixin.py +3 -1
  149. scitex/db/_BaseMixins/_BaseTableMixin.py +3 -1
  150. scitex/db/_BaseMixins/_BaseTransactionMixin.py +1 -3
  151. scitex/db/_BaseMixins/__init__.py +1 -1
  152. scitex/db/__init__.py +9 -1
  153. scitex/db/__main__.py +8 -21
  154. scitex/db/_check_health.py +15 -31
  155. scitex/db/_delete_duplicates.py +7 -4
  156. scitex/db/_inspect.py +22 -38
  157. scitex/db/_inspect_optimized.py +89 -85
  158. scitex/db/_postgresql/_PostgreSQL.py +0 -1
  159. scitex/db/_postgresql/_PostgreSQLMixins/_BlobMixin.py +3 -1
  160. scitex/db/_postgresql/_PostgreSQLMixins/_ConnectionMixin.py +1 -3
  161. scitex/db/_postgresql/_PostgreSQLMixins/_ImportExportMixin.py +1 -3
  162. scitex/db/_postgresql/_PostgreSQLMixins/_MaintenanceMixin.py +1 -4
  163. scitex/db/_postgresql/_PostgreSQLMixins/_QueryMixin.py +3 -3
  164. scitex/db/_postgresql/_PostgreSQLMixins/_RowMixin.py +3 -1
  165. scitex/db/_postgresql/_PostgreSQLMixins/_TransactionMixin.py +1 -3
  166. scitex/db/_postgresql/__init__.py +1 -1
  167. scitex/db/_sqlite3/_SQLite3.py +2 -4
  168. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +11 -12
  169. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +19 -14
  170. scitex/db/_sqlite3/_SQLite3Mixins/_BatchMixin.py +3 -1
  171. scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +7 -7
  172. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +118 -111
  173. scitex/db/_sqlite3/_SQLite3Mixins/_ConnectionMixin.py +8 -10
  174. scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +17 -45
  175. scitex/db/_sqlite3/_SQLite3Mixins/_ImportExportMixin.py +1 -3
  176. scitex/db/_sqlite3/_SQLite3Mixins/_IndexMixin.py +3 -1
  177. scitex/db/_sqlite3/_SQLite3Mixins/_QueryMixin.py +3 -4
  178. scitex/db/_sqlite3/_SQLite3Mixins/_RowMixin.py +9 -9
  179. scitex/db/_sqlite3/_SQLite3Mixins/_TableMixin.py +18 -11
  180. scitex/db/_sqlite3/_SQLite3Mixins/__init__.py +1 -0
  181. scitex/db/_sqlite3/__init__.py +1 -1
  182. scitex/db/_sqlite3/_delete_duplicates.py +13 -11
  183. scitex/decorators/__init__.py +29 -4
  184. scitex/decorators/_auto_order.py +43 -43
  185. scitex/decorators/_batch_fn.py +12 -6
  186. scitex/decorators/_cache_disk.py +8 -9
  187. scitex/decorators/_cache_disk_async.py +8 -7
  188. scitex/decorators/_combined.py +19 -13
  189. scitex/decorators/_converters.py +16 -3
  190. scitex/decorators/_deprecated.py +32 -22
  191. scitex/decorators/_numpy_fn.py +18 -4
  192. scitex/decorators/_pandas_fn.py +17 -5
  193. scitex/decorators/_signal_fn.py +17 -3
  194. scitex/decorators/_torch_fn.py +32 -15
  195. scitex/decorators/_xarray_fn.py +23 -9
  196. scitex/dev/_analyze_code_flow.py +0 -2
  197. scitex/dev/plt/__init__.py +272 -0
  198. scitex/dev/plt/plot_mpl_axhline.py +28 -0
  199. scitex/dev/plt/plot_mpl_axhspan.py +28 -0
  200. scitex/dev/plt/plot_mpl_axvline.py +28 -0
  201. scitex/dev/plt/plot_mpl_axvspan.py +28 -0
  202. scitex/dev/plt/plot_mpl_bar.py +29 -0
  203. scitex/dev/plt/plot_mpl_barh.py +29 -0
  204. scitex/dev/plt/plot_mpl_boxplot.py +28 -0
  205. scitex/dev/plt/plot_mpl_contour.py +31 -0
  206. scitex/dev/plt/plot_mpl_contourf.py +31 -0
  207. scitex/dev/plt/plot_mpl_errorbar.py +30 -0
  208. scitex/dev/plt/plot_mpl_eventplot.py +28 -0
  209. scitex/dev/plt/plot_mpl_fill.py +30 -0
  210. scitex/dev/plt/plot_mpl_fill_between.py +31 -0
  211. scitex/dev/plt/plot_mpl_hexbin.py +28 -0
  212. scitex/dev/plt/plot_mpl_hist.py +28 -0
  213. scitex/dev/plt/plot_mpl_hist2d.py +28 -0
  214. scitex/dev/plt/plot_mpl_imshow.py +29 -0
  215. scitex/dev/plt/plot_mpl_pcolormesh.py +31 -0
  216. scitex/dev/plt/plot_mpl_pie.py +29 -0
  217. scitex/dev/plt/plot_mpl_plot.py +29 -0
  218. scitex/dev/plt/plot_mpl_quiver.py +31 -0
  219. scitex/dev/plt/plot_mpl_scatter.py +28 -0
  220. scitex/dev/plt/plot_mpl_stackplot.py +31 -0
  221. scitex/dev/plt/plot_mpl_stem.py +29 -0
  222. scitex/dev/plt/plot_mpl_step.py +29 -0
  223. scitex/dev/plt/plot_mpl_violinplot.py +28 -0
  224. scitex/dev/plt/plot_sns_barplot.py +29 -0
  225. scitex/dev/plt/plot_sns_boxplot.py +29 -0
  226. scitex/dev/plt/plot_sns_heatmap.py +28 -0
  227. scitex/dev/plt/plot_sns_histplot.py +29 -0
  228. scitex/dev/plt/plot_sns_kdeplot.py +29 -0
  229. scitex/dev/plt/plot_sns_lineplot.py +31 -0
  230. scitex/dev/plt/plot_sns_scatterplot.py +29 -0
  231. scitex/dev/plt/plot_sns_stripplot.py +29 -0
  232. scitex/dev/plt/plot_sns_swarmplot.py +29 -0
  233. scitex/dev/plt/plot_sns_violinplot.py +29 -0
  234. scitex/dev/plt/plot_stx_bar.py +29 -0
  235. scitex/dev/plt/plot_stx_barh.py +29 -0
  236. scitex/dev/plt/plot_stx_box.py +28 -0
  237. scitex/dev/plt/plot_stx_boxplot.py +28 -0
  238. scitex/dev/plt/plot_stx_conf_mat.py +28 -0
  239. scitex/dev/plt/plot_stx_contour.py +31 -0
  240. scitex/dev/plt/plot_stx_ecdf.py +28 -0
  241. scitex/dev/plt/plot_stx_errorbar.py +30 -0
  242. scitex/dev/plt/plot_stx_fill_between.py +31 -0
  243. scitex/dev/plt/plot_stx_fillv.py +28 -0
  244. scitex/dev/plt/plot_stx_heatmap.py +28 -0
  245. scitex/dev/plt/plot_stx_image.py +28 -0
  246. scitex/dev/plt/plot_stx_imshow.py +28 -0
  247. scitex/dev/plt/plot_stx_joyplot.py +28 -0
  248. scitex/dev/plt/plot_stx_kde.py +28 -0
  249. scitex/dev/plt/plot_stx_line.py +28 -0
  250. scitex/dev/plt/plot_stx_mean_ci.py +28 -0
  251. scitex/dev/plt/plot_stx_mean_std.py +28 -0
  252. scitex/dev/plt/plot_stx_median_iqr.py +28 -0
  253. scitex/dev/plt/plot_stx_raster.py +28 -0
  254. scitex/dev/plt/plot_stx_rectangle.py +28 -0
  255. scitex/dev/plt/plot_stx_scatter.py +29 -0
  256. scitex/dev/plt/plot_stx_shaded_line.py +29 -0
  257. scitex/dev/plt/plot_stx_violin.py +28 -0
  258. scitex/dev/plt/plot_stx_violinplot.py +28 -0
  259. scitex/dict/_DotDict.py +15 -19
  260. scitex/dict/_flatten.py +1 -0
  261. scitex/dict/_listed_dict.py +1 -0
  262. scitex/dict/_pop_keys.py +1 -0
  263. scitex/dict/_replace.py +1 -0
  264. scitex/dict/_safe_merge.py +1 -0
  265. scitex/dict/_to_str.py +2 -3
  266. scitex/dsp/__init__.py +13 -4
  267. scitex/dsp/_crop.py +3 -1
  268. scitex/dsp/_detect_ripples.py +3 -1
  269. scitex/dsp/_modulation_index.py +3 -1
  270. scitex/dsp/_time.py +3 -1
  271. scitex/dsp/_wavelet.py +0 -1
  272. scitex/dsp/example.py +0 -5
  273. scitex/dsp/filt.py +4 -0
  274. scitex/dsp/utils/__init__.py +4 -1
  275. scitex/dsp/utils/pac.py +3 -3
  276. scitex/dt/_normalize_timestamp.py +4 -1
  277. scitex/errors.py +3 -6
  278. scitex/etc/__init__.py +1 -1
  279. scitex/fig/__init__.py +352 -0
  280. scitex/{vis → fig}/backend/__init__.py +3 -3
  281. scitex/{vis/backend/export.py → fig/backend/_export.py} +1 -1
  282. scitex/{vis/backend/parser.py → fig/backend/_parser.py} +2 -4
  283. scitex/{vis/backend/render.py → fig/backend/_render.py} +1 -1
  284. scitex/{vis → fig}/canvas.py +16 -4
  285. scitex/{vis → fig}/editor/__init__.py +0 -0
  286. scitex/{vis → fig}/editor/_dearpygui_editor.py +450 -304
  287. scitex/fig/editor/_defaults.py +300 -0
  288. scitex/fig/editor/_edit.py +751 -0
  289. scitex/{vis → fig}/editor/_flask_editor.py +8 -8
  290. scitex/{vis → fig}/editor/_mpl_editor.py +63 -48
  291. scitex/{vis → fig}/editor/_qt_editor.py +391 -160
  292. scitex/{vis → fig}/editor/_tkinter_editor.py +146 -89
  293. scitex/fig/editor/flask_editor/__init__.py +21 -0
  294. scitex/fig/editor/flask_editor/_bbox.py +1276 -0
  295. scitex/fig/editor/flask_editor/_core.py +624 -0
  296. scitex/fig/editor/flask_editor/_plotter.py +601 -0
  297. scitex/fig/editor/flask_editor/_renderer.py +739 -0
  298. scitex/{vis/editor/flask_editor/utils.py → fig/editor/flask_editor/_utils.py} +13 -14
  299. scitex/{vis → fig}/editor/flask_editor/templates/__init__.py +6 -6
  300. scitex/fig/editor/flask_editor/templates/_html.py +834 -0
  301. scitex/fig/editor/flask_editor/templates/_scripts.py +3136 -0
  302. scitex/fig/editor/flask_editor/templates/_styles.py +1346 -0
  303. scitex/{vis → fig}/io/__init__.py +18 -6
  304. scitex/fig/io/_bundle.py +973 -0
  305. scitex/{vis/io/canvas.py → fig/io/_canvas.py} +9 -5
  306. scitex/{vis/io/data.py → fig/io/_data.py} +14 -10
  307. scitex/{vis/io/directory.py → fig/io/_directory.py} +7 -4
  308. scitex/{vis/io/export.py → fig/io/_export.py} +16 -13
  309. scitex/{vis/io/load.py → fig/io/_load.py} +2 -2
  310. scitex/{vis/io/panel.py → fig/io/_panel.py} +22 -14
  311. scitex/{vis/io/save.py → fig/io/_save.py} +1 -1
  312. scitex/{vis → fig}/model/__init__.py +8 -8
  313. scitex/{vis/model/annotations.py → fig/model/_annotations.py} +3 -5
  314. scitex/{vis/model/axes.py → fig/model/_axes.py} +2 -2
  315. scitex/{vis/model/figure.py → fig/model/_figure.py} +1 -1
  316. scitex/{vis/model/guides.py → fig/model/_guides.py} +2 -2
  317. scitex/{vis/model/plot.py → fig/model/_plot.py} +3 -5
  318. scitex/{vis/model/plot_types.py → fig/model/_plot_types.py} +0 -0
  319. scitex/{vis/model/styles.py → fig/model/_styles.py} +1 -1
  320. scitex/{vis → fig}/utils/__init__.py +3 -3
  321. scitex/{vis/utils/defaults.py → fig/utils/_defaults.py} +1 -2
  322. scitex/{vis/utils/validate.py → fig/utils/_validate.py} +3 -9
  323. scitex/gen/_DimHandler.py +6 -6
  324. scitex/gen/__init__.py +5 -1
  325. scitex/gen/_deprecated_close.py +1 -0
  326. scitex/gen/_deprecated_start.py +5 -3
  327. scitex/gen/_detect_environment.py +44 -41
  328. scitex/gen/_detect_notebook_path.py +51 -47
  329. scitex/gen/_embed.py +1 -1
  330. scitex/gen/_get_notebook_path.py +81 -62
  331. scitex/gen/_inspect_module.py +0 -1
  332. scitex/gen/_norm.py +16 -7
  333. scitex/gen/_norm_cache.py +78 -65
  334. scitex/gen/_print_config.py +0 -3
  335. scitex/gen/_src.py +2 -3
  336. scitex/gen/_title_case.py +3 -2
  337. scitex/gen/_to_even.py +8 -8
  338. scitex/gen/_transpose.py +3 -3
  339. scitex/gen/misc.py +0 -3
  340. scitex/gists/_SigMacro_processFigure_S.py +2 -2
  341. scitex/gists/_SigMacro_toBlue.py +2 -2
  342. scitex/gists/__init__.py +4 -1
  343. scitex/git/_branch.py +19 -11
  344. scitex/git/_clone.py +23 -15
  345. scitex/git/_commit.py +10 -12
  346. scitex/git/_init.py +15 -38
  347. scitex/git/_remote.py +9 -3
  348. scitex/git/_result.py +3 -0
  349. scitex/git/_retry.py +2 -5
  350. scitex/git/_types.py +4 -0
  351. scitex/git/_validation.py +8 -8
  352. scitex/git/_workflow.py +4 -4
  353. scitex/io/__init__.py +12 -27
  354. scitex/io/_bundle.py +434 -0
  355. scitex/io/_flush.py +5 -2
  356. scitex/io/_glob.py +2 -2
  357. scitex/io/_json2md.py +3 -3
  358. scitex/io/_load.py +104 -8
  359. scitex/io/_load_cache.py +71 -71
  360. scitex/io/_load_configs.py +2 -3
  361. scitex/io/_load_modules/_H5Explorer.py +11 -14
  362. scitex/io/_load_modules/_ZarrExplorer.py +3 -3
  363. scitex/io/_load_modules/_bibtex.py +62 -63
  364. scitex/io/_load_modules/_canvas.py +6 -11
  365. scitex/io/_load_modules/_catboost.py +7 -2
  366. scitex/io/_load_modules/_hdf5.py +2 -0
  367. scitex/io/_load_modules/_image.py +7 -4
  368. scitex/io/_load_modules/_matlab.py +3 -1
  369. scitex/io/_load_modules/_optuna.py +0 -1
  370. scitex/io/_load_modules/_pdf.py +38 -29
  371. scitex/io/_load_modules/_sqlite3.py +1 -0
  372. scitex/io/_load_modules/_txt.py +6 -2
  373. scitex/io/_load_modules/_xml.py +9 -9
  374. scitex/io/_load_modules/_zarr.py +12 -10
  375. scitex/io/_metadata.py +34 -285
  376. scitex/io/_metadata_modules/__init__.py +46 -0
  377. scitex/io/_metadata_modules/_embed.py +70 -0
  378. scitex/io/_metadata_modules/_read.py +64 -0
  379. scitex/io/_metadata_modules/_utils.py +79 -0
  380. scitex/io/_metadata_modules/embed_metadata_jpeg.py +74 -0
  381. scitex/io/_metadata_modules/embed_metadata_pdf.py +53 -0
  382. scitex/io/_metadata_modules/embed_metadata_png.py +26 -0
  383. scitex/io/_metadata_modules/embed_metadata_svg.py +62 -0
  384. scitex/io/_metadata_modules/read_metadata_jpeg.py +57 -0
  385. scitex/io/_metadata_modules/read_metadata_pdf.py +51 -0
  386. scitex/io/_metadata_modules/read_metadata_png.py +39 -0
  387. scitex/io/_metadata_modules/read_metadata_svg.py +44 -0
  388. scitex/io/_qr_utils.py +21 -14
  389. scitex/io/_save.py +755 -80
  390. scitex/io/_save_modules/__init__.py +7 -2
  391. scitex/io/_save_modules/_bibtex.py +66 -61
  392. scitex/io/_save_modules/_canvas.py +8 -9
  393. scitex/io/_save_modules/_catboost.py +2 -2
  394. scitex/io/_save_modules/_csv.py +4 -4
  395. scitex/io/_save_modules/_excel.py +5 -9
  396. scitex/io/_save_modules/_hdf5.py +9 -21
  397. scitex/io/_save_modules/_html.py +5 -5
  398. scitex/io/_save_modules/_image.py +107 -14
  399. scitex/io/_save_modules/_joblib.py +2 -2
  400. scitex/io/_save_modules/_json.py +51 -6
  401. scitex/io/_save_modules/_listed_dfs_as_csv.py +2 -1
  402. scitex/io/_save_modules/_listed_scalars_as_csv.py +2 -1
  403. scitex/io/_save_modules/_matlab.py +2 -2
  404. scitex/io/_save_modules/_numpy.py +6 -8
  405. scitex/io/_save_modules/_pickle.py +4 -4
  406. scitex/io/_save_modules/_plotly.py +3 -3
  407. scitex/io/_save_modules/_tex.py +30 -29
  408. scitex/io/_save_modules/_text.py +2 -2
  409. scitex/io/_save_modules/_yaml.py +9 -9
  410. scitex/io/_save_modules/_zarr.py +15 -15
  411. scitex/io/utils/__init__.py +2 -1
  412. scitex/io/utils/h5_to_zarr.py +183 -163
  413. scitex/linalg/__init__.py +1 -1
  414. scitex/linalg/_geometric_median.py +4 -3
  415. scitex/logging/_Tee.py +5 -7
  416. scitex/logging/__init__.py +18 -19
  417. scitex/logging/_config.py +4 -1
  418. scitex/logging/_context.py +6 -5
  419. scitex/logging/_formatters.py +2 -3
  420. scitex/logging/_handlers.py +19 -20
  421. scitex/logging/_levels.py +9 -17
  422. scitex/logging/_logger.py +74 -15
  423. scitex/logging/_print_capture.py +17 -17
  424. scitex/msword/__init__.py +255 -0
  425. scitex/msword/profiles.py +357 -0
  426. scitex/msword/reader.py +753 -0
  427. scitex/msword/utils.py +289 -0
  428. scitex/msword/writer.py +362 -0
  429. scitex/nn/_BNet.py +1 -3
  430. scitex/nn/_Filters.py +6 -2
  431. scitex/nn/_ModulationIndex.py +3 -1
  432. scitex/nn/_PAC.py +3 -2
  433. scitex/nn/_PSD.py +0 -1
  434. scitex/nn/__init__.py +16 -3
  435. scitex/path/_clean.py +10 -8
  436. scitex/path/_find.py +1 -1
  437. scitex/path/_get_spath.py +1 -2
  438. scitex/path/_mk_spath.py +1 -1
  439. scitex/path/_symlink.py +5 -10
  440. scitex/pd/__init__.py +4 -1
  441. scitex/pd/_force_df.py +24 -24
  442. scitex/pd/_get_unique.py +1 -0
  443. scitex/pd/_merge_columns.py +1 -1
  444. scitex/pd/_round.py +11 -7
  445. scitex/pd/_to_xy.py +0 -1
  446. scitex/plt/__init__.py +190 -89
  447. scitex/plt/_subplots/_AxesWrapper.py +28 -12
  448. scitex/plt/_subplots/_AxisWrapper.py +114 -47
  449. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin/__init__.py +36 -0
  450. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin/_labels.py +264 -0
  451. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin/_metadata.py +213 -0
  452. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin/_visual.py +128 -0
  453. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin/__init__.py +59 -0
  454. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin/_base.py +34 -0
  455. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin/_scientific.py +593 -0
  456. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin/_statistical.py +654 -0
  457. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin/_stx_aliases.py +527 -0
  458. scitex/plt/_subplots/_AxisWrapperMixins/_RawMatplotlibMixin.py +321 -0
  459. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin/__init__.py +33 -0
  460. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin/_base.py +152 -0
  461. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin/_wrappers.py +600 -0
  462. scitex/plt/_subplots/_AxisWrapperMixins/_TrackingMixin.py +26 -14
  463. scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +80 -73
  464. scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +79 -5
  465. scitex/plt/_subplots/_FigWrapper.py +97 -64
  466. scitex/plt/_subplots/_SubplotsWrapper.py +161 -84
  467. scitex/plt/_subplots/__init__.py +10 -0
  468. scitex/plt/_subplots/_export_as_csv.py +124 -52
  469. scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +9 -0
  470. scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +14 -23
  471. scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +88 -38
  472. scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +25 -31
  473. scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +53 -23
  474. scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +38 -25
  475. scitex/plt/_subplots/_export_as_csv_formatters/_format_contourf.py +17 -9
  476. scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +70 -124
  477. scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +30 -17
  478. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +31 -17
  479. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +33 -21
  480. scitex/plt/_subplots/_export_as_csv_formatters/_format_hexbin.py +14 -4
  481. scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +43 -29
  482. scitex/plt/_subplots/_export_as_csv_formatters/_format_hist2d.py +14 -4
  483. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +27 -11
  484. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +34 -16
  485. scitex/plt/_subplots/_export_as_csv_formatters/_format_matshow.py +16 -8
  486. scitex/plt/_subplots/_export_as_csv_formatters/_format_pie.py +15 -6
  487. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +85 -46
  488. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +52 -27
  489. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_imshow.py +14 -1
  490. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +27 -18
  491. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +14 -5
  492. scitex/plt/_subplots/_export_as_csv_formatters/_format_quiver.py +16 -8
  493. scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +17 -6
  494. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +43 -26
  495. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +68 -47
  496. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +52 -64
  497. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +55 -50
  498. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +23 -10
  499. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +63 -29
  500. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +48 -40
  501. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +20 -6
  502. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +44 -40
  503. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +46 -39
  504. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +46 -39
  505. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +75 -94
  506. scitex/plt/_subplots/_export_as_csv_formatters/_format_stem.py +12 -3
  507. scitex/plt/_subplots/_export_as_csv_formatters/_format_step.py +12 -3
  508. scitex/plt/_subplots/_export_as_csv_formatters/_format_streamplot.py +17 -9
  509. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_bar.py +84 -0
  510. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_barh.py +85 -0
  511. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_conf_mat.py +31 -18
  512. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_contour.py +54 -0
  513. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_ecdf.py +24 -11
  514. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_errorbar.py +120 -0
  515. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_fillv.py +35 -31
  516. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_heatmap.py +33 -23
  517. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_image.py +44 -28
  518. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_imshow.py +63 -0
  519. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_joyplot.py +31 -12
  520. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_line.py +34 -23
  521. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_mean_ci.py +32 -26
  522. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_mean_std.py +29 -23
  523. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_median_iqr.py +32 -26
  524. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_raster.py +21 -11
  525. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_rectangle.py +84 -56
  526. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_scatter.py +51 -0
  527. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_scatter_hist.py +46 -34
  528. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_shaded_line.py +46 -30
  529. scitex/plt/_subplots/_export_as_csv_formatters/_format_stx_violin.py +51 -51
  530. scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +32 -31
  531. scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +34 -31
  532. scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +44 -37
  533. scitex/plt/_subplots/_export_as_csv_formatters/verify_formatters.py +91 -74
  534. scitex/plt/_tpl.py +6 -5
  535. scitex/plt/ax/_plot/__init__.py +24 -0
  536. scitex/plt/ax/_plot/_add_fitted_line.py +12 -11
  537. scitex/plt/ax/_plot/_plot_circular_hist.py +3 -1
  538. scitex/plt/ax/_plot/_plot_statistical_shaded_line.py +25 -19
  539. scitex/plt/ax/_plot/_stx_conf_mat.py +6 -3
  540. scitex/plt/ax/_plot/_stx_ecdf.py +9 -5
  541. scitex/plt/ax/_plot/_stx_fillv.py +4 -2
  542. scitex/plt/ax/_plot/_stx_heatmap.py +7 -4
  543. scitex/plt/ax/_plot/_stx_image.py +7 -5
  544. scitex/plt/ax/_plot/_stx_joyplot.py +32 -10
  545. scitex/plt/ax/_plot/_stx_raster.py +26 -11
  546. scitex/plt/ax/_plot/_stx_rectangle.py +2 -2
  547. scitex/plt/ax/_plot/_stx_shaded_line.py +15 -11
  548. scitex/plt/ax/_plot/_stx_violin.py +3 -1
  549. scitex/plt/ax/_style/_add_marginal_ax.py +6 -4
  550. scitex/plt/ax/_style/_auto_scale_axis.py +14 -10
  551. scitex/plt/ax/_style/_extend.py +3 -1
  552. scitex/plt/ax/_style/_force_aspect.py +5 -3
  553. scitex/plt/ax/_style/_format_units.py +2 -2
  554. scitex/plt/ax/_style/_hide_spines.py +5 -1
  555. scitex/plt/ax/_style/_map_ticks.py +5 -3
  556. scitex/plt/ax/_style/_rotate_labels.py +5 -4
  557. scitex/plt/ax/_style/_rotate_labels_v01.py +73 -63
  558. scitex/plt/ax/_style/_set_log_scale.py +120 -85
  559. scitex/plt/ax/_style/_set_meta.py +99 -76
  560. scitex/plt/ax/_style/_set_supxyt.py +33 -16
  561. scitex/plt/ax/_style/_set_xyt.py +27 -18
  562. scitex/plt/ax/_style/_share_axes.py +15 -5
  563. scitex/plt/ax/_style/_show_spines.py +58 -57
  564. scitex/plt/ax/_style/_style_barplot.py +1 -1
  565. scitex/plt/ax/_style/_style_boxplot.py +25 -14
  566. scitex/plt/ax/_style/_style_errorbar.py +0 -0
  567. scitex/plt/ax/_style/_style_scatter.py +1 -1
  568. scitex/plt/ax/_style/_style_suptitles.py +3 -3
  569. scitex/plt/ax/_style/_style_violinplot.py +8 -2
  570. scitex/plt/color/__init__.py +34 -2
  571. scitex/plt/color/_add_hue_col.py +1 -0
  572. scitex/plt/color/_colors.py +0 -1
  573. scitex/plt/color/_get_colors_from_conf_matap.py +3 -1
  574. scitex/plt/color/_vizualize_colors.py +0 -1
  575. scitex/plt/docs/FIGURE_ARCHITECTURE.md +155 -97
  576. scitex/plt/gallery/README.md +75 -0
  577. scitex/plt/gallery/__init__.py +29 -0
  578. scitex/plt/gallery/_generate.py +560 -0
  579. scitex/plt/gallery/_plots.py +594 -0
  580. scitex/plt/gallery/_registry.py +153 -0
  581. scitex/plt/io/__init__.py +53 -0
  582. scitex/plt/io/_bundle.py +490 -0
  583. scitex/plt/io/_layered_bundle.py +1343 -0
  584. scitex/plt/styles/SCITEX_STYLE.yaml +26 -0
  585. scitex/plt/styles/__init__.py +23 -9
  586. scitex/plt/styles/_plot_defaults.py +62 -61
  587. scitex/plt/styles/_plot_postprocess.py +126 -77
  588. scitex/plt/styles/_style_loader.py +0 -0
  589. scitex/plt/styles/presets.py +121 -18
  590. scitex/plt/utils/__init__.py +42 -3
  591. scitex/plt/utils/_close.py +8 -3
  592. scitex/plt/utils/_collect_figure_metadata.py +3033 -271
  593. scitex/plt/utils/_colorbar.py +15 -17
  594. scitex/plt/utils/_configure_mpl.py +26 -30
  595. scitex/plt/utils/_crop.py +87 -36
  596. scitex/plt/utils/_csv_column_naming.py +177 -72
  597. scitex/plt/utils/_dimension_viewer.py +7 -19
  598. scitex/plt/utils/_figure_from_axes_mm.py +70 -16
  599. scitex/plt/utils/_figure_mm.py +119 -3
  600. scitex/plt/utils/_get_actual_font.py +5 -4
  601. scitex/plt/utils/_histogram_utils.py +52 -48
  602. scitex/plt/utils/_hitmap.py +1643 -0
  603. scitex/plt/utils/_is_valid_axis.py +19 -13
  604. scitex/plt/utils/_mk_colorbar.py +3 -3
  605. scitex/plt/utils/_scientific_captions.py +202 -139
  606. scitex/plt/utils/_scitex_config.py +98 -98
  607. scitex/plt/utils/_units.py +0 -0
  608. scitex/plt/utils/metadata/__init__.py +61 -0
  609. scitex/plt/utils/metadata/_artist_extraction.py +119 -0
  610. scitex/plt/utils/metadata/_axes_metadata.py +93 -0
  611. scitex/plt/utils/metadata/_collection_artists.py +292 -0
  612. scitex/plt/utils/metadata/_core.py +207 -0
  613. scitex/plt/utils/metadata/_csv_column_extraction.py +186 -0
  614. scitex/plt/utils/metadata/_csv_hash.py +115 -0
  615. scitex/plt/utils/metadata/_csv_verification.py +95 -0
  616. scitex/plt/utils/metadata/_data_linkage.py +263 -0
  617. scitex/plt/utils/metadata/_dimensions.py +242 -0
  618. scitex/plt/utils/metadata/_editable_export.py +405 -0
  619. scitex/plt/utils/metadata/_figure_metadata.py +58 -0
  620. scitex/plt/utils/metadata/_geometry_extraction.py +570 -0
  621. scitex/plt/utils/metadata/_image_text_artists.py +168 -0
  622. scitex/plt/utils/metadata/_label_parsing.py +82 -0
  623. scitex/plt/utils/metadata/_legend_extraction.py +120 -0
  624. scitex/plt/utils/metadata/_line_artists.py +367 -0
  625. scitex/plt/utils/metadata/_line_semantic_handling.py +173 -0
  626. scitex/plt/utils/metadata/_patch_artists.py +211 -0
  627. scitex/plt/utils/metadata/_plot_content.py +26 -0
  628. scitex/plt/utils/metadata/_plot_type_detection.py +184 -0
  629. scitex/plt/utils/metadata/_precision.py +134 -0
  630. scitex/plt/utils/metadata/_precision_config.py +68 -0
  631. scitex/plt/utils/metadata/_precision_sections.py +211 -0
  632. scitex/plt/utils/metadata/_recipe_extraction.py +267 -0
  633. scitex/plt/utils/metadata/_style_parsing.py +174 -0
  634. scitex/repro/_RandomStateManager.py +33 -38
  635. scitex/repro/__init__.py +16 -7
  636. scitex/repro/_gen_ID.py +7 -9
  637. scitex/repro/_gen_timestamp.py +7 -6
  638. scitex/repro/_hash_array.py +8 -12
  639. scitex/reproduce/__init__.py +1 -1
  640. scitex/resource/_get_processor_usages.py +3 -1
  641. scitex/resource/_log_processor_usages.py +3 -1
  642. scitex/rng/__init__.py +1 -1
  643. scitex/schema/README.md +178 -0
  644. scitex/schema/__init__.py +237 -0
  645. scitex/schema/_canvas.py +444 -0
  646. scitex/schema/_plot.py +1015 -0
  647. scitex/schema/_stats.py +762 -0
  648. scitex/schema/_validation.py +590 -0
  649. scitex/scholar/.legacy/Scholar.py +5 -12
  650. scitex/scholar/.legacy/_Scholar.py +66 -99
  651. scitex/scholar/.legacy/_ScholarAPI.py +75 -66
  652. scitex/scholar/.legacy/_tmp/search_engine/_BaseSearchEngine.py +3 -3
  653. scitex/scholar/.legacy/_tmp/search_engine/_UnifiedSearcher.py +4 -9
  654. scitex/scholar/.legacy/_tmp/search_engine/__init__.py +14 -21
  655. scitex/scholar/.legacy/_tmp/search_engine/local/_LocalSearchEngine.py +40 -37
  656. scitex/scholar/.legacy/_tmp/search_engine/local/_VectorSearchEngine.py +31 -28
  657. scitex/scholar/.legacy/_tmp/search_engine/web/_ArxivSearchEngine.py +74 -65
  658. scitex/scholar/.legacy/_tmp/search_engine/web/_CrossRefSearchEngine.py +122 -116
  659. scitex/scholar/.legacy/_tmp/search_engine/web/_GoogleScholarSearchEngine.py +65 -59
  660. scitex/scholar/.legacy/_tmp/search_engine/web/_PubMedSearchEngine.py +121 -107
  661. scitex/scholar/.legacy/_tmp/search_engine/web/_SemanticScholarSearchEngine.py +5 -12
  662. scitex/scholar/.legacy/database/_DatabaseEntry.py +49 -45
  663. scitex/scholar/.legacy/database/_DatabaseIndex.py +131 -94
  664. scitex/scholar/.legacy/database/_LibraryManager.py +65 -63
  665. scitex/scholar/.legacy/database/_PaperDatabase.py +138 -124
  666. scitex/scholar/.legacy/database/_ScholarDatabaseIntegration.py +14 -36
  667. scitex/scholar/.legacy/database/_StorageIntegratedDB.py +192 -156
  668. scitex/scholar/.legacy/database/_ZoteroCompatibleDB.py +300 -237
  669. scitex/scholar/.legacy/database/__init__.py +2 -1
  670. scitex/scholar/.legacy/database/manage.py +92 -84
  671. scitex/scholar/.legacy/lookup/_LookupIndex.py +157 -101
  672. scitex/scholar/.legacy/lookup/__init__.py +2 -1
  673. scitex/scholar/.legacy/metadata/doi/batch/_MetadataHandlerForBatchDOIResolution.py +4 -9
  674. scitex/scholar/.legacy/metadata/doi/batch/_ProgressManagerForBatchDOIResolution.py +10 -23
  675. scitex/scholar/.legacy/metadata/doi/batch/_SourceStatsManagerForBatchDOIResolution.py +4 -9
  676. scitex/scholar/.legacy/metadata/doi/batch/__init__.py +3 -1
  677. scitex/scholar/.legacy/metadata/doi/resolvers/_BatchDOIResolver.py +10 -25
  678. scitex/scholar/.legacy/metadata/doi/resolvers/_BibTeXDOIResolver.py +19 -49
  679. scitex/scholar/.legacy/metadata/doi/resolvers/_DOIResolver.py +1 -0
  680. scitex/scholar/.legacy/metadata/doi/resolvers/_SingleDOIResolver.py +8 -20
  681. scitex/scholar/.legacy/metadata/doi/sources/.combined-SemanticScholarSource/_SemanticScholarSource.py +37 -35
  682. scitex/scholar/.legacy/metadata/doi/sources/.combined-SemanticScholarSource/_SemanticScholarSourceEnhanced.py +49 -37
  683. scitex/scholar/.legacy/metadata/doi/sources/_ArXivSource.py +11 -30
  684. scitex/scholar/.legacy/metadata/doi/sources/_BaseDOISource.py +19 -47
  685. scitex/scholar/.legacy/metadata/doi/sources/_CrossRefLocalSource.py +1 -0
  686. scitex/scholar/.legacy/metadata/doi/sources/_CrossRefSource.py +12 -33
  687. scitex/scholar/.legacy/metadata/doi/sources/_OpenAlexSource.py +8 -20
  688. scitex/scholar/.legacy/metadata/doi/sources/_PubMedSource.py +10 -27
  689. scitex/scholar/.legacy/metadata/doi/sources/_SemanticScholarSource.py +11 -29
  690. scitex/scholar/.legacy/metadata/doi/sources/_SourceManager.py +8 -21
  691. scitex/scholar/.legacy/metadata/doi/sources/_SourceResolutionStrategy.py +24 -55
  692. scitex/scholar/.legacy/metadata/doi/sources/_SourceRotationManager.py +8 -21
  693. scitex/scholar/.legacy/metadata/doi/sources/_URLDOISource.py +9 -16
  694. scitex/scholar/.legacy/metadata/doi/sources/_UnifiedSource.py +8 -22
  695. scitex/scholar/.legacy/metadata/doi/sources/__init__.py +1 -0
  696. scitex/scholar/.legacy/metadata/doi/utils/_PubMedConverter.py +4 -8
  697. scitex/scholar/.legacy/metadata/doi/utils/_RateLimitHandler.py +17 -43
  698. scitex/scholar/.legacy/metadata/doi/utils/_TextNormalizer.py +8 -18
  699. scitex/scholar/.legacy/metadata/doi/utils/_URLDOIExtractor.py +4 -8
  700. scitex/scholar/.legacy/metadata/doi/utils/__init__.py +1 -0
  701. scitex/scholar/.legacy/metadata/doi/utils/_to_complete_metadata_structure.py +1 -0
  702. scitex/scholar/.legacy/metadata/enrichment/_LibraryEnricher.py +2 -3
  703. scitex/scholar/.legacy/metadata/enrichment/enrichers/_ImpactFactorEnricher.py +6 -12
  704. scitex/scholar/.legacy/metadata/enrichment/enrichers/_SmartEnricher.py +5 -10
  705. scitex/scholar/.legacy/metadata/enrichment/sources/_UnifiedMetadataSource.py +4 -5
  706. scitex/scholar/.legacy/metadata/query_to_full_meta_json.py +8 -12
  707. scitex/scholar/.legacy/metadata/urls/_URLMetadataHandler.py +3 -3
  708. scitex/scholar/.legacy/metadata/urls/_ZoteroTranslatorRunner.py +15 -21
  709. scitex/scholar/.legacy/metadata/urls/__init__.py +3 -3
  710. scitex/scholar/.legacy/metadata/urls/_finder.py +4 -6
  711. scitex/scholar/.legacy/metadata/urls/_handler.py +7 -15
  712. scitex/scholar/.legacy/metadata/urls/_resolver.py +6 -12
  713. scitex/scholar/.legacy/search/_Embedder.py +74 -69
  714. scitex/scholar/.legacy/search/_SemanticSearch.py +91 -90
  715. scitex/scholar/.legacy/search/_SemanticSearchEngine.py +104 -109
  716. scitex/scholar/.legacy/search/_UnifiedSearcher.py +530 -471
  717. scitex/scholar/.legacy/search/_VectorDatabase.py +111 -92
  718. scitex/scholar/.legacy/search/__init__.py +1 -0
  719. scitex/scholar/.legacy/storage/_EnhancedStorageManager.py +182 -154
  720. scitex/scholar/.legacy/storage/__init__.py +2 -1
  721. scitex/scholar/__init__.py +0 -2
  722. scitex/scholar/__main__.py +1 -3
  723. scitex/scholar/auth/ScholarAuthManager.py +13 -36
  724. scitex/scholar/auth/core/AuthenticationGateway.py +15 -29
  725. scitex/scholar/auth/core/BrowserAuthenticator.py +22 -57
  726. scitex/scholar/auth/core/StrategyResolver.py +10 -27
  727. scitex/scholar/auth/core/__init__.py +5 -1
  728. scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +11 -21
  729. scitex/scholar/auth/gateway/_OpenURLResolver.py +10 -18
  730. scitex/scholar/auth/gateway/_resolve_functions.py +3 -3
  731. scitex/scholar/auth/providers/BaseAuthenticator.py +1 -0
  732. scitex/scholar/auth/providers/EZProxyAuthenticator.py +7 -14
  733. scitex/scholar/auth/providers/OpenAthensAuthenticator.py +29 -57
  734. scitex/scholar/auth/providers/ShibbolethAuthenticator.py +87 -73
  735. scitex/scholar/auth/session/AuthCacheManager.py +12 -22
  736. scitex/scholar/auth/session/SessionManager.py +4 -6
  737. scitex/scholar/auth/sso/BaseSSOAutomator.py +13 -19
  738. scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +16 -45
  739. scitex/scholar/auth/sso/SSOAutomator.py +8 -15
  740. scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +13 -23
  741. scitex/scholar/browser/ScholarBrowserManager.py +31 -56
  742. scitex/scholar/browser/__init__.py +1 -0
  743. scitex/scholar/browser/utils/click_and_wait.py +3 -4
  744. scitex/scholar/browser/utils/close_unwanted_pages.py +4 -7
  745. scitex/scholar/browser/utils/wait_redirects.py +15 -40
  746. scitex/scholar/citation_graph/__init__.py +0 -0
  747. scitex/scholar/citation_graph/builder.py +3 -7
  748. scitex/scholar/citation_graph/database.py +4 -11
  749. scitex/scholar/citation_graph/example.py +5 -10
  750. scitex/scholar/citation_graph/models.py +0 -0
  751. scitex/scholar/cli/_url_utils.py +1 -1
  752. scitex/scholar/cli/chrome.py +5 -3
  753. scitex/scholar/cli/download_pdf.py +13 -14
  754. scitex/scholar/cli/handlers/bibtex_handler.py +4 -12
  755. scitex/scholar/cli/handlers/doi_handler.py +1 -3
  756. scitex/scholar/cli/handlers/project_handler.py +6 -20
  757. scitex/scholar/cli/open_browser.py +41 -39
  758. scitex/scholar/cli/open_browser_auto.py +31 -39
  759. scitex/scholar/cli/open_browser_monitored.py +27 -24
  760. scitex/scholar/config/ScholarConfig.py +5 -8
  761. scitex/scholar/config/__init__.py +1 -0
  762. scitex/scholar/config/core/_CascadeConfig.py +3 -3
  763. scitex/scholar/config/core/_PathManager.py +16 -28
  764. scitex/scholar/core/Paper.py +79 -78
  765. scitex/scholar/core/Papers.py +16 -27
  766. scitex/scholar/core/Scholar.py +98 -229
  767. scitex/scholar/core/journal_normalizer.py +52 -49
  768. scitex/scholar/core/oa_cache.py +27 -23
  769. scitex/scholar/core/open_access.py +17 -8
  770. scitex/scholar/docs/template.py +4 -3
  771. scitex/scholar/docs/to_claude/examples/example-python-project-scitex/scripts/mnist/clf_svm.py +0 -0
  772. scitex/scholar/docs/to_claude/examples/example-python-project-scitex/scripts/mnist/download.py +0 -0
  773. scitex/scholar/docs/to_claude/examples/example-python-project-scitex/scripts/mnist/plot_conf_mat.py +0 -0
  774. scitex/scholar/docs/to_claude/examples/example-python-project-scitex/scripts/mnist/plot_digits.py +0 -0
  775. scitex/scholar/docs/to_claude/examples/example-python-project-scitex/scripts/mnist/plot_umap_space.py +0 -0
  776. scitex/scholar/examples/00_config.py +10 -9
  777. scitex/scholar/examples/01_auth.py +3 -0
  778. scitex/scholar/examples/02_browser.py +14 -10
  779. scitex/scholar/examples/03_01-engine.py +3 -0
  780. scitex/scholar/examples/03_02-engine-for-bibtex.py +4 -3
  781. scitex/scholar/examples/04_01-url.py +9 -9
  782. scitex/scholar/examples/04_02-url-for-bibtex.py +7 -3
  783. scitex/scholar/examples/04_02-url-for-dois.py +87 -97
  784. scitex/scholar/examples/05_download_pdf.py +10 -4
  785. scitex/scholar/examples/06_find_and_download.py +6 -6
  786. scitex/scholar/examples/06_parse_bibtex.py +17 -17
  787. scitex/scholar/examples/07_storage_integration.py +6 -9
  788. scitex/scholar/examples/99_fullpipeline-for-bibtex.py +14 -15
  789. scitex/scholar/examples/99_fullpipeline-for-one-entry.py +31 -23
  790. scitex/scholar/examples/99_maintenance.py +3 -0
  791. scitex/scholar/examples/dev.py +2 -3
  792. scitex/scholar/examples/zotero_integration.py +11 -18
  793. scitex/scholar/impact_factor/ImpactFactorEngine.py +7 -9
  794. scitex/scholar/impact_factor/estimation/__init__.py +4 -4
  795. scitex/scholar/impact_factor/estimation/core/__init__.py +3 -7
  796. scitex/scholar/impact_factor/estimation/core/cache_manager.py +223 -211
  797. scitex/scholar/impact_factor/estimation/core/calculator.py +165 -131
  798. scitex/scholar/impact_factor/estimation/core/journal_matcher.py +217 -172
  799. scitex/scholar/impact_factor/jcr/ImpactFactorJCREngine.py +6 -14
  800. scitex/scholar/impact_factor/jcr/build_database.py +4 -3
  801. scitex/scholar/integration/base.py +9 -17
  802. scitex/scholar/integration/mendeley/exporter.py +2 -4
  803. scitex/scholar/integration/mendeley/importer.py +3 -3
  804. scitex/scholar/integration/mendeley/linker.py +3 -3
  805. scitex/scholar/integration/mendeley/mapper.py +9 -6
  806. scitex/scholar/integration/zotero/__main__.py +26 -43
  807. scitex/scholar/integration/zotero/exporter.py +15 -11
  808. scitex/scholar/integration/zotero/importer.py +12 -10
  809. scitex/scholar/integration/zotero/linker.py +8 -12
  810. scitex/scholar/integration/zotero/mapper.py +17 -12
  811. scitex/scholar/metadata_engines/.combined-SemanticScholarSource/_SemanticScholarSource.py +37 -35
  812. scitex/scholar/metadata_engines/.combined-SemanticScholarSource/_SemanticScholarSourceEnhanced.py +47 -35
  813. scitex/scholar/metadata_engines/ScholarEngine.py +21 -43
  814. scitex/scholar/metadata_engines/__init__.py +1 -0
  815. scitex/scholar/metadata_engines/individual/ArXivEngine.py +15 -37
  816. scitex/scholar/metadata_engines/individual/CrossRefEngine.py +15 -42
  817. scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +24 -45
  818. scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +11 -21
  819. scitex/scholar/metadata_engines/individual/PubMedEngine.py +10 -27
  820. scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +28 -35
  821. scitex/scholar/metadata_engines/individual/URLDOIEngine.py +11 -22
  822. scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +20 -49
  823. scitex/scholar/metadata_engines/utils/_PubMedConverter.py +4 -8
  824. scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +5 -10
  825. scitex/scholar/metadata_engines/utils/__init__.py +2 -0
  826. scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +3 -0
  827. scitex/scholar/metadata_engines/utils/_standardize_metadata.py +2 -3
  828. scitex/scholar/pdf_download/ScholarPDFDownloader.py +25 -37
  829. scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +11 -19
  830. scitex/scholar/pdf_download/strategies/direct_download.py +5 -9
  831. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +3 -3
  832. scitex/scholar/pdf_download/strategies/manual_download_utils.py +6 -13
  833. scitex/scholar/pdf_download/strategies/open_access_download.py +49 -31
  834. scitex/scholar/pdf_download/strategies/response_body.py +8 -19
  835. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +9 -18
  836. scitex/scholar/pipelines/ScholarPipelineMetadataParallel.py +25 -26
  837. scitex/scholar/pipelines/ScholarPipelineMetadataSingle.py +62 -23
  838. scitex/scholar/pipelines/ScholarPipelineParallel.py +13 -30
  839. scitex/scholar/pipelines/ScholarPipelineSearchParallel.py +299 -220
  840. scitex/scholar/pipelines/ScholarPipelineSearchSingle.py +202 -165
  841. scitex/scholar/pipelines/ScholarPipelineSingle.py +25 -51
  842. scitex/scholar/pipelines/SearchQueryParser.py +55 -55
  843. scitex/scholar/search_engines/ScholarSearchEngine.py +31 -27
  844. scitex/scholar/search_engines/_BaseSearchEngine.py +20 -23
  845. scitex/scholar/search_engines/individual/ArXivSearchEngine.py +53 -35
  846. scitex/scholar/search_engines/individual/CrossRefSearchEngine.py +47 -40
  847. scitex/scholar/search_engines/individual/OpenAlexSearchEngine.py +55 -50
  848. scitex/scholar/search_engines/individual/PubMedSearchEngine.py +8 -10
  849. scitex/scholar/search_engines/individual/SemanticScholarSearchEngine.py +55 -49
  850. scitex/scholar/storage/BibTeXHandler.py +150 -95
  851. scitex/scholar/storage/PaperIO.py +3 -6
  852. scitex/scholar/storage/ScholarLibrary.py +70 -49
  853. scitex/scholar/storage/_DeduplicationManager.py +52 -25
  854. scitex/scholar/storage/_LibraryCacheManager.py +19 -46
  855. scitex/scholar/storage/_LibraryManager.py +65 -175
  856. scitex/scholar/url_finder/ScholarURLFinder.py +9 -25
  857. scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +1 -1
  858. scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +6 -10
  859. scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +4 -6
  860. scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +8 -15
  861. scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +3 -3
  862. scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +3 -3
  863. scitex/scholar/url_finder/translators/core/patterns.py +6 -4
  864. scitex/scholar/url_finder/translators/core/registry.py +6 -9
  865. scitex/scholar/url_finder/translators/individual/BOFiP_Impots.py +60 -52
  866. scitex/scholar/url_finder/translators/individual/Baidu_Scholar.py +54 -62
  867. scitex/scholar/url_finder/translators/individual/Bangkok_Post.py +38 -44
  868. scitex/scholar/url_finder/translators/individual/Baruch_Foundation.py +43 -47
  869. scitex/scholar/url_finder/translators/individual/Beobachter.py +46 -50
  870. scitex/scholar/url_finder/translators/individual/Bezneng_Gajit.py +37 -41
  871. scitex/scholar/url_finder/translators/individual/BibLaTeX.py +59 -52
  872. scitex/scholar/url_finder/translators/individual/BibTeX.py +83 -79
  873. scitex/scholar/url_finder/translators/individual/Biblio_com.py +48 -51
  874. scitex/scholar/url_finder/translators/individual/Bibliontology_RDF.py +58 -56
  875. scitex/scholar/url_finder/translators/individual/Camara_Brasileira_do_Livro_ISBN.py +102 -99
  876. scitex/scholar/url_finder/translators/individual/CanLII.py +49 -43
  877. scitex/scholar/url_finder/translators/individual/Canada_com.py +36 -40
  878. scitex/scholar/url_finder/translators/individual/Canadian_Letters_and_Images.py +43 -43
  879. scitex/scholar/url_finder/translators/individual/Canadiana_ca.py +77 -66
  880. scitex/scholar/url_finder/translators/individual/Cascadilla_Proceedings_Project.py +68 -62
  881. scitex/scholar/url_finder/translators/individual/Central_and_Eastern_European_Online_Library_Journals.py +60 -60
  882. scitex/scholar/url_finder/translators/individual/Champlain_Society_Collection.py +63 -61
  883. scitex/scholar/url_finder/translators/individual/Chicago_Journal_of_Theoretical_Computer_Science.py +74 -58
  884. scitex/scholar/url_finder/translators/individual/Christian_Science_Monitor.py +32 -38
  885. scitex/scholar/url_finder/translators/individual/Columbia_University_Press.py +51 -47
  886. scitex/scholar/url_finder/translators/individual/Common_Place.py +66 -57
  887. scitex/scholar/url_finder/translators/individual/Cornell_LII.py +66 -62
  888. scitex/scholar/url_finder/translators/individual/Cornell_University_Press.py +38 -45
  889. scitex/scholar/url_finder/translators/individual/CourtListener.py +52 -56
  890. scitex/scholar/url_finder/translators/individual/DAI_Zenon.py +53 -54
  891. scitex/scholar/url_finder/translators/individual/access_medicine.py +27 -33
  892. scitex/scholar/url_finder/translators/individual/acm.py +1 -1
  893. scitex/scholar/url_finder/translators/individual/acm_digital_library.py +93 -63
  894. scitex/scholar/url_finder/translators/individual/airiti.py +3 -1
  895. scitex/scholar/url_finder/translators/individual/aosic.py +3 -1
  896. scitex/scholar/url_finder/translators/individual/archive_ouverte_aosic.py +3 -1
  897. scitex/scholar/url_finder/translators/individual/archive_ouverte_en_sciences_de_l_information_et_de_la_communication___aosic_.py +6 -2
  898. scitex/scholar/url_finder/translators/individual/artforum.py +35 -27
  899. scitex/scholar/url_finder/translators/individual/arxiv.py +1 -1
  900. scitex/scholar/url_finder/translators/individual/arxiv_org.py +8 -4
  901. scitex/scholar/url_finder/translators/individual/atlanta_journal_constitution.py +22 -18
  902. scitex/scholar/url_finder/translators/individual/atypon_journals.py +19 -11
  903. scitex/scholar/url_finder/translators/individual/austlii_and_nzlii.py +48 -44
  904. scitex/scholar/url_finder/translators/individual/australian_dictionary_of_biography.py +21 -17
  905. scitex/scholar/url_finder/translators/individual/bailii.py +22 -19
  906. scitex/scholar/url_finder/translators/individual/bbc.py +46 -42
  907. scitex/scholar/url_finder/translators/individual/bbc_genome.py +37 -25
  908. scitex/scholar/url_finder/translators/individual/biblioteca_nacional_de_maestros.py +24 -20
  909. scitex/scholar/url_finder/translators/individual/bibliotheque_archives_nationale_quebec_pistard.py +42 -43
  910. scitex/scholar/url_finder/translators/individual/bibliotheque_archives_nationales_quebec.py +87 -81
  911. scitex/scholar/url_finder/translators/individual/bibliotheque_nationale_france.py +39 -37
  912. scitex/scholar/url_finder/translators/individual/bibsys.py +32 -28
  913. scitex/scholar/url_finder/translators/individual/bioconductor.py +58 -52
  914. scitex/scholar/url_finder/translators/individual/biomed_central.py +23 -15
  915. scitex/scholar/url_finder/translators/individual/biorxiv.py +26 -13
  916. scitex/scholar/url_finder/translators/individual/blogger.py +39 -43
  917. scitex/scholar/url_finder/translators/individual/bloomberg.py +48 -52
  918. scitex/scholar/url_finder/translators/individual/bloomsbury_food_library.py +37 -37
  919. scitex/scholar/url_finder/translators/individual/bluesky.py +30 -28
  920. scitex/scholar/url_finder/translators/individual/bnf_isbn.py +1 -1
  921. scitex/scholar/url_finder/translators/individual/bocc.py +66 -60
  922. scitex/scholar/url_finder/translators/individual/boe.py +52 -52
  923. scitex/scholar/url_finder/translators/individual/brill.py +3 -1
  924. scitex/scholar/url_finder/translators/individual/business_standard.py +36 -38
  925. scitex/scholar/url_finder/translators/individual/cabi_cab_abstracts.py +39 -41
  926. scitex/scholar/url_finder/translators/individual/cambridge.py +3 -1
  927. scitex/scholar/url_finder/translators/individual/cambridge_core.py +30 -24
  928. scitex/scholar/url_finder/translators/individual/caod.py +50 -46
  929. scitex/scholar/url_finder/translators/individual/cbc.py +91 -67
  930. scitex/scholar/url_finder/translators/individual/ccfr_bnf.py +49 -53
  931. scitex/scholar/url_finder/translators/individual/cia_world_factbook.py +43 -33
  932. scitex/scholar/url_finder/translators/individual/crossref_rest.py +208 -174
  933. scitex/scholar/url_finder/translators/individual/current_affairs.py +29 -35
  934. scitex/scholar/url_finder/translators/individual/dabi.py +70 -66
  935. scitex/scholar/url_finder/translators/individual/dagens_nyheter.py +3 -1
  936. scitex/scholar/url_finder/translators/individual/dagstuhl.py +10 -15
  937. scitex/scholar/url_finder/translators/individual/dar_almandumah.py +13 -9
  938. scitex/scholar/url_finder/translators/individual/dart_europe.py +19 -22
  939. scitex/scholar/url_finder/translators/individual/data_gov.py +2 -2
  940. scitex/scholar/url_finder/translators/individual/databrary.py +27 -28
  941. scitex/scholar/url_finder/translators/individual/datacite_json.py +152 -137
  942. scitex/scholar/url_finder/translators/individual/dataverse.py +68 -64
  943. scitex/scholar/url_finder/translators/individual/daum_news.py +38 -38
  944. scitex/scholar/url_finder/translators/individual/dblp.py +4 -8
  945. scitex/scholar/url_finder/translators/individual/dblp_computer_science_bibliography.py +8 -3
  946. scitex/scholar/url_finder/translators/individual/dbpia.py +5 -3
  947. scitex/scholar/url_finder/translators/individual/defense_technical_information_center.py +30 -28
  948. scitex/scholar/url_finder/translators/individual/delpher.py +102 -79
  949. scitex/scholar/url_finder/translators/individual/demographic_research.py +35 -31
  950. scitex/scholar/url_finder/translators/individual/denik_cz.py +58 -54
  951. scitex/scholar/url_finder/translators/individual/depatisnet.py +7 -10
  952. scitex/scholar/url_finder/translators/individual/der_freitag.py +81 -66
  953. scitex/scholar/url_finder/translators/individual/der_spiegel.py +56 -54
  954. scitex/scholar/url_finder/translators/individual/digibib_net.py +3 -1
  955. scitex/scholar/url_finder/translators/individual/digizeitschriften.py +3 -1
  956. scitex/scholar/url_finder/translators/individual/dpla.py +13 -14
  957. scitex/scholar/url_finder/translators/individual/dspace.py +2 -2
  958. scitex/scholar/url_finder/translators/individual/ebrary.py +3 -1
  959. scitex/scholar/url_finder/translators/individual/ebscohost.py +3 -1
  960. scitex/scholar/url_finder/translators/individual/electronic_colloquium_on_computational_complexity.py +3 -1
  961. scitex/scholar/url_finder/translators/individual/elife.py +3 -1
  962. scitex/scholar/url_finder/translators/individual/elsevier_health_journals.py +3 -1
  963. scitex/scholar/url_finder/translators/individual/emerald.py +3 -1
  964. scitex/scholar/url_finder/translators/individual/emerald_insight.py +3 -1
  965. scitex/scholar/url_finder/translators/individual/epicurious.py +3 -1
  966. scitex/scholar/url_finder/translators/individual/eurogamerusgamer.py +3 -1
  967. scitex/scholar/url_finder/translators/individual/fachportal_padagogik.py +3 -1
  968. scitex/scholar/url_finder/translators/individual/frontiers.py +1 -1
  969. scitex/scholar/url_finder/translators/individual/gale_databases.py +3 -1
  970. scitex/scholar/url_finder/translators/individual/gms_german_medical_science.py +6 -2
  971. scitex/scholar/url_finder/translators/individual/ieee_computer_society.py +6 -2
  972. scitex/scholar/url_finder/translators/individual/ieee_xplore.py +41 -35
  973. scitex/scholar/url_finder/translators/individual/inter_research_science_center.py +6 -2
  974. scitex/scholar/url_finder/translators/individual/jisc_historical_texts.py +3 -1
  975. scitex/scholar/url_finder/translators/individual/jstor.py +14 -12
  976. scitex/scholar/url_finder/translators/individual/korean_national_library.py +3 -1
  977. scitex/scholar/url_finder/translators/individual/la_times.py +3 -1
  978. scitex/scholar/url_finder/translators/individual/landesbibliographie_baden_wurttemberg.py +3 -1
  979. scitex/scholar/url_finder/translators/individual/legislative_insight.py +3 -1
  980. scitex/scholar/url_finder/translators/individual/libraries_tasmania.py +3 -1
  981. scitex/scholar/url_finder/translators/individual/library_catalog__koha_.py +3 -1
  982. scitex/scholar/url_finder/translators/individual/lingbuzz.py +2 -2
  983. scitex/scholar/url_finder/translators/individual/max_planck_institute_for_the_history_of_science_virtual_laboratory_library.py +3 -1
  984. scitex/scholar/url_finder/translators/individual/mdpi.py +12 -6
  985. scitex/scholar/url_finder/translators/individual/microbiology_society_journals.py +3 -1
  986. scitex/scholar/url_finder/translators/individual/midas_journals.py +3 -1
  987. scitex/scholar/url_finder/translators/individual/nagoya_university_opac.py +3 -1
  988. scitex/scholar/url_finder/translators/individual/nature_publishing_group.py +32 -19
  989. scitex/scholar/url_finder/translators/individual/ntsb_accident_reports.py +3 -1
  990. scitex/scholar/url_finder/translators/individual/openedition_journals.py +8 -4
  991. scitex/scholar/url_finder/translators/individual/orcid.py +16 -15
  992. scitex/scholar/url_finder/translators/individual/oxford.py +25 -19
  993. scitex/scholar/url_finder/translators/individual/oxford_dictionaries_premium.py +3 -1
  994. scitex/scholar/url_finder/translators/individual/ozon_ru.py +3 -1
  995. scitex/scholar/url_finder/translators/individual/plos.py +9 -12
  996. scitex/scholar/url_finder/translators/individual/polygon.py +3 -1
  997. scitex/scholar/url_finder/translators/individual/primo.py +3 -1
  998. scitex/scholar/url_finder/translators/individual/project_muse.py +3 -1
  999. scitex/scholar/url_finder/translators/individual/pubfactory_journals.py +3 -1
  1000. scitex/scholar/url_finder/translators/individual/pubmed.py +71 -65
  1001. scitex/scholar/url_finder/translators/individual/pubmed_central.py +8 -6
  1002. scitex/scholar/url_finder/translators/individual/rechtspraak_nl.py +3 -1
  1003. scitex/scholar/url_finder/translators/individual/sage_journals.py +25 -17
  1004. scitex/scholar/url_finder/translators/individual/sciencedirect.py +36 -17
  1005. scitex/scholar/url_finder/translators/individual/semantics_visual_library.py +3 -1
  1006. scitex/scholar/url_finder/translators/individual/silverchair.py +70 -52
  1007. scitex/scholar/url_finder/translators/individual/sora.py +3 -1
  1008. scitex/scholar/url_finder/translators/individual/springer.py +15 -11
  1009. scitex/scholar/url_finder/translators/individual/ssrn.py +3 -3
  1010. scitex/scholar/url_finder/translators/individual/stanford_encyclopedia_of_philosophy.py +3 -1
  1011. scitex/scholar/url_finder/translators/individual/superlib.py +3 -1
  1012. scitex/scholar/url_finder/translators/individual/treesearch.py +3 -1
  1013. scitex/scholar/url_finder/translators/individual/university_of_chicago_press_books.py +3 -1
  1014. scitex/scholar/url_finder/translators/individual/vlex.py +3 -1
  1015. scitex/scholar/url_finder/translators/individual/web_of_science.py +3 -1
  1016. scitex/scholar/url_finder/translators/individual/web_of_science_nextgen.py +3 -1
  1017. scitex/scholar/url_finder/translators/individual/wiley.py +31 -25
  1018. scitex/scholar/url_finder/translators/individual/wilson_center_digital_archive.py +3 -1
  1019. scitex/scholar/utils/bibtex/_parse_bibtex.py +3 -3
  1020. scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +5 -9
  1021. scitex/scholar/utils/text/_TextNormalizer.py +249 -176
  1022. scitex/scholar/utils/validation/DOIValidator.py +31 -28
  1023. scitex/scholar/utils/validation/__init__.py +0 -0
  1024. scitex/scholar/utils/validation/validate_library_dois.py +61 -57
  1025. scitex/scholar/zotero/__init__.py +1 -1
  1026. scitex/security/cli.py +7 -20
  1027. scitex/security/github.py +45 -32
  1028. scitex/session/__init__.py +8 -9
  1029. scitex/session/_decorator.py +49 -42
  1030. scitex/session/_lifecycle.py +39 -39
  1031. scitex/session/_manager.py +24 -20
  1032. scitex/sh/__init__.py +4 -3
  1033. scitex/sh/_execute.py +10 -7
  1034. scitex/sh/_security.py +3 -3
  1035. scitex/sh/_types.py +2 -3
  1036. scitex/stats/__init__.py +174 -6
  1037. scitex/stats/_schema.py +42 -569
  1038. scitex/stats/auto/__init__.py +188 -0
  1039. scitex/stats/auto/_context.py +331 -0
  1040. scitex/stats/auto/_formatting.py +679 -0
  1041. scitex/stats/auto/_rules.py +901 -0
  1042. scitex/stats/auto/_selector.py +554 -0
  1043. scitex/stats/auto/_styles.py +721 -0
  1044. scitex/stats/correct/__init__.py +4 -4
  1045. scitex/stats/correct/_correct_bonferroni.py +43 -34
  1046. scitex/stats/correct/_correct_fdr.py +14 -40
  1047. scitex/stats/correct/_correct_fdr_.py +39 -46
  1048. scitex/stats/correct/_correct_holm.py +14 -32
  1049. scitex/stats/correct/_correct_sidak.py +36 -21
  1050. scitex/stats/descriptive/_circular.py +20 -21
  1051. scitex/stats/descriptive/_describe.py +19 -5
  1052. scitex/stats/descriptive/_nan.py +5 -7
  1053. scitex/stats/descriptive/_real.py +4 -3
  1054. scitex/stats/effect_sizes/__init__.py +10 -11
  1055. scitex/stats/effect_sizes/_cliffs_delta.py +35 -32
  1056. scitex/stats/effect_sizes/_cohens_d.py +30 -31
  1057. scitex/stats/effect_sizes/_epsilon_squared.py +19 -22
  1058. scitex/stats/effect_sizes/_eta_squared.py +23 -27
  1059. scitex/stats/effect_sizes/_prob_superiority.py +18 -21
  1060. scitex/stats/io/__init__.py +29 -0
  1061. scitex/stats/io/_bundle.py +156 -0
  1062. scitex/stats/posthoc/__init__.py +3 -3
  1063. scitex/stats/posthoc/_dunnett.py +75 -55
  1064. scitex/stats/posthoc/_games_howell.py +61 -43
  1065. scitex/stats/posthoc/_tukey_hsd.py +42 -34
  1066. scitex/stats/power/__init__.py +2 -2
  1067. scitex/stats/power/_power.py +56 -56
  1068. scitex/stats/tests/__init__.py +1 -1
  1069. scitex/stats/tests/correlation/__init__.py +1 -1
  1070. scitex/stats/tests/correlation/_test_pearson.py +28 -38
  1071. scitex/stats/utils/__init__.py +14 -17
  1072. scitex/stats/utils/_effect_size.py +85 -78
  1073. scitex/stats/utils/_formatters.py +49 -43
  1074. scitex/stats/utils/_normalizers.py +7 -14
  1075. scitex/stats/utils/_power.py +56 -56
  1076. scitex/str/__init__.py +1 -0
  1077. scitex/str/_clean_path.py +3 -3
  1078. scitex/str/_factor_out_digits.py +86 -58
  1079. scitex/str/_format_plot_text.py +180 -111
  1080. scitex/str/_latex.py +19 -19
  1081. scitex/str/_latex_fallback.py +9 -10
  1082. scitex/str/_parse.py +3 -6
  1083. scitex/str/_print_debug.py +13 -13
  1084. scitex/str/_printc.py +2 -0
  1085. scitex/str/_search.py +3 -3
  1086. scitex/template/.legacy/_clone_project.py +9 -13
  1087. scitex/template/__init__.py +10 -2
  1088. scitex/template/_clone_project.py +7 -2
  1089. scitex/template/_copy.py +1 -0
  1090. scitex/template/_customize.py +3 -6
  1091. scitex/template/_git_strategy.py +2 -3
  1092. scitex/template/_rename.py +1 -0
  1093. scitex/template/clone_pip_project.py +6 -7
  1094. scitex/template/clone_research.py +7 -10
  1095. scitex/template/clone_singularity.py +6 -7
  1096. scitex/template/clone_writer_directory.py +6 -7
  1097. scitex/tex/__init__.py +4 -0
  1098. scitex/tex/_export.py +890 -0
  1099. scitex/tex/_preview.py +26 -11
  1100. scitex/tex/_to_vec.py +10 -7
  1101. scitex/torch/__init__.py +11 -1
  1102. scitex/types/_ArrayLike.py +2 -0
  1103. scitex/types/_is_listed_X.py +3 -3
  1104. scitex/units.py +110 -77
  1105. scitex/utils/_compress_hdf5.py +3 -3
  1106. scitex/utils/_email.py +8 -4
  1107. scitex/utils/_notify.py +14 -8
  1108. scitex/utils/_search.py +6 -6
  1109. scitex/utils/_verify_scitex_format.py +17 -42
  1110. scitex/utils/_verify_scitex_format_v01.py +12 -34
  1111. scitex/utils/template.py +4 -3
  1112. scitex/web/__init__.py +7 -1
  1113. scitex/web/_scraping.py +54 -38
  1114. scitex/web/_search_pubmed.py +30 -14
  1115. scitex/writer/.legacy/Writer_v01-refactored.py +4 -4
  1116. scitex/writer/.legacy/_compile.py +18 -28
  1117. scitex/writer/Writer.py +8 -21
  1118. scitex/writer/__init__.py +11 -11
  1119. scitex/writer/_clone_writer_project.py +2 -6
  1120. scitex/writer/_compile/__init__.py +1 -0
  1121. scitex/writer/_compile/_parser.py +1 -0
  1122. scitex/writer/_compile/_runner.py +35 -38
  1123. scitex/writer/_compile/_validator.py +1 -0
  1124. scitex/writer/_compile/manuscript.py +1 -0
  1125. scitex/writer/_compile/revision.py +1 -0
  1126. scitex/writer/_compile/supplementary.py +1 -0
  1127. scitex/writer/_compile_async.py +5 -12
  1128. scitex/writer/_project/__init__.py +1 -0
  1129. scitex/writer/_project/_create.py +10 -25
  1130. scitex/writer/_project/_trees.py +4 -9
  1131. scitex/writer/_project/_validate.py +2 -3
  1132. scitex/writer/_validate_tree_structures.py +7 -18
  1133. scitex/writer/dataclasses/__init__.py +8 -10
  1134. scitex/writer/dataclasses/config/_CONSTANTS.py +2 -3
  1135. scitex/writer/dataclasses/config/_WriterConfig.py +4 -9
  1136. scitex/writer/dataclasses/contents/_ManuscriptContents.py +14 -25
  1137. scitex/writer/dataclasses/contents/_RevisionContents.py +21 -16
  1138. scitex/writer/dataclasses/contents/_SupplementaryContents.py +21 -24
  1139. scitex/writer/dataclasses/core/_Document.py +2 -3
  1140. scitex/writer/dataclasses/core/_DocumentSection.py +8 -23
  1141. scitex/writer/dataclasses/results/_CompilationResult.py +2 -3
  1142. scitex/writer/dataclasses/results/_LaTeXIssue.py +3 -6
  1143. scitex/writer/dataclasses/results/_SaveSectionsResponse.py +20 -9
  1144. scitex/writer/dataclasses/results/_SectionReadResponse.py +24 -10
  1145. scitex/writer/dataclasses/tree/_ConfigTree.py +7 -4
  1146. scitex/writer/dataclasses/tree/_ManuscriptTree.py +10 -13
  1147. scitex/writer/dataclasses/tree/_RevisionTree.py +16 -17
  1148. scitex/writer/dataclasses/tree/_ScriptsTree.py +10 -5
  1149. scitex/writer/dataclasses/tree/_SharedTree.py +10 -13
  1150. scitex/writer/dataclasses/tree/_SupplementaryTree.py +15 -14
  1151. scitex/writer/utils/.legacy_git_retry.py +3 -8
  1152. scitex/writer/utils/_parse_latex_logs.py +2 -3
  1153. scitex/writer/utils/_parse_script_args.py +20 -23
  1154. scitex/writer/utils/_watch.py +5 -5
  1155. {scitex-2.5.0.dist-info → scitex-2.7.3.dist-info}/METADATA +14 -10
  1156. {scitex-2.5.0.dist-info → scitex-2.7.3.dist-info}/RECORD +1149 -985
  1157. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +0 -583
  1158. scitex/io/memo.md +0 -2827
  1159. scitex/plt/_subplots/TODO.md +0 -53
  1160. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +0 -537
  1161. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +0 -1499
  1162. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +0 -431
  1163. scitex/plt/_subplots/_export_as_csv_formatters.py +0 -112
  1164. scitex/vis/__init__.py +0 -177
  1165. scitex/vis/editor/_defaults.py +0 -244
  1166. scitex/vis/editor/_edit.py +0 -378
  1167. scitex/vis/editor/flask_editor/__init__.py +0 -21
  1168. scitex/vis/editor/flask_editor/bbox.py +0 -216
  1169. scitex/vis/editor/flask_editor/core.py +0 -152
  1170. scitex/vis/editor/flask_editor/plotter.py +0 -130
  1171. scitex/vis/editor/flask_editor/renderer.py +0 -184
  1172. scitex/vis/editor/flask_editor/templates/html.py +0 -295
  1173. scitex/vis/editor/flask_editor/templates/scripts.py +0 -614
  1174. scitex/vis/editor/flask_editor/templates/styles.py +0 -549
  1175. /scitex/{vis → fig}/README.md +0 -0
  1176. /scitex/{vis → fig}/docs/CANVAS_ARCHITECTURE.md +0 -0
  1177. {scitex-2.5.0.dist-info → scitex-2.7.3.dist-info}/WHEEL +0 -0
  1178. {scitex-2.5.0.dist-info → scitex-2.7.3.dist-info}/entry_points.txt +0 -0
  1179. {scitex-2.5.0.dist-info → scitex-2.7.3.dist-info}/licenses/LICENSE +0 -0
@@ -13,23 +13,323 @@ figures self-documenting and reproducible.
13
13
 
14
14
  __FILE__ = __file__
15
15
 
16
- from typing import Dict, Optional
16
+ from typing import Dict, Optional, Union, List
17
17
 
18
+ from scitex import logging
18
19
 
19
- def collect_figure_metadata(fig, ax=None, plot_id=None) -> Dict:
20
+ logger = logging.getLogger(__name__)
21
+
22
+ # Precision settings for JSON output
23
+ PRECISION = {
24
+ "mm": 2, # Millimeters: 0.01mm precision (10 microns)
25
+ "inch": 3, # Inches: 0.001 inch precision
26
+ "position": 3, # Normalized position: 0.001 precision
27
+ "lim": 2, # Axis limits: 2 decimal places
28
+ "linewidth": 2, # Line widths: 0.01 precision
29
+ }
30
+
31
+
32
+ class FixedFloat:
33
+ """
34
+ A float wrapper that preserves fixed decimal places in JSON output.
35
+
36
+ Example: FixedFloat(0.25, 3) -> "0.250" in JSON
37
+ """
38
+ def __init__(self, value: float, precision: int):
39
+ self.value = round(value, precision)
40
+ self.precision = precision
41
+
42
+ def __repr__(self):
43
+ return f"{self.value:.{self.precision}f}"
44
+
45
+ def __float__(self):
46
+ return self.value
47
+
48
+
49
+ def _round_value(value: Union[float, int], precision: int, fixed: bool = False) -> Union[float, int, "FixedFloat"]:
50
+ """
51
+ Round a single value to specified precision.
52
+
53
+ Parameters
54
+ ----------
55
+ value : float or int
56
+ Value to round
57
+ precision : int
58
+ Number of decimal places
59
+ fixed : bool
60
+ If True, return FixedFloat with fixed decimal places (e.g., 0.250)
61
+ If False, return float (e.g., 0.25)
62
+ """
63
+ if isinstance(value, int):
64
+ if fixed:
65
+ return FixedFloat(float(value), precision)
66
+ return value
67
+ if isinstance(value, float):
68
+ if fixed:
69
+ return FixedFloat(value, precision)
70
+ return round(value, precision)
71
+ return value
72
+
73
+
74
+ def _round_list(values: List, precision: int, fixed: bool = False) -> List:
75
+ """Round all values in a list."""
76
+ return [_round_value(v, precision, fixed) for v in values]
77
+
78
+
79
+ def _round_dict(d: dict, precision_map: dict = None) -> dict:
80
+ """
81
+ Round all float values in a dict based on key-specific precision.
82
+
83
+ Parameters
84
+ ----------
85
+ d : dict
86
+ Dictionary to process
87
+ precision_map : dict, optional
88
+ Mapping of key patterns to precision values.
89
+ Default uses PRECISION settings based on key names.
90
+ """
91
+ if precision_map is None:
92
+ precision_map = {}
93
+
94
+ result = {}
95
+ for key, value in d.items():
96
+ # Determine precision based on key name
97
+ if "mm" in key.lower():
98
+ prec = PRECISION["mm"]
99
+ elif "inch" in key.lower():
100
+ prec = PRECISION["inch"]
101
+ elif "position" in key.lower() or key in ("left", "bottom", "right", "top"):
102
+ prec = PRECISION["position"]
103
+ elif "lim" in key.lower():
104
+ prec = PRECISION["lim"]
105
+ elif "width" in key.lower() and "line" in key.lower():
106
+ prec = PRECISION["linewidth"]
107
+ else:
108
+ prec = precision_map.get(key, 3) # Default 3 decimals
109
+
110
+ if isinstance(value, dict):
111
+ result[key] = _round_dict(value, precision_map)
112
+ elif isinstance(value, list):
113
+ result[key] = _round_list(value, prec)
114
+ elif isinstance(value, float):
115
+ result[key] = _round_value(value, prec)
116
+ else:
117
+ result[key] = value
118
+
119
+ return result
120
+
121
+
122
+ def _collect_single_axes_metadata(fig, ax, ax_index: int) -> dict:
123
+ """
124
+ Collect metadata for a single axes object.
125
+
126
+ Parameters
127
+ ----------
128
+ fig : matplotlib.figure.Figure
129
+ The parent figure
130
+ ax : matplotlib.axes.Axes
131
+ The axes to collect metadata from
132
+ ax_index : int
133
+ Index of this axes in the figure (for position tracking)
134
+
135
+ Returns
136
+ -------
137
+ dict
138
+ Metadata dictionary for this axes containing:
139
+ - size_mm, size_inch, size_px
140
+ - position_ratio
141
+ - position_in_grid
142
+ - margins_mm, margins_inch
143
+ - bbox_mm, bbox_inch, bbox_px
144
+ - x_axis_bottom, y_axis_left (axis info)
145
+ """
146
+ ax_metadata = {}
147
+
148
+ try:
149
+ from ._figure_from_axes_mm import get_dimension_info
150
+
151
+ dim_info = get_dimension_info(fig, ax)
152
+
153
+ # Size in multiple units
154
+ ax_metadata["size_mm"] = dim_info.get("axes_size_mm", [])
155
+ if "axes_size_inch" in dim_info:
156
+ ax_metadata["size_inch"] = dim_info["axes_size_inch"]
157
+ if "axes_size_px" in dim_info:
158
+ ax_metadata["size_px"] = dim_info["axes_size_px"]
159
+
160
+ # Position in figure coordinates (normalized 0-1 values)
161
+ # Uses matplotlib terminology: bounds_figure_fraction
162
+ if "axes_position" in dim_info:
163
+ ax_metadata["bounds_figure_fraction"] = list(dim_info["axes_position"])
164
+
165
+ # Position in grid (row, col)
166
+ if hasattr(ax, "_scitex_metadata") and "position_in_grid" in ax._scitex_metadata:
167
+ ax_metadata["position_in_grid"] = ax._scitex_metadata["position_in_grid"]
168
+ else:
169
+ # Calculate from ax_index if we have grid info
170
+ ax_metadata["position_in_grid"] = [ax_index, 0] # Default single column
171
+
172
+ # Margins in mm and inch
173
+ if "margins_mm" in dim_info:
174
+ ax_metadata["margins_mm"] = dim_info["margins_mm"]
175
+ if "margins_inch" in dim_info:
176
+ ax_metadata["margins_inch"] = dim_info["margins_inch"]
177
+
178
+ # Bounding box with intuitive keys
179
+ if "axes_bbox_px" in dim_info:
180
+ bbox = dim_info["axes_bbox_px"]
181
+ # Convert from x0/y0/x1/y1 to x_left/y_bottom/x_right/y_top
182
+ ax_metadata["bbox_px"] = {
183
+ "x_left": bbox.get("x0", bbox.get("x_left", 0)),
184
+ "x_right": bbox.get("x1", bbox.get("x_right", 0)),
185
+ "y_top": bbox.get("y0", bbox.get("y_top", 0)),
186
+ "y_bottom": bbox.get("y1", bbox.get("y_bottom", 0)),
187
+ "width": bbox.get("width", 0),
188
+ "height": bbox.get("height", 0),
189
+ }
190
+ if "axes_bbox_mm" in dim_info:
191
+ bbox = dim_info["axes_bbox_mm"]
192
+ ax_metadata["bbox_mm"] = {
193
+ "x_left": bbox.get("x0", bbox.get("x_left", 0)),
194
+ "x_right": bbox.get("x1", bbox.get("x_right", 0)),
195
+ "y_top": bbox.get("y0", bbox.get("y_top", 0)),
196
+ "y_bottom": bbox.get("y1", bbox.get("y_bottom", 0)),
197
+ "width": bbox.get("width", 0),
198
+ "height": bbox.get("height", 0),
199
+ }
200
+ if "axes_bbox_inch" in dim_info:
201
+ bbox = dim_info["axes_bbox_inch"]
202
+ ax_metadata["bbox_inch"] = {
203
+ "x_left": bbox.get("x0", bbox.get("x_left", 0)),
204
+ "x_right": bbox.get("x1", bbox.get("x_right", 0)),
205
+ "y_top": bbox.get("y0", bbox.get("y_top", 0)),
206
+ "y_bottom": bbox.get("y1", bbox.get("y_bottom", 0)),
207
+ "width": bbox.get("width", 0),
208
+ "height": bbox.get("height", 0),
209
+ }
210
+
211
+ except Exception as e:
212
+ logger.warning(f"Could not extract dimension info for axes {ax_index}: {e}")
213
+
214
+ # Extract axes labels and units
215
+ # X-axis - using matplotlib terminology (xaxis)
216
+ xlabel = ax.get_xlabel()
217
+ x_label, x_unit = _parse_label_unit(xlabel)
218
+ ax_metadata["xaxis"] = {
219
+ "label": x_label,
220
+ "unit": x_unit,
221
+ "scale": ax.get_xscale(),
222
+ "lim": list(ax.get_xlim()),
223
+ }
224
+
225
+ # Y-axis - using matplotlib terminology (yaxis)
226
+ ylabel = ax.get_ylabel()
227
+ y_label, y_unit = _parse_label_unit(ylabel)
228
+ ax_metadata["yaxis"] = {
229
+ "label": y_label,
230
+ "unit": y_unit,
231
+ "scale": ax.get_yscale(),
232
+ "lim": list(ax.get_ylim()),
233
+ }
234
+
235
+ return ax_metadata
236
+
237
+
238
+ def _restructure_style(flat_style: dict) -> dict:
239
+ """
240
+ Restructure flat style_mm dict into hierarchical structure with explicit scopes.
241
+
242
+ Converts:
243
+ {"axis_thickness_mm": 0.2, "tick_length_mm": 0.8, ...}
244
+ To:
245
+ {
246
+ "global": {"fonts": {...}, "padding": {...}},
247
+ "axes_default": {"axes": {...}, "ticks": {...}},
248
+ "artist_default": {"lines": {...}, "markers": {...}}
249
+ }
250
+
251
+ Style scopes:
252
+ - global: rcParams-like settings (fonts, padding) applied to entire figure
253
+ - axes_default: default axes appearance (can be overridden per-axes)
254
+ - artist_default: default artist appearance (can be overridden per-artist)
255
+ """
256
+ result = {
257
+ "global": {
258
+ "fonts": {},
259
+ "padding": {},
260
+ },
261
+ "axes_default": {
262
+ "axes": {},
263
+ "ticks": {},
264
+ },
265
+ "artist_default": {
266
+ "lines": {},
267
+ "markers": {},
268
+ },
269
+ }
270
+
271
+ # Mapping from flat keys to hierarchical structure (scope, category, key)
272
+ key_mapping = {
273
+ # Axes-level defaults
274
+ "axis_thickness_mm": ("axes_default", "axes", "thickness_mm"),
275
+ "axes_thickness_mm": ("axes_default", "axes", "thickness_mm"),
276
+ "tick_length_mm": ("axes_default", "ticks", "length_mm"),
277
+ "tick_thickness_mm": ("axes_default", "ticks", "thickness_mm"),
278
+ "n_ticks": ("axes_default", "ticks", "n_ticks"),
279
+ # Artist-level defaults (Line2D, markers)
280
+ "trace_thickness_mm": ("artist_default", "lines", "thickness_mm"),
281
+ "line_thickness_mm": ("artist_default", "lines", "thickness_mm"),
282
+ "marker_size_mm": ("artist_default", "markers", "size_mm"),
283
+ "scatter_size_mm": ("artist_default", "markers", "scatter_size_mm"),
284
+ # Global defaults (rcParams-like)
285
+ "font_family": ("global", "fonts", "family"),
286
+ "font_family_requested": ("global", "fonts", "family_requested"),
287
+ "font_family_actual": ("global", "fonts", "family_actual"),
288
+ "axis_font_size_pt": ("global", "fonts", "axis_size_pt"),
289
+ "tick_font_size_pt": ("global", "fonts", "tick_size_pt"),
290
+ "title_font_size_pt": ("global", "fonts", "title_size_pt"),
291
+ "legend_font_size_pt": ("global", "fonts", "legend_size_pt"),
292
+ "suptitle_font_size_pt": ("global", "fonts", "suptitle_size_pt"),
293
+ "annotation_font_size_pt": ("global", "fonts", "annotation_size_pt"),
294
+ "label_pad_pt": ("global", "padding", "label_pt"),
295
+ "tick_pad_pt": ("global", "padding", "tick_pt"),
296
+ "title_pad_pt": ("global", "padding", "title_pt"),
297
+ }
298
+
299
+ for key, value in flat_style.items():
300
+ if key in key_mapping:
301
+ scope, category, new_key = key_mapping[key]
302
+ result[scope][category][new_key] = value
303
+ else:
304
+ # Unknown keys go to a misc section or are kept at top level
305
+ # For now, skip unknown keys to keep structure clean
306
+ pass
307
+
308
+ # Remove empty categories within each scope
309
+ for scope in list(result.keys()):
310
+ result[scope] = {k: v for k, v in result[scope].items() if v}
311
+ # Remove empty scopes
312
+ if not result[scope]:
313
+ del result[scope]
314
+
315
+ return result
316
+
317
+
318
+ def collect_figure_metadata(fig, ax=None) -> Dict:
20
319
  """
21
320
  Collect all metadata from figure and axes for embedding in saved images.
22
321
 
23
322
  This function automatically extracts:
24
323
  - Software versions (scitex, matplotlib)
25
324
  - Timestamp
325
+ - Figure UUID (unique identifier)
26
326
  - Figure/axes dimensions (mm, inch, px)
27
327
  - DPI settings
28
328
  - Margins
29
329
  - Styling parameters (if available)
30
330
  - Mode (display/publication)
31
331
  - Creation method
32
- - Plot type and axes information (Phase 1)
332
+ - Plot type and axes information
33
333
 
34
334
  Parameters
35
335
  ----------
@@ -38,9 +338,6 @@ def collect_figure_metadata(fig, ax=None, plot_id=None) -> Dict:
38
338
  ax : matplotlib.axes.Axes, optional
39
339
  Primary axes to collect dimension info from.
40
340
  If not provided, uses first axes in figure.
41
- plot_id : str, optional
42
- Identifier for this plot (e.g., "01_plot"). If not provided,
43
- will be extracted from filename if available.
44
341
 
45
342
  Returns
46
343
  -------
@@ -68,233 +365,588 @@ def collect_figure_metadata(fig, ax=None, plot_id=None) -> Dict:
68
365
  - Debugging dimension/DPI issues
69
366
  """
70
367
  import datetime
368
+ import uuid
71
369
 
72
370
  import matplotlib
73
371
  import scitex
74
372
 
75
- # Base metadata
373
+ # Base metadata with cleaner structure:
374
+ # - runtime: software/creation info
375
+ # - figure: figure-level properties
376
+ # - axes: axes-level properties
377
+ # - style: styling parameters
378
+ # - plot: plot content (title, type, traces, legend)
379
+ # - data: CSV linkage (path, hash, columns)
76
380
  metadata = {
77
- "metadata_version": "1.1.0", # Version of the metadata schema itself (updated for Phase 1)
78
- "scitex": {
79
- "version": scitex.__version__,
381
+ "scitex_schema": "scitex.plt.figure",
382
+ "scitex_schema_version": "0.1.0",
383
+ "figure_uuid": str(uuid.uuid4()),
384
+ "runtime": {
385
+ "scitex_version": scitex.__version__,
386
+ "matplotlib_version": matplotlib.__version__,
80
387
  "created_at": datetime.datetime.now().isoformat(),
81
388
  },
82
- "matplotlib": {
83
- "version": matplotlib.__version__,
84
- },
85
389
  }
86
390
 
87
- # Add plot ID if provided
88
- if plot_id:
89
- metadata["id"] = plot_id
90
-
91
- # If no axes provided, try to get first axes from figure
92
- if ax is None and hasattr(fig, "axes") and len(fig.axes) > 0:
93
- ax = fig.axes[0]
391
+ # Collect all axes from figure
392
+ # Keep AxisWrappers for metadata access, but also track grid shape
393
+ all_axes = [] # List of (ax_wrapper_or_mpl, row, col) tuples
394
+ grid_shape = (1, 1) # Default single axes
94
395
 
95
- # Add dimension info if axes available
96
396
  if ax is not None:
397
+ # Handle AxesWrapper (multi-axes) - extract individual AxisWrappers with positions
398
+ if hasattr(ax, "_axes_scitex"):
399
+ import numpy as np
400
+ axes_array = ax._axes_scitex
401
+ if isinstance(axes_array, np.ndarray):
402
+ grid_shape = axes_array.shape
403
+ for idx, ax_item in enumerate(axes_array.flat):
404
+ row = idx // grid_shape[1]
405
+ col = idx % grid_shape[1]
406
+ all_axes.append((ax_item, row, col))
407
+ else:
408
+ all_axes = [(axes_array, 0, 0)]
409
+ # Handle AxisWrapper (single axes)
410
+ elif hasattr(ax, "_axis_mpl"):
411
+ all_axes = [(ax, 0, 0)]
412
+ else:
413
+ # Assume it's a matplotlib axes
414
+ all_axes = [(ax, 0, 0)]
415
+ elif hasattr(fig, "axes") and len(fig.axes) > 0:
416
+ # Fallback to figure axes (linear indexing)
417
+ for idx, ax_item in enumerate(fig.axes):
418
+ all_axes.append((ax_item, 0, idx))
419
+
420
+ # Helper to unwrap AxisWrapper to matplotlib axes
421
+ def _unwrap_ax(ax_item):
422
+ if hasattr(ax_item, "_axis_mpl"):
423
+ return ax_item._axis_mpl
424
+ return ax_item
425
+
426
+ # Add figure-level properties (extracted from first axes for figure dimensions)
427
+ if all_axes:
97
428
  try:
98
429
  from ._figure_from_axes_mm import get_dimension_info
99
430
 
100
- dim_info = get_dimension_info(fig, ax)
431
+ first_ax_tuple = all_axes[0]
432
+ first_ax_mpl = _unwrap_ax(first_ax_tuple[0])
433
+ dim_info = get_dimension_info(fig, first_ax_mpl)
101
434
 
102
- metadata["dimensions"] = {
103
- "figure_size_mm": dim_info["figure_size_mm"],
104
- "figure_size_inch": dim_info["figure_size_inch"],
105
- "figure_size_px": dim_info["figure_size_px"],
106
- "axes_size_mm": dim_info["axes_size_mm"],
107
- "axes_size_inch": dim_info["axes_size_inch"],
108
- "axes_size_px": dim_info["axes_size_px"],
109
- "axes_position": dim_info["axes_position"],
435
+ metadata["figure"] = {
436
+ "size_mm": dim_info["figure_size_mm"],
437
+ "size_inch": dim_info["figure_size_inch"],
438
+ "size_px": dim_info["figure_size_px"],
110
439
  "dpi": dim_info["dpi"],
111
440
  }
112
441
 
113
- # Calculate margins from dimension info
114
- fig_w_mm, fig_h_mm = dim_info["figure_size_mm"]
115
- axes_w_mm, axes_h_mm = dim_info["axes_size_mm"]
116
- axes_pos = dim_info["axes_position"]
117
- fig_w_px, fig_h_px = dim_info["figure_size_px"]
118
- axes_w_px, axes_h_px = dim_info["axes_size_px"]
119
- dpi = dim_info["dpi"]
120
-
121
- metadata["margins_mm"] = {
122
- "left": axes_pos[0] * fig_w_mm,
123
- "bottom": axes_pos[1] * fig_h_mm,
124
- "right": fig_w_mm - (axes_pos[0] * fig_w_mm + axes_w_mm),
125
- "top": fig_h_mm - (axes_pos[1] * fig_h_mm + axes_h_mm),
126
- }
127
-
128
- # Calculate axes bounding box in pixels and millimeters
129
- # axes_position is (left, bottom, width, height) in figure coordinates (0-1)
130
- # Convert to absolute coordinates
131
- x0_px = int(axes_pos[0] * fig_w_px)
132
- y0_px = int((1 - axes_pos[1] - axes_pos[3]) * fig_h_px) # Flip Y (matplotlib origin is bottom-left)
133
- x1_px = x0_px + axes_w_px
134
- y1_px = y0_px + axes_h_px
135
-
136
- x0_mm = axes_pos[0] * fig_w_mm
137
- y0_mm = (1 - axes_pos[1] - axes_pos[3]) * fig_h_mm # Flip Y
138
- x1_mm = x0_mm + axes_w_mm
139
- y1_mm = y0_mm + axes_h_mm
140
-
141
- metadata["axes_bbox_px"] = {
142
- "x0": x0_px,
143
- "y0": y0_px,
144
- "x1": x1_px,
145
- "y1": y1_px,
146
- "width": axes_w_px,
147
- "height": axes_h_px,
148
- }
149
-
150
- metadata["axes_bbox_mm"] = {
151
- "x0": x0_mm,
152
- "y0": y0_mm,
153
- "x1": x1_mm,
154
- "y1": y1_mm,
155
- "width": axes_w_mm,
156
- "height": axes_h_mm,
157
- }
158
-
442
+ # Add top-level axes_bbox_px for easy access by canvas/web editors
443
+ # Uses x0/y0/x1/y1 format (origin at top-left for web compatibility)
444
+ # x0: left edge (Y-axis position), y1: bottom edge (X-axis position)
445
+ if "axes_bbox_px" in dim_info:
446
+ metadata["axes_bbox_px"] = dim_info["axes_bbox_px"]
447
+ if "axes_bbox_mm" in dim_info:
448
+ metadata["axes_bbox_mm"] = dim_info["axes_bbox_mm"]
159
449
  except Exception as e:
160
- # If dimension extraction fails, continue without it
161
- import warnings
162
-
163
- warnings.warn(
164
- f"Could not extract dimension info for metadata: {e}"
165
- )
450
+ logger.warning(f"Could not extract figure dimension info: {e}")
451
+
452
+ # Collect per-axes metadata
453
+ if all_axes:
454
+ metadata["axes"] = {}
455
+ for ax_item, row, col in all_axes:
456
+ # Use row-col format: ax_00, ax_01, ax_10, ax_11 for 2x2 grid
457
+ ax_key = f"ax_{row}{col}"
458
+ try:
459
+ ax_mpl = _unwrap_ax(ax_item)
460
+ ax_metadata = _collect_single_axes_metadata(fig, ax_mpl, row * grid_shape[1] + col)
461
+ if ax_metadata:
462
+ # Add grid position info
463
+ ax_metadata["grid_position"] = {"row": row, "col": col}
464
+ metadata["axes"][ax_key] = ax_metadata
465
+ except Exception as e:
466
+ logger.warning(f"Could not extract metadata for {ax_key}: {e}")
166
467
 
167
468
  # Add scitex-specific metadata if axes was tagged
469
+ scitex_meta = None
168
470
  if ax is not None and hasattr(ax, "_scitex_metadata"):
169
471
  scitex_meta = ax._scitex_metadata
170
-
171
- # Extract stats separately for top-level access
172
- if 'stats' in scitex_meta:
173
- metadata['stats'] = scitex_meta['stats']
174
-
175
- # Merge into scitex section
176
- for key, value in scitex_meta.items():
177
- if key not in metadata["scitex"] and key != 'stats': # Don't duplicate stats
178
- metadata["scitex"][key] = value
179
-
180
- # Alternative: check figure for metadata (for multi-axes cases)
181
472
  elif hasattr(fig, "_scitex_metadata"):
182
473
  scitex_meta = fig._scitex_metadata
183
474
 
475
+ if scitex_meta:
184
476
  # Extract stats separately for top-level access
185
- if 'stats' in scitex_meta:
186
- metadata['stats'] = scitex_meta['stats']
187
-
188
- for key, value in scitex_meta.items():
189
- if key not in metadata["scitex"] and key != 'stats': # Don't duplicate stats
190
- metadata["scitex"][key] = value
477
+ if "stats" in scitex_meta:
478
+ stats_list = scitex_meta["stats"]
479
+ # Determine first_ax_key from axes metadata
480
+ first_ax_key = None
481
+ if "axes" in metadata and metadata["axes"]:
482
+ first_ax_key = next(iter(metadata["axes"].keys()), None)
483
+ # Add plot_id and ax_id to each stats entry if not present
484
+ for stat in stats_list:
485
+ if isinstance(stat, dict):
486
+ # Try to get plot info from metadata
487
+ if stat.get("plot_id") is None:
488
+ if "plot" in metadata and "ax_id" in metadata["plot"]:
489
+ stat["plot_id"] = metadata["plot"]["ax_id"]
490
+ elif first_ax_key:
491
+ stat["plot_id"] = first_ax_key
492
+ if "ax_id" not in stat and first_ax_key:
493
+ stat["ax_id"] = first_ax_key
494
+ metadata["stats"] = stats_list
495
+
496
+ # Extract style_mm to dedicated "style" section with hierarchical structure
497
+ if "style_mm" in scitex_meta:
498
+ metadata["style"] = _restructure_style(scitex_meta["style_mm"])
499
+
500
+ # Extract mode to figure section
501
+ if "mode" in scitex_meta:
502
+ if "figure" not in metadata:
503
+ metadata["figure"] = {}
504
+ metadata["figure"]["mode"] = scitex_meta["mode"]
505
+
506
+ # Extract created_with to runtime section
507
+ if "created_with" in scitex_meta:
508
+ metadata["runtime"]["created_with"] = scitex_meta["created_with"]
509
+
510
+ # Note: axes_size_mm and position_in_grid are now handled per-axes
511
+ # in _collect_single_axes_metadata() and stored under axes.ax_00, axes.ax_01, etc.
191
512
 
192
513
  # Add actual font information
193
514
  try:
194
515
  from ._get_actual_font import get_actual_font_name
516
+
195
517
  actual_font = get_actual_font_name()
196
518
 
197
- # Store both requested and actual font
198
- if "style_mm" in metadata.get("scitex", {}):
199
- requested_font = metadata["scitex"]["style_mm"].get("font_family", "Arial")
200
- metadata["scitex"]["style_mm"]["font_family_requested"] = requested_font
201
- metadata["scitex"]["style_mm"]["font_family_actual"] = actual_font
519
+ # Store both requested and actual font in style.global.fonts section
520
+ if "style" in metadata:
521
+ # Ensure global.fonts section exists
522
+ if "global" not in metadata["style"]:
523
+ metadata["style"]["global"] = {}
524
+ if "fonts" not in metadata["style"]["global"]:
525
+ metadata["style"]["global"]["fonts"] = {}
526
+
527
+ # Get requested font from global.fonts.family or default to Arial
528
+ requested_font = metadata["style"]["global"]["fonts"].get("family", "Arial")
529
+ # Remove redundant family - keep only family_requested and family_actual
530
+ if "family" in metadata["style"]["global"]["fonts"]:
531
+ del metadata["style"]["global"]["fonts"]["family"]
532
+ metadata["style"]["global"]["fonts"]["family_requested"] = requested_font
533
+ metadata["style"]["global"]["fonts"]["family_actual"] = actual_font
202
534
 
203
535
  # Warn if requested and actual fonts differ
204
536
  if requested_font != actual_font:
205
537
  try:
206
538
  from scitex.logging import getLogger
539
+
207
540
  logger = getLogger(__name__)
208
541
  logger.warning(
209
542
  f"Font mismatch: Requested '{requested_font}' but using '{actual_font}'. "
210
543
  f"For {requested_font}: sudo apt-get install ttf-mscorefonts-installer && fc-cache -fv"
211
544
  )
212
545
  except ImportError:
213
- # Fallback to warnings if scitex.logging not available
214
- import warnings
215
- warnings.warn(
216
- f"Font mismatch: Requested '{requested_font}' but using '{actual_font}'",
217
- UserWarning
546
+ logger.warning(
547
+ f"Font mismatch: Requested '{requested_font}' but using '{actual_font}'"
218
548
  )
219
549
  else:
220
- # If no style_mm, add font info to scitex section
221
- if "scitex" in metadata:
222
- metadata["scitex"]["font_family_actual"] = actual_font
550
+ # If no style section, add font info to runtime section
551
+ metadata["runtime"]["font_family_actual"] = actual_font
223
552
  except Exception:
224
553
  # If font detection fails, continue without it
225
554
  pass
226
555
 
227
- # Phase 1: Add plot_type, axes, and style_preset
556
+ # Extract plot content and axes labels
557
+ # For multi-axes figures, we need to handle AxesWrapper specially
558
+ primary_ax = ax
228
559
  if ax is not None:
560
+ # Handle AxesWrapper (multi-axes) - use first axis for primary plot info
561
+ if hasattr(ax, "_axes_scitex"):
562
+ import numpy as np
563
+ axes_array = ax._axes_scitex
564
+ if isinstance(axes_array, np.ndarray) and axes_array.size > 0:
565
+ primary_ax = axes_array.flat[0]
566
+ else:
567
+ primary_ax = axes_array
568
+
569
+ if primary_ax is not None:
229
570
  try:
230
- # Extract axes labels and units
231
- axes_info = {}
232
-
233
- # X-axis
234
- xlabel = ax.get_xlabel()
235
- x_label, x_unit = _parse_label_unit(xlabel)
236
- axes_info["x"] = {
237
- "label": x_label,
238
- "unit": x_unit,
239
- "scale": ax.get_xscale(),
240
- "lim": list(ax.get_xlim()),
241
- }
242
-
243
- # Y-axis
244
- ylabel = ax.get_ylabel()
245
- y_label, y_unit = _parse_label_unit(ylabel)
246
- axes_info["y"] = {
247
- "label": y_label,
248
- "unit": y_unit,
249
- "scale": ax.get_yscale(),
250
- "lim": list(ax.get_ylim()),
251
- }
252
-
253
- # Add n_ticks if available from style
254
- if "scitex" in metadata and "style_mm" in metadata["scitex"]:
255
- if "n_ticks" in metadata["scitex"]["style_mm"]:
256
- n_ticks = metadata["scitex"]["style_mm"]["n_ticks"]
257
- axes_info["x"]["n_ticks"] = n_ticks
258
- axes_info["y"]["n_ticks"] = n_ticks
259
-
260
- metadata["axes"] = axes_info
261
-
262
- # Extract title
263
- title = ax.get_title()
571
+ # Try to get scitex AxisWrapper for history access
572
+ # This is needed because matplotlib axes don't have the tracking history
573
+ ax_for_history = primary_ax
574
+
575
+ # If ax is a raw matplotlib axes, try to find the scitex wrapper
576
+ if not hasattr(primary_ax, 'history'):
577
+ # Check if primary_ax has a scitex wrapper stored on it
578
+ if hasattr(primary_ax, '_scitex_wrapper'):
579
+ ax_for_history = primary_ax._scitex_wrapper
580
+ # Check if figure has scitex axes reference
581
+ elif hasattr(fig, 'axes') and hasattr(fig.axes, 'history'):
582
+ ax_for_history = fig.axes
583
+ # Check for FigWrapper's axes attribute
584
+ elif hasattr(fig, '_fig_scitex') and hasattr(fig._fig_scitex, 'axes'):
585
+ ax_for_history = fig._fig_scitex.axes
586
+ # Check if the figure object itself has scitex_axes
587
+ elif hasattr(fig, '_scitex_axes'):
588
+ ax_for_history = fig._scitex_axes
589
+
590
+ # Add n_ticks to axes metadata if available from style
591
+ if "style" in metadata and "ticks" in metadata["style"] and "n_ticks" in metadata["style"]["ticks"]:
592
+ n_ticks = metadata["style"]["ticks"]["n_ticks"]
593
+ # Add n_ticks to each axes' axis info
594
+ if "axes" in metadata:
595
+ for ax_key in metadata["axes"]:
596
+ ax_data = metadata["axes"][ax_key]
597
+ if "xaxis" in ax_data:
598
+ ax_data["xaxis"]["n_ticks"] = n_ticks
599
+ if "yaxis" in ax_data:
600
+ ax_data["yaxis"]["n_ticks"] = n_ticks
601
+
602
+ # Initialize plot section for plot content
603
+ plot_info = {}
604
+
605
+ # Add ax_id to match the axes key in metadata["axes"]
606
+ # This links plot info to the corresponding axes entry
607
+ ax_row, ax_col = 0, 0 # Default for single axes
608
+ if hasattr(primary_ax, "_scitex_metadata") and "position_in_grid" in primary_ax._scitex_metadata:
609
+ pos = primary_ax._scitex_metadata["position_in_grid"]
610
+ ax_row, ax_col = pos[0], pos[1]
611
+ # Use same format as axes keys: ax_00, ax_01, etc.
612
+ plot_info["ax_id"] = f"ax_{ax_row:02d}" if ax_row == ax_col == 0 else f"ax_{ax_row * 10 + ax_col:02d}"
613
+
614
+ # Extract title - use underlying matplotlib axes if needed
615
+ ax_mpl = primary_ax._axis_mpl if hasattr(primary_ax, '_axis_mpl') else primary_ax
616
+ title = ax_mpl.get_title()
264
617
  if title:
265
- metadata["title"] = title
618
+ plot_info["title"] = title
266
619
 
267
620
  # Detect plot type and method from axes history or lines
268
- plot_type, method = _detect_plot_type(ax)
621
+ # Use ax_for_history which has the scitex history if available
622
+ plot_type, method = _detect_plot_type(ax_for_history)
269
623
  if plot_type:
270
- metadata["plot_type"] = plot_type
624
+ plot_info["type"] = plot_type
271
625
  if method:
272
- metadata["method"] = method
626
+ plot_info["method"] = method
273
627
 
274
628
  # Extract style preset if available
275
- if hasattr(ax, "_scitex_metadata") and "style_preset" in ax._scitex_metadata:
276
- metadata["style_preset"] = ax._scitex_metadata["style_preset"]
277
- elif hasattr(fig, "_scitex_metadata") and "style_preset" in fig._scitex_metadata:
278
- metadata["style_preset"] = fig._scitex_metadata["style_preset"]
279
-
280
- # Phase 2: Extract traces (lines) with their properties and CSV column mapping
281
- traces = _extract_traces(ax)
282
- if traces:
283
- metadata["traces"] = traces
284
-
285
- # Phase 2: Extract legend info
286
- legend_info = _extract_legend_info(ax)
287
- if legend_info:
288
- metadata["legend"] = legend_info
629
+ if (
630
+ hasattr(primary_ax, "_scitex_metadata")
631
+ and "style_preset" in primary_ax._scitex_metadata
632
+ ):
633
+ if "style" not in metadata:
634
+ metadata["style"] = {}
635
+ metadata["style"]["preset"] = primary_ax._scitex_metadata["style_preset"]
636
+ elif (
637
+ hasattr(fig, "_scitex_metadata")
638
+ and "style_preset" in fig._scitex_metadata
639
+ ):
640
+ if "style" not in metadata:
641
+ metadata["style"] = {}
642
+ metadata["style"]["preset"] = fig._scitex_metadata["style_preset"]
643
+
644
+ # Extract artists and legend - add to axes section (matplotlib terminology)
645
+ # Artists and legend belong to axes, not a separate plot section
646
+ ax_row, ax_col = 0, 0
647
+ if hasattr(primary_ax, "_scitex_metadata") and "position_in_grid" in primary_ax._scitex_metadata:
648
+ pos = primary_ax._scitex_metadata["position_in_grid"]
649
+ ax_row, ax_col = pos[0], pos[1]
650
+ ax_key = f"ax_{ax_row:02d}" if ax_row == ax_col == 0 else f"ax_{ax_row * 10 + ax_col:02d}"
651
+
652
+ if "axes" in metadata and ax_key in metadata["axes"]:
653
+ # Add artists to axes
654
+ artists = _extract_artists(primary_ax)
655
+ if artists:
656
+ metadata["axes"][ax_key]["artists"] = artists
657
+
658
+ # Add legend to axes
659
+ legend_info = _extract_legend_info(primary_ax)
660
+ if legend_info:
661
+ metadata["axes"][ax_key]["legend"] = legend_info
662
+
663
+ # Add plot section if we have content
664
+ if plot_info:
665
+ metadata["plot"] = plot_info
666
+
667
+ # Data section for CSV linkage
668
+ # Note: Per-trace column mappings are in plot.traces[i].csv_columns
669
+ # This section provides:
670
+ # - csv_hash: for verifying data integrity
671
+ # - csv_path: path to CSV file (added by _save.py)
672
+ # - columns_actual: actual column names in CSV (added by _save.py after export)
673
+ data_info = {}
674
+
675
+ # Compute CSV data hash for reproducibility verification
676
+ csv_hash = _compute_csv_hash(ax_for_history)
677
+ if csv_hash:
678
+ data_info["csv_hash"] = csv_hash
679
+
680
+ # csv_path and columns_actual will be added by _save.py after actual CSV export
681
+ # This ensures single source of truth - actual columns, not predictions
682
+
683
+ # Add data section if we have content
684
+ if data_info:
685
+ metadata["data"] = data_info
289
686
 
290
687
  except Exception as e:
291
688
  # If Phase 1 extraction fails, continue without it
292
- import warnings
293
- warnings.warn(f"Could not extract Phase 1 metadata: {e}")
689
+ logger.warning(f"Could not extract Phase 1 metadata: {e}")
690
+
691
+ # Apply precision rounding to all numeric values
692
+ metadata = _round_metadata(metadata)
294
693
 
295
694
  return metadata
296
695
 
297
696
 
697
+ def _round_metadata(metadata: dict) -> dict:
698
+ """
699
+ Apply appropriate precision rounding to all numeric values in metadata.
700
+
701
+ Precision rules:
702
+ - mm values: 2 decimal places (0.01mm = 10 microns)
703
+ - inch values: 3 decimal places
704
+ - position values: 3 decimal places
705
+ - axis limits: 2 decimal places
706
+ - linewidth: 2 decimal places
707
+ - px values: integers (no decimals)
708
+ """
709
+ result = {}
710
+
711
+ for key, value in metadata.items():
712
+ if key in ("scitex_schema", "scitex_schema_version", "figure_uuid"):
713
+ # String fields - no rounding
714
+ result[key] = value
715
+ elif key == "runtime":
716
+ # Runtime section - no numeric values to round
717
+ result[key] = value
718
+ elif key == "figure":
719
+ result[key] = _round_figure_section(value)
720
+ elif key == "axes":
721
+ result[key] = _round_axes_section(value)
722
+ elif key == "style":
723
+ result[key] = _round_style_section(value)
724
+ elif key == "plot":
725
+ result[key] = _round_plot_section(value)
726
+ elif key == "data":
727
+ # Data section - no numeric values to round (hashes, paths, column names)
728
+ result[key] = value
729
+ elif key == "stats":
730
+ # Stats section - preserve precision for statistical values
731
+ result[key] = value
732
+ else:
733
+ result[key] = value
734
+
735
+ return result
736
+
737
+
738
+ def _round_figure_section(fig_data: dict) -> dict:
739
+ """Round values in figure section."""
740
+ result = {}
741
+ for key, value in fig_data.items():
742
+ if key == "size_mm":
743
+ # Fixed 2 decimals for mm: [80.00, 68.00]
744
+ result[key] = _round_list(value, PRECISION["mm"], fixed=True)
745
+ elif key == "size_inch":
746
+ # Fixed 3 decimals for inch: [3.150, 2.677]
747
+ result[key] = _round_list(value, PRECISION["inch"], fixed=True)
748
+ elif key == "size_px":
749
+ result[key] = [int(v) for v in value] # Pixels are integers
750
+ elif key == "dpi":
751
+ result[key] = int(value)
752
+ else:
753
+ result[key] = value
754
+ return result
755
+
756
+
757
+ def _round_axes_section(axes_data: dict) -> dict:
758
+ """Round values in axes section.
759
+
760
+ Handles both flat structure (legacy) and nested structure (ax_00, ax_01, ...).
761
+ """
762
+ result = {}
763
+ for key, value in axes_data.items():
764
+ # Check if this is a nested axes key (ax_00, ax_01, etc.)
765
+ if key.startswith("ax_") and isinstance(value, dict):
766
+ # Recursively round the nested axes data
767
+ result[key] = _round_single_axes_data(value)
768
+ else:
769
+ # Handle flat structure (legacy) or non-axes keys
770
+ result[key] = _round_single_axes_data({key: value}).get(key, value)
771
+ return result
772
+
773
+
774
+ def _round_single_axes_data(ax_data: dict) -> dict:
775
+ """Round values for a single axes' data."""
776
+ result = {}
777
+ for key, value in ax_data.items():
778
+ if key == "size_mm":
779
+ # Fixed 2 decimals: [40.00, 28.00]
780
+ result[key] = _round_list(value, PRECISION["mm"], fixed=True)
781
+ elif key == "size_inch":
782
+ # Fixed 3 decimals: [1.575, 1.102]
783
+ result[key] = _round_list(value, PRECISION["inch"], fixed=True)
784
+ elif key == "size_px":
785
+ result[key] = [int(v) for v in value]
786
+ elif key in ("position", "position_ratio", "bounds_figure_fraction"):
787
+ # Fixed 3 decimals: [0.250, 0.294, 0.500, 0.412]
788
+ result[key] = _round_list(value, PRECISION["position"], fixed=True)
789
+ elif key == "position_in_grid":
790
+ result[key] = [int(v) for v in value]
791
+ elif key == "margins_mm":
792
+ # Fixed 2 decimals: {"left": 20.00, ...}
793
+ result[key] = {k: _round_value(v, PRECISION["mm"], fixed=True) for k, v in value.items()}
794
+ elif key == "margins_inch":
795
+ # Fixed 3 decimals: {"left": 0.787, ...}
796
+ result[key] = {k: _round_value(v, PRECISION["inch"], fixed=True) for k, v in value.items()}
797
+ elif key == "bbox_mm":
798
+ # Fixed 2 decimals
799
+ result[key] = {k: _round_value(v, PRECISION["mm"], fixed=True) for k, v in value.items()}
800
+ elif key == "bbox_inch":
801
+ # Fixed 3 decimals
802
+ result[key] = {k: _round_value(v, PRECISION["inch"], fixed=True) for k, v in value.items()}
803
+ elif key == "bbox_px":
804
+ result[key] = {k: int(v) for k, v in value.items()}
805
+ elif key in ("xaxis", "yaxis", "xaxis_top", "yaxis_right"):
806
+ # Axis info (label, unit, scale, lim, n_ticks) - using matplotlib terminology
807
+ axis_result = {}
808
+ for ak, av in value.items():
809
+ if ak == "lim":
810
+ # Fixed 2 decimals for limits: [-0.31, 6.60]
811
+ axis_result[ak] = _round_list(av, PRECISION["lim"], fixed=True)
812
+ elif ak == "n_ticks":
813
+ axis_result[ak] = int(av)
814
+ else:
815
+ axis_result[ak] = av
816
+ result[key] = axis_result
817
+ elif key == "legend":
818
+ # Legend has no floats to round, pass through
819
+ result[key] = value
820
+ elif key == "artists":
821
+ # Round artist values
822
+ result[key] = [_round_artist(a) for a in value]
823
+ else:
824
+ result[key] = value
825
+ return result
826
+
827
+
828
+ def _round_style_section(style_data: dict) -> dict:
829
+ """Round values in hierarchical style section with scopes.
830
+
831
+ Handles structure like:
832
+ {
833
+ "global": {"fonts": {...}, "padding": {...}},
834
+ "axes_default": {"axes": {...}, "ticks": {...}},
835
+ "artist_default": {"lines": {...}, "markers": {...}}
836
+ }
837
+ """
838
+ result = {}
839
+ for scope, scope_data in style_data.items():
840
+ if scope in ("global", "axes_default", "artist_default"):
841
+ # Handle scope-level dict
842
+ result[scope] = {}
843
+ for category, category_data in scope_data.items():
844
+ if isinstance(category_data, dict):
845
+ result[scope][category] = _round_style_subsection(category, category_data)
846
+ else:
847
+ result[scope][category] = category_data
848
+ elif isinstance(scope_data, dict):
849
+ # Fallback for flat structure (backward compatibility)
850
+ result[scope] = _round_style_subsection(scope, scope_data)
851
+ elif isinstance(scope_data, float):
852
+ if "_mm" in scope:
853
+ result[scope] = _round_value(scope_data, PRECISION["mm"], fixed=True)
854
+ elif "_pt" in scope:
855
+ result[scope] = _round_value(scope_data, 1, fixed=True)
856
+ else:
857
+ result[scope] = _round_value(scope_data, 2)
858
+ elif isinstance(scope_data, int):
859
+ result[scope] = scope_data
860
+ else:
861
+ result[scope] = scope_data
862
+ return result
863
+
864
+
865
+ def _round_style_subsection(category: str, data: dict) -> dict:
866
+ """Round values in a style subsection based on category."""
867
+ result = {}
868
+ for key, value in data.items():
869
+ if isinstance(value, float):
870
+ if "_mm" in key or category in ("axes", "ticks", "lines", "markers"):
871
+ # mm values: 2 decimals
872
+ result[key] = _round_value(value, PRECISION["mm"], fixed=True)
873
+ elif "_pt" in key or category in ("fonts", "padding"):
874
+ # pt values: 1 decimal
875
+ result[key] = _round_value(value, 1, fixed=True)
876
+ else:
877
+ result[key] = _round_value(value, 2)
878
+ elif isinstance(value, int):
879
+ result[key] = value
880
+ else:
881
+ result[key] = value
882
+ return result
883
+
884
+
885
+ def _round_plot_section(plot_data: dict) -> dict:
886
+ """Round values in plot section."""
887
+ result = {}
888
+ for key, value in plot_data.items():
889
+ if key == "artists":
890
+ result[key] = [_round_artist(a) for a in value]
891
+ elif key == "legend":
892
+ result[key] = value # Legend has no floats to round
893
+ else:
894
+ result[key] = value
895
+ return result
896
+
897
+
898
+ def _round_artist(artist: dict) -> dict:
899
+ """Round values in a single artist."""
900
+ result = {}
901
+ for key, value in artist.items():
902
+ if key == "style" and isinstance(value, dict):
903
+ # Legacy: Round values in style dict (for backward compatibility)
904
+ style_result = {}
905
+ for sk, sv in value.items():
906
+ if sk in ("linewidth_pt", "markersize_pt"):
907
+ # Fixed 2 decimals: 0.57
908
+ style_result[sk] = _round_value(sv, PRECISION["linewidth"], fixed=True)
909
+ else:
910
+ style_result[sk] = sv
911
+ result[key] = style_result
912
+ elif key == "backend" and isinstance(value, dict):
913
+ # New two-layer structure: round values in backend.props
914
+ backend_result = {"name": value.get("name", "matplotlib")}
915
+ if "artist_class" in value:
916
+ backend_result["artist_class"] = value["artist_class"]
917
+ if "props" in value and isinstance(value["props"], dict):
918
+ props_result = {}
919
+ for pk, pv in value["props"].items():
920
+ if pk in ("linewidth_pt", "markersize_pt"):
921
+ # Fixed 2 decimals: 0.57
922
+ props_result[pk] = _round_value(pv, PRECISION["linewidth"], fixed=True)
923
+ elif pk == "size":
924
+ # Scatter size: 1 decimal
925
+ props_result[pk] = _round_value(pv, 1, fixed=True)
926
+ else:
927
+ props_result[pk] = pv
928
+ backend_result["props"] = props_result
929
+ result[key] = backend_result
930
+ elif key == "geometry" and isinstance(value, dict):
931
+ # Round geometry values (for bar charts)
932
+ geom_result = {}
933
+ for gk, gv in value.items():
934
+ if isinstance(gv, float):
935
+ geom_result[gk] = _round_value(gv, 4, fixed=False)
936
+ else:
937
+ geom_result[gk] = gv
938
+ result[key] = geom_result
939
+ elif key == "zorder":
940
+ result[key] = int(value) if isinstance(value, (int, float)) else value
941
+ else:
942
+ result[key] = value
943
+ return result
944
+
945
+
946
+ # Backward compatibility alias
947
+ _round_trace = _round_artist
948
+
949
+
298
950
  def _parse_label_unit(label_text: str) -> tuple:
299
951
  """
300
952
  Parse label text to extract label and unit.
@@ -320,12 +972,12 @@ def _parse_label_unit(label_text: str) -> tuple:
320
972
  return "", ""
321
973
 
322
974
  # Try to match [...] pattern first (preferred format)
323
- match = re.match(r'^(.+?)\s*\[([^\]]+)\]$', label_text)
975
+ match = re.match(r"^(.+?)\s*\[([^\]]+)\]$", label_text)
324
976
  if match:
325
977
  return match.group(1).strip(), match.group(2).strip()
326
978
 
327
979
  # Try to match (...) pattern
328
- match = re.match(r'^(.+?)\s*\(([^\)]+)\)$', label_text)
980
+ match = re.match(r"^(.+?)\s*\(([^\)]+)\)$", label_text)
329
981
  if match:
330
982
  return match.group(1).strip(), match.group(2).strip()
331
983
 
@@ -333,96 +985,1017 @@ def _parse_label_unit(label_text: str) -> tuple:
333
985
  return label_text.strip(), ""
334
986
 
335
987
 
336
- def _extract_traces(ax) -> list:
988
+ def _get_csv_column_names(trace_id: str, ax_row: int = 0, ax_col: int = 0, variables: list = None) -> dict:
989
+ """
990
+ Get CSV column names using the single source of truth naming convention.
991
+
992
+ Format: ax-row-{row}-col-{col}_trace-id-{id}_variable-{var}
993
+
994
+ Parameters
995
+ ----------
996
+ trace_id : str
997
+ The trace identifier (e.g., "sine", "step")
998
+ ax_row : int
999
+ Row position of axes in grid (default: 0)
1000
+ ax_col : int
1001
+ Column position of axes in grid (default: 0)
1002
+ variables : list, optional
1003
+ List of variable names (default: ["x", "y"])
1004
+
1005
+ Returns
1006
+ -------
1007
+ dict
1008
+ Dictionary mapping variable names to CSV column names
1009
+ """
1010
+ from ._csv_column_naming import get_csv_column_name
1011
+
1012
+ if variables is None:
1013
+ variables = ["x", "y"]
1014
+
1015
+ data_ref = {}
1016
+ for var in variables:
1017
+ data_ref[var] = get_csv_column_name(var, ax_row, ax_col, trace_id=trace_id)
1018
+
1019
+ return data_ref
1020
+
1021
+
1022
+ def _extract_artists(ax) -> list:
337
1023
  """
338
- Extract trace (line) information including properties and CSV column mapping.
1024
+ Extract artist information including properties and CSV column mapping.
1025
+
1026
+ Uses matplotlib terminology: each drawable element is an Artist.
1027
+ Only includes artists that were explicitly created via scitex tracking (top-level calls),
1028
+ not internal artists created by matplotlib functions like boxplot() which internally
1029
+ call plot() multiple times.
339
1030
 
340
1031
  Parameters
341
1032
  ----------
342
1033
  ax : matplotlib.axes.Axes
343
- The axes to extract traces from
1034
+ The axes to extract artists from
344
1035
 
345
1036
  Returns
346
1037
  -------
347
1038
  list
348
- List of trace dictionaries with id, label, color, linestyle, linewidth,
349
- and csv_columns mapping
1039
+ List of artist dictionaries with:
1040
+ - id: unique identifier
1041
+ - artist_class: matplotlib class name (Line2D, PathCollection, etc.)
1042
+ - label: legend label
1043
+ - style: color, linestyle, linewidth, etc.
1044
+ - data_ref: CSV column mapping (matches columns_actual exactly)
350
1045
  """
351
1046
  import matplotlib.colors as mcolors
352
- from ._csv_column_naming import get_csv_column_name, sanitize_trace_id
353
1047
 
354
- traces = []
1048
+ artists = []
355
1049
 
356
1050
  # Get axes position for CSV column naming
357
1051
  ax_row, ax_col = 0, 0 # Default for single axes
358
- if hasattr(ax, '_scitex_metadata') and 'position_in_grid' in ax._scitex_metadata:
359
- pos = ax._scitex_metadata['position_in_grid']
1052
+ if hasattr(ax, "_scitex_metadata") and "position_in_grid" in ax._scitex_metadata:
1053
+ pos = ax._scitex_metadata["position_in_grid"]
360
1054
  ax_row, ax_col = pos[0], pos[1]
361
1055
 
362
- for i, line in enumerate(ax.lines):
363
- trace = {}
1056
+ # Get the raw matplotlib axes for accessing lines
1057
+ mpl_ax = ax._axis_mpl if hasattr(ax, "_axis_mpl") else ax
1058
+
1059
+ # Try to find scitex wrapper for plot type detection and history access
1060
+ ax_for_detection = ax
1061
+ if not hasattr(ax, 'history') and hasattr(mpl_ax, '_scitex_wrapper'):
1062
+ ax_for_detection = mpl_ax._scitex_wrapper
1063
+
1064
+ # Check if we should filter to only tracked artists
1065
+ # For plot types that internally call plot (boxplot, errorbar, etc.),
1066
+ # we don't export the internal artists EXCEPT explicitly tracked ones
1067
+ plot_type, method = _detect_plot_type(ax_for_detection)
1068
+
1069
+ # Plot types where internal line artists should be hidden
1070
+ # But we still export artists that have explicit _scitex_id set
1071
+ # These plot types create Line2D objects internally that don't have
1072
+ # corresponding data in the CSV export
1073
+ # NOTE: scatter is NOT included here because scatter plots often have
1074
+ # regression lines that should be exported
1075
+ internal_plot_types = {
1076
+ "boxplot", "violin", "hist", "bar", "image", "heatmap", "kde", "ecdf",
1077
+ "errorbar", "fill", "stem", "contour", "pie", "quiver", "stream"
1078
+ }
364
1079
 
1080
+ skip_unlabeled = plot_type in internal_plot_types
1081
+
1082
+ # Build a map from scitex_id to full record from history
1083
+ # Record format: (tracking_id, method, tracked_dict, kwargs)
1084
+ id_to_history = {}
1085
+ if hasattr(ax_for_detection, "history"):
1086
+ for record_id, record in ax_for_detection.history.items():
1087
+ if isinstance(record, tuple) and len(record) >= 2:
1088
+ tracking_id = record[0] # The id used in tracking
1089
+ id_to_history[tracking_id] = record # Store full record
1090
+
1091
+ # Special handling for boxplot and violin - extract semantic components
1092
+ # Boxplot creates lines in a specific pattern: for n boxes, there are
1093
+ # typically: whiskers (2*n), caps (2*n), median (n), fliers (n)
1094
+ is_boxplot = plot_type == "boxplot"
1095
+ is_violin = plot_type == "violin"
1096
+ is_stem = plot_type == "stem"
1097
+
1098
+ # For boxplot, try to determine the number of boxes and compute stats from history
1099
+ num_boxes = 0
1100
+ boxplot_stats = [] # Will hold stats for each box
1101
+ boxplot_data = None
1102
+ if is_boxplot and hasattr(ax_for_detection, "history"):
1103
+ for record in ax_for_detection.history.values():
1104
+ if isinstance(record, tuple) and len(record) >= 3:
1105
+ method_name = record[1]
1106
+ if method_name == "boxplot":
1107
+ tracked_dict = record[2]
1108
+ args = tracked_dict.get("args", [])
1109
+ if args and len(args) > 0:
1110
+ data = args[0]
1111
+ if hasattr(data, '__len__') and not isinstance(data, str):
1112
+ # Check if it's list of arrays or single array
1113
+ if hasattr(data[0], '__len__') and not isinstance(data[0], str):
1114
+ num_boxes = len(data)
1115
+ boxplot_data = data
1116
+ else:
1117
+ num_boxes = 1
1118
+ boxplot_data = [data]
1119
+ break
1120
+
1121
+ # Compute boxplot statistics
1122
+ if boxplot_data is not None:
1123
+ import numpy as np
1124
+ for box_idx, box_data in enumerate(boxplot_data):
1125
+ try:
1126
+ arr = np.asarray(box_data)
1127
+ arr = arr[~np.isnan(arr)] # Remove NaN values
1128
+ if len(arr) > 0:
1129
+ q1 = float(np.percentile(arr, 25))
1130
+ median = float(np.median(arr))
1131
+ q3 = float(np.percentile(arr, 75))
1132
+ iqr = q3 - q1
1133
+ whisker_low = float(max(arr.min(), q1 - 1.5 * iqr))
1134
+ whisker_high = float(min(arr.max(), q3 + 1.5 * iqr))
1135
+ # Fliers are points outside whiskers
1136
+ fliers = arr[(arr < whisker_low) | (arr > whisker_high)]
1137
+ boxplot_stats.append({
1138
+ "box_index": box_idx,
1139
+ "median": median,
1140
+ "q1": q1,
1141
+ "q3": q3,
1142
+ "whisker_low": whisker_low,
1143
+ "whisker_high": whisker_high,
1144
+ "n_fliers": int(len(fliers)),
1145
+ "n_samples": int(len(arr)),
1146
+ })
1147
+ except (ValueError, TypeError):
1148
+ pass
1149
+
1150
+ for i, line in enumerate(mpl_ax.lines):
365
1151
  # Get ID from _scitex_id attribute (set by scitex plotting functions)
366
1152
  # This matches the id= kwarg passed to ax.plot()
367
- scitex_id = getattr(line, '_scitex_id', None)
1153
+ scitex_id = getattr(line, "_scitex_id", None)
368
1154
 
369
1155
  # Get label for legend
370
1156
  label = line.get_label()
371
1157
 
372
- # Determine trace_id for CSV column matching
373
- # Use index-based ID to match CSV export (single source of truth)
374
- trace_id_for_csv = None # Will use trace_index in get_csv_column_name
375
-
376
- # Store display id/label separately
377
- if scitex_id:
378
- trace["id"] = scitex_id
379
- elif not label.startswith('_'):
380
- trace["id"] = label
1158
+ # For internal plot types (boxplot, violin, etc.), skip Line2D artists
1159
+ # that were created internally by matplotlib (not explicitly tracked).
1160
+ # These internal artists don't have corresponding data in the CSV.
1161
+ # BUT: for boxplot/violin/stem, we want to export with semantic labels
1162
+ semantic_type = None
1163
+ semantic_id = None
1164
+ has_boxplot_stats = False
1165
+ box_idx = None
1166
+
1167
+ # For stem, always detect semantic type (even with scitex_id)
1168
+ if is_stem:
1169
+ marker = line.get_marker()
1170
+ linestyle = line.get_linestyle()
1171
+ if marker and marker != "None" and linestyle == "None":
1172
+ # This is the marker line (markers only, no connecting line)
1173
+ semantic_type = "stem_marker"
1174
+ semantic_id = "stem_markers"
1175
+ elif linestyle and linestyle != "None":
1176
+ # This is either stemlines or baseline
1177
+ # Check if it looks like a baseline (horizontal line at y=0)
1178
+ ydata = line.get_ydata()
1179
+ if len(ydata) >= 2 and len(set(ydata)) == 1:
1180
+ semantic_type = "stem_baseline"
1181
+ semantic_id = "stem_baseline"
1182
+ else:
1183
+ semantic_type = "stem_stem"
1184
+ semantic_id = "stem_lines"
1185
+ else:
1186
+ semantic_type = "stem_component"
1187
+ semantic_id = f"stem_{i}"
1188
+
1189
+ if skip_unlabeled and not scitex_id and label.startswith("_"):
1190
+ # For boxplot, assign semantic roles based on position in lines list
1191
+ if is_boxplot and num_boxes > 0:
1192
+ # Boxplot line order: whiskers (2*n), caps (2*n), medians (n), fliers (n)
1193
+ total_whiskers = 2 * num_boxes
1194
+ total_caps = 2 * num_boxes
1195
+ total_medians = num_boxes
1196
+
1197
+ if i < total_whiskers:
1198
+ box_idx = i // 2
1199
+ whisker_idx = i % 2
1200
+ semantic_type = "boxplot_whisker"
1201
+ semantic_id = f"box_{box_idx}_whisker_{whisker_idx}"
1202
+ elif i < total_whiskers + total_caps:
1203
+ cap_i = i - total_whiskers
1204
+ box_idx = cap_i // 2
1205
+ cap_idx = cap_i % 2
1206
+ semantic_type = "boxplot_cap"
1207
+ semantic_id = f"box_{box_idx}_cap_{cap_idx}"
1208
+ elif i < total_whiskers + total_caps + total_medians:
1209
+ box_idx = i - total_whiskers - total_caps
1210
+ semantic_type = "boxplot_median"
1211
+ semantic_id = f"box_{box_idx}_median"
1212
+ # Mark this as the primary element to hold stats
1213
+ has_boxplot_stats = True
1214
+ else:
1215
+ flier_idx = i - total_whiskers - total_caps - total_medians
1216
+ # Distribute fliers across boxes if we have fewer flier lines than boxes
1217
+ box_idx = flier_idx if flier_idx < num_boxes else num_boxes - 1
1218
+ semantic_type = "boxplot_flier"
1219
+ semantic_id = f"box_{box_idx}_flier"
1220
+ elif is_violin:
1221
+ # Violin typically has: bodies (patches), then optional lines
1222
+ semantic_type = "violin_component"
1223
+ semantic_id = f"violin_line_{i}"
1224
+ elif is_stem:
1225
+ # Already handled above
1226
+ pass
1227
+ else:
1228
+ continue # Skip for other internal plot types
1229
+
1230
+ artist = {}
1231
+
1232
+ # For scatter plots, check if this Line2D is a regression line
1233
+ is_regression_line = False
1234
+ if plot_type == "scatter" and label.startswith("_"):
1235
+ # Check if this looks like a regression line (straight line with few points)
1236
+ xdata = line.get_xdata()
1237
+ ydata = line.get_ydata()
1238
+ if len(xdata) == 2: # Regression line typically has 2 points
1239
+ is_regression_line = True
1240
+
1241
+ # Store display id/label
1242
+ # For stem, use semantic_id as the primary ID to ensure uniqueness
1243
+ if semantic_id and is_stem:
1244
+ artist["id"] = semantic_id
1245
+ if scitex_id:
1246
+ artist["group_id"] = scitex_id # Store original trace id as group
1247
+ elif scitex_id:
1248
+ artist["id"] = scitex_id
1249
+ elif semantic_id:
1250
+ artist["id"] = semantic_id
1251
+ elif is_regression_line:
1252
+ artist["id"] = f"regression_{i}"
1253
+ elif not label.startswith("_"):
1254
+ artist["id"] = label
381
1255
  else:
382
- trace["id"] = f"line_{i}"
1256
+ artist["id"] = f"line_{i}"
1257
+
1258
+ # Semantic layer: mark (plot type) and role (component role)
1259
+ # mark: line, scatter, bar, boxplot, violin, heatmap, etc.
1260
+ # role: specific component like boxplot_median, violin_body, etc.
1261
+ artist["mark"] = "line" # Line2D is always a line mark
1262
+ if semantic_type:
1263
+ artist["role"] = semantic_type
1264
+ elif is_regression_line:
1265
+ artist["role"] = "regression_line"
383
1266
 
384
1267
  # Label (for legend) - use label if not internal
385
- if not label.startswith('_'):
386
- trace["label"] = label
1268
+ # legend_included indicates if this artist appears in legend
1269
+ if not label.startswith("_"):
1270
+ artist["label"] = label
1271
+ artist["legend_included"] = True
1272
+ else:
1273
+ artist["legend_included"] = False
1274
+
1275
+ # zorder for layering
1276
+ artist["zorder"] = line.get_zorder()
1277
+
1278
+ # Backend layer: matplotlib-specific properties
1279
+ backend = {
1280
+ "name": "matplotlib",
1281
+ "artist_class": type(line).__name__, # e.g., "Line2D"
1282
+ "props": {}
1283
+ }
387
1284
 
388
1285
  # Color - always convert to hex for consistent JSON storage
389
1286
  color = line.get_color()
390
1287
  try:
391
1288
  # mcolors.to_hex handles strings, RGB tuples, RGBA tuples
392
1289
  color_hex = mcolors.to_hex(color, keep_alpha=False)
393
- trace["color"] = color_hex
1290
+ backend["props"]["color"] = color_hex
394
1291
  except (ValueError, TypeError):
395
1292
  # Fallback: store as-is
396
- trace["color"] = color
1293
+ backend["props"]["color"] = color
397
1294
 
398
1295
  # Line style
399
- trace["linestyle"] = line.get_linestyle()
1296
+ backend["props"]["linestyle"] = line.get_linestyle()
400
1297
 
401
1298
  # Line width
402
- trace["linewidth"] = line.get_linewidth()
1299
+ backend["props"]["linewidth_pt"] = line.get_linewidth()
403
1300
 
404
- # Marker
1301
+ # Marker - always include (null if no marker)
405
1302
  marker = line.get_marker()
406
- if marker and marker != 'None':
407
- trace["marker"] = marker
408
- trace["markersize"] = line.get_markersize()
409
-
410
- # CSV column mapping - use single source of truth
411
- # Uses trace_index to match what _export_as_csv generates
412
- trace["csv_columns"] = {
413
- "x": get_csv_column_name("plot_x", ax_row, ax_col, trace_index=i),
414
- "y": get_csv_column_name("plot_y", ax_row, ax_col, trace_index=i),
1303
+ if marker and marker != "None" and marker != "none":
1304
+ backend["props"]["marker"] = marker
1305
+ backend["props"]["markersize_pt"] = line.get_markersize()
1306
+ else:
1307
+ backend["props"]["marker"] = None
1308
+
1309
+ artist["backend"] = backend
1310
+
1311
+ # data_ref - CSV column mapping using single source of truth naming
1312
+ # Format: ax-row-{row}-col-{col}_trace-id-{id}_variable-{var}
1313
+ # Only add data_ref if this is NOT a boxplot/violin internal element
1314
+ # (those have semantic_type set but no corresponding CSV data)
1315
+ if not semantic_type:
1316
+ # Try to find the correct trace_id for data_ref
1317
+ # Priority: 1) _scitex_id, 2) History record trace_id, 3) Artist ID
1318
+ trace_id_for_ref = None
1319
+
1320
+ if scitex_id:
1321
+ # Artist has explicit _scitex_id set
1322
+ trace_id_for_ref = scitex_id
1323
+ else:
1324
+ # Try to find matching history record for this Line2D
1325
+ # Look for "plot" method records and match by index
1326
+ if hasattr(ax_for_detection, "history"):
1327
+ plot_records = []
1328
+ for record_id, record in ax_for_detection.history.items():
1329
+ if isinstance(record, tuple) and len(record) >= 2:
1330
+ if record[1] == "plot":
1331
+ # Extract trace_id from tracking_id (e.g., "ax_00_plot_0" -> "0")
1332
+ tracking_id = record[0]
1333
+ if tracking_id.startswith("ax_"):
1334
+ parts = tracking_id.split("_")
1335
+ if len(parts) >= 4:
1336
+ trace_id_for_ref = "_".join(parts[3:])
1337
+ elif len(parts) == 4:
1338
+ trace_id_for_ref = parts[3]
1339
+ elif tracking_id.startswith("plot_"):
1340
+ trace_id_for_ref = tracking_id[5:] if len(tracking_id) > 5 else str(i)
1341
+ else:
1342
+ # User-provided ID like "sine"
1343
+ trace_id_for_ref = tracking_id
1344
+ plot_records.append(trace_id_for_ref)
1345
+
1346
+ # Match by line index if we have plot records
1347
+ if plot_records:
1348
+ # Find the index of this line among all non-semantic lines
1349
+ non_semantic_line_idx = 0
1350
+ for j, l in enumerate(mpl_ax.lines[:i]):
1351
+ l_label = l.get_label()
1352
+ l_scitex_id = getattr(l, "_scitex_id", None)
1353
+ l_semantic_id = getattr(l, "_scitex_semantic_id", None)
1354
+ # Count only lines that would get data_ref (non-semantic)
1355
+ if not l_semantic_id and not l_label.startswith("_"):
1356
+ non_semantic_line_idx += 1
1357
+ elif l_scitex_id:
1358
+ non_semantic_line_idx += 1
1359
+
1360
+ if non_semantic_line_idx < len(plot_records):
1361
+ trace_id_for_ref = plot_records[non_semantic_line_idx]
1362
+
1363
+ # Fallback to artist ID
1364
+ if not trace_id_for_ref:
1365
+ trace_id_for_ref = artist.get("id", str(i))
1366
+
1367
+ artist["data_ref"] = _get_csv_column_names(trace_id_for_ref, ax_row, ax_col)
1368
+ elif is_stem and scitex_id:
1369
+ # For stem artists, add data_ref pointing to the original trace's columns
1370
+ artist["data_ref"] = _get_csv_column_names(scitex_id, ax_row, ax_col)
1371
+ # For baseline, mark it as derived (not directly from CSV)
1372
+ if semantic_type == "stem_baseline":
1373
+ artist["derived"] = True
1374
+ artist["data_ref"]["derived_from"] = "y=0"
1375
+
1376
+ # Add boxplot statistics to the median artist
1377
+ if has_boxplot_stats and box_idx is not None and box_idx < len(boxplot_stats):
1378
+ artist["stats"] = boxplot_stats[box_idx]
1379
+
1380
+ artists.append(artist)
1381
+
1382
+ # Also extract PathCollection artists (scatter points)
1383
+ for i, coll in enumerate(mpl_ax.collections):
1384
+ if "PathCollection" not in type(coll).__name__:
1385
+ continue
1386
+
1387
+ artist = {}
1388
+
1389
+ # Get ID from _scitex_id attribute
1390
+ scitex_id = getattr(coll, "_scitex_id", None)
1391
+ label = coll.get_label()
1392
+
1393
+ if scitex_id:
1394
+ artist["id"] = scitex_id
1395
+ elif label and not label.startswith("_"):
1396
+ artist["id"] = label
1397
+ else:
1398
+ artist["id"] = f"scatter_{i}"
1399
+
1400
+ # Semantic layer
1401
+ artist["mark"] = "scatter"
1402
+
1403
+ # Legend inclusion
1404
+ if label and not label.startswith("_"):
1405
+ artist["label"] = label
1406
+ artist["legend_included"] = True
1407
+ else:
1408
+ artist["legend_included"] = False
1409
+
1410
+ artist["zorder"] = coll.get_zorder()
1411
+
1412
+ # Backend layer: matplotlib-specific properties
1413
+ backend = {
1414
+ "name": "matplotlib",
1415
+ "artist_class": type(coll).__name__, # "PathCollection"
1416
+ "props": {}
1417
+ }
1418
+
1419
+ try:
1420
+ facecolors = coll.get_facecolor()
1421
+ if len(facecolors) > 0:
1422
+ backend["props"]["facecolor"] = mcolors.to_hex(facecolors[0], keep_alpha=False)
1423
+ except (ValueError, TypeError, IndexError):
1424
+ pass
1425
+
1426
+ try:
1427
+ edgecolors = coll.get_edgecolor()
1428
+ if len(edgecolors) > 0:
1429
+ backend["props"]["edgecolor"] = mcolors.to_hex(edgecolors[0], keep_alpha=False)
1430
+ except (ValueError, TypeError, IndexError):
1431
+ pass
1432
+
1433
+ try:
1434
+ sizes = coll.get_sizes()
1435
+ if len(sizes) > 0:
1436
+ backend["props"]["size"] = float(sizes[0])
1437
+ except (ValueError, TypeError, IndexError):
1438
+ pass
1439
+
1440
+ artist["backend"] = backend
1441
+
1442
+ # data_ref - CSV column mapping using single source of truth naming
1443
+ # Format: ax-row-{row}-col-{col}_trace-id-{id}_variable-{var}
1444
+ artist_id = artist.get("id", str(i))
1445
+ artist["data_ref"] = _get_csv_column_names(artist_id, ax_row, ax_col)
1446
+
1447
+ artists.append(artist)
1448
+
1449
+ # Extract Rectangle patches (bar/barh/hist charts)
1450
+ # First, collect all rectangles to determine group info
1451
+ rectangles = []
1452
+ for i, patch in enumerate(mpl_ax.patches):
1453
+ patch_type = type(patch).__name__
1454
+ if patch_type == "Rectangle":
1455
+ rectangles.append((i, patch))
1456
+
1457
+ # Determine if this is bar, barh, or hist based on plot_type
1458
+ is_bar = plot_type in ("bar", "barh")
1459
+ is_hist = plot_type == "hist"
1460
+
1461
+ # Get trace_id from history for data_ref
1462
+ trace_id_for_bars = None
1463
+ if hasattr(ax_for_detection, "history"):
1464
+ for record in ax_for_detection.history.values():
1465
+ if isinstance(record, tuple) and len(record) >= 2:
1466
+ method_name = record[1]
1467
+ if method_name in ("bar", "barh", "hist"):
1468
+ trace_id_for_bars = record[0]
1469
+ break
1470
+
1471
+ bar_count = 0
1472
+ for rect_idx, (i, patch) in enumerate(rectangles):
1473
+ patch_type = type(patch).__name__
1474
+
1475
+ # Skip internal unlabeled patches for non-bar/hist types
1476
+ scitex_id = getattr(patch, "_scitex_id", None)
1477
+ label = patch.get_label() if hasattr(patch, "get_label") else ""
1478
+
1479
+ # For bar/hist, we want ALL rectangles even if unlabeled
1480
+ if not (is_bar or is_hist):
1481
+ if skip_unlabeled and not scitex_id and (not label or label.startswith("_")):
1482
+ continue
1483
+
1484
+ artist = {}
1485
+
1486
+ # Generate unique ID with index
1487
+ base_id = scitex_id or (label if label and not label.startswith("_") else trace_id_for_bars or "bar")
1488
+ artist["id"] = f"{base_id}_{bar_count}"
1489
+
1490
+ # Add group_id for referencing the whole group
1491
+ artist["group_id"] = base_id
1492
+
1493
+ # Semantic layer
1494
+ artist["mark"] = "bar"
1495
+ if is_hist:
1496
+ artist["role"] = "hist_bin"
1497
+ else:
1498
+ artist["role"] = "bar_body"
1499
+
1500
+ # Legend inclusion - only first bar of a group should be in legend
1501
+ if label and not label.startswith("_") and bar_count == 0:
1502
+ artist["label"] = label
1503
+ artist["legend_included"] = True
1504
+ else:
1505
+ artist["legend_included"] = False
1506
+
1507
+ artist["zorder"] = patch.get_zorder()
1508
+
1509
+ # Backend layer: matplotlib-specific properties
1510
+ backend = {
1511
+ "name": "matplotlib",
1512
+ "artist_class": patch_type,
1513
+ "props": {}
1514
+ }
1515
+
1516
+ try:
1517
+ backend["props"]["facecolor"] = mcolors.to_hex(patch.get_facecolor(), keep_alpha=False)
1518
+ except (ValueError, TypeError):
1519
+ pass
1520
+ try:
1521
+ backend["props"]["edgecolor"] = mcolors.to_hex(patch.get_edgecolor(), keep_alpha=False)
1522
+ except (ValueError, TypeError):
1523
+ pass
1524
+ try:
1525
+ backend["props"]["linewidth_pt"] = patch.get_linewidth()
1526
+ except (ValueError, TypeError):
1527
+ pass
1528
+
1529
+ artist["backend"] = backend
1530
+
1531
+ # Bar geometry
1532
+ try:
1533
+ artist["geometry"] = {
1534
+ "x": patch.get_x(),
1535
+ "y": patch.get_y(),
1536
+ "width": patch.get_width(),
1537
+ "height": patch.get_height(),
1538
+ }
1539
+ except (ValueError, TypeError):
1540
+ pass
1541
+
1542
+ # data_ref with row_index for individual bars
1543
+ if trace_id_for_bars:
1544
+ if is_hist:
1545
+ # Histogram uses specific column names: bin-centers (x), bin-counts (y)
1546
+ prefix = f"ax-row-{ax_row}-col-{ax_col}_trace-id-{trace_id_for_bars}_variable-"
1547
+ artist["data_ref"] = {
1548
+ "x": f"{prefix}bin-centers",
1549
+ "y": f"{prefix}bin-counts",
1550
+ "row_index": bar_count,
1551
+ "bin_index": bar_count,
1552
+ }
1553
+ else:
1554
+ artist["data_ref"] = _get_csv_column_names(trace_id_for_bars, ax_row, ax_col)
1555
+ artist["data_ref"]["row_index"] = bar_count
1556
+
1557
+ bar_count += 1
1558
+ artists.append(artist)
1559
+
1560
+ # Extract Wedge patches (pie charts)
1561
+ wedge_count = 0
1562
+ for i, patch in enumerate(mpl_ax.patches):
1563
+ patch_type = type(patch).__name__
1564
+
1565
+ if patch_type != "Wedge":
1566
+ continue
1567
+
1568
+ artist = {}
1569
+
1570
+ scitex_id = getattr(patch, "_scitex_id", None)
1571
+ label = patch.get_label() if hasattr(patch, "get_label") else ""
1572
+
1573
+ if scitex_id:
1574
+ artist["id"] = scitex_id
1575
+ elif label and not label.startswith("_"):
1576
+ artist["id"] = label
1577
+ else:
1578
+ artist["id"] = f"wedge_{wedge_count}"
1579
+ wedge_count += 1
1580
+
1581
+ # Semantic layer
1582
+ artist["mark"] = "pie"
1583
+ artist["role"] = "pie_wedge"
1584
+
1585
+ if label and not label.startswith("_"):
1586
+ artist["label"] = label
1587
+ artist["legend_included"] = True
1588
+ else:
1589
+ artist["legend_included"] = False
1590
+
1591
+ artist["zorder"] = patch.get_zorder()
1592
+
1593
+ # Backend layer
1594
+ backend = {
1595
+ "name": "matplotlib",
1596
+ "artist_class": patch_type,
1597
+ "props": {}
1598
+ }
1599
+ try:
1600
+ backend["props"]["facecolor"] = mcolors.to_hex(patch.get_facecolor(), keep_alpha=False)
1601
+ except (ValueError, TypeError):
1602
+ pass
1603
+
1604
+ artist["backend"] = backend
1605
+ artists.append(artist)
1606
+
1607
+ # Extract QuadMesh (hist2d) and PolyCollection (hexbin/violin) with colormap info
1608
+ # Try to get hist2d result data from history
1609
+ hist2d_result = None
1610
+ hexbin_result = None
1611
+ if hasattr(ax_for_detection, "history"):
1612
+ for record in ax_for_detection.history.values():
1613
+ if isinstance(record, tuple) and len(record) >= 3:
1614
+ method_name = record[1]
1615
+ tracked_dict = record[2]
1616
+ if method_name == "hist2d" and "result" in tracked_dict:
1617
+ hist2d_result = tracked_dict["result"]
1618
+ elif method_name == "hexbin" and "result" in tracked_dict:
1619
+ hexbin_result = tracked_dict["result"]
1620
+
1621
+ for i, coll in enumerate(mpl_ax.collections):
1622
+ coll_type = type(coll).__name__
1623
+
1624
+ if coll_type == "QuadMesh":
1625
+ artist = {}
1626
+ artist["id"] = f"hist2d_{i}"
1627
+
1628
+ # Semantic layer
1629
+ artist["mark"] = "heatmap"
1630
+ artist["role"] = "hist2d"
1631
+
1632
+ artist["legend_included"] = False
1633
+ artist["zorder"] = coll.get_zorder()
1634
+
1635
+ # Backend layer
1636
+ backend = {
1637
+ "name": "matplotlib",
1638
+ "artist_class": coll_type,
1639
+ "props": {}
1640
+ }
1641
+ try:
1642
+ cmap = coll.get_cmap()
1643
+ if cmap:
1644
+ backend["props"]["cmap"] = cmap.name
1645
+ except (ValueError, TypeError, AttributeError):
1646
+ pass
1647
+ try:
1648
+ backend["props"]["vmin"] = float(coll.norm.vmin) if coll.norm else None
1649
+ backend["props"]["vmax"] = float(coll.norm.vmax) if coll.norm else None
1650
+ except (ValueError, TypeError, AttributeError):
1651
+ pass
1652
+
1653
+ artist["backend"] = backend
1654
+
1655
+ # Extract hist2d result data directly from QuadMesh
1656
+ try:
1657
+ # Get the count array from the QuadMesh
1658
+ arr = coll.get_array()
1659
+ if arr is not None and len(arr) > 0:
1660
+ import numpy as np
1661
+ # QuadMesh from hist2d has counts as flattened array
1662
+ # Try to get coordinates from the mesh
1663
+ coords = coll.get_coordinates()
1664
+ if coords is not None and len(coords) > 0:
1665
+ # coords shape is (n_rows+1, n_cols+1, 2) for 2D hist
1666
+ n_ybins = coords.shape[0] - 1
1667
+ n_xbins = coords.shape[1] - 1
1668
+
1669
+ # Get edges from coordinates
1670
+ xedges = coords[0, :, 0] # First row, all cols, x-coord
1671
+ yedges = coords[:, 0, 1] # All rows, first col, y-coord
1672
+
1673
+ artist["result"] = {
1674
+ "H_shape": [n_ybins, n_xbins],
1675
+ "n_xbins": int(n_xbins),
1676
+ "n_ybins": int(n_ybins),
1677
+ "xedges_range": [float(xedges[0]), float(xedges[-1])],
1678
+ "yedges_range": [float(yedges[0]), float(yedges[-1])],
1679
+ "count_range": [float(arr.min()), float(arr.max())],
1680
+ "total_count": int(arr.sum()),
1681
+ }
1682
+ except (IndexError, TypeError, AttributeError, ValueError):
1683
+ pass
1684
+
1685
+ artists.append(artist)
1686
+
1687
+ elif coll_type == "PolyCollection" or (coll_type == "FillBetweenPolyCollection" and plot_type == "violin"):
1688
+ arr = coll.get_array() if hasattr(coll, "get_array") else None
1689
+
1690
+ # Check if this is hexbin (has array data for counts) or violin body
1691
+ if arr is not None and len(arr) > 0 and plot_type == "hexbin":
1692
+ artist = {}
1693
+ artist["id"] = f"hexbin_{i}"
1694
+
1695
+ # Semantic layer
1696
+ artist["mark"] = "heatmap"
1697
+ artist["role"] = "hexbin"
1698
+
1699
+ artist["legend_included"] = False
1700
+ artist["zorder"] = coll.get_zorder()
1701
+
1702
+ # Backend layer
1703
+ backend = {
1704
+ "name": "matplotlib",
1705
+ "artist_class": coll_type,
1706
+ "props": {}
1707
+ }
1708
+ try:
1709
+ cmap = coll.get_cmap()
1710
+ if cmap:
1711
+ backend["props"]["cmap"] = cmap.name
1712
+ except (ValueError, TypeError, AttributeError):
1713
+ pass
1714
+ try:
1715
+ backend["props"]["vmin"] = float(coll.norm.vmin) if coll.norm else None
1716
+ backend["props"]["vmax"] = float(coll.norm.vmax) if coll.norm else None
1717
+ except (ValueError, TypeError, AttributeError):
1718
+ pass
1719
+
1720
+ artist["backend"] = backend
1721
+
1722
+ # Add hexbin result info directly from the PolyCollection
1723
+ try:
1724
+ artist["result"] = {
1725
+ "n_hexagons": int(len(arr)),
1726
+ "count_range": [float(arr.min()), float(arr.max())] if len(arr) > 0 else None,
1727
+ "total_count": int(arr.sum()),
1728
+ }
1729
+ except (TypeError, AttributeError, ValueError):
1730
+ pass
1731
+
1732
+ artists.append(artist)
1733
+
1734
+ elif plot_type == "violin":
1735
+ # This is a violin body (PolyCollection for violin shape)
1736
+ artist = {}
1737
+ scitex_id = getattr(coll, "_scitex_id", None)
1738
+ label = coll.get_label() if hasattr(coll, "get_label") else ""
1739
+
1740
+ if scitex_id:
1741
+ artist["id"] = f"{scitex_id}_body_{i}"
1742
+ artist["group_id"] = scitex_id
1743
+ else:
1744
+ artist["id"] = f"violin_body_{i}"
1745
+
1746
+ # Semantic layer
1747
+ artist["mark"] = "polygon"
1748
+ artist["role"] = "violin_body"
1749
+
1750
+ artist["legend_included"] = False
1751
+ artist["zorder"] = coll.get_zorder()
1752
+
1753
+ # Backend layer
1754
+ backend = {
1755
+ "name": "matplotlib",
1756
+ "artist_class": coll_type,
1757
+ "props": {}
1758
+ }
1759
+ try:
1760
+ facecolors = coll.get_facecolor()
1761
+ if len(facecolors) > 0:
1762
+ backend["props"]["facecolor"] = mcolors.to_hex(facecolors[0], keep_alpha=False)
1763
+ except (ValueError, TypeError, IndexError):
1764
+ pass
1765
+ try:
1766
+ edgecolors = coll.get_edgecolor()
1767
+ if len(edgecolors) > 0:
1768
+ backend["props"]["edgecolor"] = mcolors.to_hex(edgecolors[0], keep_alpha=False)
1769
+ except (ValueError, TypeError, IndexError):
1770
+ pass
1771
+
1772
+ artist["backend"] = backend
1773
+ artists.append(artist)
1774
+
1775
+ # Extract AxesImage (imshow)
1776
+ for i, img in enumerate(mpl_ax.images):
1777
+ img_type = type(img).__name__
1778
+
1779
+ artist = {}
1780
+
1781
+ scitex_id = getattr(img, "_scitex_id", None)
1782
+ label = img.get_label() if hasattr(img, "get_label") else ""
1783
+
1784
+ if scitex_id:
1785
+ artist["id"] = scitex_id
1786
+ elif label and not label.startswith("_"):
1787
+ artist["id"] = label
1788
+ else:
1789
+ artist["id"] = f"image_{i}"
1790
+
1791
+ # Semantic layer
1792
+ artist["mark"] = "image"
1793
+ artist["role"] = "image"
1794
+
1795
+ artist["legend_included"] = False
1796
+ artist["zorder"] = img.get_zorder()
1797
+
1798
+ # Backend layer
1799
+ backend = {
1800
+ "name": "matplotlib",
1801
+ "artist_class": img_type,
1802
+ "props": {}
1803
+ }
1804
+ try:
1805
+ cmap = img.get_cmap()
1806
+ if cmap:
1807
+ backend["props"]["cmap"] = cmap.name
1808
+ except (ValueError, TypeError, AttributeError):
1809
+ pass
1810
+ try:
1811
+ backend["props"]["vmin"] = float(img.norm.vmin) if img.norm else None
1812
+ backend["props"]["vmax"] = float(img.norm.vmax) if img.norm else None
1813
+ except (ValueError, TypeError, AttributeError):
1814
+ pass
1815
+ try:
1816
+ backend["props"]["interpolation"] = img.get_interpolation()
1817
+ except (ValueError, TypeError, AttributeError):
1818
+ pass
1819
+
1820
+ artist["backend"] = backend
1821
+ artists.append(artist)
1822
+
1823
+ # Extract Text artists (annotations, stats text, etc.)
1824
+ text_count = 0
1825
+ for i, text_obj in enumerate(mpl_ax.texts):
1826
+ text_content = text_obj.get_text()
1827
+ if not text_content or text_content.strip() == "":
1828
+ continue
1829
+
1830
+ artist = {}
1831
+
1832
+ scitex_id = getattr(text_obj, "_scitex_id", None)
1833
+
1834
+ if scitex_id:
1835
+ artist["id"] = scitex_id
1836
+ else:
1837
+ artist["id"] = f"text_{text_count}"
1838
+
1839
+ # Semantic layer
1840
+ artist["mark"] = "text"
1841
+
1842
+ # Try to determine role from content or position
1843
+ pos = text_obj.get_position()
1844
+ # Check if this looks like stats annotation (contains r=, p=, etc.)
1845
+ if any(kw in text_content.lower() for kw in ['r=', 'p=', 'r²=', 'n=']):
1846
+ artist["role"] = "stats_annotation"
1847
+ else:
1848
+ artist["role"] = "annotation"
1849
+
1850
+ artist["legend_included"] = False
1851
+ artist["zorder"] = text_obj.get_zorder()
1852
+
1853
+ # Geometry - text position
1854
+ artist["geometry"] = {
1855
+ "x": pos[0],
1856
+ "y": pos[1],
415
1857
  }
416
1858
 
417
- traces.append(trace)
1859
+ # Text content
1860
+ artist["text"] = text_content
418
1861
 
419
- return traces
1862
+ # Backend layer
1863
+ backend = {
1864
+ "name": "matplotlib",
1865
+ "artist_class": type(text_obj).__name__,
1866
+ "props": {}
1867
+ }
1868
+
1869
+ try:
1870
+ color = text_obj.get_color()
1871
+ backend["props"]["color"] = mcolors.to_hex(color, keep_alpha=False)
1872
+ except (ValueError, TypeError):
1873
+ pass
1874
+
1875
+ try:
1876
+ backend["props"]["fontsize_pt"] = text_obj.get_fontsize()
1877
+ except (ValueError, TypeError):
1878
+ pass
1879
+
1880
+ try:
1881
+ backend["props"]["ha"] = text_obj.get_ha()
1882
+ backend["props"]["va"] = text_obj.get_va()
1883
+ except (ValueError, TypeError):
1884
+ pass
1885
+
1886
+ artist["backend"] = backend
1887
+
1888
+ # data_ref for text position - only if text was explicitly tracked (has _scitex_id)
1889
+ # Auto-generated text (like contour clabels, pie labels) doesn't have CSV data
1890
+ if scitex_id:
1891
+ artist["data_ref"] = {
1892
+ "x": f"text_{text_count}_x",
1893
+ "y": f"text_{text_count}_y",
1894
+ "content": f"text_{text_count}_content"
1895
+ }
1896
+
1897
+ text_count += 1
1898
+ artists.append(artist)
1899
+
1900
+ # Extract LineCollection artists (errorbar lines, etc.)
1901
+ for i, coll in enumerate(mpl_ax.collections):
1902
+ coll_type = type(coll).__name__
1903
+
1904
+ if coll_type == "LineCollection":
1905
+ # LineCollection is used for errorbar caps/lines
1906
+ artist = {}
1907
+
1908
+ scitex_id = getattr(coll, "_scitex_id", None)
1909
+ label = coll.get_label() if hasattr(coll, "get_label") else ""
1910
+
1911
+ if scitex_id:
1912
+ artist["id"] = scitex_id
1913
+ elif label and not label.startswith("_"):
1914
+ artist["id"] = label
1915
+ else:
1916
+ artist["id"] = f"linecollection_{i}"
1917
+
1918
+ # Semantic layer - determine role
1919
+ artist["mark"] = "line"
1920
+ # Check if this is an errorbar based on context
1921
+ if plot_type == "bar" or method == "barh":
1922
+ artist["role"] = "errorbar"
1923
+ elif plot_type == "stem":
1924
+ artist["role"] = "stem_stem"
1925
+ artist["id"] = "stem_lines" # Override ID for stem
1926
+ else:
1927
+ artist["role"] = "line_collection"
1928
+
1929
+ artist["legend_included"] = False
1930
+ artist["zorder"] = coll.get_zorder()
1931
+
1932
+ # Backend layer
1933
+ backend = {
1934
+ "name": "matplotlib",
1935
+ "artist_class": coll_type,
1936
+ "props": {}
1937
+ }
1938
+
1939
+ try:
1940
+ colors = coll.get_colors()
1941
+ if len(colors) > 0:
1942
+ backend["props"]["color"] = mcolors.to_hex(colors[0], keep_alpha=False)
1943
+ except (ValueError, TypeError, IndexError):
1944
+ pass
1945
+
1946
+ try:
1947
+ linewidths = coll.get_linewidths()
1948
+ if len(linewidths) > 0:
1949
+ backend["props"]["linewidth_pt"] = float(linewidths[0])
1950
+ except (ValueError, TypeError, IndexError):
1951
+ pass
1952
+
1953
+ artist["backend"] = backend
1954
+
1955
+ # Add data_ref for errorbar LineCollections
1956
+ if artist["role"] == "errorbar":
1957
+ # Try to find the trace_id from history
1958
+ errorbar_trace_id = None
1959
+ error_var = "yerr" if method == "bar" else "xerr"
1960
+ if hasattr(ax_for_detection, "history"):
1961
+ for record in ax_for_detection.history.values():
1962
+ if isinstance(record, tuple) and len(record) >= 2:
1963
+ method_name = record[1]
1964
+ if method_name in ("bar", "barh"):
1965
+ errorbar_trace_id = record[0]
1966
+ break
1967
+ if errorbar_trace_id:
1968
+ base_ref = _get_csv_column_names(errorbar_trace_id, ax_row, ax_col)
1969
+ artist["data_ref"] = {
1970
+ "x": base_ref.get("x"),
1971
+ "y": base_ref.get("y"),
1972
+ error_var: f"ax-row-{ax_row}-col-{ax_col}_trace-id-{errorbar_trace_id}_variable-{error_var}"
1973
+ }
1974
+ elif artist["role"] == "stem_stem" and hasattr(ax_for_detection, "history"):
1975
+ # Add data_ref for stem LineCollection
1976
+ for record in ax_for_detection.history.values():
1977
+ if isinstance(record, tuple) and len(record) >= 2:
1978
+ method_name = record[1]
1979
+ if method_name == "stem":
1980
+ stem_trace_id = record[0]
1981
+ artist["data_ref"] = _get_csv_column_names(stem_trace_id, ax_row, ax_col)
1982
+ break
1983
+
1984
+ artists.append(artist)
1985
+
1986
+ return artists
1987
+
1988
+
1989
+ # Backward compatibility alias
1990
+ _extract_traces = _extract_artists
420
1991
 
421
1992
 
422
1993
  def _extract_legend_info(ax) -> Optional[dict]:
423
1994
  """
424
1995
  Extract legend information from axes.
425
1996
 
1997
+ Uses matplotlib terminology for legend properties.
1998
+
426
1999
  Parameters
427
2000
  ----------
428
2001
  ax : matplotlib.axes.Axes
@@ -431,7 +2004,7 @@ def _extract_legend_info(ax) -> Optional[dict]:
431
2004
  Returns
432
2005
  -------
433
2006
  dict or None
434
- Legend info dictionary or None if no legend
2007
+ Legend info dictionary with matplotlib properties, or None if no legend
435
2008
  """
436
2009
  legend = ax.get_legend()
437
2010
  if legend is None:
@@ -439,14 +2012,89 @@ def _extract_legend_info(ax) -> Optional[dict]:
439
2012
 
440
2013
  legend_info = {
441
2014
  "visible": legend.get_visible(),
442
- "loc": legend._loc if hasattr(legend, '_loc') else "best",
443
- "frameon": legend.get_frame_on() if hasattr(legend, 'get_frame_on') else True,
2015
+ "loc": legend._loc if hasattr(legend, "_loc") else "best",
2016
+ "frameon": legend.get_frame_on() if hasattr(legend, "get_frame_on") else True,
444
2017
  }
445
2018
 
446
- # Extract legend entries (labels)
2019
+ # ncol - number of columns
2020
+ if hasattr(legend, "_ncols"):
2021
+ legend_info["ncol"] = legend._ncols
2022
+ elif hasattr(legend, "_ncol"):
2023
+ legend_info["ncol"] = legend._ncol
2024
+
2025
+ # Extract legend handles with artist references
2026
+ # This allows reconstructing the legend by referencing artists
2027
+ handles = []
447
2028
  texts = legend.get_texts()
448
- if texts:
449
- legend_info["labels"] = [t.get_text() for t in texts]
2029
+ legend_handles = legend.legend_handles if hasattr(legend, 'legend_handles') else []
2030
+
2031
+ # Get the raw matplotlib axes for accessing lines to match IDs
2032
+ mpl_ax = ax._axis_mpl if hasattr(ax, "_axis_mpl") else ax
2033
+
2034
+ for i, text in enumerate(texts):
2035
+ label_text = text.get_text()
2036
+ handle_entry = {"label": label_text}
2037
+
2038
+ # Try to get artist_id from corresponding handle
2039
+ artist_id = None
2040
+ if i < len(legend_handles):
2041
+ handle = legend_handles[i]
2042
+ # Check if handle has scitex_id
2043
+ if hasattr(handle, "_scitex_id"):
2044
+ artist_id = handle._scitex_id
2045
+
2046
+ # Fallback: find matching artist by label in axes artists
2047
+ if artist_id is None:
2048
+ # Check lines
2049
+ for line in mpl_ax.lines:
2050
+ line_label = line.get_label()
2051
+ if line_label == label_text:
2052
+ if hasattr(line, "_scitex_id"):
2053
+ artist_id = line._scitex_id
2054
+ elif not line_label.startswith("_"):
2055
+ artist_id = line_label
2056
+ break
2057
+
2058
+ # Check collections (scatter)
2059
+ if artist_id is None:
2060
+ for coll in mpl_ax.collections:
2061
+ coll_label = coll.get_label() if hasattr(coll, "get_label") else ""
2062
+ if coll_label == label_text:
2063
+ if hasattr(coll, "_scitex_id"):
2064
+ artist_id = coll._scitex_id
2065
+ elif coll_label and not coll_label.startswith("_"):
2066
+ artist_id = coll_label
2067
+ break
2068
+
2069
+ # Check patches (bar/hist/pie)
2070
+ if artist_id is None:
2071
+ for patch in mpl_ax.patches:
2072
+ patch_label = patch.get_label() if hasattr(patch, "get_label") else ""
2073
+ if patch_label == label_text:
2074
+ if hasattr(patch, "_scitex_id"):
2075
+ artist_id = patch._scitex_id
2076
+ elif patch_label and not patch_label.startswith("_"):
2077
+ artist_id = patch_label
2078
+ break
2079
+
2080
+ # Check images (imshow)
2081
+ if artist_id is None:
2082
+ for img in mpl_ax.images:
2083
+ img_label = img.get_label() if hasattr(img, "get_label") else ""
2084
+ if img_label == label_text:
2085
+ if hasattr(img, "_scitex_id"):
2086
+ artist_id = img._scitex_id
2087
+ elif img_label and not img_label.startswith("_"):
2088
+ artist_id = img_label
2089
+ break
2090
+
2091
+ if artist_id:
2092
+ handle_entry["artist_id"] = artist_id
2093
+
2094
+ handles.append(handle_entry)
2095
+
2096
+ if handles:
2097
+ legend_info["handles"] = handles
450
2098
 
451
2099
  return legend_info
452
2100
 
@@ -478,93 +2126,1209 @@ def _detect_plot_type(ax) -> tuple:
478
2126
  or (None, None) if unclear
479
2127
  """
480
2128
  # Check scitex history FIRST (most reliable for scitex plots)
481
- if hasattr(ax, 'history') and len(ax.history) > 0:
482
- # Get the first plotting command
483
- first_cmd = ax.history[0].get('command', '')
484
- if 'stx_heatmap' in first_cmd:
485
- return "heatmap", "stx_heatmap"
486
- elif 'stx_kde' in first_cmd:
487
- return "kde", "stx_kde"
488
- elif 'stx_ecdf' in first_cmd:
489
- return "ecdf", "stx_ecdf"
490
- elif 'stx_violin' in first_cmd:
491
- return "violin", "stx_violin"
492
- elif 'stx_box' in first_cmd or 'boxplot' in first_cmd:
493
- return "boxplot", "stx_box"
494
- elif 'stx_line' in first_cmd:
495
- return "line", "stx_line"
496
- elif 'plot_scatter' in first_cmd:
497
- return "scatter", "plot_scatter"
498
- elif 'stx_mean_std' in first_cmd:
499
- return "line", "stx_mean_std"
500
- elif 'stx_shaded_line' in first_cmd:
501
- return "line", "stx_shaded_line"
502
- elif 'sns_boxplot' in first_cmd:
503
- return "boxplot", "sns_boxplot"
504
- elif 'sns_violinplot' in first_cmd:
505
- return "violin", "sns_violinplot"
506
- elif 'sns_scatterplot' in first_cmd:
507
- return "scatter", "sns_scatterplot"
508
- elif 'sns_lineplot' in first_cmd:
509
- return "line", "sns_lineplot"
510
- elif 'sns_histplot' in first_cmd:
511
- return "hist", "sns_histplot"
512
- elif 'sns_barplot' in first_cmd:
513
- return "bar", "sns_barplot"
514
- elif 'sns_stripplot' in first_cmd:
515
- return "scatter", "sns_stripplot"
516
- elif 'sns_kdeplot' in first_cmd:
517
- return "kde", "sns_kdeplot"
518
- elif 'scatter' in first_cmd:
519
- return "scatter", "scatter"
520
- elif 'bar' in first_cmd:
521
- return "bar", "bar"
522
- elif 'hist' in first_cmd:
523
- return "hist", "hist"
2129
+ # History format: dict with keys as IDs and values as tuples (id, method, tracked_dict, kwargs)
2130
+ if hasattr(ax, "history") and len(ax.history) > 0:
2131
+ # Get all methods from history
2132
+ methods = []
2133
+ for record in ax.history.values():
2134
+ if isinstance(record, tuple) and len(record) >= 2:
2135
+ methods.append(record[1]) # record[1] is the method name
2136
+
2137
+ # Check methods in priority order (more specific first)
2138
+ for method in methods:
2139
+ if method == "stx_heatmap":
2140
+ return "heatmap", "stx_heatmap"
2141
+ elif method == "stx_kde":
2142
+ return "kde", "stx_kde"
2143
+ elif method == "stx_ecdf":
2144
+ return "ecdf", "stx_ecdf"
2145
+ elif method == "stx_violin":
2146
+ return "violin", "stx_violin"
2147
+ elif method in ("stx_box", "boxplot"):
2148
+ return "boxplot", method
2149
+ elif method == "stx_line":
2150
+ return "line", "stx_line"
2151
+ elif method == "plot_scatter":
2152
+ return "scatter", "plot_scatter"
2153
+ elif method == "stx_mean_std":
2154
+ return "line", "stx_mean_std"
2155
+ elif method == "stx_mean_ci":
2156
+ return "line", "stx_mean_ci"
2157
+ elif method == "stx_median_iqr":
2158
+ return "line", "stx_median_iqr"
2159
+ elif method == "stx_shaded_line":
2160
+ return "line", "stx_shaded_line"
2161
+ elif method == "sns_boxplot":
2162
+ return "boxplot", "sns_boxplot"
2163
+ elif method == "sns_violinplot":
2164
+ return "violin", "sns_violinplot"
2165
+ elif method == "sns_scatterplot":
2166
+ return "scatter", "sns_scatterplot"
2167
+ elif method == "sns_lineplot":
2168
+ return "line", "sns_lineplot"
2169
+ elif method == "sns_histplot":
2170
+ return "hist", "sns_histplot"
2171
+ elif method == "sns_barplot":
2172
+ return "bar", "sns_barplot"
2173
+ elif method == "sns_stripplot":
2174
+ return "scatter", "sns_stripplot"
2175
+ elif method == "sns_kdeplot":
2176
+ return "kde", "sns_kdeplot"
2177
+ elif method == "scatter":
2178
+ return "scatter", "scatter"
2179
+ elif method == "bar":
2180
+ return "bar", "bar"
2181
+ elif method == "barh":
2182
+ return "bar", "barh"
2183
+ elif method == "hist":
2184
+ return "hist", "hist"
2185
+ elif method == "hist2d":
2186
+ return "hist2d", "hist2d"
2187
+ elif method == "hexbin":
2188
+ return "hexbin", "hexbin"
2189
+ elif method == "violinplot":
2190
+ return "violin", "violinplot"
2191
+ elif method == "errorbar":
2192
+ return "errorbar", "errorbar"
2193
+ elif method == "fill_between":
2194
+ return "fill", "fill_between"
2195
+ elif method == "fill_betweenx":
2196
+ return "fill", "fill_betweenx"
2197
+ elif method == "imshow":
2198
+ return "image", "imshow"
2199
+ elif method == "matshow":
2200
+ return "image", "matshow"
2201
+ elif method == "contour":
2202
+ return "contour", "contour"
2203
+ elif method == "contourf":
2204
+ return "contour", "contourf"
2205
+ elif method == "stem":
2206
+ return "stem", "stem"
2207
+ elif method == "step":
2208
+ return "step", "step"
2209
+ elif method == "pie":
2210
+ return "pie", "pie"
2211
+ elif method == "quiver":
2212
+ return "quiver", "quiver"
2213
+ elif method == "streamplot":
2214
+ return "stream", "streamplot"
2215
+ elif method == "plot":
2216
+ return "line", "plot"
2217
+ # Note: "plot" method is handled last as a fallback since boxplot uses it internally
524
2218
 
525
2219
  # Check for images (takes priority)
526
2220
  if len(ax.images) > 0:
527
2221
  return "image", "imshow"
528
2222
 
2223
+ # Check for 2D density plots (hist2d, hexbin) - QuadMesh or PolyCollection
2224
+ if hasattr(ax, "collections"):
2225
+ for coll in ax.collections:
2226
+ coll_type = type(coll).__name__
2227
+ if "QuadMesh" in coll_type:
2228
+ return "hist2d", "hist2d"
2229
+ if "PolyCollection" in coll_type and hasattr(coll, "get_array"):
2230
+ # hexbin creates PolyCollection with array data
2231
+ arr = coll.get_array()
2232
+ if arr is not None and len(arr) > 0:
2233
+ return "hexbin", "hexbin"
2234
+
529
2235
  # Check for contours
530
- if hasattr(ax, 'collections'):
2236
+ if hasattr(ax, "collections"):
531
2237
  for coll in ax.collections:
532
- if 'Contour' in type(coll).__name__:
2238
+ if "Contour" in type(coll).__name__:
533
2239
  return "contour", "contour"
534
2240
 
535
2241
  # Check for bar plots
536
2242
  if len(ax.containers) > 0:
537
2243
  # Check if it's a boxplot (has multiple containers with specific structure)
538
- if any('boxplot' in str(type(c)).lower() for c in ax.containers):
2244
+ if any("boxplot" in str(type(c)).lower() for c in ax.containers):
539
2245
  return "boxplot", "boxplot"
540
2246
  # Otherwise assume bar plot
541
2247
  return "bar", "bar"
542
2248
 
543
- # Check for patches (could be histogram, violin, etc.)
2249
+ # Check for patches (could be histogram, violin, pie, etc.)
544
2250
  if len(ax.patches) > 0:
2251
+ # Check for pie chart (Wedge patches)
2252
+ if any("Wedge" in type(p).__name__ for p in ax.patches):
2253
+ return "pie", "pie"
545
2254
  # If there are many rectangular patches, likely histogram
546
2255
  if len(ax.patches) > 5:
547
2256
  return "hist", "hist"
548
2257
  # Check for violin plot
549
- if any('Poly' in type(p).__name__ for p in ax.patches):
2258
+ if any("Poly" in type(p).__name__ for p in ax.patches):
550
2259
  return "violin", "violinplot"
551
2260
 
552
2261
  # Check for scatter plots (PathCollection)
553
- if hasattr(ax, 'collections') and len(ax.collections) > 0:
2262
+ if hasattr(ax, "collections") and len(ax.collections) > 0:
554
2263
  for coll in ax.collections:
555
- if 'PathCollection' in type(coll).__name__:
2264
+ if "PathCollection" in type(coll).__name__:
556
2265
  return "scatter", "scatter"
557
2266
 
558
2267
  # Check for line plots
559
2268
  if len(ax.lines) > 0:
560
2269
  # If there are error bars, it might be errorbar plot
561
- if any(hasattr(line, '_mpl_error') for line in ax.lines):
2270
+ if any(hasattr(line, "_mpl_error") for line in ax.lines):
562
2271
  return "errorbar", "errorbar"
563
2272
  return "line", "plot"
564
2273
 
565
2274
  return None, None
566
2275
 
567
2276
 
2277
+ def _extract_csv_columns_from_history(ax) -> list:
2278
+ """
2279
+ Extract CSV column names from scitex history for all plot types.
2280
+
2281
+ This function generates the exact column names that will be produced
2282
+ by export_as_csv(), providing a mapping between JSON metadata and CSV data.
2283
+
2284
+ Parameters
2285
+ ----------
2286
+ ax : AxisWrapper or matplotlib.axes.Axes
2287
+ The axes to extract CSV column info from
2288
+
2289
+ Returns
2290
+ -------
2291
+ list
2292
+ List of dictionaries containing CSV column mappings for each tracked plot,
2293
+ e.g., [{"id": "boxplot_0", "method": "boxplot", "columns": ["ax_00_boxplot_0_boxplot_0", "ax_00_boxplot_0_boxplot_1"]}]
2294
+ """
2295
+ from ._csv_column_naming import get_csv_column_name
2296
+
2297
+ # Get axes position for CSV column naming
2298
+ ax_row, ax_col = 0, 0 # Default for single axes
2299
+ if hasattr(ax, "_scitex_metadata") and "position_in_grid" in ax._scitex_metadata:
2300
+ pos = ax._scitex_metadata["position_in_grid"]
2301
+ ax_row, ax_col = pos[0], pos[1]
2302
+
2303
+ csv_columns_list = []
2304
+
2305
+ # Check if we have scitex history
2306
+ if not hasattr(ax, "history") or len(ax.history) == 0:
2307
+ return csv_columns_list
2308
+
2309
+ # Iterate through history to extract column names
2310
+ # Use enumerate to track trace index for proper CSV column naming
2311
+ for trace_index, (record_id, record) in enumerate(ax.history.items()):
2312
+ if not isinstance(record, tuple) or len(record) < 4:
2313
+ continue
2314
+
2315
+ id_val, method, tracked_dict, kwargs = record
2316
+
2317
+ # Generate column names using the same function as _extract_traces
2318
+ # This ensures consistency between plot.traces.csv_columns and data.columns
2319
+ columns = _get_csv_columns_for_method_with_index(
2320
+ id_val, method, tracked_dict, kwargs, ax_row, ax_col, trace_index
2321
+ )
2322
+
2323
+ if columns:
2324
+ csv_columns_list.append({
2325
+ "id": id_val,
2326
+ "method": method,
2327
+ "columns": columns,
2328
+ })
2329
+
2330
+ return csv_columns_list
2331
+
2332
+
2333
+ def _get_csv_columns_for_method_with_index(
2334
+ id_val, method, tracked_dict, kwargs, ax_row: int, ax_col: int, trace_index: int
2335
+ ) -> list:
2336
+ """
2337
+ Get CSV column names for a specific plotting method using trace index.
2338
+
2339
+ This function uses the same naming convention as _extract_traces to ensure
2340
+ consistency between plot.traces.csv_columns and data.columns.
2341
+
2342
+ Parameters
2343
+ ----------
2344
+ id_val : str
2345
+ The plot ID (e.g., "sine", "cosine")
2346
+ method : str
2347
+ The plotting method name (e.g., "plot", "scatter")
2348
+ tracked_dict : dict
2349
+ The tracked data dictionary
2350
+ kwargs : dict
2351
+ The keyword arguments passed to the plot
2352
+ ax_row : int
2353
+ Row index of axes in grid
2354
+ ax_col : int
2355
+ Column index of axes in grid
2356
+ trace_index : int
2357
+ Index of this trace (for deduplication)
2358
+
2359
+ Returns
2360
+ -------
2361
+ list
2362
+ List of column names that will be in the CSV
2363
+ """
2364
+ from ._csv_column_naming import get_csv_column_name
2365
+
2366
+ columns = []
2367
+
2368
+ # Use simplified variable names (x, y, bins, counts, etc.)
2369
+ # The full context comes from the column name structure:
2370
+ # ax-row_{row}_ax-col_{col}_trace-id_{id}_variable_{var}
2371
+ if method in ("plot", "stx_line"):
2372
+ columns = [
2373
+ get_csv_column_name("x", ax_row, ax_col, trace_index=trace_index),
2374
+ get_csv_column_name("y", ax_row, ax_col, trace_index=trace_index),
2375
+ ]
2376
+ elif method in ("scatter", "plot_scatter"):
2377
+ columns = [
2378
+ get_csv_column_name("x", ax_row, ax_col, trace_index=trace_index),
2379
+ get_csv_column_name("y", ax_row, ax_col, trace_index=trace_index),
2380
+ ]
2381
+ elif method in ("bar", "barh"):
2382
+ columns = [
2383
+ get_csv_column_name("x", ax_row, ax_col, trace_index=trace_index),
2384
+ get_csv_column_name("height", ax_row, ax_col, trace_index=trace_index),
2385
+ ]
2386
+ elif method == "hist":
2387
+ columns = [
2388
+ get_csv_column_name("bins", ax_row, ax_col, trace_index=trace_index),
2389
+ get_csv_column_name("counts", ax_row, ax_col, trace_index=trace_index),
2390
+ ]
2391
+ elif method in ("boxplot", "stx_box"):
2392
+ columns = [
2393
+ get_csv_column_name("data", ax_row, ax_col, trace_index=trace_index),
2394
+ ]
2395
+ elif method in ("violinplot", "stx_violin"):
2396
+ columns = [
2397
+ get_csv_column_name("data", ax_row, ax_col, trace_index=trace_index),
2398
+ ]
2399
+ elif method == "errorbar":
2400
+ columns = [
2401
+ get_csv_column_name("x", ax_row, ax_col, trace_index=trace_index),
2402
+ get_csv_column_name("y", ax_row, ax_col, trace_index=trace_index),
2403
+ get_csv_column_name("yerr", ax_row, ax_col, trace_index=trace_index),
2404
+ ]
2405
+ elif method == "fill_between":
2406
+ columns = [
2407
+ get_csv_column_name("x", ax_row, ax_col, trace_index=trace_index),
2408
+ get_csv_column_name("y1", ax_row, ax_col, trace_index=trace_index),
2409
+ get_csv_column_name("y2", ax_row, ax_col, trace_index=trace_index),
2410
+ ]
2411
+ elif method in ("imshow", "stx_heatmap", "stx_image"):
2412
+ columns = [
2413
+ get_csv_column_name("data", ax_row, ax_col, trace_index=trace_index),
2414
+ ]
2415
+ elif method in ("stx_kde", "stx_ecdf"):
2416
+ columns = [
2417
+ get_csv_column_name("x", ax_row, ax_col, trace_index=trace_index),
2418
+ get_csv_column_name("y", ax_row, ax_col, trace_index=trace_index),
2419
+ ]
2420
+ elif method in ("stx_mean_std", "stx_mean_ci", "stx_median_iqr", "stx_shaded_line"):
2421
+ columns = [
2422
+ get_csv_column_name("x", ax_row, ax_col, trace_index=trace_index),
2423
+ get_csv_column_name("y", ax_row, ax_col, trace_index=trace_index),
2424
+ get_csv_column_name("lower", ax_row, ax_col, trace_index=trace_index),
2425
+ get_csv_column_name("upper", ax_row, ax_col, trace_index=trace_index),
2426
+ ]
2427
+ elif method.startswith("sns_"):
2428
+ sns_type = method.replace("sns_", "")
2429
+ if sns_type in ("boxplot", "violinplot"):
2430
+ columns = [
2431
+ get_csv_column_name("data", ax_row, ax_col, trace_index=trace_index),
2432
+ ]
2433
+ elif sns_type in ("scatterplot", "lineplot"):
2434
+ columns = [
2435
+ get_csv_column_name("x", ax_row, ax_col, trace_index=trace_index),
2436
+ get_csv_column_name("y", ax_row, ax_col, trace_index=trace_index),
2437
+ ]
2438
+ elif sns_type == "barplot":
2439
+ columns = [
2440
+ get_csv_column_name("x", ax_row, ax_col, trace_index=trace_index),
2441
+ get_csv_column_name("y", ax_row, ax_col, trace_index=trace_index),
2442
+ ]
2443
+ elif sns_type == "histplot":
2444
+ columns = [
2445
+ get_csv_column_name("bins", ax_row, ax_col, trace_index=trace_index),
2446
+ get_csv_column_name("counts", ax_row, ax_col, trace_index=trace_index),
2447
+ ]
2448
+ elif sns_type == "kdeplot":
2449
+ columns = [
2450
+ get_csv_column_name("x", ax_row, ax_col, trace_index=trace_index),
2451
+ get_csv_column_name("y", ax_row, ax_col, trace_index=trace_index),
2452
+ ]
2453
+
2454
+ return columns
2455
+
2456
+
2457
+ def _compute_csv_hash_from_df(df) -> Optional[str]:
2458
+ """
2459
+ Compute a hash of CSV data from a DataFrame.
2460
+
2461
+ This is used after actual CSV export to compute the hash from the
2462
+ exact data that was written.
2463
+
2464
+ Parameters
2465
+ ----------
2466
+ df : pandas.DataFrame
2467
+ The DataFrame to compute hash from
2468
+
2469
+ Returns
2470
+ -------
2471
+ str or None
2472
+ SHA256 hash of the CSV data (first 16 chars), or None if unable to compute
2473
+ """
2474
+ import hashlib
2475
+
2476
+ try:
2477
+ if df is None or df.empty:
2478
+ return None
2479
+
2480
+ # Convert to CSV string for hashing
2481
+ csv_string = df.to_csv(index=False)
2482
+
2483
+ # Compute SHA256 hash
2484
+ hash_obj = hashlib.sha256(csv_string.encode("utf-8"))
2485
+ hash_hex = hash_obj.hexdigest()
2486
+
2487
+ # Return first 16 characters for readability
2488
+ return hash_hex[:16]
2489
+
2490
+ except Exception:
2491
+ return None
2492
+
2493
+
2494
+ def _compute_csv_hash(ax_or_df) -> Optional[str]:
2495
+ """
2496
+ Compute a hash of the CSV data for reproducibility verification.
2497
+
2498
+ The hash is computed from the actual data that would be exported to CSV,
2499
+ allowing verification that JSON and CSV files are in sync.
2500
+
2501
+ Note: The hash is computed from the AxisWrapper's export_as_csv(), which
2502
+ does NOT include the ax_{index:02d}_ prefix. The FigWrapper.export_as_csv()
2503
+ adds this prefix. We replicate this prefix addition here.
2504
+
2505
+ Parameters
2506
+ ----------
2507
+ ax_or_df : AxisWrapper, matplotlib.axes.Axes, or pandas.DataFrame
2508
+ The axes to compute CSV hash from, or a pre-exported DataFrame
2509
+
2510
+ Returns
2511
+ -------
2512
+ str or None
2513
+ SHA256 hash of the CSV data (first 16 chars), or None if unable to compute
2514
+ """
2515
+ import hashlib
2516
+
2517
+ import pandas as pd
2518
+
2519
+ # If it's already a DataFrame, use the direct hash function
2520
+ if isinstance(ax_or_df, pd.DataFrame):
2521
+ return _compute_csv_hash_from_df(ax_or_df)
2522
+
2523
+ ax = ax_or_df
2524
+
2525
+ # Check if we have scitex history with export capability
2526
+ if not hasattr(ax, "export_as_csv"):
2527
+ return None
2528
+
2529
+ try:
2530
+ # For single axes figures (most common case), ax_index = 0
2531
+ ax_index = 0
2532
+
2533
+ # Export the data as CSV from the AxisWrapper
2534
+ df = ax.export_as_csv()
2535
+
2536
+ if df is None or df.empty:
2537
+ return None
2538
+
2539
+ # Add axis prefix to match what FigWrapper.export_as_csv produces
2540
+ # Uses zero-padded index: ax_00_, ax_01_, etc.
2541
+ prefix = f"ax_{ax_index:02d}_"
2542
+ new_cols = []
2543
+ for col in df.columns:
2544
+ col_str = str(col)
2545
+ if not col_str.startswith(prefix):
2546
+ col_str = f"{prefix}{col_str}"
2547
+ new_cols.append(col_str)
2548
+ df.columns = new_cols
2549
+
2550
+ # Convert to CSV string for hashing
2551
+ csv_string = df.to_csv(index=False)
2552
+
2553
+ # Compute SHA256 hash
2554
+ hash_obj = hashlib.sha256(csv_string.encode("utf-8"))
2555
+ hash_hex = hash_obj.hexdigest()
2556
+
2557
+ # Return first 16 characters for readability
2558
+ return hash_hex[:16]
2559
+
2560
+ except Exception:
2561
+ return None
2562
+
2563
+
2564
+ def _get_csv_columns_for_method(id_val, method, tracked_dict, kwargs, ax_index: int) -> list:
2565
+ """
2566
+ Get CSV column names for a specific plotting method.
2567
+
2568
+ This simulates the actual CSV export to get exact column names.
2569
+ It uses the same formatters that generate the CSV to ensure consistency.
2570
+
2571
+ Architecture note:
2572
+ - CSV formatters (e.g., _format_boxplot) generate columns WITHOUT ax_ prefix
2573
+ - FigWrapper.export_as_csv() adds the ax_{index:02d}_ prefix
2574
+ - This function simulates that process to get the final column names
2575
+
2576
+ Parameters
2577
+ ----------
2578
+ id_val : str
2579
+ The plot ID (e.g., "boxplot_0", "plot_0")
2580
+ method : str
2581
+ The plotting method name (e.g., "boxplot", "plot", "scatter")
2582
+ tracked_dict : dict
2583
+ The tracked data dictionary
2584
+ kwargs : dict
2585
+ The keyword arguments passed to the plot
2586
+ ax_index : int
2587
+ Flattened index of axes (0 for single axes, 0-N for multi-axes)
2588
+
2589
+ Returns
2590
+ -------
2591
+ list
2592
+ List of column names that will be in the CSV (exact match)
2593
+ """
2594
+ # Import the actual formatters to ensure consistency
2595
+ # This is the single source of truth - we use the same code path as CSV export
2596
+ try:
2597
+ from scitex.plt._subplots._export_as_csv import format_record
2598
+ import pandas as pd
2599
+
2600
+ # Construct the record tuple as used in tracking
2601
+ record = (id_val, method, tracked_dict, kwargs)
2602
+
2603
+ # Call the actual formatter to get the DataFrame
2604
+ df = format_record(record)
2605
+
2606
+ if df is not None and not df.empty:
2607
+ # Add the axis prefix (this is what FigWrapper.export_as_csv does)
2608
+ # Uses zero-padded index: ax_00_, ax_01_, etc.
2609
+ prefix = f"ax_{ax_index:02d}_"
2610
+ columns = []
2611
+ for col in df.columns:
2612
+ col_str = str(col)
2613
+ if not col_str.startswith(prefix):
2614
+ col_str = f"{prefix}{col_str}"
2615
+ columns.append(col_str)
2616
+ return columns
2617
+
2618
+ except Exception:
2619
+ # If formatters fail, fall back to pattern-based generation
2620
+ pass
2621
+
2622
+ # Fallback: Pattern-based column name generation
2623
+ # This should rarely be used since we prefer the actual formatter
2624
+ import numpy as np
2625
+
2626
+ prefix = f"ax_{ax_index:02d}_"
2627
+ columns = []
2628
+
2629
+ # Get args from tracked_dict
2630
+ args = tracked_dict.get("args", []) if tracked_dict else []
2631
+
2632
+ if method in ("boxplot", "stx_box"):
2633
+ # Boxplot: one column per box (mirrors _format_boxplot)
2634
+ if len(args) >= 1:
2635
+ data = args[0]
2636
+ labels = kwargs.get("labels", None) if kwargs else None
2637
+
2638
+ from scitex.types import is_listed_X as scitex_types_is_listed_X
2639
+
2640
+ if isinstance(data, np.ndarray) or scitex_types_is_listed_X(data, [float, int]):
2641
+ # Single box
2642
+ if labels and len(labels) == 1:
2643
+ columns.append(f"{prefix}{id_val}_{labels[0]}")
2644
+ else:
2645
+ columns.append(f"{prefix}{id_val}_boxplot_0")
2646
+ else:
2647
+ # Multiple boxes
2648
+ try:
2649
+ num_boxes = len(data)
2650
+ if labels and len(labels) == num_boxes:
2651
+ for label in labels:
2652
+ columns.append(f"{prefix}{id_val}_{label}")
2653
+ else:
2654
+ for i in range(num_boxes):
2655
+ columns.append(f"{prefix}{id_val}_boxplot_{i}")
2656
+ except TypeError:
2657
+ columns.append(f"{prefix}{id_val}_boxplot_0")
2658
+
2659
+ elif method in ("plot", "stx_line"):
2660
+ # Line plot: x and y columns
2661
+ # For single axes (ax_index=0), use simple prefix
2662
+ columns.append(f"{prefix}{id_val}_plot_x")
2663
+ columns.append(f"{prefix}{id_val}_plot_y")
2664
+
2665
+ elif method in ("scatter", "plot_scatter"):
2666
+ columns.append(f"{prefix}{id_val}_scatter_x")
2667
+ columns.append(f"{prefix}{id_val}_scatter_y")
2668
+
2669
+ elif method in ("bar", "barh"):
2670
+ columns.append(f"{prefix}{id_val}_bar_x")
2671
+ columns.append(f"{prefix}{id_val}_bar_height")
2672
+
2673
+ elif method == "hist":
2674
+ columns.append(f"{prefix}{id_val}_hist_bins")
2675
+ columns.append(f"{prefix}{id_val}_hist_counts")
2676
+
2677
+ elif method in ("violinplot", "stx_violin"):
2678
+ if len(args) >= 1:
2679
+ data = args[0]
2680
+ try:
2681
+ num_violins = len(data)
2682
+ for i in range(num_violins):
2683
+ columns.append(f"{prefix}{id_val}_violin_{i}")
2684
+ except TypeError:
2685
+ columns.append(f"{prefix}{id_val}_violin_0")
2686
+
2687
+ elif method == "errorbar":
2688
+ columns.append(f"{prefix}{id_val}_errorbar_x")
2689
+ columns.append(f"{prefix}{id_val}_errorbar_y")
2690
+ columns.append(f"{prefix}{id_val}_errorbar_yerr")
2691
+
2692
+ elif method == "fill_between":
2693
+ columns.append(f"{prefix}{id_val}_fill_x")
2694
+ columns.append(f"{prefix}{id_val}_fill_y1")
2695
+ columns.append(f"{prefix}{id_val}_fill_y2")
2696
+
2697
+ elif method in ("imshow", "stx_heatmap", "stx_image"):
2698
+ if len(args) >= 1:
2699
+ data = args[0]
2700
+ try:
2701
+ if hasattr(data, "shape") and len(data.shape) >= 2:
2702
+ columns.append(f"{prefix}{id_val}_image_data")
2703
+ except (TypeError, AttributeError):
2704
+ pass
2705
+
2706
+ elif method in ("stx_kde", "stx_ecdf"):
2707
+ suffix = method.replace("stx_", "")
2708
+ columns.append(f"{prefix}{id_val}_{suffix}_x")
2709
+ columns.append(f"{prefix}{id_val}_{suffix}_y")
2710
+
2711
+ elif method in ("stx_mean_std", "stx_mean_ci", "stx_median_iqr", "stx_shaded_line"):
2712
+ suffix = method.replace("stx_", "")
2713
+ columns.append(f"{prefix}{id_val}_{suffix}_x")
2714
+ columns.append(f"{prefix}{id_val}_{suffix}_y")
2715
+ columns.append(f"{prefix}{id_val}_{suffix}_lower")
2716
+ columns.append(f"{prefix}{id_val}_{suffix}_upper")
2717
+
2718
+ elif method.startswith("sns_"):
2719
+ sns_type = method.replace("sns_", "")
2720
+ if sns_type in ("boxplot", "violinplot"):
2721
+ columns.append(f"{prefix}{id_val}_{sns_type}_data")
2722
+ elif sns_type in ("scatterplot", "lineplot"):
2723
+ columns.append(f"{prefix}{id_val}_{sns_type}_x")
2724
+ columns.append(f"{prefix}{id_val}_{sns_type}_y")
2725
+ elif sns_type == "barplot":
2726
+ columns.append(f"{prefix}{id_val}_barplot_x")
2727
+ columns.append(f"{prefix}{id_val}_barplot_y")
2728
+ elif sns_type == "histplot":
2729
+ columns.append(f"{prefix}{id_val}_histplot_bins")
2730
+ columns.append(f"{prefix}{id_val}_histplot_counts")
2731
+ elif sns_type == "kdeplot":
2732
+ columns.append(f"{prefix}{id_val}_kdeplot_x")
2733
+ columns.append(f"{prefix}{id_val}_kdeplot_y")
2734
+
2735
+ return columns
2736
+
2737
+
2738
+ def assert_csv_json_consistency(csv_path: str, json_path: str = None) -> None:
2739
+ """
2740
+ Assert that CSV data file and its JSON metadata are consistent.
2741
+
2742
+ Raises AssertionError if the column names don't match.
2743
+
2744
+ Parameters
2745
+ ----------
2746
+ csv_path : str
2747
+ Path to the CSV data file
2748
+ json_path : str, optional
2749
+ Path to the JSON metadata file. If not provided, assumes
2750
+ the JSON is at the same location with .json extension.
2751
+
2752
+ Raises
2753
+ ------
2754
+ AssertionError
2755
+ If CSV and JSON column names don't match
2756
+ FileNotFoundError
2757
+ If CSV or JSON files don't exist
2758
+
2759
+ Examples
2760
+ --------
2761
+ >>> assert_csv_json_consistency('/tmp/plot.csv') # Passes silently if valid
2762
+ >>> # Or use in tests:
2763
+ >>> try:
2764
+ ... assert_csv_json_consistency('/tmp/plot.csv')
2765
+ ... except AssertionError as e:
2766
+ ... print(f"Validation failed: {e}")
2767
+ """
2768
+ result = verify_csv_json_consistency(csv_path, json_path)
2769
+
2770
+ if result['errors']:
2771
+ raise FileNotFoundError('\n'.join(result['errors']))
2772
+
2773
+ if not result['valid']:
2774
+ msg_parts = ["CSV/JSON consistency check failed:"]
2775
+ if result['missing_in_csv']:
2776
+ msg_parts.append(f" columns_actual missing in CSV: {result['missing_in_csv']}")
2777
+ if result['extra_in_csv']:
2778
+ msg_parts.append(f" Extra columns in CSV: {result['extra_in_csv']}")
2779
+ if result.get('data_ref_missing'):
2780
+ msg_parts.append(f" data_ref columns missing in CSV: {result['data_ref_missing']}")
2781
+ raise AssertionError('\n'.join(msg_parts))
2782
+
2783
+
2784
+ def verify_csv_json_consistency(csv_path: str, json_path: str = None) -> dict:
2785
+ """
2786
+ Verify consistency between CSV data file and its JSON metadata.
2787
+
2788
+ This function checks that:
2789
+ 1. Column names in the CSV file match those declared in JSON's columns_actual
2790
+ 2. Artist data_ref values in JSON match actual CSV column names
2791
+
2792
+ Parameters
2793
+ ----------
2794
+ csv_path : str
2795
+ Path to the CSV data file
2796
+ json_path : str, optional
2797
+ Path to the JSON metadata file. If not provided, assumes
2798
+ the JSON is at the same location with .json extension.
2799
+
2800
+ Returns
2801
+ -------
2802
+ dict
2803
+ Verification result with keys:
2804
+ - 'valid': bool - True if CSV and JSON are consistent
2805
+ - 'csv_columns': list - Column names found in CSV
2806
+ - 'json_columns': list - Column names declared in JSON
2807
+ - 'data_ref_columns': list - Column names from artist data_ref
2808
+ - 'missing_in_csv': list - Columns in JSON but not in CSV
2809
+ - 'extra_in_csv': list - Columns in CSV but not in JSON
2810
+ - 'data_ref_missing': list - data_ref columns not found in CSV
2811
+ - 'errors': list - Any error messages
2812
+
2813
+ Examples
2814
+ --------
2815
+ >>> result = verify_csv_json_consistency('/tmp/plot.csv')
2816
+ >>> print(result['valid'])
2817
+ True
2818
+ >>> print(result['missing_in_csv'])
2819
+ []
2820
+ """
2821
+ import json
2822
+ import os
2823
+ import pandas as pd
2824
+
2825
+ result = {
2826
+ 'valid': False,
2827
+ 'csv_columns': [],
2828
+ 'json_columns': [],
2829
+ 'data_ref_columns': [],
2830
+ 'missing_in_csv': [],
2831
+ 'extra_in_csv': [],
2832
+ 'data_ref_missing': [],
2833
+ 'errors': [],
2834
+ }
2835
+
2836
+ # Determine JSON path
2837
+ if json_path is None:
2838
+ base, _ = os.path.splitext(csv_path)
2839
+ json_path = base + '.json'
2840
+
2841
+ # Check files exist
2842
+ if not os.path.exists(csv_path):
2843
+ result['errors'].append(f"CSV file not found: {csv_path}")
2844
+ return result
2845
+ if not os.path.exists(json_path):
2846
+ result['errors'].append(f"JSON file not found: {json_path}")
2847
+ return result
2848
+
2849
+ try:
2850
+ # Read CSV columns
2851
+ df = pd.read_csv(csv_path, nrows=0) # Just read header
2852
+ csv_columns = list(df.columns)
2853
+ result['csv_columns'] = csv_columns
2854
+ except Exception as e:
2855
+ result['errors'].append(f"Error reading CSV: {e}")
2856
+ return result
2857
+
2858
+ try:
2859
+ # Read JSON metadata
2860
+ with open(json_path, 'r') as f:
2861
+ metadata = json.load(f)
2862
+
2863
+ # Get columns_actual from data section
2864
+ json_columns = []
2865
+ if 'data' in metadata and 'columns_actual' in metadata['data']:
2866
+ json_columns = metadata['data']['columns_actual']
2867
+ result['json_columns'] = json_columns
2868
+
2869
+ # Extract data_ref columns from artists
2870
+ # Skip 'derived_from' key as it contains descriptive text, not CSV column names
2871
+ # Also skip 'row_index' as it's a numeric index, not a column name
2872
+ data_ref_columns = []
2873
+ skip_keys = {'derived_from', 'row_index'}
2874
+ if 'axes' in metadata:
2875
+ for ax_key, ax_data in metadata['axes'].items():
2876
+ if 'artists' in ax_data:
2877
+ for artist in ax_data['artists']:
2878
+ if 'data_ref' in artist:
2879
+ for key, val in artist['data_ref'].items():
2880
+ if key not in skip_keys and isinstance(val, str):
2881
+ data_ref_columns.append(val)
2882
+ result['data_ref_columns'] = data_ref_columns
2883
+
2884
+ except Exception as e:
2885
+ result['errors'].append(f"Error reading JSON: {e}")
2886
+ return result
2887
+
2888
+ # Compare columns_actual with CSV
2889
+ csv_set = set(csv_columns)
2890
+ json_set = set(json_columns)
2891
+
2892
+ result['missing_in_csv'] = list(json_set - csv_set)
2893
+ result['extra_in_csv'] = list(csv_set - json_set)
2894
+
2895
+ # Check data_ref columns exist in CSV (if there are any)
2896
+ if data_ref_columns:
2897
+ data_ref_set = set(data_ref_columns)
2898
+ result['data_ref_missing'] = list(data_ref_set - csv_set)
2899
+
2900
+ # Valid only if columns_actual matches AND data_ref columns are found in CSV
2901
+ result['valid'] = (
2902
+ len(result['missing_in_csv']) == 0 and
2903
+ len(result['extra_in_csv']) == 0 and
2904
+ len(result['data_ref_missing']) == 0
2905
+ )
2906
+
2907
+ return result
2908
+
2909
+
2910
+ def collect_recipe_metadata(
2911
+ fig,
2912
+ ax=None,
2913
+ auto_crop: bool = True,
2914
+ crop_margin_mm: float = 1.0,
2915
+ ) -> Dict:
2916
+ """
2917
+ Collect minimal "recipe" metadata from figure - method calls + data refs.
2918
+
2919
+ Unlike `collect_figure_metadata()` which captures every rendered artist,
2920
+ this function captures only what's needed to reproduce the figure:
2921
+ - Figure/axes dimensions and limits
2922
+ - Method calls with arguments (from ax.history)
2923
+ - Data column references for CSV linkage
2924
+ - Cropping settings
2925
+
2926
+ This produces much smaller JSON files (e.g., 60 lines vs 1300 for histogram).
2927
+
2928
+ Parameters
2929
+ ----------
2930
+ fig : matplotlib.figure.Figure
2931
+ Figure to collect metadata from
2932
+ ax : matplotlib.axes.Axes or AxisWrapper, optional
2933
+ Primary axes to collect from. If not provided, uses first axes.
2934
+ auto_crop : bool, optional
2935
+ Whether auto-cropping is enabled. Default is True.
2936
+ crop_margin_mm : float, optional
2937
+ Margin in mm for auto-cropping. Default is 1.0.
2938
+
2939
+ Returns
2940
+ -------
2941
+ dict
2942
+ Minimal metadata dictionary with structure:
2943
+ - scitex_schema: "scitex.plt.figure.recipe"
2944
+ - scitex_schema_version: "0.2.0"
2945
+ - figure: {size_mm, dpi, mode, auto_crop, crop_margin_mm}
2946
+ - axes: {ax_00: {xaxis, yaxis, calls: [...]}}
2947
+ - data: {csv_path, columns}
2948
+
2949
+ Examples
2950
+ --------
2951
+ >>> fig, ax = scitex.plt.subplots()
2952
+ >>> ax.hist(data, bins=40, id="histogram")
2953
+ >>> metadata = collect_recipe_metadata(fig, ax)
2954
+ >>> # Result has ~60 lines instead of ~1300
2955
+ """
2956
+ import datetime
2957
+ import uuid
2958
+
2959
+ import matplotlib
2960
+ import scitex
2961
+
2962
+ metadata = {
2963
+ "scitex_schema": "scitex.plt.figure.recipe",
2964
+ "scitex_schema_version": "0.2.0",
2965
+ "figure_uuid": str(uuid.uuid4()),
2966
+ "runtime": {
2967
+ "scitex_version": scitex.__version__,
2968
+ "matplotlib_version": matplotlib.__version__,
2969
+ "created_at": datetime.datetime.now().isoformat(),
2970
+ },
2971
+ }
2972
+
2973
+ # Collect axes - handle AxesWrapper (multi-axes) properly
2974
+ all_axes = [] # List of (ax_wrapper, row, col) tuples
2975
+ grid_shape = (1, 1)
2976
+
2977
+ if ax is not None:
2978
+ # Handle AxesWrapper (multi-axes) - extract individual AxisWrappers with positions
2979
+ if hasattr(ax, "_axes_scitex"):
2980
+ import numpy as np
2981
+ axes_array = ax._axes_scitex
2982
+ if isinstance(axes_array, np.ndarray):
2983
+ grid_shape = axes_array.shape
2984
+ for idx, ax_item in enumerate(axes_array.flat):
2985
+ row = idx // grid_shape[1]
2986
+ col = idx % grid_shape[1]
2987
+ all_axes.append((ax_item, row, col))
2988
+ else:
2989
+ all_axes = [(axes_array, 0, 0)]
2990
+ # Handle AxisWrapper (single axes)
2991
+ elif hasattr(ax, "_axis_mpl"):
2992
+ all_axes = [(ax, 0, 0)]
2993
+ else:
2994
+ # Assume it's a matplotlib axes
2995
+ all_axes = [(ax, 0, 0)]
2996
+ elif hasattr(fig, "axes") and len(fig.axes) > 0:
2997
+ # Fallback to figure axes (linear indexing)
2998
+ for idx, ax_item in enumerate(fig.axes):
2999
+ all_axes.append((ax_item, 0, idx))
3000
+
3001
+ # Figure-level properties
3002
+ if all_axes:
3003
+ try:
3004
+ from ._figure_from_axes_mm import get_dimension_info
3005
+ first_ax_tuple = all_axes[0]
3006
+ first_ax = first_ax_tuple[0]
3007
+ # Get underlying matplotlib axis if wrapped
3008
+ mpl_ax = getattr(first_ax, '_axis_mpl', first_ax)
3009
+ dim_info = get_dimension_info(fig, mpl_ax)
3010
+
3011
+ # Convert to plain lists/floats for JSON serialization
3012
+ size_mm = dim_info["figure_size_mm"]
3013
+ if hasattr(size_mm, 'tolist'):
3014
+ size_mm = size_mm.tolist()
3015
+ elif isinstance(size_mm, (list, tuple)):
3016
+ size_mm = [float(v) if hasattr(v, 'value') else v for v in size_mm]
3017
+
3018
+ metadata["figure"] = {
3019
+ "size_mm": size_mm,
3020
+ "dpi": int(dim_info["dpi"]),
3021
+ "auto_crop": auto_crop,
3022
+ "crop_margin_mm": crop_margin_mm,
3023
+ }
3024
+
3025
+ # Add top-level axes_bbox_px for canvas/web alignment (x0/y0/x1/y1 format)
3026
+ # x0: left edge (Y-axis position), y1: bottom edge (X-axis position)
3027
+ if "axes_bbox_px" in dim_info:
3028
+ bbox = dim_info["axes_bbox_px"]
3029
+ metadata["axes_bbox_px"] = {
3030
+ "x0": int(bbox["x0"]),
3031
+ "y0": int(bbox["y0"]),
3032
+ "x1": int(bbox["x1"]),
3033
+ "y1": int(bbox["y1"]),
3034
+ "width": int(bbox["width"]),
3035
+ "height": int(bbox["height"]),
3036
+ }
3037
+ if "axes_bbox_mm" in dim_info:
3038
+ bbox = dim_info["axes_bbox_mm"]
3039
+ metadata["axes_bbox_mm"] = {
3040
+ "x0": round(float(bbox["x0"]), 2),
3041
+ "y0": round(float(bbox["y0"]), 2),
3042
+ "x1": round(float(bbox["x1"]), 2),
3043
+ "y1": round(float(bbox["y1"]), 2),
3044
+ "width": round(float(bbox["width"]), 2),
3045
+ "height": round(float(bbox["height"]), 2),
3046
+ }
3047
+ except Exception:
3048
+ pass
3049
+
3050
+ # Add mode from scitex metadata
3051
+ scitex_meta = None
3052
+ if ax is not None and hasattr(ax, "_scitex_metadata"):
3053
+ scitex_meta = ax._scitex_metadata
3054
+ elif hasattr(fig, "_scitex_metadata"):
3055
+ scitex_meta = fig._scitex_metadata
3056
+
3057
+ if scitex_meta:
3058
+ if "figure" not in metadata:
3059
+ metadata["figure"] = {}
3060
+ if "mode" in scitex_meta:
3061
+ metadata["figure"]["mode"] = scitex_meta["mode"]
3062
+ # Include style_mm for reproducibility (thickness, fonts, etc.)
3063
+ if "style_mm" in scitex_meta:
3064
+ metadata["style"] = scitex_meta["style_mm"]
3065
+
3066
+ # Collect per-axes metadata with calls
3067
+ if all_axes:
3068
+ metadata["axes"] = {}
3069
+ for current_ax, row, col in all_axes:
3070
+ # Use row-col format: ax_00, ax_01, ax_10, ax_11 for 2x2 grid
3071
+ ax_key = f"ax_{row}{col}"
3072
+
3073
+ # Get underlying matplotlib axis if wrapped
3074
+ mpl_ax = getattr(current_ax, '_axis_mpl', current_ax)
3075
+
3076
+ ax_meta = {
3077
+ "grid_position": {"row": row, "col": col}
3078
+ }
3079
+
3080
+ # Additional position info from scitex_metadata if available
3081
+ if hasattr(current_ax, "_scitex_metadata"):
3082
+ pos = current_ax._scitex_metadata.get("position_in_grid")
3083
+ if pos:
3084
+ ax_meta["grid_position"] = {"row": pos[0], "col": pos[1]}
3085
+
3086
+ # Axis labels and limits (minimal - for axis alignment)
3087
+ try:
3088
+ xlim = mpl_ax.get_xlim()
3089
+ ylim = mpl_ax.get_ylim()
3090
+ ax_meta["xaxis"] = {
3091
+ "label": mpl_ax.get_xlabel() or "",
3092
+ "lim": [round(xlim[0], 4), round(xlim[1], 4)],
3093
+ }
3094
+ ax_meta["yaxis"] = {
3095
+ "label": mpl_ax.get_ylabel() or "",
3096
+ "lim": [round(ylim[0], 4), round(ylim[1], 4)],
3097
+ }
3098
+ except Exception:
3099
+ pass
3100
+
3101
+ # Method calls from history - the core "recipe"
3102
+ # Pass row and col for proper data_ref column naming
3103
+ ax_index = row * grid_shape[1] + col
3104
+ ax_meta["calls"] = _extract_calls_from_history(current_ax, ax_index)
3105
+
3106
+ metadata["axes"][ax_key] = ax_meta
3107
+
3108
+ return metadata
3109
+
3110
+
3111
+ def _extract_calls_from_history(ax, ax_index: int) -> List[dict]:
3112
+ """
3113
+ Extract method call records from axis history.
3114
+
3115
+ Parameters
3116
+ ----------
3117
+ ax : AxisWrapper or matplotlib.axes.Axes
3118
+ Axis to extract history from
3119
+ ax_index : int
3120
+ Index of axis in figure (for CSV column naming)
3121
+
3122
+ Returns
3123
+ -------
3124
+ list
3125
+ List of call records: [{id, method, data_ref, kwargs}, ...]
3126
+ """
3127
+ calls = []
3128
+
3129
+ # Check for scitex wrapper with history
3130
+ if not hasattr(ax, 'history') and not hasattr(ax, '_ax_history'):
3131
+ return calls
3132
+
3133
+ # Get history dict
3134
+ history = getattr(ax, 'history', None)
3135
+ if history is None:
3136
+ history = getattr(ax, '_ax_history', {})
3137
+
3138
+ # Get grid position
3139
+ ax_row = 0
3140
+ ax_col = 0
3141
+ if hasattr(ax, "_scitex_metadata"):
3142
+ pos = ax._scitex_metadata.get("position_in_grid", [0, 0])
3143
+ ax_row, ax_col = pos[0], pos[1]
3144
+
3145
+ for trace_id, record in history.items():
3146
+ # record format: (id, method_name, tracked_dict, kwargs)
3147
+ if not isinstance(record, (list, tuple)) or len(record) < 3:
3148
+ continue
3149
+
3150
+ call_id, method_name, tracked_dict = record[0], record[1], record[2]
3151
+ kwargs = record[3] if len(record) > 3 else {}
3152
+
3153
+ call = {
3154
+ "id": str(call_id),
3155
+ "method": method_name,
3156
+ }
3157
+
3158
+ # Build data_ref from tracked_dict to CSV column names
3159
+ data_ref = _build_data_ref(call_id, method_name, tracked_dict, ax_row, ax_col)
3160
+ if data_ref:
3161
+ call["data_ref"] = data_ref
3162
+
3163
+ # Filter kwargs to only style-relevant ones (not data)
3164
+ style_kwargs = _filter_style_kwargs(kwargs, method_name)
3165
+ if style_kwargs:
3166
+ call["kwargs"] = style_kwargs
3167
+
3168
+ calls.append(call)
3169
+
3170
+ return calls
3171
+
3172
+
3173
+ def _build_data_ref(trace_id, method_name: str, tracked_dict: dict,
3174
+ ax_row: int, ax_col: int) -> dict:
3175
+ """
3176
+ Build data_ref mapping from tracked_dict to CSV column names.
3177
+
3178
+ Parameters
3179
+ ----------
3180
+ trace_id : str
3181
+ Trace identifier
3182
+ method_name : str
3183
+ Name of the method called
3184
+ tracked_dict : dict
3185
+ Data tracked by the method (contains arrays, dataframes)
3186
+ ax_row, ax_col : int
3187
+ Axis position in grid
3188
+
3189
+ Returns
3190
+ -------
3191
+ dict
3192
+ Mapping of variable names to CSV column names
3193
+ """
3194
+ prefix = f"ax-row-{ax_row}-col-{ax_col}_trace-id-{trace_id}_variable-"
3195
+
3196
+ data_ref = {}
3197
+
3198
+ # Method-specific column naming
3199
+ if method_name == 'hist':
3200
+ # Histogram: raw data + computed bins
3201
+ data_ref["raw_data"] = f"{prefix}raw-data"
3202
+ data_ref["bin_centers"] = f"{prefix}bin-centers"
3203
+ data_ref["bin_counts"] = f"{prefix}bin-counts"
3204
+ elif method_name in ('plot', 'scatter', 'step', 'errorbar'):
3205
+ # Standard x, y plots
3206
+ data_ref["x"] = f"{prefix}x"
3207
+ data_ref["y"] = f"{prefix}y"
3208
+ # Check for error bars in tracked_dict
3209
+ if tracked_dict and 'yerr' in tracked_dict:
3210
+ data_ref["yerr"] = f"{prefix}yerr"
3211
+ if tracked_dict and 'xerr' in tracked_dict:
3212
+ data_ref["xerr"] = f"{prefix}xerr"
3213
+ elif method_name in ('bar', 'barh'):
3214
+ data_ref["x"] = f"{prefix}x"
3215
+ data_ref["y"] = f"{prefix}y"
3216
+ elif method_name == 'stem':
3217
+ data_ref["x"] = f"{prefix}x"
3218
+ data_ref["y"] = f"{prefix}y"
3219
+ elif method_name in ('fill_between', 'fill_betweenx'):
3220
+ data_ref["x"] = f"{prefix}x"
3221
+ data_ref["y1"] = f"{prefix}y1"
3222
+ data_ref["y2"] = f"{prefix}y2"
3223
+ elif method_name in ('imshow', 'matshow', 'pcolormesh'):
3224
+ data_ref["data"] = f"{prefix}data"
3225
+ elif method_name in ('contour', 'contourf'):
3226
+ data_ref["x"] = f"{prefix}x"
3227
+ data_ref["y"] = f"{prefix}y"
3228
+ data_ref["z"] = f"{prefix}z"
3229
+ elif method_name in ('boxplot', 'violinplot'):
3230
+ data_ref["data"] = f"{prefix}data"
3231
+ elif method_name == 'pie':
3232
+ data_ref["x"] = f"{prefix}x"
3233
+ elif method_name in ('quiver', 'streamplot'):
3234
+ data_ref["x"] = f"{prefix}x"
3235
+ data_ref["y"] = f"{prefix}y"
3236
+ data_ref["u"] = f"{prefix}u"
3237
+ data_ref["v"] = f"{prefix}v"
3238
+ elif method_name == 'hexbin':
3239
+ data_ref["x"] = f"{prefix}x"
3240
+ data_ref["y"] = f"{prefix}y"
3241
+ elif method_name == 'hist2d':
3242
+ data_ref["x"] = f"{prefix}x"
3243
+ data_ref["y"] = f"{prefix}y"
3244
+ elif method_name == 'kde':
3245
+ data_ref["x"] = f"{prefix}x"
3246
+ data_ref["y"] = f"{prefix}y"
3247
+ # SciTeX custom methods (stx_*) - use same naming as matplotlib wrappers
3248
+ elif method_name == 'stx_line':
3249
+ data_ref["x"] = f"{prefix}x"
3250
+ data_ref["y"] = f"{prefix}y"
3251
+ elif method_name in ('stx_mean_std', 'stx_mean_ci', 'stx_median_iqr', 'stx_shaded_line'):
3252
+ data_ref["x"] = f"{prefix}x"
3253
+ data_ref["y_lower"] = f"{prefix}y-lower"
3254
+ data_ref["y_middle"] = f"{prefix}y-middle"
3255
+ data_ref["y_upper"] = f"{prefix}y-upper"
3256
+ elif method_name in ('stx_box', 'stx_violin'):
3257
+ data_ref["data"] = f"{prefix}data"
3258
+ elif method_name == 'stx_scatter_hist':
3259
+ data_ref["x"] = f"{prefix}x"
3260
+ data_ref["y"] = f"{prefix}y"
3261
+ elif method_name in ('stx_heatmap', 'stx_conf_mat', 'stx_image', 'stx_raster'):
3262
+ data_ref["data"] = f"{prefix}data"
3263
+ elif method_name in ('stx_kde', 'stx_ecdf'):
3264
+ data_ref["x"] = f"{prefix}x"
3265
+ data_ref["y"] = f"{prefix}y"
3266
+ elif method_name.startswith('stx_'):
3267
+ # Generic fallback for other stx_ methods
3268
+ data_ref["x"] = f"{prefix}x"
3269
+ data_ref["y"] = f"{prefix}y"
3270
+ else:
3271
+ # Generic fallback for tracked data
3272
+ if tracked_dict:
3273
+ if 'x' in tracked_dict or 'args' in tracked_dict:
3274
+ data_ref["x"] = f"{prefix}x"
3275
+ data_ref["y"] = f"{prefix}y"
3276
+
3277
+ return data_ref
3278
+
3279
+
3280
+ def _filter_style_kwargs(kwargs: dict, method_name: str) -> dict:
3281
+ """
3282
+ Filter kwargs to only include style-relevant parameters.
3283
+
3284
+ Removes data arrays and internal parameters, keeps style settings
3285
+ that affect appearance (color, linewidth, etc.).
3286
+
3287
+ Parameters
3288
+ ----------
3289
+ kwargs : dict
3290
+ Original keyword arguments
3291
+ method_name : str
3292
+ Name of the method
3293
+
3294
+ Returns
3295
+ -------
3296
+ dict
3297
+ Filtered kwargs with only style parameters
3298
+ """
3299
+ if not kwargs:
3300
+ return {}
3301
+
3302
+ # Style-relevant kwargs to keep
3303
+ style_keys = {
3304
+ 'color', 'c', 'facecolor', 'edgecolor', 'linecolor',
3305
+ 'linewidth', 'lw', 'linestyle', 'ls',
3306
+ 'marker', 'markersize', 'ms', 'markerfacecolor', 'markeredgecolor',
3307
+ 'alpha', 'zorder',
3308
+ 'label',
3309
+ 'bins', 'density', 'histtype', 'orientation',
3310
+ 'width', 'height', 'align',
3311
+ 'cmap', 'vmin', 'vmax', 'norm',
3312
+ 'levels', 'extend',
3313
+ 'scale', 'units',
3314
+ 'autopct', 'explode', 'shadow', 'startangle',
3315
+ }
3316
+
3317
+ filtered = {}
3318
+ for key, value in kwargs.items():
3319
+ if key in style_keys:
3320
+ # Skip if value is a large array (data, not style)
3321
+ if hasattr(value, '__len__') and not isinstance(value, str):
3322
+ if len(value) > 10:
3323
+ continue
3324
+ # Round float values to 4 decimal places for cleaner JSON
3325
+ if isinstance(value, float):
3326
+ value = round(value, 4)
3327
+ filtered[key] = value
3328
+
3329
+ return filtered
3330
+
3331
+
568
3332
  if __name__ == "__main__":
569
3333
  import numpy as np
570
3334
 
@@ -610,13 +3374,11 @@ if __name__ == "__main__":
610
3374
  print(f" • Software version: {metadata['scitex']['version']}")
611
3375
  print(f" • Timestamp: {metadata['scitex']['created_at']}")
612
3376
  if "dimensions" in metadata:
613
- print(
614
- f" • Axes size: {metadata['dimensions']['axes_size_mm']} mm"
615
- )
3377
+ print(f" • Axes size: {metadata['dimensions']['axes_size_mm']} mm")
616
3378
  print(f" • DPI: {metadata['dimensions']['dpi']}")
617
- if "scitex" in metadata and "mode" in metadata["scitex"]:
3379
+ if "runtime" in metadata and "mode" in metadata["runtime"]:
618
3380
  print(f" • Mode: {metadata['scitex']['mode']}")
619
- if "scitex" in metadata and "style_mm" in metadata["scitex"]:
3381
+ if "runtime" in metadata and "style_mm" in metadata["runtime"]:
620
3382
  print(" • Style: Embedded ✓")
621
3383
 
622
3384
  # EOF