scitex 2.16.2__py3-none-any.whl → 2.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. scitex/_mcp_resources/_cheatsheet.py +1 -1
  2. scitex/_mcp_resources/_modules.py +1 -1
  3. scitex/_mcp_tools/__init__.py +2 -0
  4. scitex/_mcp_tools/verify.py +256 -0
  5. scitex/cli/main.py +2 -0
  6. scitex/cli/verify.py +476 -0
  7. scitex/dev/plt/__init__.py +1 -1
  8. scitex/dev/plt/data/mpl/PLOTTING_FUNCTIONS.yaml +90 -0
  9. scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES.yaml +1571 -0
  10. scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES_DETAILED.yaml +6262 -0
  11. scitex/dev/plt/data/mpl/SIGNATURES_FLATTENED.yaml +1274 -0
  12. scitex/dev/plt/data/mpl/dir_ax.txt +459 -0
  13. scitex/dev/plt/mpl/get_dir_ax.py +1 -1
  14. scitex/dev/plt/mpl/get_signatures.py +1 -1
  15. scitex/dev/plt/mpl/get_signatures_details.py +1 -1
  16. scitex/io/_load.py +8 -1
  17. scitex/io/_save.py +12 -0
  18. scitex/scholar/data/.gitkeep +0 -0
  19. scitex/scholar/data/README.md +44 -0
  20. scitex/scholar/data/bib_files/bibliography.bib +1952 -0
  21. scitex/scholar/data/bib_files/neurovista.bib +277 -0
  22. scitex/scholar/data/bib_files/neurovista_enriched.bib +441 -0
  23. scitex/scholar/data/bib_files/neurovista_enriched_enriched.bib +441 -0
  24. scitex/scholar/data/bib_files/neurovista_processed.bib +338 -0
  25. scitex/scholar/data/bib_files/openaccess.bib +89 -0
  26. scitex/scholar/data/bib_files/pac-seizure_prediction_enriched.bib +2178 -0
  27. scitex/scholar/data/bib_files/pac.bib +698 -0
  28. scitex/scholar/data/bib_files/pac_enriched.bib +1061 -0
  29. scitex/scholar/data/bib_files/pac_processed.bib +0 -0
  30. scitex/scholar/data/bib_files/pac_titles.txt +75 -0
  31. scitex/scholar/data/bib_files/paywalled.bib +98 -0
  32. scitex/scholar/data/bib_files/related-papers-by-coauthors.bib +58 -0
  33. scitex/scholar/data/bib_files/related-papers-by-coauthors_enriched.bib +87 -0
  34. scitex/scholar/data/bib_files/seizure_prediction.bib +694 -0
  35. scitex/scholar/data/bib_files/seizure_prediction_processed.bib +0 -0
  36. scitex/scholar/data/bib_files/test_complete_enriched.bib +437 -0
  37. scitex/scholar/data/bib_files/test_final_enriched.bib +437 -0
  38. scitex/scholar/data/bib_files/test_seizure.bib +46 -0
  39. scitex/scholar/data/impact_factor/JCR_IF_2022.xlsx +0 -0
  40. scitex/scholar/data/impact_factor/JCR_IF_2024.db +0 -0
  41. scitex/scholar/data/impact_factor/JCR_IF_2024.xlsx +0 -0
  42. scitex/scholar/data/impact_factor/JCR_IF_2024_v01.db +0 -0
  43. scitex/scholar/data/impact_factor.db +0 -0
  44. scitex/session/README.md +2 -2
  45. scitex/session/__init__.py +1 -0
  46. scitex/session/_decorator.py +57 -33
  47. scitex/session/_lifecycle/__init__.py +23 -0
  48. scitex/session/_lifecycle/_close.py +225 -0
  49. scitex/session/_lifecycle/_config.py +112 -0
  50. scitex/session/_lifecycle/_matplotlib.py +83 -0
  51. scitex/session/_lifecycle/_start.py +246 -0
  52. scitex/session/_lifecycle/_utils.py +186 -0
  53. scitex/session/_manager.py +40 -3
  54. scitex/session/template.py +1 -1
  55. scitex/template/_templates/plt.py +1 -1
  56. scitex/template/_templates/session.py +1 -1
  57. scitex/verify/README.md +312 -0
  58. scitex/verify/__init__.py +212 -0
  59. scitex/verify/_chain.py +369 -0
  60. scitex/verify/_db.py +600 -0
  61. scitex/verify/_hash.py +187 -0
  62. scitex/verify/_integration.py +127 -0
  63. scitex/verify/_rerun.py +253 -0
  64. scitex/verify/_tracker.py +330 -0
  65. scitex/verify/_visualize.py +48 -0
  66. scitex/verify/_viz/__init__.py +56 -0
  67. scitex/verify/_viz/_colors.py +84 -0
  68. scitex/verify/_viz/_format.py +302 -0
  69. scitex/verify/_viz/_json.py +192 -0
  70. scitex/verify/_viz/_mermaid.py +440 -0
  71. scitex/verify/_viz/_plotly.py +193 -0
  72. scitex/verify/_viz/_templates.py +246 -0
  73. scitex/verify/_viz/_utils.py +56 -0
  74. {scitex-2.16.2.dist-info → scitex-2.17.0.dist-info}/METADATA +1 -1
  75. {scitex-2.16.2.dist-info → scitex-2.17.0.dist-info}/RECORD +78 -29
  76. scitex/scholar/url_finder/.tmp/open_url/KNOWN_RESOLVERS.py +0 -462
  77. scitex/scholar/url_finder/.tmp/open_url/README.md +0 -223
  78. scitex/scholar/url_finder/.tmp/open_url/_DOIToURLResolver.py +0 -694
  79. scitex/scholar/url_finder/.tmp/open_url/_OpenURLResolver.py +0 -1160
  80. scitex/scholar/url_finder/.tmp/open_url/_ResolverLinkFinder.py +0 -344
  81. scitex/scholar/url_finder/.tmp/open_url/__init__.py +0 -24
  82. scitex/session/_lifecycle.py +0 -827
  83. {scitex-2.16.2.dist-info → scitex-2.17.0.dist-info}/WHEEL +0 -0
  84. {scitex-2.16.2.dist-info → scitex-2.17.0.dist-info}/entry_points.txt +0 -0
  85. {scitex-2.16.2.dist-info → scitex-2.17.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,312 @@
1
+ <!-- ---
2
+ !-- Timestamp: 2026-02-01 08:47:14
3
+ !-- Author: ywatanabe
4
+ !-- File: /home/ywatanabe/proj/scitex-python/src/scitex/verify/README.md
5
+ !-- --- -->
6
+
7
+ # scitex.verify Module
8
+
9
+ Hash-based verification system for reproducible scientific computations.
10
+
11
+ ## Overview
12
+
13
+ The verify module provides cryptographic tracking of scientific pipelines, enabling researchers to:
14
+ - **Detect changes** in input/output files
15
+ - **Trace dependencies** through processing chains
16
+ - **Verify reproducibility** by re-executing scripts
17
+ - **Visualize workflows** as directed acyclic graphs (DAGs)
18
+
19
+ ![Verification DAG](dag.png)
20
+
21
+ *Example DAG showing verification states: ✓ verified (green), ✗ failed (red)*
22
+
23
+ ## Architecture
24
+
25
+ ```
26
+ scitex/verify/
27
+ ├── __init__.py # Public API and convenience functions
28
+ ├── _hash.py # SHA256 hashing utilities
29
+ ├── _db.py # SQLite database for storing hashes
30
+ ├── _tracker.py # Session tracking integration
31
+ ├── _chain.py # Chain verification logic
32
+ ├── _rerun.py # Re-execution verification
33
+ ├── _integration.py # Hooks for stx.io and @stx.session
34
+ ├── _visualize.py # Re-exports from _viz/
35
+ └── _viz/
36
+ ├── _mermaid.py # Mermaid DAG generation
37
+ ├── _plotly.py # Interactive Plotly DAGs
38
+ ├── _json.py # JSON DAG export
39
+ ├── _format.py # Terminal output formatting
40
+ ├── _colors.py # Color constants
41
+ ├── _templates.py # HTML templates
42
+ └── _utils.py # Shared utilities
43
+ ```
44
+
45
+ ## Core Components
46
+
47
+ ### Hash Utilities (`_hash.py`)
48
+
49
+ ```python
50
+ from scitex.verify import hash_file, hash_files, hash_directory
51
+
52
+ # Single file
53
+ h = hash_file("data.csv") # SHA256 hex string
54
+
55
+ # Multiple files
56
+ hashes = hash_files(["a.csv", "b.csv"]) # {path: hash}
57
+
58
+ # Directory (recursive)
59
+ h = hash_directory("output/") # Combined hash of all files
60
+ ```
61
+
62
+ ### Database (`_db.py`)
63
+
64
+ SQLite-based storage for verification records:
65
+
66
+ ```python
67
+ from scitex.verify import get_db
68
+
69
+ db = get_db() # ~/.scitex/verify.db by default
70
+
71
+ # Record a run
72
+ db.record_run(
73
+ session_id="abc123",
74
+ script_path="/path/to/script.py",
75
+ script_hash="sha256...",
76
+ config_hash="sha256...",
77
+ status="success"
78
+ )
79
+
80
+ # Record file hashes
81
+ db.record_file_hash(
82
+ session_id="abc123",
83
+ file_path="/path/to/output.csv",
84
+ file_hash="sha256...",
85
+ role="output" # or "input"
86
+ )
87
+
88
+ # Query runs
89
+ runs = db.list_runs(limit=10, status="success")
90
+ chain = db.get_chain("abc123") # Parent session IDs
91
+ ```
92
+
93
+ ### Session Tracker (`_tracker.py`)
94
+
95
+ Integrates with `@stx.session`:
96
+
97
+ ```python
98
+ from scitex.verify import SessionTracker, start_tracking, stop_tracking
99
+
100
+ # Manual tracking (usually automatic via @stx.session)
101
+ tracker = start_tracking(session_id="abc123")
102
+ tracker.add_input("/path/to/input.csv")
103
+ tracker.add_output("/path/to/output.csv")
104
+ stop_tracking()
105
+ ```
106
+
107
+ ### Chain Verification (`_chain.py`)
108
+
109
+ ```python
110
+ from scitex.verify import verify_file, verify_run, verify_chain
111
+
112
+ # Verify single file
113
+ file_result = verify_file("/path/to/output.csv")
114
+ print(file_result.is_verified) # True if hash matches
115
+
116
+ # Verify session run
117
+ run_result = verify_run("abc123")
118
+ print(run_result.is_verified) # True if all files match
119
+ print(run_result.mismatched_files) # List of changed files
120
+
121
+ # Verify entire chain
122
+ chain_result = verify_chain("/path/to/final_output.csv")
123
+ print(chain_result.is_verified) # True if all runs verified
124
+ print(chain_result.chain_length) # Number of runs in chain
125
+ for run in chain_result.runs:
126
+ print(f"{run.session_id}: {run.status}")
127
+ ```
128
+
129
+ ### Verification Levels
130
+
131
+ ```python
132
+ from scitex.verify import VerificationLevel, VerificationStatus
133
+
134
+ # Levels
135
+ VerificationLevel.CACHE # Fast hash comparison (✓)
136
+ VerificationLevel.RERUN # Re-execution verification (✓✓)
137
+
138
+ # Statuses
139
+ VerificationStatus.VERIFIED
140
+ VerificationStatus.UNVERIFIED
141
+ VerificationStatus.FAILED
142
+ VerificationStatus.UNKNOWN
143
+ ```
144
+
145
+ ### Re-execution Verification (`_rerun.py`)
146
+
147
+ ```python
148
+ from scitex.verify import verify_by_rerun
149
+
150
+ # Re-run script and verify outputs match
151
+ result = verify_by_rerun("/path/to/output.csv")
152
+ print(result.is_verified) # True if re-execution produces same hashes
153
+ print(result.level) # VerificationLevel.RERUN
154
+ ```
155
+
156
+ ## Visualization (`_visualize.py`)
157
+
158
+ ### Terminal Output
159
+
160
+ ```python
161
+ from scitex.verify import format_status, format_chain_verification
162
+
163
+ # Git status-like output
164
+ print(format_status())
165
+
166
+ # Chain visualization
167
+ chain = verify_chain("output.csv")
168
+ print(format_chain_verification(chain))
169
+ ```
170
+
171
+ ### Mermaid DAG
172
+
173
+ ```python
174
+ from scitex.verify import generate_mermaid_dag
175
+
176
+ mermaid = generate_mermaid_dag(target_file="output.csv")
177
+ # Returns:
178
+ # graph TD
179
+ # script_0["✓ 🐍 analyze.py"]:::verified
180
+ # file_0[("✓ 📊 output.csv")]:::file_ok
181
+ # script_0 --> file_0
182
+ # classDef verified fill:#90EE90...
183
+ ```
184
+
185
+ ### HTML/PNG/SVG Export
186
+
187
+ ```python
188
+ from scitex.verify import render_dag
189
+
190
+ # Interactive HTML
191
+ render_dag("dag.html", target_file="output.csv", show_hashes=True)
192
+
193
+ # Static images (requires mmdc)
194
+ render_dag("dag.png", target_file="output.csv")
195
+ render_dag("dag.svg", target_file="output.csv")
196
+
197
+ # Raw formats
198
+ render_dag("dag.mmd", target_file="output.csv") # Mermaid code
199
+ render_dag("dag.json", target_file="output.csv") # Graph structure
200
+ ```
201
+
202
+ ### Interactive Plotly
203
+
204
+ ```python
205
+ from scitex.verify import generate_plotly_dag, render_plotly_dag
206
+
207
+ fig = generate_plotly_dag(target_file="output.csv")
208
+ fig.show() # Opens browser
209
+
210
+ render_plotly_dag("dag_plotly.html", target_file="output.csv")
211
+ ```
212
+
213
+ ## Integration Hooks (`_integration.py`)
214
+
215
+ Automatically called by `@stx.session` and `stx.io`:
216
+
217
+ ```python
218
+ from scitex.verify import on_session_start, on_session_close, on_io_load, on_io_save
219
+
220
+ # Session lifecycle
221
+ on_session_start(session_id, script_path, config_hash)
222
+ on_session_close(session_id, status="success")
223
+
224
+ # I/O tracking
225
+ on_io_load(file_path) # Records as input
226
+ on_io_save(file_path) # Records as output
227
+ ```
228
+
229
+ ## CLI Commands
230
+
231
+ ```bash
232
+ # List runs
233
+ scitex verify list [--limit N] [--status success|failed]
234
+
235
+ # Check status
236
+ scitex verify status
237
+
238
+ # Verify specific run
239
+ scitex verify run SESSION_ID [--from-scratch]
240
+
241
+ # Trace dependencies
242
+ scitex verify chain FILE_PATH
243
+
244
+ # Database stats
245
+ scitex verify stats
246
+ ```
247
+
248
+ ## MCP Tools
249
+
250
+ Available via MCP protocol:
251
+
252
+ | Tool | Description |
253
+ |------|-------------|
254
+ | `verify_list` | List tracked runs |
255
+ | `verify_run` | Verify specific run |
256
+ | `verify_chain` | Trace dependencies |
257
+ | `verify_status` | Show changed items |
258
+ | `verify_stats` | Database statistics |
259
+ | `verify_mermaid` | Generate Mermaid DAG |
260
+
261
+ ## Database Schema
262
+
263
+ ```sql
264
+ -- Runs table
265
+ CREATE TABLE runs (
266
+ session_id TEXT PRIMARY KEY,
267
+ script_path TEXT,
268
+ script_hash TEXT,
269
+ config_hash TEXT,
270
+ status TEXT,
271
+ started_at TIMESTAMP,
272
+ ended_at TIMESTAMP
273
+ );
274
+
275
+ -- File hashes table
276
+ CREATE TABLE file_hashes (
277
+ id INTEGER PRIMARY KEY,
278
+ session_id TEXT,
279
+ file_path TEXT,
280
+ file_hash TEXT,
281
+ role TEXT, -- 'input' or 'output'
282
+ recorded_at TIMESTAMP,
283
+ FOREIGN KEY (session_id) REFERENCES runs(session_id)
284
+ );
285
+
286
+ -- Verification records table
287
+ CREATE TABLE verifications (
288
+ id INTEGER PRIMARY KEY,
289
+ session_id TEXT,
290
+ level TEXT, -- 'cache' or 'rerun'
291
+ status TEXT, -- 'verified', 'failed'
292
+ verified_at TIMESTAMP,
293
+ FOREIGN KEY (session_id) REFERENCES runs(session_id)
294
+ );
295
+ ```
296
+
297
+ ## Examples
298
+
299
+ See `examples/scitex/verify/` for complete working examples:
300
+
301
+ - `00_run_all.sh` - Run complete pipeline
302
+ - `01-08` - Multi-branch processing pipeline
303
+ - `09_demo_verification.py` - Verification states demo
304
+ - `10_programmatic_verification.py` - API usage examples
305
+
306
+ ## See Also
307
+
308
+ - `examples/scitex/verify/README.md` - Usage examples with DAG visualization
309
+ - `@stx.session` decorator - Automatic session tracking
310
+ - `stx.io` module - File I/O with hash tracking
311
+
312
+ <!-- EOF -->
@@ -0,0 +1,212 @@
1
+ #!/usr/bin/env python3
2
+ # Timestamp: "2026-02-01 (ywatanabe)"
3
+ # File: /home/ywatanabe/proj/scitex-python/src/scitex/verify/__init__.py
4
+ """
5
+ SciTeX Verify Module - Hash-based verification for reproducible science.
6
+
7
+ This module provides tools to track, verify, and visualize the reproducibility
8
+ of scientific computations through cryptographic hashing.
9
+
10
+ Core Concepts
11
+ -------------
12
+ - **Run**: A single execution of a script with tracked inputs/outputs
13
+ - **Hash**: SHA256 fingerprint of files to detect changes
14
+ - **Chain**: Dependency links between runs (parent → child)
15
+ - **Verification**: Comparing stored hashes with current file states
16
+
17
+ Verification Levels
18
+ -------------------
19
+ - **verified-by-cache** (✓): Fast comparison of stored vs current hashes
20
+ - **verified-by-rerun** (✓✓): Full re-execution and comparison (slower, more thorough)
21
+
22
+ Examples
23
+ --------
24
+ >>> import scitex as stx
25
+
26
+ >>> # Automatic tracking via @stx.session + stx.io
27
+ >>> @stx.session
28
+ ... def main():
29
+ ... data = stx.io.load("input.csv") # Auto-tracked as input
30
+ ... result = process(data)
31
+ ... stx.io.save(result, "output.png") # Auto-tracked as output
32
+
33
+ >>> # Manual verification
34
+ >>> stx.verify.status() # Show changed files
35
+ >>> stx.verify.run("session_id") # Verify specific run
36
+ >>> stx.verify.chain("output.png") # Trace back to source
37
+
38
+ CLI Commands
39
+ ------------
40
+ - ``scitex verify list`` - List all runs with verification status
41
+ - ``scitex verify run <id>`` - Verify specific run
42
+ - ``scitex verify chain <file>`` - Trace dependencies back to source
43
+ - ``scitex verify status`` - Show changed items (git status-like)
44
+
45
+ MCP Tools
46
+ ---------
47
+ - ``verify_list`` - List runs
48
+ - ``verify_run`` - Verify specific run
49
+ - ``verify_chain`` - Trace chain
50
+ - ``verify_status`` - Show changes
51
+ """
52
+
53
+ from __future__ import annotations
54
+
55
+ # Chain verification
56
+ from ._chain import (
57
+ ChainVerification,
58
+ FileVerification,
59
+ RunVerification,
60
+ VerificationLevel,
61
+ VerificationStatus,
62
+ get_status,
63
+ verify_chain,
64
+ verify_file,
65
+ verify_run,
66
+ )
67
+
68
+ # Database
69
+ from ._db import (
70
+ VerificationDB,
71
+ get_db,
72
+ )
73
+
74
+ # Hash utilities
75
+ from ._hash import (
76
+ combine_hashes,
77
+ hash_directory,
78
+ hash_file,
79
+ hash_files,
80
+ verify_hash,
81
+ )
82
+
83
+ # Integration hooks
84
+ from ._integration import (
85
+ on_io_load,
86
+ on_io_save,
87
+ on_session_close,
88
+ on_session_start,
89
+ )
90
+
91
+ # Rerun verification (separate module to avoid circular imports)
92
+ from ._rerun import verify_by_rerun, verify_run_from_scratch
93
+
94
+ # Tracker
95
+ from ._tracker import (
96
+ SessionTracker,
97
+ get_tracker,
98
+ set_tracker,
99
+ start_tracking,
100
+ stop_tracking,
101
+ )
102
+
103
+ # Visualization
104
+ from ._visualize import (
105
+ format_chain_verification,
106
+ format_list,
107
+ format_run_detailed,
108
+ format_run_verification,
109
+ format_status,
110
+ generate_html_dag,
111
+ generate_mermaid_dag,
112
+ generate_plotly_dag,
113
+ print_verification_summary,
114
+ render_dag,
115
+ render_plotly_dag,
116
+ )
117
+
118
+
119
+ # Convenience functions at module level
120
+ def list_runs(limit: int = 100, status: str = None):
121
+ """List tracked runs."""
122
+ db = get_db()
123
+ return db.list_runs(status=status, limit=limit)
124
+
125
+
126
+ def status():
127
+ """Get verification status summary (like git status)."""
128
+ return get_status()
129
+
130
+
131
+ def run(session_id: str, from_scratch: bool = False):
132
+ """Verify a specific run.
133
+
134
+ Parameters
135
+ ----------
136
+ session_id : str
137
+ Session identifier
138
+ from_scratch : bool, optional
139
+ If True, re-execute the script and verify outputs (slow but thorough).
140
+ If False, only compare hashes (fast).
141
+ """
142
+ if from_scratch:
143
+ return verify_run_from_scratch(session_id)
144
+ return verify_run(session_id)
145
+
146
+
147
+ def chain(target: str):
148
+ """Verify the chain for a target file."""
149
+ return verify_chain(target)
150
+
151
+
152
+ def stats():
153
+ """Get database statistics."""
154
+ db = get_db()
155
+ return db.stats()
156
+
157
+
158
+ __all__ = [
159
+ # Hash utilities
160
+ "hash_file",
161
+ "hash_files",
162
+ "hash_directory",
163
+ "combine_hashes",
164
+ "verify_hash",
165
+ # Database
166
+ "VerificationDB",
167
+ "get_db",
168
+ # Tracker
169
+ "SessionTracker",
170
+ "get_tracker",
171
+ "set_tracker",
172
+ "start_tracking",
173
+ "stop_tracking",
174
+ # Chain verification
175
+ "VerificationStatus",
176
+ "VerificationLevel",
177
+ "FileVerification",
178
+ "RunVerification",
179
+ "ChainVerification",
180
+ "verify_file",
181
+ "verify_run",
182
+ "verify_by_rerun",
183
+ "verify_run_from_scratch", # backward compat alias
184
+ "verify_chain",
185
+ "get_status",
186
+ # Visualization
187
+ "format_run_verification",
188
+ "format_run_detailed",
189
+ "format_chain_verification",
190
+ "format_status",
191
+ "format_list",
192
+ "generate_mermaid_dag",
193
+ "generate_html_dag",
194
+ "generate_plotly_dag",
195
+ "render_dag",
196
+ "render_plotly_dag",
197
+ "print_verification_summary",
198
+ # Convenience functions
199
+ "list_runs",
200
+ "status",
201
+ "run",
202
+ "chain",
203
+ "stats",
204
+ # Integration hooks
205
+ "on_session_start",
206
+ "on_session_close",
207
+ "on_io_load",
208
+ "on_io_save",
209
+ ]
210
+
211
+
212
+ # EOF