seq2cause 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+ Metadata-Version: 2.4
2
+ Name: seq2cause
3
+ Version: 0.1.0
4
+ Summary: Causal Discovery from Event Sequences using Autoregressive Models
5
+ Project-URL: Homepage, https://github.com/Mathugo/seq2cause
6
+ Author-email: Hugo Math <hugo.math@gmail.com>
7
+ License: MIT
8
+ Requires-Python: <4,>=3.8
9
+ Requires-Dist: accelerate
10
+ Requires-Dist: captum
11
+ Requires-Dist: datasets>2.20.0
12
+ Requires-Dist: numpy<2.0
13
+ Requires-Dist: pyarrow<17
14
+ Requires-Dist: seaborn
15
+ Requires-Dist: torch
16
+ Requires-Dist: transformers>=4.24.0
@@ -0,0 +1,81 @@
1
+ # seq2cause
2
+ seq2cause: Turns any discrete sequence of events into a causal graph using autoregressive models (LLaMA, GPT, RNN, Mamba).
3
+
4
+ [![PyPI version](https://img.shields.io/pypi/v/seq2cause.svg)](https://pypi.org/project/seq2cause/)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
6
+ [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
7
+
8
+ **seq2cause** is a Python library for **Causal Discovery on Discrete Event Sequences**. It bridges the gap between Autoregressive Models (Language Models, RNN, Mambda) and Causal Discovery by treating autoregressive models as density estimators to perform parallelized CI-tests on GPUs.
9
+
10
+ ## 🚀 Key Features
11
+
12
+ - **Bring Your Own Model:** Plug in any HuggingFace/PyTorch model (`GPT-2`, `LLaMA`, `RNN`) trained on your discrete sequences (logs, codes, symbols).
13
+ - **Scaling:** To thousands of events: The memory complexity scales linearly with the vocabulary and sequence length. Optimized for sparse, high-dimensional streams (e.g., Vehicle Diagnostics, Server Logs, User Journeys).
14
+ - **Multiple GPUs Acceleration:** Batch processing for analyzing thousands of events in seconds using multiple GPUs.
15
+ - **Delayed Effects:** Are identifiable up to the sequence length
16
+ - **Causal Relationships Type**: We explain event-to-event, event-to-outcome causal graphs from single sequences and also an aggregation of global event-to-outcome scenarios with instance time causal graphs and summary causal graph.
17
+
18
+ ## 📦 Installation
19
+
20
+ ```bash
21
+ pip install seq2cause
22
+ ```
23
+
24
+ ## âš¡ Quick Start
25
+ Recover the causal graph from your logs in 3 lines of code.
26
+
27
+ ## 📚 How It Works
28
+
29
+ seq2cause implements the **TRACE** framework (Temporal Reconstruction via Autoregressive Causal Estimation) for the event-to-event causal discovery and **OSCAR** for the event-to-outcome. <talk abvout cmi>
30
+
31
+ ## Graph Types
32
+ You can precise the graph types, which includes [redo graph namming and parameters in packages, put time instrance, summary graph]:
33
+
34
+ - **Event-to-Event (per sequence):** Implements the **TRACE** algorithm using Conditional Mutual Information (CMI) approximation.
35
+ - **Event-to-Outcome (per sequence):** Implements the **OSCAR** algorithm which target event-to-outcome relationships using a second autoregressive models to predict outcomes.
36
+ - **Event-to-Outcome (global):** Implements the **CARGO** algorithm which aggregate the per-sequence causal graph to provide a global causal relationship of observational data.
37
+
38
+ ## Future works
39
+
40
+ - **Time series**: Implements causal discovery for time series using autoregressive models (normalizing flows, AR models)
41
+
42
+ ## 🔗 Citation
43
+ If you use seq2cause in your research, please cite our works:
44
+
45
+ ```bash
46
+ @misc{math2026tracescalableamortizedcausal,
47
+ title={TRACE: Scalable Amortized Causal Discovery from Single Sequences via Autoregressive Density Estimation},
48
+ author={Hugo Math and Rainer Lienhart},
49
+ year={2026},
50
+ eprint={2602.01135},
51
+ archivePrefix={arXiv},
52
+ primaryClass={cs.LG},
53
+ url={https://arxiv.org/abs/2602.01135},
54
+ }
55
+ ```
56
+
57
+ ```bash
58
+ @inproceedings{
59
+ math2025oneshot,
60
+ title={One-Shot Multi-Label Causal Discovery in High-Dimensional Event Sequences},
61
+ author={Hugo Math and Robin Sch{\"o}n and Rainer Lienhart},
62
+ booktitle={NeurIPS 2025 Workshop on CauScien: Uncovering Causality in Science},
63
+ year={2025},
64
+ url={https://openreview.net/forum?id=z7NT8vGWC2}
65
+ }
66
+ ```
67
+
68
+ ```bash
69
+ @inproceedings{
70
+ math2025towards,
71
+ title={Towards Practical Multi-label Causal Discovery in High-Dimensional Event Sequences via One-Shot Graph Aggregation},
72
+ author={Hugo Math and Rainer Lienhart},
73
+ booktitle={NeurIPS 2025 Workshop on Structured Probabilistic Inference {\&} Generative Modeling},
74
+ year={2025},
75
+ url={https://openreview.net/forum?id=1HZfpuDVeW}
76
+ }
77
+ ```
78
+
79
+ ## 📄 License
80
+ This project is licensed under the MIT License - see the LICENSE file for details.
81
+
@@ -0,0 +1,32 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "seq2cause"
7
+ version = "0.1.0"
8
+ description = "Causal Discovery from Event Sequences using Autoregressive Models"
9
+ requires-python = ">=3.8,<4"
10
+
11
+ authors = [
12
+ { name = "Hugo Math", email = "hugo.math@gmail.com" },
13
+ ]
14
+
15
+ license = { text = "MIT" }
16
+
17
+ dependencies = [
18
+ "transformers>=4.24.0",
19
+ "pyarrow<17",
20
+ "numpy<2.0",
21
+ "torch",
22
+ "accelerate",
23
+ "datasets>2.20.0",
24
+ "seaborn",
25
+ "captum",
26
+ ]
27
+
28
+ [project.urls]
29
+ "Homepage" = "https://github.com/Mathugo/seq2cause"
30
+
31
+ [tool.hatch.build.targets.wheel]
32
+ packages = ["src/seq2cause"]
@@ -0,0 +1,4 @@
1
+ # src/seq2cause/__init__.py
2
+ from .core import TRACE
3
+
4
+ __version__ = "0.1.0"
@@ -0,0 +1,2 @@
1
+ class TRACE:
2
+ pass