causaliq-knowledge 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- causaliq_knowledge-0.1.0/LICENSE +21 -0
- causaliq_knowledge-0.1.0/PKG-INFO +185 -0
- causaliq_knowledge-0.1.0/README.md +131 -0
- causaliq_knowledge-0.1.0/pyproject.toml +184 -0
- causaliq_knowledge-0.1.0/setup.cfg +4 -0
- causaliq_knowledge-0.1.0/src/causaliq_knowledge/__init__.py +33 -0
- causaliq_knowledge-0.1.0/src/causaliq_knowledge/base.py +85 -0
- causaliq_knowledge-0.1.0/src/causaliq_knowledge/cli.py +207 -0
- causaliq_knowledge-0.1.0/src/causaliq_knowledge/llm/__init__.py +34 -0
- causaliq_knowledge-0.1.0/src/causaliq_knowledge/llm/gemini_client.py +203 -0
- causaliq_knowledge-0.1.0/src/causaliq_knowledge/llm/groq_client.py +148 -0
- causaliq_knowledge-0.1.0/src/causaliq_knowledge/llm/prompts.py +204 -0
- causaliq_knowledge-0.1.0/src/causaliq_knowledge/llm/provider.py +341 -0
- causaliq_knowledge-0.1.0/src/causaliq_knowledge/models.py +124 -0
- causaliq_knowledge-0.1.0/src/causaliq_knowledge.egg-info/PKG-INFO +185 -0
- causaliq_knowledge-0.1.0/src/causaliq_knowledge.egg-info/SOURCES.txt +18 -0
- causaliq_knowledge-0.1.0/src/causaliq_knowledge.egg-info/dependency_links.txt +1 -0
- causaliq_knowledge-0.1.0/src/causaliq_knowledge.egg-info/entry_points.txt +3 -0
- causaliq_knowledge-0.1.0/src/causaliq_knowledge.egg-info/requires.txt +29 -0
- causaliq_knowledge-0.1.0/src/causaliq_knowledge.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 CausalIQ
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: causaliq-knowledge
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Incorporating LLM and human knowledge into causal discovery
|
|
5
|
+
Author-email: CausalIQ <info@causaliq.com>
|
|
6
|
+
Maintainer-email: CausalIQ <info@causaliq.com>
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
Project-URL: Homepage, https://github.com/causaliq/causaliq-knowledge
|
|
9
|
+
Project-URL: Documentation, https://github.com/causaliq/causaliq-knowledge#readme
|
|
10
|
+
Project-URL: Repository, https://github.com/causaliq/causaliq-knowledge
|
|
11
|
+
Project-URL: Bug Tracker, https://github.com/causaliq/causaliq-knowledge/issues
|
|
12
|
+
Keywords: causaliq
|
|
13
|
+
Classifier: Development Status :: 3 - Alpha
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering
|
|
23
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
24
|
+
Requires-Python: >=3.9
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
License-File: LICENSE
|
|
27
|
+
Requires-Dist: click>=8.0.0
|
|
28
|
+
Requires-Dist: httpx>=0.24.0
|
|
29
|
+
Requires-Dist: pydantic>=2.0.0
|
|
30
|
+
Provides-Extra: dev
|
|
31
|
+
Requires-Dist: causaliq-core>=0.3.0; extra == "dev"
|
|
32
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
33
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
34
|
+
Requires-Dist: pytest-mock>=3.10.0; extra == "dev"
|
|
35
|
+
Requires-Dist: black>=22.0.0; extra == "dev"
|
|
36
|
+
Requires-Dist: isort>=5.10.0; extra == "dev"
|
|
37
|
+
Requires-Dist: flake8>=5.0.0; extra == "dev"
|
|
38
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
39
|
+
Requires-Dist: types-requests>=2.32.0; extra == "dev"
|
|
40
|
+
Requires-Dist: pre-commit>=2.20.0; extra == "dev"
|
|
41
|
+
Requires-Dist: build>=0.8.0; extra == "dev"
|
|
42
|
+
Requires-Dist: twine>=4.0.0; extra == "dev"
|
|
43
|
+
Provides-Extra: test
|
|
44
|
+
Requires-Dist: causaliq-core>=0.3.0; extra == "test"
|
|
45
|
+
Requires-Dist: pytest>=7.0.0; extra == "test"
|
|
46
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "test"
|
|
47
|
+
Requires-Dist: pytest-mock>=3.10.0; extra == "test"
|
|
48
|
+
Provides-Extra: docs
|
|
49
|
+
Requires-Dist: mkdocs>=1.5.0; extra == "docs"
|
|
50
|
+
Requires-Dist: mkdocs-material>=9.0.0; extra == "docs"
|
|
51
|
+
Requires-Dist: mkdocstrings==0.30.1; extra == "docs"
|
|
52
|
+
Requires-Dist: mkdocstrings-python==1.18.2; extra == "docs"
|
|
53
|
+
Dynamic: license-file
|
|
54
|
+
|
|
55
|
+
# causaliq-knowledge
|
|
56
|
+
|
|
57
|
+

|
|
58
|
+
[](https://opensource.org/licenses/MIT)
|
|
59
|
+

|
|
60
|
+
|
|
61
|
+
The CausalIQ Knowledge project represents a novel approach to causal discovery by combining the traditional statistical structure learning algorithms with the contextual understanding and reasoning capabilities of Large Language Models. This integration enables more interpretable, domain-aware, and human-friendly causal discovery workflows. It is part of the [CausalIQ ecosystem](https://causaliq.org/) for intelligent causal discovery.
|
|
62
|
+
|
|
63
|
+
## Status
|
|
64
|
+
|
|
65
|
+
🚧 **Active Development** - this repository is currently in active development, which involves:
|
|
66
|
+
|
|
67
|
+
- Adding new knowledge features, in particular knowledge from LLMs
|
|
68
|
+
- Migrating functionality which provides knowledge based on standard reference networks from the legacy monolithic discovery repo
|
|
69
|
+
- Ensuring CausalIQ development standards are met
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
## Quick Start
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from causaliq_knowledge.llm import LLMKnowledge
|
|
76
|
+
|
|
77
|
+
# Query an LLM about a potential causal relationship
|
|
78
|
+
knowledge = LLMKnowledge(models=["groq/llama-3.1-8b-instant"])
|
|
79
|
+
result = knowledge.query_edge("smoking", "lung_cancer")
|
|
80
|
+
|
|
81
|
+
print(f"Exists: {result.exists}, Direction: {result.direction}")
|
|
82
|
+
print(f"Confidence: {result.confidence}")
|
|
83
|
+
print(f"Reasoning: {result.reasoning}")
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Features
|
|
87
|
+
|
|
88
|
+
Under development:
|
|
89
|
+
|
|
90
|
+
- **Release v0.1.0 - Foundation LLM**: Simple LLM queries to 1 or 2 LLMs about edge existence and orientation to support graph averaging
|
|
91
|
+
|
|
92
|
+
Currently implemented releases:
|
|
93
|
+
|
|
94
|
+
- None
|
|
95
|
+
|
|
96
|
+
Planned:
|
|
97
|
+
|
|
98
|
+
- **Release v0.2.0 - Additional LLMs**: Support for more LLM providers (OpenAI, Anthropic)
|
|
99
|
+
- **Release v0.3.0 - LLM Caching**: Caching of LLM queries and responses
|
|
100
|
+
- **Release v0.4.0 - LLM Context**: Variable/role/literature etc context
|
|
101
|
+
- **Release v0.5.0 - Algorithm integration**: Integration into structure learning algorithms
|
|
102
|
+
- **Release v0.6.0 - Legacy Reference**: Support for legacy approaches of deriving knowledge from reference networks
|
|
103
|
+
|
|
104
|
+
## Implementation Approach
|
|
105
|
+
|
|
106
|
+
### Technology Stack
|
|
107
|
+
|
|
108
|
+
- **Vendor-Specific API Clients**: Direct integration with LLM providers using httpx
|
|
109
|
+
- **[Pydantic](https://docs.pydantic.dev/)**: Structured response validation
|
|
110
|
+
- **[Click](https://click.palletsprojects.com/)**: Command-line interface
|
|
111
|
+
|
|
112
|
+
### Why Vendor-Specific APIs (not LiteLLM/LangChain)?
|
|
113
|
+
|
|
114
|
+
We use **direct vendor-specific API clients** rather than wrapper libraries:
|
|
115
|
+
|
|
116
|
+
| Aspect | Direct APIs | Wrapper Libraries |
|
|
117
|
+
|--------|-------------|-------------------|
|
|
118
|
+
| Reliability | ✅ Full control | ❌ Wrapper bugs |
|
|
119
|
+
| Dependencies | ✅ Minimal (httpx) | ❌ Heavy (~50-100MB) |
|
|
120
|
+
| Debugging | ✅ Clear traces | ❌ Abstraction layers |
|
|
121
|
+
| Maintenance | ✅ We control | ❌ Wait for updates |
|
|
122
|
+
|
|
123
|
+
This approach keeps the package lightweight, reliable, and easy to debug.
|
|
124
|
+
|
|
125
|
+
### Supported LLM Providers
|
|
126
|
+
|
|
127
|
+
| Provider | Client | Models | Free Tier |
|
|
128
|
+
|----------|--------|--------|-----------|
|
|
129
|
+
| **Groq** | `GroqClient` | llama-3.1-8b-instant | ✅ Generous |
|
|
130
|
+
| **Google Gemini** | `GeminiClient` | gemini-2.5-flash | ✅ Generous |
|
|
131
|
+
|
|
132
|
+
Additional providers (OpenAI, Anthropic) can be added in future releases.
|
|
133
|
+
|
|
134
|
+
## Upcoming Key Innovations
|
|
135
|
+
|
|
136
|
+
### 🧠 LLMs support Causal Discovery and Inference
|
|
137
|
+
|
|
138
|
+
- Initially LLM will work with **graph averaging** to resolve uncertain edges (use entropy to decide edges with uncertain existence or direction)
|
|
139
|
+
- Integration into **structure learning** algorithms to provide knowledge for "uncertain" areas of the graph
|
|
140
|
+
- LLMs analyse learning process and errors to **suggest improved algorithms**
|
|
141
|
+
- LLMs used to preprocess **text and visual data** so they can be used as inputs to structure learning
|
|
142
|
+
|
|
143
|
+
### 🤝 Human Engagement
|
|
144
|
+
|
|
145
|
+
- **Natural language constraints**: Specify domain knowledge in plain English
|
|
146
|
+
- **Expert knowledge incorporation** by converting expert understanding into algorithmic constraints
|
|
147
|
+
- LLMs convert **natural language questions** to causal queries
|
|
148
|
+
- **Interactive causal discovery** where structure learning or LLMs identify areas of causal uncertainty and can test causal hypotheses through dialogue
|
|
149
|
+
|
|
150
|
+
### 🪟 Transparency and interpretability
|
|
151
|
+
|
|
152
|
+
- LLMs **interpret structure learning process** and outputs, including their uncertainties
|
|
153
|
+
- LLMs **interpret causal inference** results including uncertainties
|
|
154
|
+
- **Contextual graph interpretation** to explain variable meanings and relationships
|
|
155
|
+
- **Uncertainty communication** with clear explanation of confidence levels and limitations
|
|
156
|
+
- **Report generation** including automated research summaries and methodology descriptions
|
|
157
|
+
|
|
158
|
+
### 🔒 Stability and reproducibility
|
|
159
|
+
|
|
160
|
+
- **Cache queries and responses** so that experiments are stable and repeatable even if LLMs themselves are not
|
|
161
|
+
- **Stable randomisation** of e.g. data sub-sampling
|
|
162
|
+
|
|
163
|
+
### 💰 Efficient use of LLM resources (important as an independent researcher)
|
|
164
|
+
|
|
165
|
+
- **Cache queries and results** so that knowledge can be re-used
|
|
166
|
+
- Evaluation and development of **simple context-adapted LLMs**
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
## Upcoming Integration with CausalIQ Ecosystem
|
|
170
|
+
|
|
171
|
+
- 🔍 CausalIQ Discovery makes use of this package to learn more accurate graphs.
|
|
172
|
+
- 🧪 CausalIQ Analysis uses this package to explain the learning process, intelligently combine and explain results.
|
|
173
|
+
- 🔮 CausalIQ Predict uses this package to explain predictions made by learnt models.
|
|
174
|
+
|
|
175
|
+
## Documentation
|
|
176
|
+
|
|
177
|
+
- [User Guide](docs/userguide/introduction.md) - Getting started
|
|
178
|
+
- [Architecture Overview](docs/architecture/overview.md) - Design and components
|
|
179
|
+
- [LLM Integration Design](docs/architecture/llm_integration.md) - Detailed LLM design
|
|
180
|
+
- [Roadmap](docs/roadmap.md) - Release planning
|
|
181
|
+
|
|
182
|
+
---
|
|
183
|
+
|
|
184
|
+
**Supported Python Versions**: 3.9, 3.10, 3.11, 3.12, 3.13
|
|
185
|
+
**Default Python Version**: 3.11
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# causaliq-knowledge
|
|
2
|
+
|
|
3
|
+

|
|
4
|
+
[](https://opensource.org/licenses/MIT)
|
|
5
|
+

|
|
6
|
+
|
|
7
|
+
The CausalIQ Knowledge project represents a novel approach to causal discovery by combining the traditional statistical structure learning algorithms with the contextual understanding and reasoning capabilities of Large Language Models. This integration enables more interpretable, domain-aware, and human-friendly causal discovery workflows. It is part of the [CausalIQ ecosystem](https://causaliq.org/) for intelligent causal discovery.
|
|
8
|
+
|
|
9
|
+
## Status
|
|
10
|
+
|
|
11
|
+
🚧 **Active Development** - this repository is currently in active development, which involves:
|
|
12
|
+
|
|
13
|
+
- Adding new knowledge features, in particular knowledge from LLMs
|
|
14
|
+
- Migrating functionality which provides knowledge based on standard reference networks from the legacy monolithic discovery repo
|
|
15
|
+
- Ensuring CausalIQ development standards are met
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
## Quick Start
|
|
19
|
+
|
|
20
|
+
```python
|
|
21
|
+
from causaliq_knowledge.llm import LLMKnowledge
|
|
22
|
+
|
|
23
|
+
# Query an LLM about a potential causal relationship
|
|
24
|
+
knowledge = LLMKnowledge(models=["groq/llama-3.1-8b-instant"])
|
|
25
|
+
result = knowledge.query_edge("smoking", "lung_cancer")
|
|
26
|
+
|
|
27
|
+
print(f"Exists: {result.exists}, Direction: {result.direction}")
|
|
28
|
+
print(f"Confidence: {result.confidence}")
|
|
29
|
+
print(f"Reasoning: {result.reasoning}")
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Features
|
|
33
|
+
|
|
34
|
+
Under development:
|
|
35
|
+
|
|
36
|
+
- **Release v0.1.0 - Foundation LLM**: Simple LLM queries to 1 or 2 LLMs about edge existence and orientation to support graph averaging
|
|
37
|
+
|
|
38
|
+
Currently implemented releases:
|
|
39
|
+
|
|
40
|
+
- None
|
|
41
|
+
|
|
42
|
+
Planned:
|
|
43
|
+
|
|
44
|
+
- **Release v0.2.0 - Additional LLMs**: Support for more LLM providers (OpenAI, Anthropic)
|
|
45
|
+
- **Release v0.3.0 - LLM Caching**: Caching of LLM queries and responses
|
|
46
|
+
- **Release v0.4.0 - LLM Context**: Variable/role/literature etc context
|
|
47
|
+
- **Release v0.5.0 - Algorithm integration**: Integration into structure learning algorithms
|
|
48
|
+
- **Release v0.6.0 - Legacy Reference**: Support for legacy approaches of deriving knowledge from reference networks
|
|
49
|
+
|
|
50
|
+
## Implementation Approach
|
|
51
|
+
|
|
52
|
+
### Technology Stack
|
|
53
|
+
|
|
54
|
+
- **Vendor-Specific API Clients**: Direct integration with LLM providers using httpx
|
|
55
|
+
- **[Pydantic](https://docs.pydantic.dev/)**: Structured response validation
|
|
56
|
+
- **[Click](https://click.palletsprojects.com/)**: Command-line interface
|
|
57
|
+
|
|
58
|
+
### Why Vendor-Specific APIs (not LiteLLM/LangChain)?
|
|
59
|
+
|
|
60
|
+
We use **direct vendor-specific API clients** rather than wrapper libraries:
|
|
61
|
+
|
|
62
|
+
| Aspect | Direct APIs | Wrapper Libraries |
|
|
63
|
+
|--------|-------------|-------------------|
|
|
64
|
+
| Reliability | ✅ Full control | ❌ Wrapper bugs |
|
|
65
|
+
| Dependencies | ✅ Minimal (httpx) | ❌ Heavy (~50-100MB) |
|
|
66
|
+
| Debugging | ✅ Clear traces | ❌ Abstraction layers |
|
|
67
|
+
| Maintenance | ✅ We control | ❌ Wait for updates |
|
|
68
|
+
|
|
69
|
+
This approach keeps the package lightweight, reliable, and easy to debug.
|
|
70
|
+
|
|
71
|
+
### Supported LLM Providers
|
|
72
|
+
|
|
73
|
+
| Provider | Client | Models | Free Tier |
|
|
74
|
+
|----------|--------|--------|-----------|
|
|
75
|
+
| **Groq** | `GroqClient` | llama-3.1-8b-instant | ✅ Generous |
|
|
76
|
+
| **Google Gemini** | `GeminiClient` | gemini-2.5-flash | ✅ Generous |
|
|
77
|
+
|
|
78
|
+
Additional providers (OpenAI, Anthropic) can be added in future releases.
|
|
79
|
+
|
|
80
|
+
## Upcoming Key Innovations
|
|
81
|
+
|
|
82
|
+
### 🧠 LLMs support Causal Discovery and Inference
|
|
83
|
+
|
|
84
|
+
- Initially LLM will work with **graph averaging** to resolve uncertain edges (use entropy to decide edges with uncertain existence or direction)
|
|
85
|
+
- Integration into **structure learning** algorithms to provide knowledge for "uncertain" areas of the graph
|
|
86
|
+
- LLMs analyse learning process and errors to **suggest improved algorithms**
|
|
87
|
+
- LLMs used to preprocess **text and visual data** so they can be used as inputs to structure learning
|
|
88
|
+
|
|
89
|
+
### 🤝 Human Engagement
|
|
90
|
+
|
|
91
|
+
- **Natural language constraints**: Specify domain knowledge in plain English
|
|
92
|
+
- **Expert knowledge incorporation** by converting expert understanding into algorithmic constraints
|
|
93
|
+
- LLMs convert **natural language questions** to causal queries
|
|
94
|
+
- **Interactive causal discovery** where structure learning or LLMs identify areas of causal uncertainty and can test causal hypotheses through dialogue
|
|
95
|
+
|
|
96
|
+
### 🪟 Transparency and interpretability
|
|
97
|
+
|
|
98
|
+
- LLMs **interpret structure learning process** and outputs, including their uncertainties
|
|
99
|
+
- LLMs **interpret causal inference** results including uncertainties
|
|
100
|
+
- **Contextual graph interpretation** to explain variable meanings and relationships
|
|
101
|
+
- **Uncertainty communication** with clear explanation of confidence levels and limitations
|
|
102
|
+
- **Report generation** including automated research summaries and methodology descriptions
|
|
103
|
+
|
|
104
|
+
### 🔒 Stability and reproducibility
|
|
105
|
+
|
|
106
|
+
- **Cache queries and responses** so that experiments are stable and repeatable even if LLMs themselves are not
|
|
107
|
+
- **Stable randomisation** of e.g. data sub-sampling
|
|
108
|
+
|
|
109
|
+
### 💰 Efficient use of LLM resources (important as an independent researcher)
|
|
110
|
+
|
|
111
|
+
- **Cache queries and results** so that knowledge can be re-used
|
|
112
|
+
- Evaluation and development of **simple context-adapted LLMs**
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
## Upcoming Integration with CausalIQ Ecosystem
|
|
116
|
+
|
|
117
|
+
- 🔍 CausalIQ Discovery makes use of this package to learn more accurate graphs.
|
|
118
|
+
- 🧪 CausalIQ Analysis uses this package to explain the learning process, intelligently combine and explain results.
|
|
119
|
+
- 🔮 CausalIQ Predict uses this package to explain predictions made by learnt models.
|
|
120
|
+
|
|
121
|
+
## Documentation
|
|
122
|
+
|
|
123
|
+
- [User Guide](docs/userguide/introduction.md) - Getting started
|
|
124
|
+
- [Architecture Overview](docs/architecture/overview.md) - Design and components
|
|
125
|
+
- [LLM Integration Design](docs/architecture/llm_integration.md) - Detailed LLM design
|
|
126
|
+
- [Roadmap](docs/roadmap.md) - Release planning
|
|
127
|
+
|
|
128
|
+
---
|
|
129
|
+
|
|
130
|
+
**Supported Python Versions**: 3.9, 3.10, 3.11, 3.12, 3.13
|
|
131
|
+
**Default Python Version**: 3.11
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "causaliq-knowledge"
|
|
7
|
+
dynamic = ["version"]
|
|
8
|
+
description = "Incorporating LLM and human knowledge into causal discovery"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
authors = [
|
|
12
|
+
{name = "CausalIQ", email = "info@causaliq.com"},
|
|
13
|
+
]
|
|
14
|
+
maintainers = [
|
|
15
|
+
{name = "CausalIQ", email = "info@causaliq.com"},
|
|
16
|
+
]
|
|
17
|
+
classifiers = [
|
|
18
|
+
"Development Status :: 3 - Alpha",
|
|
19
|
+
"Intended Audience :: Science/Research",
|
|
20
|
+
"Operating System :: OS Independent",
|
|
21
|
+
"Programming Language :: Python :: 3",
|
|
22
|
+
"Programming Language :: Python :: 3.9",
|
|
23
|
+
"Programming Language :: Python :: 3.10",
|
|
24
|
+
"Programming Language :: Python :: 3.11",
|
|
25
|
+
"Programming Language :: Python :: 3.12",
|
|
26
|
+
"Programming Language :: Python :: 3.13",
|
|
27
|
+
"Topic :: Scientific/Engineering",
|
|
28
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
29
|
+
]
|
|
30
|
+
keywords = ["causaliq"]
|
|
31
|
+
requires-python = ">=3.9"
|
|
32
|
+
dependencies = [
|
|
33
|
+
"click>=8.0.0",
|
|
34
|
+
"httpx>=0.24.0",
|
|
35
|
+
"pydantic>=2.0.0",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
[project.optional-dependencies]
|
|
39
|
+
dev = [
|
|
40
|
+
"causaliq-core>=0.3.0",
|
|
41
|
+
"pytest>=7.0.0",
|
|
42
|
+
"pytest-cov>=4.0.0",
|
|
43
|
+
"pytest-mock>=3.10.0",
|
|
44
|
+
"black>=22.0.0",
|
|
45
|
+
"isort>=5.10.0",
|
|
46
|
+
"flake8>=5.0.0",
|
|
47
|
+
"mypy>=1.0.0",
|
|
48
|
+
"types-requests>=2.32.0",
|
|
49
|
+
"pre-commit>=2.20.0",
|
|
50
|
+
"build>=0.8.0",
|
|
51
|
+
"twine>=4.0.0",
|
|
52
|
+
]
|
|
53
|
+
test = [
|
|
54
|
+
"causaliq-core>=0.3.0",
|
|
55
|
+
"pytest>=7.0.0",
|
|
56
|
+
"pytest-cov>=4.0.0",
|
|
57
|
+
"pytest-mock>=3.10.0",
|
|
58
|
+
]
|
|
59
|
+
docs = [
|
|
60
|
+
"mkdocs>=1.5.0",
|
|
61
|
+
"mkdocs-material>=9.0.0",
|
|
62
|
+
"mkdocstrings==0.30.1",
|
|
63
|
+
"mkdocstrings-python==1.18.2"
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
[project.urls]
|
|
67
|
+
Homepage = "https://github.com/causaliq/causaliq-knowledge"
|
|
68
|
+
Documentation = "https://github.com/causaliq/causaliq-knowledge#readme"
|
|
69
|
+
Repository = "https://github.com/causaliq/causaliq-knowledge"
|
|
70
|
+
"Bug Tracker" = "https://github.com/causaliq/causaliq-knowledge/issues"
|
|
71
|
+
|
|
72
|
+
[project.scripts]
|
|
73
|
+
causaliq-knowledge = "causaliq_knowledge.cli:main"
|
|
74
|
+
cqknow = "causaliq_knowledge.cli:main"
|
|
75
|
+
|
|
76
|
+
[tool.setuptools.dynamic]
|
|
77
|
+
version = {attr = "causaliq_knowledge.__version__"}
|
|
78
|
+
|
|
79
|
+
[tool.setuptools.packages.find]
|
|
80
|
+
where = ["src"]
|
|
81
|
+
|
|
82
|
+
[tool.setuptools.package-dir]
|
|
83
|
+
"" = "src"
|
|
84
|
+
|
|
85
|
+
[tool.setuptools.package-data]
|
|
86
|
+
causaliq_knowledge = ["py.typed"]
|
|
87
|
+
|
|
88
|
+
[tool.pytest.ini_options]
|
|
89
|
+
minversion = "7.0"
|
|
90
|
+
addopts = "-ra -q --strict-markers --cov=causaliq_knowledge --cov-report=term-missing --cov-report=html -m 'not slow'"
|
|
91
|
+
testpaths = [
|
|
92
|
+
"tests",
|
|
93
|
+
]
|
|
94
|
+
python_files = [
|
|
95
|
+
"test_*.py",
|
|
96
|
+
"*_test.py",
|
|
97
|
+
]
|
|
98
|
+
python_classes = [
|
|
99
|
+
"Test*",
|
|
100
|
+
]
|
|
101
|
+
python_functions = [
|
|
102
|
+
"test_*",
|
|
103
|
+
]
|
|
104
|
+
markers = [
|
|
105
|
+
"unit: Unit tests (fast, no external dependencies)",
|
|
106
|
+
"functional: Functional tests (CLI behavior, mocked external deps)",
|
|
107
|
+
"integration: Integration tests (real external dependencies)",
|
|
108
|
+
"slow: Slow tests that take significant time",
|
|
109
|
+
]
|
|
110
|
+
|
|
111
|
+
[tool.coverage.run]
|
|
112
|
+
source = ["src/causaliq_knowledge"]
|
|
113
|
+
omit = [
|
|
114
|
+
"*/tests/*",
|
|
115
|
+
"*/test_*.py",
|
|
116
|
+
]
|
|
117
|
+
parallel = true
|
|
118
|
+
concurrency = ["thread", "multiprocessing"]
|
|
119
|
+
|
|
120
|
+
[tool.coverage.report]
|
|
121
|
+
exclude_lines = [
|
|
122
|
+
"pragma: no cover",
|
|
123
|
+
"def __repr__",
|
|
124
|
+
"if self.debug:",
|
|
125
|
+
"if settings.DEBUG",
|
|
126
|
+
"raise AssertionError",
|
|
127
|
+
"raise NotImplementedError",
|
|
128
|
+
"if 0:",
|
|
129
|
+
"if __name__ == .__main__.:",
|
|
130
|
+
"class .*\\bProtocol\\):",
|
|
131
|
+
"@(abc\\.)?abstractmethod",
|
|
132
|
+
]
|
|
133
|
+
|
|
134
|
+
[tool.black]
|
|
135
|
+
line-length = 79
|
|
136
|
+
target-version = ['py39']
|
|
137
|
+
include = '\.pyi?$'
|
|
138
|
+
extend-exclude = '''
|
|
139
|
+
/(
|
|
140
|
+
# directories
|
|
141
|
+
\.eggs
|
|
142
|
+
| \.git
|
|
143
|
+
| \.hg
|
|
144
|
+
| \.mypy_cache
|
|
145
|
+
| \.tox
|
|
146
|
+
| \.venv
|
|
147
|
+
| build
|
|
148
|
+
| dist
|
|
149
|
+
)/
|
|
150
|
+
'''
|
|
151
|
+
|
|
152
|
+
[tool.isort]
|
|
153
|
+
profile = "black"
|
|
154
|
+
multi_line_output = 3
|
|
155
|
+
line_length = 79
|
|
156
|
+
known_first_party = ["causaliq_knowledge"]
|
|
157
|
+
|
|
158
|
+
[tool.mypy]
|
|
159
|
+
python_version = "3.9"
|
|
160
|
+
warn_return_any = true
|
|
161
|
+
warn_unused_configs = true
|
|
162
|
+
disallow_untyped_defs = true
|
|
163
|
+
disallow_incomplete_defs = true
|
|
164
|
+
check_untyped_defs = true
|
|
165
|
+
disallow_untyped_decorators = true
|
|
166
|
+
no_implicit_optional = true
|
|
167
|
+
warn_redundant_casts = true
|
|
168
|
+
warn_unused_ignores = true
|
|
169
|
+
warn_no_return = true
|
|
170
|
+
warn_unreachable = true
|
|
171
|
+
strict_equality = true
|
|
172
|
+
|
|
173
|
+
[[tool.mypy.overrides]]
|
|
174
|
+
module = "tests.*"
|
|
175
|
+
disallow_untyped_defs = false
|
|
176
|
+
|
|
177
|
+
[[tool.mypy.overrides]]
|
|
178
|
+
module = [
|
|
179
|
+
"scipy",
|
|
180
|
+
"scipy.*",
|
|
181
|
+
"scipy.stats",
|
|
182
|
+
"scipy.special"
|
|
183
|
+
]
|
|
184
|
+
ignore_missing_imports = true
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""
|
|
2
|
+
causaliq-knowledge: LLM and human knowledge for causal discovery.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from causaliq_knowledge.base import KnowledgeProvider
|
|
6
|
+
from causaliq_knowledge.models import EdgeDirection, EdgeKnowledge
|
|
7
|
+
|
|
8
|
+
__version__ = "0.1.0"
|
|
9
|
+
__author__ = "CausalIQ"
|
|
10
|
+
__email__ = "info@causaliq.com"
|
|
11
|
+
|
|
12
|
+
# Package metadata
|
|
13
|
+
__title__ = "causaliq-knowledge"
|
|
14
|
+
__description__ = "LLM and human knowledge for causal discovery"
|
|
15
|
+
|
|
16
|
+
__url__ = "https://github.com/causaliq/causaliq-knowledge"
|
|
17
|
+
__license__ = "MIT"
|
|
18
|
+
|
|
19
|
+
# Version tuple for programmatic access
|
|
20
|
+
VERSION = tuple(map(int, __version__.split(".")))
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"__version__",
|
|
24
|
+
"__author__",
|
|
25
|
+
"__email__",
|
|
26
|
+
"VERSION",
|
|
27
|
+
# Core models
|
|
28
|
+
"EdgeKnowledge",
|
|
29
|
+
"EdgeDirection",
|
|
30
|
+
# Abstract interface
|
|
31
|
+
"KnowledgeProvider",
|
|
32
|
+
# Note: Import LLMKnowledge from causaliq_knowledge.llm
|
|
33
|
+
]
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Abstract base class for knowledge providers."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from causaliq_knowledge.models import EdgeKnowledge
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class KnowledgeProvider(ABC):
|
|
10
|
+
"""Abstract interface for all knowledge sources.
|
|
11
|
+
|
|
12
|
+
This is the base class that all knowledge providers must implement.
|
|
13
|
+
Knowledge providers can be LLM-based, rule-based, human-input based,
|
|
14
|
+
or any other source of causal knowledge.
|
|
15
|
+
|
|
16
|
+
The primary method is `query_edge()` which asks about the causal
|
|
17
|
+
relationship between two variables.
|
|
18
|
+
|
|
19
|
+
Example:
|
|
20
|
+
>>> class MyKnowledgeProvider(KnowledgeProvider):
|
|
21
|
+
... def query_edge(self, node_a, node_b, context=None):
|
|
22
|
+
... # Implementation here
|
|
23
|
+
... return EdgeKnowledge(exists=True, confidence=0.8, ...)
|
|
24
|
+
...
|
|
25
|
+
>>> provider = MyKnowledgeProvider()
|
|
26
|
+
>>> result = provider.query_edge("smoking", "cancer")
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
@abstractmethod
|
|
30
|
+
def query_edge(
|
|
31
|
+
self,
|
|
32
|
+
node_a: str,
|
|
33
|
+
node_b: str,
|
|
34
|
+
context: Optional[dict] = None,
|
|
35
|
+
) -> EdgeKnowledge:
|
|
36
|
+
"""Query whether a causal edge exists between two nodes.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
node_a: Name of the first variable.
|
|
40
|
+
node_b: Name of the second variable.
|
|
41
|
+
context: Optional context dictionary that may include:
|
|
42
|
+
- domain: The domain (e.g., "medicine", "economics")
|
|
43
|
+
- descriptions: Dict mapping variable names to descriptions
|
|
44
|
+
- additional_info: Any other relevant context
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
EdgeKnowledge with:
|
|
48
|
+
- exists: True, False, or None (uncertain)
|
|
49
|
+
- direction: "a_to_b", "b_to_a", "undirected", or None
|
|
50
|
+
- confidence: 0.0 to 1.0
|
|
51
|
+
- reasoning: Human-readable explanation
|
|
52
|
+
- model: Source identifier (optional)
|
|
53
|
+
|
|
54
|
+
Raises:
|
|
55
|
+
NotImplementedError: If not implemented by subclass.
|
|
56
|
+
"""
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
def query_edges(
|
|
60
|
+
self,
|
|
61
|
+
edges: list[tuple[str, str]],
|
|
62
|
+
context: Optional[dict] = None,
|
|
63
|
+
) -> list[EdgeKnowledge]:
|
|
64
|
+
"""Query multiple edges at once.
|
|
65
|
+
|
|
66
|
+
Default implementation calls query_edge for each pair.
|
|
67
|
+
Subclasses may override for batch optimization.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
edges: List of (node_a, node_b) tuples to query.
|
|
71
|
+
context: Optional context dictionary (shared across all queries).
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
List of EdgeKnowledge results, one per edge pair.
|
|
75
|
+
"""
|
|
76
|
+
return [self.query_edge(a, b, context) for a, b in edges]
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def name(self) -> str:
|
|
80
|
+
"""Return the name of this knowledge provider.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Class name by default. Subclasses may override.
|
|
84
|
+
"""
|
|
85
|
+
return self.__class__.__name__
|