dewey-haystack 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dewey_haystack-0.1.0/PKG-INFO +199 -0
- dewey_haystack-0.1.0/README.md +174 -0
- dewey_haystack-0.1.0/pyproject.toml +41 -0
- dewey_haystack-0.1.0/setup.cfg +4 -0
- dewey_haystack-0.1.0/src/dewey_haystack.egg-info/PKG-INFO +199 -0
- dewey_haystack-0.1.0/src/dewey_haystack.egg-info/SOURCES.txt +15 -0
- dewey_haystack-0.1.0/src/dewey_haystack.egg-info/dependency_links.txt +1 -0
- dewey_haystack-0.1.0/src/dewey_haystack.egg-info/requires.txt +6 -0
- dewey_haystack-0.1.0/src/dewey_haystack.egg-info/top_level.txt +1 -0
- dewey_haystack-0.1.0/src/haystack_integrations/components/retrievers/dewey/__init__.py +8 -0
- dewey_haystack-0.1.0/src/haystack_integrations/components/retrievers/dewey/dewey_research_component.py +162 -0
- dewey_haystack-0.1.0/src/haystack_integrations/components/retrievers/dewey/dewey_retriever.py +116 -0
- dewey_haystack-0.1.0/src/haystack_integrations/document_stores/dewey/__init__.py +5 -0
- dewey_haystack-0.1.0/src/haystack_integrations/document_stores/dewey/dewey_document_store.py +210 -0
- dewey_haystack-0.1.0/tests/test_document_store.py +111 -0
- dewey_haystack-0.1.0/tests/test_research_component.py +123 -0
- dewey_haystack-0.1.0/tests/test_retriever.py +112 -0
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dewey-haystack
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Haystack integration for Dewey — document store, retriever, and research component
|
|
5
|
+
Author-email: Dewey <hi@meetdewey.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://meetdewey.com
|
|
8
|
+
Project-URL: Repository, https://github.com/meetdewey/dewey-haystack
|
|
9
|
+
Keywords: haystack,dewey,rag,retrieval,document-store,llm
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
+
Requires-Python: >=3.9
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
Requires-Dist: meetdewey>=1.0
|
|
21
|
+
Requires-Dist: haystack-ai>=2.0
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
24
|
+
Requires-Dist: pytest-mock>=3; extra == "dev"
|
|
25
|
+
|
|
26
|
+
# dewey-haystack
|
|
27
|
+
|
|
28
|
+
[](https://github.com/meetdewey/dewey-haystack/actions/workflows/ci.yml)
|
|
29
|
+
|
|
30
|
+
[Haystack](https://haystack.deepset.ai/) integration for [Dewey](https://meetdewey.com) — document store, retriever, and research component.
|
|
31
|
+
|
|
32
|
+
## Installation
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install dewey-haystack
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Components
|
|
39
|
+
|
|
40
|
+
### DeweyDocumentStore
|
|
41
|
+
|
|
42
|
+
Haystack DocumentStore backed by a Dewey collection. Handles document upload and deletion; Dewey manages chunking and embeddings automatically.
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from haystack_integrations.document_stores.dewey import DeweyDocumentStore
|
|
46
|
+
from haystack.utils import Secret
|
|
47
|
+
|
|
48
|
+
store = DeweyDocumentStore(
|
|
49
|
+
api_key=Secret.from_env_var("DEWEY_API_KEY"),
|
|
50
|
+
collection_id="3f7a1b2c-...",
|
|
51
|
+
)
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Upload Haystack Documents:
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
from haystack import Document
|
|
58
|
+
|
|
59
|
+
store.write_documents([
|
|
60
|
+
Document(content="Neural networks learn via backpropagation.", meta={"source": "ml.txt"}),
|
|
61
|
+
Document(content="Transformers use self-attention mechanisms."),
|
|
62
|
+
])
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### DeweyRetriever
|
|
66
|
+
|
|
67
|
+
Drop-in Haystack retriever backed by Dewey's hybrid semantic + BM25 search.
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from haystack import Pipeline
|
|
71
|
+
from haystack_integrations.document_stores.dewey import DeweyDocumentStore
|
|
72
|
+
from haystack_integrations.components.retrievers.dewey import DeweyRetriever
|
|
73
|
+
from haystack.utils import Secret
|
|
74
|
+
|
|
75
|
+
store = DeweyDocumentStore(
|
|
76
|
+
api_key=Secret.from_env_var("DEWEY_API_KEY"),
|
|
77
|
+
collection_id="3f7a1b2c-...",
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
pipeline = Pipeline()
|
|
81
|
+
pipeline.add_component("retriever", DeweyRetriever(document_store=store, top_k=8))
|
|
82
|
+
|
|
83
|
+
result = pipeline.run({"retriever": {"query": "What are the key findings?"}})
|
|
84
|
+
for doc in result["retriever"]["documents"]:
|
|
85
|
+
print(f"[{doc.meta['filename']}] {doc.content}")
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Each returned `Document` carries citation metadata:
|
|
89
|
+
|
|
90
|
+
| Field | Description |
|
|
91
|
+
|---|---|
|
|
92
|
+
| `score` | Relevance score (0–1) |
|
|
93
|
+
| `document_id` | Dewey document ID |
|
|
94
|
+
| `filename` | Original filename |
|
|
95
|
+
| `section_id` | Section ID |
|
|
96
|
+
| `section_title` | Section heading |
|
|
97
|
+
| `section_level` | Heading depth (1 = top-level) |
|
|
98
|
+
|
|
99
|
+
**RAG pipeline with an LLM:**
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
from haystack.components.builders import PromptBuilder
|
|
103
|
+
from haystack.components.generators import OpenAIGenerator
|
|
104
|
+
|
|
105
|
+
prompt_template = """
|
|
106
|
+
Answer the question using only the provided context.
|
|
107
|
+
|
|
108
|
+
Context:
|
|
109
|
+
{% for doc in documents %}
|
|
110
|
+
- {{ doc.content }}
|
|
111
|
+
{% endfor %}
|
|
112
|
+
|
|
113
|
+
Question: {{ query }}
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
pipeline = Pipeline()
|
|
117
|
+
pipeline.add_component("retriever", DeweyRetriever(document_store=store, top_k=5))
|
|
118
|
+
pipeline.add_component("prompt", PromptBuilder(template=prompt_template))
|
|
119
|
+
pipeline.add_component("llm", OpenAIGenerator(model="gpt-4o-mini"))
|
|
120
|
+
|
|
121
|
+
pipeline.connect("retriever.documents", "prompt.documents")
|
|
122
|
+
pipeline.connect("prompt.prompt", "llm.prompt")
|
|
123
|
+
|
|
124
|
+
result = pipeline.run({
|
|
125
|
+
"retriever": {"query": "What were the main findings?"},
|
|
126
|
+
"prompt": {"query": "What were the main findings?"},
|
|
127
|
+
})
|
|
128
|
+
print(result["llm"]["replies"][0])
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### DeweyResearchComponent
|
|
132
|
+
|
|
133
|
+
A Haystack component that runs Dewey's full agentic research loop — searching, reading, and synthesising across multiple documents — and returns a grounded Markdown answer with cited sources.
|
|
134
|
+
|
|
135
|
+
Use this as a drop-in replacement for an LLM generator when you want Dewey to handle both retrieval *and* generation.
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
from haystack import Pipeline
|
|
139
|
+
from haystack_integrations.components.retrievers.dewey import DeweyResearchComponent
|
|
140
|
+
from haystack.utils import Secret
|
|
141
|
+
|
|
142
|
+
pipeline = Pipeline()
|
|
143
|
+
pipeline.add_component(
|
|
144
|
+
"research",
|
|
145
|
+
DeweyResearchComponent(
|
|
146
|
+
api_key=Secret.from_env_var("DEWEY_API_KEY"),
|
|
147
|
+
collection_id="3f7a1b2c-...",
|
|
148
|
+
depth="balanced",
|
|
149
|
+
),
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
result = pipeline.run({"research": {"query": "What were the key findings across all studies?"}})
|
|
153
|
+
print(result["research"]["answer"])
|
|
154
|
+
|
|
155
|
+
for source in result["research"]["sources"]:
|
|
156
|
+
print(f" [{source.meta['filename']}] {source.content[:80]}...")
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
**Outputs:**
|
|
160
|
+
|
|
161
|
+
| Key | Type | Description |
|
|
162
|
+
|---|---|---|
|
|
163
|
+
| `answer` | `str` | Synthesised Markdown answer |
|
|
164
|
+
| `sources` | `list[Document]` | Source chunks cited by the answer |
|
|
165
|
+
|
|
166
|
+
**Research depths:**
|
|
167
|
+
|
|
168
|
+
| depth | Speed | Tools | Requires BYOK |
|
|
169
|
+
|---|---|---|---|
|
|
170
|
+
| `quick` | fast | basic search | no |
|
|
171
|
+
| `balanced` | fast | basic search | no |
|
|
172
|
+
| `deep` | slower | full tool suite | yes |
|
|
173
|
+
| `exhaustive` | slowest | full tool suite | yes |
|
|
174
|
+
|
|
175
|
+
`deep` and `exhaustive` require a Dewey Pro plan and a BYOK API key configured on your project.
|
|
176
|
+
|
|
177
|
+
**With a custom model:**
|
|
178
|
+
|
|
179
|
+
```python
|
|
180
|
+
DeweyResearchComponent(
|
|
181
|
+
api_key=Secret.from_env_var("DEWEY_API_KEY"),
|
|
182
|
+
collection_id="3f7a1b2c-...",
|
|
183
|
+
depth="deep",
|
|
184
|
+
model="claude-sonnet-4-6", # requires Anthropic BYOK key on your project
|
|
185
|
+
)
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
## Requirements
|
|
189
|
+
|
|
190
|
+
- Python 3.9+
|
|
191
|
+
- `meetdewey >= 1.0`
|
|
192
|
+
- `haystack-ai >= 2.0`
|
|
193
|
+
|
|
194
|
+
## Development
|
|
195
|
+
|
|
196
|
+
```bash
|
|
197
|
+
pip install -e ".[dev]"
|
|
198
|
+
pytest
|
|
199
|
+
```
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
# dewey-haystack
|
|
2
|
+
|
|
3
|
+
[](https://github.com/meetdewey/dewey-haystack/actions/workflows/ci.yml)
|
|
4
|
+
|
|
5
|
+
[Haystack](https://haystack.deepset.ai/) integration for [Dewey](https://meetdewey.com) — document store, retriever, and research component.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install dewey-haystack
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Components
|
|
14
|
+
|
|
15
|
+
### DeweyDocumentStore
|
|
16
|
+
|
|
17
|
+
Haystack DocumentStore backed by a Dewey collection. Handles document upload and deletion; Dewey manages chunking and embeddings automatically.
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
from haystack_integrations.document_stores.dewey import DeweyDocumentStore
|
|
21
|
+
from haystack.utils import Secret
|
|
22
|
+
|
|
23
|
+
store = DeweyDocumentStore(
|
|
24
|
+
api_key=Secret.from_env_var("DEWEY_API_KEY"),
|
|
25
|
+
collection_id="3f7a1b2c-...",
|
|
26
|
+
)
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Upload Haystack Documents:
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
from haystack import Document
|
|
33
|
+
|
|
34
|
+
store.write_documents([
|
|
35
|
+
Document(content="Neural networks learn via backpropagation.", meta={"source": "ml.txt"}),
|
|
36
|
+
Document(content="Transformers use self-attention mechanisms."),
|
|
37
|
+
])
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### DeweyRetriever
|
|
41
|
+
|
|
42
|
+
Drop-in Haystack retriever backed by Dewey's hybrid semantic + BM25 search.
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from haystack import Pipeline
|
|
46
|
+
from haystack_integrations.document_stores.dewey import DeweyDocumentStore
|
|
47
|
+
from haystack_integrations.components.retrievers.dewey import DeweyRetriever
|
|
48
|
+
from haystack.utils import Secret
|
|
49
|
+
|
|
50
|
+
store = DeweyDocumentStore(
|
|
51
|
+
api_key=Secret.from_env_var("DEWEY_API_KEY"),
|
|
52
|
+
collection_id="3f7a1b2c-...",
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
pipeline = Pipeline()
|
|
56
|
+
pipeline.add_component("retriever", DeweyRetriever(document_store=store, top_k=8))
|
|
57
|
+
|
|
58
|
+
result = pipeline.run({"retriever": {"query": "What are the key findings?"}})
|
|
59
|
+
for doc in result["retriever"]["documents"]:
|
|
60
|
+
print(f"[{doc.meta['filename']}] {doc.content}")
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Each returned `Document` carries citation metadata:
|
|
64
|
+
|
|
65
|
+
| Field | Description |
|
|
66
|
+
|---|---|
|
|
67
|
+
| `score` | Relevance score (0–1) |
|
|
68
|
+
| `document_id` | Dewey document ID |
|
|
69
|
+
| `filename` | Original filename |
|
|
70
|
+
| `section_id` | Section ID |
|
|
71
|
+
| `section_title` | Section heading |
|
|
72
|
+
| `section_level` | Heading depth (1 = top-level) |
|
|
73
|
+
|
|
74
|
+
**RAG pipeline with an LLM:**
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
from haystack.components.builders import PromptBuilder
|
|
78
|
+
from haystack.components.generators import OpenAIGenerator
|
|
79
|
+
|
|
80
|
+
prompt_template = """
|
|
81
|
+
Answer the question using only the provided context.
|
|
82
|
+
|
|
83
|
+
Context:
|
|
84
|
+
{% for doc in documents %}
|
|
85
|
+
- {{ doc.content }}
|
|
86
|
+
{% endfor %}
|
|
87
|
+
|
|
88
|
+
Question: {{ query }}
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
pipeline = Pipeline()
|
|
92
|
+
pipeline.add_component("retriever", DeweyRetriever(document_store=store, top_k=5))
|
|
93
|
+
pipeline.add_component("prompt", PromptBuilder(template=prompt_template))
|
|
94
|
+
pipeline.add_component("llm", OpenAIGenerator(model="gpt-4o-mini"))
|
|
95
|
+
|
|
96
|
+
pipeline.connect("retriever.documents", "prompt.documents")
|
|
97
|
+
pipeline.connect("prompt.prompt", "llm.prompt")
|
|
98
|
+
|
|
99
|
+
result = pipeline.run({
|
|
100
|
+
"retriever": {"query": "What were the main findings?"},
|
|
101
|
+
"prompt": {"query": "What were the main findings?"},
|
|
102
|
+
})
|
|
103
|
+
print(result["llm"]["replies"][0])
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### DeweyResearchComponent
|
|
107
|
+
|
|
108
|
+
A Haystack component that runs Dewey's full agentic research loop — searching, reading, and synthesising across multiple documents — and returns a grounded Markdown answer with cited sources.
|
|
109
|
+
|
|
110
|
+
Use this as a drop-in replacement for an LLM generator when you want Dewey to handle both retrieval *and* generation.
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
from haystack import Pipeline
|
|
114
|
+
from haystack_integrations.components.retrievers.dewey import DeweyResearchComponent
|
|
115
|
+
from haystack.utils import Secret
|
|
116
|
+
|
|
117
|
+
pipeline = Pipeline()
|
|
118
|
+
pipeline.add_component(
|
|
119
|
+
"research",
|
|
120
|
+
DeweyResearchComponent(
|
|
121
|
+
api_key=Secret.from_env_var("DEWEY_API_KEY"),
|
|
122
|
+
collection_id="3f7a1b2c-...",
|
|
123
|
+
depth="balanced",
|
|
124
|
+
),
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
result = pipeline.run({"research": {"query": "What were the key findings across all studies?"}})
|
|
128
|
+
print(result["research"]["answer"])
|
|
129
|
+
|
|
130
|
+
for source in result["research"]["sources"]:
|
|
131
|
+
print(f" [{source.meta['filename']}] {source.content[:80]}...")
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
**Outputs:**
|
|
135
|
+
|
|
136
|
+
| Key | Type | Description |
|
|
137
|
+
|---|---|---|
|
|
138
|
+
| `answer` | `str` | Synthesised Markdown answer |
|
|
139
|
+
| `sources` | `list[Document]` | Source chunks cited by the answer |
|
|
140
|
+
|
|
141
|
+
**Research depths:**
|
|
142
|
+
|
|
143
|
+
| depth | Speed | Tools | Requires BYOK |
|
|
144
|
+
|---|---|---|---|
|
|
145
|
+
| `quick` | fast | basic search | no |
|
|
146
|
+
| `balanced` | fast | basic search | no |
|
|
147
|
+
| `deep` | slower | full tool suite | yes |
|
|
148
|
+
| `exhaustive` | slowest | full tool suite | yes |
|
|
149
|
+
|
|
150
|
+
`deep` and `exhaustive` require a Dewey Pro plan and a BYOK API key configured on your project.
|
|
151
|
+
|
|
152
|
+
**With a custom model:**
|
|
153
|
+
|
|
154
|
+
```python
|
|
155
|
+
DeweyResearchComponent(
|
|
156
|
+
api_key=Secret.from_env_var("DEWEY_API_KEY"),
|
|
157
|
+
collection_id="3f7a1b2c-...",
|
|
158
|
+
depth="deep",
|
|
159
|
+
model="claude-sonnet-4-6", # requires Anthropic BYOK key on your project
|
|
160
|
+
)
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## Requirements
|
|
164
|
+
|
|
165
|
+
- Python 3.9+
|
|
166
|
+
- `meetdewey >= 1.0`
|
|
167
|
+
- `haystack-ai >= 2.0`
|
|
168
|
+
|
|
169
|
+
## Development
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
pip install -e ".[dev]"
|
|
173
|
+
pytest
|
|
174
|
+
```
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "dewey-haystack"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Haystack integration for Dewey — document store, retriever, and research component"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
authors = [{ name = "Dewey", email = "hi@meetdewey.com" }]
|
|
13
|
+
keywords = ["haystack", "dewey", "rag", "retrieval", "document-store", "llm"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Programming Language :: Python :: 3.9",
|
|
19
|
+
"Programming Language :: Python :: 3.10",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
23
|
+
]
|
|
24
|
+
dependencies = [
|
|
25
|
+
"meetdewey>=1.0",
|
|
26
|
+
"haystack-ai>=2.0",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
[project.optional-dependencies]
|
|
30
|
+
dev = [
|
|
31
|
+
"pytest>=7",
|
|
32
|
+
"pytest-mock>=3",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
[project.urls]
|
|
36
|
+
Homepage = "https://meetdewey.com"
|
|
37
|
+
Repository = "https://github.com/meetdewey/dewey-haystack"
|
|
38
|
+
|
|
39
|
+
[tool.setuptools.packages.find]
|
|
40
|
+
where = ["src"]
|
|
41
|
+
include = ["haystack_integrations*"]
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dewey-haystack
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Haystack integration for Dewey — document store, retriever, and research component
|
|
5
|
+
Author-email: Dewey <hi@meetdewey.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://meetdewey.com
|
|
8
|
+
Project-URL: Repository, https://github.com/meetdewey/dewey-haystack
|
|
9
|
+
Keywords: haystack,dewey,rag,retrieval,document-store,llm
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
+
Requires-Python: >=3.9
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
Requires-Dist: meetdewey>=1.0
|
|
21
|
+
Requires-Dist: haystack-ai>=2.0
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
24
|
+
Requires-Dist: pytest-mock>=3; extra == "dev"
|
|
25
|
+
|
|
26
|
+
# dewey-haystack
|
|
27
|
+
|
|
28
|
+
[](https://github.com/meetdewey/dewey-haystack/actions/workflows/ci.yml)
|
|
29
|
+
|
|
30
|
+
[Haystack](https://haystack.deepset.ai/) integration for [Dewey](https://meetdewey.com) — document store, retriever, and research component.
|
|
31
|
+
|
|
32
|
+
## Installation
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install dewey-haystack
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Components
|
|
39
|
+
|
|
40
|
+
### DeweyDocumentStore
|
|
41
|
+
|
|
42
|
+
Haystack DocumentStore backed by a Dewey collection. Handles document upload and deletion; Dewey manages chunking and embeddings automatically.
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from haystack_integrations.document_stores.dewey import DeweyDocumentStore
|
|
46
|
+
from haystack.utils import Secret
|
|
47
|
+
|
|
48
|
+
store = DeweyDocumentStore(
|
|
49
|
+
api_key=Secret.from_env_var("DEWEY_API_KEY"),
|
|
50
|
+
collection_id="3f7a1b2c-...",
|
|
51
|
+
)
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Upload Haystack Documents:
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
from haystack import Document
|
|
58
|
+
|
|
59
|
+
store.write_documents([
|
|
60
|
+
Document(content="Neural networks learn via backpropagation.", meta={"source": "ml.txt"}),
|
|
61
|
+
Document(content="Transformers use self-attention mechanisms."),
|
|
62
|
+
])
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### DeweyRetriever
|
|
66
|
+
|
|
67
|
+
Drop-in Haystack retriever backed by Dewey's hybrid semantic + BM25 search.
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from haystack import Pipeline
|
|
71
|
+
from haystack_integrations.document_stores.dewey import DeweyDocumentStore
|
|
72
|
+
from haystack_integrations.components.retrievers.dewey import DeweyRetriever
|
|
73
|
+
from haystack.utils import Secret
|
|
74
|
+
|
|
75
|
+
store = DeweyDocumentStore(
|
|
76
|
+
api_key=Secret.from_env_var("DEWEY_API_KEY"),
|
|
77
|
+
collection_id="3f7a1b2c-...",
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
pipeline = Pipeline()
|
|
81
|
+
pipeline.add_component("retriever", DeweyRetriever(document_store=store, top_k=8))
|
|
82
|
+
|
|
83
|
+
result = pipeline.run({"retriever": {"query": "What are the key findings?"}})
|
|
84
|
+
for doc in result["retriever"]["documents"]:
|
|
85
|
+
print(f"[{doc.meta['filename']}] {doc.content}")
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Each returned `Document` carries citation metadata:
|
|
89
|
+
|
|
90
|
+
| Field | Description |
|
|
91
|
+
|---|---|
|
|
92
|
+
| `score` | Relevance score (0–1) |
|
|
93
|
+
| `document_id` | Dewey document ID |
|
|
94
|
+
| `filename` | Original filename |
|
|
95
|
+
| `section_id` | Section ID |
|
|
96
|
+
| `section_title` | Section heading |
|
|
97
|
+
| `section_level` | Heading depth (1 = top-level) |
|
|
98
|
+
|
|
99
|
+
**RAG pipeline with an LLM:**
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
from haystack.components.builders import PromptBuilder
|
|
103
|
+
from haystack.components.generators import OpenAIGenerator
|
|
104
|
+
|
|
105
|
+
prompt_template = """
|
|
106
|
+
Answer the question using only the provided context.
|
|
107
|
+
|
|
108
|
+
Context:
|
|
109
|
+
{% for doc in documents %}
|
|
110
|
+
- {{ doc.content }}
|
|
111
|
+
{% endfor %}
|
|
112
|
+
|
|
113
|
+
Question: {{ query }}
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
pipeline = Pipeline()
|
|
117
|
+
pipeline.add_component("retriever", DeweyRetriever(document_store=store, top_k=5))
|
|
118
|
+
pipeline.add_component("prompt", PromptBuilder(template=prompt_template))
|
|
119
|
+
pipeline.add_component("llm", OpenAIGenerator(model="gpt-4o-mini"))
|
|
120
|
+
|
|
121
|
+
pipeline.connect("retriever.documents", "prompt.documents")
|
|
122
|
+
pipeline.connect("prompt.prompt", "llm.prompt")
|
|
123
|
+
|
|
124
|
+
result = pipeline.run({
|
|
125
|
+
"retriever": {"query": "What were the main findings?"},
|
|
126
|
+
"prompt": {"query": "What were the main findings?"},
|
|
127
|
+
})
|
|
128
|
+
print(result["llm"]["replies"][0])
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### DeweyResearchComponent
|
|
132
|
+
|
|
133
|
+
A Haystack component that runs Dewey's full agentic research loop — searching, reading, and synthesising across multiple documents — and returns a grounded Markdown answer with cited sources.
|
|
134
|
+
|
|
135
|
+
Use this as a drop-in replacement for an LLM generator when you want Dewey to handle both retrieval *and* generation.
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
from haystack import Pipeline
|
|
139
|
+
from haystack_integrations.components.retrievers.dewey import DeweyResearchComponent
|
|
140
|
+
from haystack.utils import Secret
|
|
141
|
+
|
|
142
|
+
pipeline = Pipeline()
|
|
143
|
+
pipeline.add_component(
|
|
144
|
+
"research",
|
|
145
|
+
DeweyResearchComponent(
|
|
146
|
+
api_key=Secret.from_env_var("DEWEY_API_KEY"),
|
|
147
|
+
collection_id="3f7a1b2c-...",
|
|
148
|
+
depth="balanced",
|
|
149
|
+
),
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
result = pipeline.run({"research": {"query": "What were the key findings across all studies?"}})
|
|
153
|
+
print(result["research"]["answer"])
|
|
154
|
+
|
|
155
|
+
for source in result["research"]["sources"]:
|
|
156
|
+
print(f" [{source.meta['filename']}] {source.content[:80]}...")
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
**Outputs:**
|
|
160
|
+
|
|
161
|
+
| Key | Type | Description |
|
|
162
|
+
|---|---|---|
|
|
163
|
+
| `answer` | `str` | Synthesised Markdown answer |
|
|
164
|
+
| `sources` | `list[Document]` | Source chunks cited by the answer |
|
|
165
|
+
|
|
166
|
+
**Research depths:**
|
|
167
|
+
|
|
168
|
+
| depth | Speed | Tools | Requires BYOK |
|
|
169
|
+
|---|---|---|---|
|
|
170
|
+
| `quick` | fast | basic search | no |
|
|
171
|
+
| `balanced` | fast | basic search | no |
|
|
172
|
+
| `deep` | slower | full tool suite | yes |
|
|
173
|
+
| `exhaustive` | slowest | full tool suite | yes |
|
|
174
|
+
|
|
175
|
+
`deep` and `exhaustive` require a Dewey Pro plan and a BYOK API key configured on your project.
|
|
176
|
+
|
|
177
|
+
**With a custom model:**
|
|
178
|
+
|
|
179
|
+
```python
|
|
180
|
+
DeweyResearchComponent(
|
|
181
|
+
api_key=Secret.from_env_var("DEWEY_API_KEY"),
|
|
182
|
+
collection_id="3f7a1b2c-...",
|
|
183
|
+
depth="deep",
|
|
184
|
+
model="claude-sonnet-4-6", # requires Anthropic BYOK key on your project
|
|
185
|
+
)
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
## Requirements
|
|
189
|
+
|
|
190
|
+
- Python 3.9+
|
|
191
|
+
- `meetdewey >= 1.0`
|
|
192
|
+
- `haystack-ai >= 2.0`
|
|
193
|
+
|
|
194
|
+
## Development
|
|
195
|
+
|
|
196
|
+
```bash
|
|
197
|
+
pip install -e ".[dev]"
|
|
198
|
+
pytest
|
|
199
|
+
```
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
src/dewey_haystack.egg-info/PKG-INFO
|
|
4
|
+
src/dewey_haystack.egg-info/SOURCES.txt
|
|
5
|
+
src/dewey_haystack.egg-info/dependency_links.txt
|
|
6
|
+
src/dewey_haystack.egg-info/requires.txt
|
|
7
|
+
src/dewey_haystack.egg-info/top_level.txt
|
|
8
|
+
src/haystack_integrations/components/retrievers/dewey/__init__.py
|
|
9
|
+
src/haystack_integrations/components/retrievers/dewey/dewey_research_component.py
|
|
10
|
+
src/haystack_integrations/components/retrievers/dewey/dewey_retriever.py
|
|
11
|
+
src/haystack_integrations/document_stores/dewey/__init__.py
|
|
12
|
+
src/haystack_integrations/document_stores/dewey/dewey_document_store.py
|
|
13
|
+
tests/test_document_store.py
|
|
14
|
+
tests/test_research_component.py
|
|
15
|
+
tests/test_retriever.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
haystack_integrations
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
from haystack_integrations.components.retrievers.dewey.dewey_research_component import (
|
|
2
|
+
DeweyResearchComponent,
|
|
3
|
+
)
|
|
4
|
+
from haystack_integrations.components.retrievers.dewey.dewey_retriever import (
|
|
5
|
+
DeweyRetriever,
|
|
6
|
+
)
|
|
7
|
+
|
|
8
|
+
__all__ = ["DeweyRetriever", "DeweyResearchComponent"]
|