dspy-moss 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dspy_moss-0.0.1/LICENSE +25 -0
- dspy_moss-0.0.1/PKG-INFO +143 -0
- dspy_moss-0.0.1/README.md +131 -0
- dspy_moss-0.0.1/pyproject.toml +51 -0
- dspy_moss-0.0.1/setup.cfg +4 -0
- dspy_moss-0.0.1/src/dspy_moss/__init__.py +22 -0
- dspy_moss-0.0.1/src/dspy_moss/retrieve.py +239 -0
- dspy_moss-0.0.1/src/dspy_moss.egg-info/PKG-INFO +143 -0
- dspy_moss-0.0.1/src/dspy_moss.egg-info/SOURCES.txt +11 -0
- dspy_moss-0.0.1/src/dspy_moss.egg-info/dependency_links.txt +1 -0
- dspy_moss-0.0.1/src/dspy_moss.egg-info/requires.txt +2 -0
- dspy_moss-0.0.1/src/dspy_moss.egg-info/top_level.txt +1 -0
- dspy_moss-0.0.1/tests/test_retrieve.py +133 -0
dspy_moss-0.0.1/LICENSE
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
BSD 2-Clause License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025, InferEdge Inc.
|
|
4
|
+
All rights reserved.
|
|
5
|
+
|
|
6
|
+
Redistribution and use in source and binary forms, with or without
|
|
7
|
+
modification, are permitted provided that the following conditions are met:
|
|
8
|
+
|
|
9
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
10
|
+
list of conditions and the following disclaimer.
|
|
11
|
+
|
|
12
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
13
|
+
this list of conditions and the following disclaimer in the documentation
|
|
14
|
+
and/or other materials provided with the distribution.
|
|
15
|
+
|
|
16
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
17
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
18
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
19
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
20
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
21
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
22
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
23
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
24
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
25
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
dspy_moss-0.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dspy-moss
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Moss semantic search retrieval module for DSPy
|
|
5
|
+
License: BSD-2-Clause
|
|
6
|
+
Requires-Python: <3.15,>=3.10
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: moss>=1.1.1
|
|
10
|
+
Requires-Dist: dspy>=2.0.0
|
|
11
|
+
Dynamic: license-file
|
|
12
|
+
|
|
13
|
+
# dspy-moss
|
|
14
|
+
|
|
15
|
+
Moss semantic search retrieval module for [DSPy](https://dspy.ai).
|
|
16
|
+
|
|
17
|
+
Provides `MossRM` — a `dspy.Retrieve` subclass that plugs into DSPy's RM interface for sub-10ms knowledge retrieval with no external embedder.
|
|
18
|
+
|
|
19
|
+
## Installation
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
pip install dspy-moss
|
|
23
|
+
# or
|
|
24
|
+
uv add dspy-moss
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Quickstart
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import dspy
|
|
31
|
+
from dspy_moss import MossRM
|
|
32
|
+
|
|
33
|
+
# Reads MOSS_PROJECT_ID and MOSS_PROJECT_KEY from environment
|
|
34
|
+
rm = MossRM(index_name="my-index")
|
|
35
|
+
rm.load_index() # load into this process's memory before querying
|
|
36
|
+
dspy.configure(lm=dspy.LM("openai/gpt-4o"), rm=rm)
|
|
37
|
+
|
|
38
|
+
retrieve = dspy.Retrieve(k=3)
|
|
39
|
+
result = retrieve("What is the refund policy?")
|
|
40
|
+
for passage in result.passages:
|
|
41
|
+
print(f"[{passage['score']:.3f}] {passage['long_text']}")
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Usage patterns
|
|
45
|
+
|
|
46
|
+
### As a configured retriever
|
|
47
|
+
|
|
48
|
+
Set `MossRM` as the default retriever for all `dspy.Retrieve` calls in your program:
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
import dspy
|
|
52
|
+
from dspy_moss import MossRM
|
|
53
|
+
|
|
54
|
+
rm = MossRM(
|
|
55
|
+
index_name="support-kb",
|
|
56
|
+
k=5,
|
|
57
|
+
alpha=0.8, # 1.0 = semantic only, 0.0 = keyword only
|
|
58
|
+
)
|
|
59
|
+
dspy.configure(lm=dspy.LM("openai/gpt-4o"), rm=rm)
|
|
60
|
+
|
|
61
|
+
# Any dspy.Retrieve() now uses Moss
|
|
62
|
+
class RAG(dspy.Module):
|
|
63
|
+
def __init__(self):
|
|
64
|
+
self.retrieve = dspy.Retrieve(k=3)
|
|
65
|
+
self.generate = dspy.ChainOfThought("context, question -> answer")
|
|
66
|
+
|
|
67
|
+
def forward(self, question):
|
|
68
|
+
context = self.retrieve(question).passages
|
|
69
|
+
return self.generate(context=context, question=question)
|
|
70
|
+
|
|
71
|
+
rag = RAG()
|
|
72
|
+
print(rag("How long do refunds take?").answer)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### As a ReAct tool
|
|
76
|
+
|
|
77
|
+
`MossRM.forward()` is already sync, so pass the instance directly — no wrapper needed:
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
import dspy
|
|
81
|
+
from dspy_moss import MossRM
|
|
82
|
+
|
|
83
|
+
rm = MossRM(index_name="support-kb", k=5)
|
|
84
|
+
rm.load_index()
|
|
85
|
+
agent = dspy.ReAct(signature="question -> answer", tools=[rm])
|
|
86
|
+
print(agent(question="What payment methods do you accept?").answer)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### With an explicit client
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
from moss import MossClient
|
|
93
|
+
from dspy_moss import MossRM
|
|
94
|
+
|
|
95
|
+
client = MossClient("proj-id", "proj-key")
|
|
96
|
+
rm = MossRM(index_name="my-index", moss_client=client, k=5)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## Configuration
|
|
100
|
+
|
|
101
|
+
### MossRM
|
|
102
|
+
|
|
103
|
+
| Parameter | Default | Description |
|
|
104
|
+
| --- | --- | --- |
|
|
105
|
+
| `index_name` | (required) | Name of the Moss index to query |
|
|
106
|
+
| `moss_client` | `None` | Existing `MossClient`. When omitted, one is created from credentials |
|
|
107
|
+
| `project_id` | `MOSS_PROJECT_ID` env var | Moss project ID |
|
|
108
|
+
| `project_key` | `MOSS_PROJECT_KEY` env var | Moss project key |
|
|
109
|
+
| `k` | `3` | Default number of passages per query |
|
|
110
|
+
| `alpha` | `0.8` | Search blend: 1.0 = semantic only, 0.0 = keyword only |
|
|
111
|
+
|
|
112
|
+
### Passage format
|
|
113
|
+
|
|
114
|
+
Each entry in `result.passages` is a dict with:
|
|
115
|
+
|
|
116
|
+
| Key | Type | Description |
|
|
117
|
+
| --- | --- | --- |
|
|
118
|
+
| `long_text` | `str` | Document text (DSPy's standard passage field) |
|
|
119
|
+
| `id` | `str` | Document ID |
|
|
120
|
+
| `score` | `float` | Relevance score |
|
|
121
|
+
| `metadata` | `dict` | Key-value metadata stored with the document |
|
|
122
|
+
|
|
123
|
+
### Mutable index helpers
|
|
124
|
+
|
|
125
|
+
`MossRM` also exposes two optional helpers for agents that write to the knowledge base:
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
# Read documents
|
|
129
|
+
objects = rm.get_objects(num_samples=10)
|
|
130
|
+
|
|
131
|
+
# Add / upsert documents
|
|
132
|
+
rm.insert([{"id": "doc-1", "text": "New fact.", "metadata": {"source": "agent"}}])
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## License
|
|
136
|
+
|
|
137
|
+
BSD 2-Clause — see [LICENSE](LICENSE).
|
|
138
|
+
|
|
139
|
+
## Support
|
|
140
|
+
|
|
141
|
+
- [Moss Docs](https://docs.moss.dev)
|
|
142
|
+
- [Moss Discord](https://discord.gg/eMXExuafBR)
|
|
143
|
+
- [DSPy Docs](https://dspy.ai)
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# dspy-moss
|
|
2
|
+
|
|
3
|
+
Moss semantic search retrieval module for [DSPy](https://dspy.ai).
|
|
4
|
+
|
|
5
|
+
Provides `MossRM` — a `dspy.Retrieve` subclass that plugs into DSPy's RM interface for sub-10ms knowledge retrieval with no external embedder.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install dspy-moss
|
|
11
|
+
# or
|
|
12
|
+
uv add dspy-moss
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Quickstart
|
|
16
|
+
|
|
17
|
+
```python
|
|
18
|
+
import dspy
|
|
19
|
+
from dspy_moss import MossRM
|
|
20
|
+
|
|
21
|
+
# Reads MOSS_PROJECT_ID and MOSS_PROJECT_KEY from environment
|
|
22
|
+
rm = MossRM(index_name="my-index")
|
|
23
|
+
rm.load_index() # load into this process's memory before querying
|
|
24
|
+
dspy.configure(lm=dspy.LM("openai/gpt-4o"), rm=rm)
|
|
25
|
+
|
|
26
|
+
retrieve = dspy.Retrieve(k=3)
|
|
27
|
+
result = retrieve("What is the refund policy?")
|
|
28
|
+
for passage in result.passages:
|
|
29
|
+
print(f"[{passage['score']:.3f}] {passage['long_text']}")
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Usage patterns
|
|
33
|
+
|
|
34
|
+
### As a configured retriever
|
|
35
|
+
|
|
36
|
+
Set `MossRM` as the default retriever for all `dspy.Retrieve` calls in your program:
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
import dspy
|
|
40
|
+
from dspy_moss import MossRM
|
|
41
|
+
|
|
42
|
+
rm = MossRM(
|
|
43
|
+
index_name="support-kb",
|
|
44
|
+
k=5,
|
|
45
|
+
alpha=0.8, # 1.0 = semantic only, 0.0 = keyword only
|
|
46
|
+
)
|
|
47
|
+
dspy.configure(lm=dspy.LM("openai/gpt-4o"), rm=rm)
|
|
48
|
+
|
|
49
|
+
# Any dspy.Retrieve() now uses Moss
|
|
50
|
+
class RAG(dspy.Module):
|
|
51
|
+
def __init__(self):
|
|
52
|
+
self.retrieve = dspy.Retrieve(k=3)
|
|
53
|
+
self.generate = dspy.ChainOfThought("context, question -> answer")
|
|
54
|
+
|
|
55
|
+
def forward(self, question):
|
|
56
|
+
context = self.retrieve(question).passages
|
|
57
|
+
return self.generate(context=context, question=question)
|
|
58
|
+
|
|
59
|
+
rag = RAG()
|
|
60
|
+
print(rag("How long do refunds take?").answer)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### As a ReAct tool
|
|
64
|
+
|
|
65
|
+
`MossRM.forward()` is already sync, so pass the instance directly — no wrapper needed:
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
import dspy
|
|
69
|
+
from dspy_moss import MossRM
|
|
70
|
+
|
|
71
|
+
rm = MossRM(index_name="support-kb", k=5)
|
|
72
|
+
rm.load_index()
|
|
73
|
+
agent = dspy.ReAct(signature="question -> answer", tools=[rm])
|
|
74
|
+
print(agent(question="What payment methods do you accept?").answer)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### With an explicit client
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from moss import MossClient
|
|
81
|
+
from dspy_moss import MossRM
|
|
82
|
+
|
|
83
|
+
client = MossClient("proj-id", "proj-key")
|
|
84
|
+
rm = MossRM(index_name="my-index", moss_client=client, k=5)
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Configuration
|
|
88
|
+
|
|
89
|
+
### MossRM
|
|
90
|
+
|
|
91
|
+
| Parameter | Default | Description |
|
|
92
|
+
| --- | --- | --- |
|
|
93
|
+
| `index_name` | (required) | Name of the Moss index to query |
|
|
94
|
+
| `moss_client` | `None` | Existing `MossClient`. When omitted, one is created from credentials |
|
|
95
|
+
| `project_id` | `MOSS_PROJECT_ID` env var | Moss project ID |
|
|
96
|
+
| `project_key` | `MOSS_PROJECT_KEY` env var | Moss project key |
|
|
97
|
+
| `k` | `3` | Default number of passages per query |
|
|
98
|
+
| `alpha` | `0.8` | Search blend: 1.0 = semantic only, 0.0 = keyword only |
|
|
99
|
+
|
|
100
|
+
### Passage format
|
|
101
|
+
|
|
102
|
+
Each entry in `result.passages` is a dict with:
|
|
103
|
+
|
|
104
|
+
| Key | Type | Description |
|
|
105
|
+
| --- | --- | --- |
|
|
106
|
+
| `long_text` | `str` | Document text (DSPy's standard passage field) |
|
|
107
|
+
| `id` | `str` | Document ID |
|
|
108
|
+
| `score` | `float` | Relevance score |
|
|
109
|
+
| `metadata` | `dict` | Key-value metadata stored with the document |
|
|
110
|
+
|
|
111
|
+
### Mutable index helpers
|
|
112
|
+
|
|
113
|
+
`MossRM` also exposes two optional helpers for agents that write to the knowledge base:
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
# Read documents
|
|
117
|
+
objects = rm.get_objects(num_samples=10)
|
|
118
|
+
|
|
119
|
+
# Add / upsert documents
|
|
120
|
+
rm.insert([{"id": "doc-1", "text": "New fact.", "metadata": {"source": "agent"}}])
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## License
|
|
124
|
+
|
|
125
|
+
BSD 2-Clause — see [LICENSE](LICENSE).
|
|
126
|
+
|
|
127
|
+
## Support
|
|
128
|
+
|
|
129
|
+
- [Moss Docs](https://docs.moss.dev)
|
|
130
|
+
- [Moss Discord](https://discord.gg/eMXExuafBR)
|
|
131
|
+
- [DSPy Docs](https://dspy.ai)
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "dspy-moss"
|
|
3
|
+
version = "0.0.1"
|
|
4
|
+
description = "Moss semantic search retrieval module for DSPy"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = { text = "BSD-2-Clause" }
|
|
7
|
+
requires-python = ">=3.10,<3.15"
|
|
8
|
+
dependencies = [
|
|
9
|
+
"moss>=1.1.1",
|
|
10
|
+
"dspy>=2.0.0",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
[dependency-groups]
|
|
14
|
+
dev = [
|
|
15
|
+
"python-dotenv>=1.2.1",
|
|
16
|
+
"ruff>=0.1.0",
|
|
17
|
+
"pytest>=8.0",
|
|
18
|
+
"pytest-asyncio>=0.23.0",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
[tool.ruff]
|
|
22
|
+
line-length = 100
|
|
23
|
+
target-version = "py310"
|
|
24
|
+
|
|
25
|
+
[tool.ruff.lint]
|
|
26
|
+
select = ["E", "W", "F", "I", "B", "UP", "D"]
|
|
27
|
+
ignore = ["D100", "D104"]
|
|
28
|
+
|
|
29
|
+
[tool.ruff.lint.per-file-ignores]
|
|
30
|
+
"tests/**/*.py" = ["D101", "D102", "D103", "D107"]
|
|
31
|
+
"examples/**/*.py" = ["D101", "D102", "D103"]
|
|
32
|
+
|
|
33
|
+
[tool.ruff.lint.pydocstyle]
|
|
34
|
+
convention = "google"
|
|
35
|
+
|
|
36
|
+
[tool.ruff.format]
|
|
37
|
+
quote-style = "double"
|
|
38
|
+
indent-style = "space"
|
|
39
|
+
skip-magic-trailing-comma = false
|
|
40
|
+
line-ending = "auto"
|
|
41
|
+
|
|
42
|
+
[tool.pytest.ini_options]
|
|
43
|
+
asyncio_mode = "auto"
|
|
44
|
+
|
|
45
|
+
[build-system]
|
|
46
|
+
requires = ["setuptools>=61.0"]
|
|
47
|
+
build-backend = "setuptools.build_meta"
|
|
48
|
+
|
|
49
|
+
[tool.setuptools.packages.find]
|
|
50
|
+
where = ["src"]
|
|
51
|
+
namespaces = false
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Moss semantic search retrieval module for DSPy."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from moss import (
|
|
6
|
+
DocumentInfo,
|
|
7
|
+
GetDocumentsOptions,
|
|
8
|
+
IndexInfo,
|
|
9
|
+
MossClient,
|
|
10
|
+
SearchResult,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
from .retrieve import MossRM
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"DocumentInfo",
|
|
17
|
+
"GetDocumentsOptions",
|
|
18
|
+
"IndexInfo",
|
|
19
|
+
"MossClient",
|
|
20
|
+
"MossRM",
|
|
21
|
+
"SearchResult",
|
|
22
|
+
]
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2025, InferEdge Inc.
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""DSPy retrieval module backed by Moss semantic search."""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import asyncio
|
|
12
|
+
import logging
|
|
13
|
+
import os
|
|
14
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
import dspy
|
|
18
|
+
from moss import DocumentInfo, MossClient, MutationOptions, QueryOptions
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class _DotDict(dict):
|
|
22
|
+
"""Dict with attribute-style read access, compatible with DSPy passage objects."""
|
|
23
|
+
|
|
24
|
+
__getattr__ = dict.get # type: ignore[assignment]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
__all__ = ["MossRM"]
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger("dspy_moss")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class MossRM(dspy.Retrieve):
|
|
33
|
+
"""DSPy retrieval module that uses Moss for sub-10ms semantic search.
|
|
34
|
+
|
|
35
|
+
Integrates with DSPy's RM interface so it can be set as the default
|
|
36
|
+
retriever via ``dspy.configure(rm=MossRM(...))``, or used directly as
|
|
37
|
+
a tool in ``dspy.ReAct``.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
index_name: Name of the Moss index to query.
|
|
41
|
+
moss_client: An existing ``MossClient`` instance. When omitted, one
|
|
42
|
+
is created from ``project_id`` / ``project_key`` (or their env-var
|
|
43
|
+
equivalents ``MOSS_PROJECT_ID`` / ``MOSS_PROJECT_KEY``).
|
|
44
|
+
project_id: Moss project ID. Falls back to ``MOSS_PROJECT_ID`` env var.
|
|
45
|
+
project_key: Moss project key. Falls back to ``MOSS_PROJECT_KEY`` env var.
|
|
46
|
+
k: Default number of passages to retrieve per query (default: 3).
|
|
47
|
+
alpha: Hybrid search blend — 1.0 = pure semantic, 0.0 = pure keyword
|
|
48
|
+
(default: 0.8).
|
|
49
|
+
|
|
50
|
+
Examples::
|
|
51
|
+
|
|
52
|
+
import dspy
|
|
53
|
+
from dspy_moss import MossRM
|
|
54
|
+
|
|
55
|
+
rm = MossRM("my-index") # reads MOSS_PROJECT_ID / KEY from env
|
|
56
|
+
dspy.configure(lm=dspy.LM("openai/gpt-4o"), rm=rm)
|
|
57
|
+
|
|
58
|
+
retrieve = dspy.Retrieve(k=3)
|
|
59
|
+
result = retrieve("What is the refund policy?")
|
|
60
|
+
print(result.passages)
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
def __init__(
|
|
64
|
+
self,
|
|
65
|
+
index_name: str,
|
|
66
|
+
moss_client: MossClient | None = None,
|
|
67
|
+
project_id: str | None = None,
|
|
68
|
+
project_key: str | None = None,
|
|
69
|
+
k: int = 3,
|
|
70
|
+
alpha: float = 0.8,
|
|
71
|
+
):
|
|
72
|
+
"""Initialize the MossRM retrieval module."""
|
|
73
|
+
if moss_client is None:
|
|
74
|
+
resolved_id = project_id or os.getenv("MOSS_PROJECT_ID") or ""
|
|
75
|
+
resolved_key = project_key or os.getenv("MOSS_PROJECT_KEY") or ""
|
|
76
|
+
if not resolved_id or not resolved_key:
|
|
77
|
+
raise ValueError(
|
|
78
|
+
"Moss credentials required. Provide moss_client or project_id/project_key, "
|
|
79
|
+
"or set MOSS_PROJECT_ID and MOSS_PROJECT_KEY environment variables."
|
|
80
|
+
)
|
|
81
|
+
moss_client = MossClient(resolved_id, resolved_key)
|
|
82
|
+
|
|
83
|
+
self._index_name = index_name
|
|
84
|
+
self._client = moss_client
|
|
85
|
+
self._alpha = alpha
|
|
86
|
+
self._executor = ThreadPoolExecutor(max_workers=1, thread_name_prefix="dspy-moss")
|
|
87
|
+
super().__init__(k=k)
|
|
88
|
+
|
|
89
|
+
# ------------------------------------------------------------------
|
|
90
|
+
# Async helper
|
|
91
|
+
# ------------------------------------------------------------------
|
|
92
|
+
|
|
93
|
+
def _run(self, coro: Any) -> Any:
|
|
94
|
+
"""Run a coroutine from sync context, safe inside a running event loop."""
|
|
95
|
+
try:
|
|
96
|
+
asyncio.get_running_loop()
|
|
97
|
+
in_running_loop = True
|
|
98
|
+
except RuntimeError:
|
|
99
|
+
in_running_loop = False
|
|
100
|
+
|
|
101
|
+
if in_running_loop:
|
|
102
|
+
return self._executor.submit(asyncio.run, coro).result()
|
|
103
|
+
return asyncio.run(coro)
|
|
104
|
+
|
|
105
|
+
def close(self) -> None:
|
|
106
|
+
"""Release the background executor used when called inside an event loop."""
|
|
107
|
+
self._executor.shutdown(wait=True)
|
|
108
|
+
|
|
109
|
+
def __enter__(self) -> MossRM:
|
|
110
|
+
"""Return this retriever for use as a context manager."""
|
|
111
|
+
return self
|
|
112
|
+
|
|
113
|
+
def __exit__(self, *_exc_info: object) -> None:
|
|
114
|
+
"""Release executor resources when leaving a context manager."""
|
|
115
|
+
self.close()
|
|
116
|
+
|
|
117
|
+
# ------------------------------------------------------------------
|
|
118
|
+
# Index loading
|
|
119
|
+
# ------------------------------------------------------------------
|
|
120
|
+
|
|
121
|
+
def load_index(
|
|
122
|
+
self,
|
|
123
|
+
auto_refresh: bool = False,
|
|
124
|
+
polling_interval_in_seconds: int = 600,
|
|
125
|
+
) -> None:
|
|
126
|
+
"""Load the index into memory for fast local queries.
|
|
127
|
+
|
|
128
|
+
Without this, every query falls back to the cloud API. Call once at
|
|
129
|
+
startup before any ``forward()`` or ``dspy.Retrieve`` calls.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
auto_refresh: Reload automatically when the cloud index is updated.
|
|
133
|
+
polling_interval_in_seconds: Refresh interval when ``auto_refresh=True``.
|
|
134
|
+
"""
|
|
135
|
+
logger.info("Loading Moss index '%s' into memory", self._index_name)
|
|
136
|
+
self._run(
|
|
137
|
+
self._client.load_index(
|
|
138
|
+
self._index_name,
|
|
139
|
+
auto_refresh=auto_refresh,
|
|
140
|
+
polling_interval_in_seconds=polling_interval_in_seconds,
|
|
141
|
+
)
|
|
142
|
+
)
|
|
143
|
+
logger.info("Moss index '%s' ready", self._index_name)
|
|
144
|
+
|
|
145
|
+
def forward(
|
|
146
|
+
self,
|
|
147
|
+
query_or_queries: str | list[str],
|
|
148
|
+
k: int | None = None,
|
|
149
|
+
**kwargs: Any,
|
|
150
|
+
) -> list[_DotDict]:
|
|
151
|
+
"""Retrieve the top-k passages for one or more queries.
|
|
152
|
+
|
|
153
|
+
Returns a list of dot-accessible dicts (``long_text``, ``id``,
|
|
154
|
+
``score``, ``metadata``). DSPy's ``Retrieve`` base class wraps this
|
|
155
|
+
list in a ``Prediction`` and extracts ``long_text`` from each item.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
query_or_queries: A single query string or a list of query strings.
|
|
159
|
+
k: Number of passages to retrieve. Defaults to ``self.k``.
|
|
160
|
+
**kwargs: Additional keyword arguments.
|
|
161
|
+
"""
|
|
162
|
+
k = k if k is not None else self.k
|
|
163
|
+
queries = (
|
|
164
|
+
[query_or_queries]
|
|
165
|
+
if isinstance(query_or_queries, str)
|
|
166
|
+
else query_or_queries
|
|
167
|
+
)
|
|
168
|
+
queries = [q for q in queries if q]
|
|
169
|
+
|
|
170
|
+
option_kwargs = dict(kwargs)
|
|
171
|
+
top_k = option_kwargs.pop("top_k", k)
|
|
172
|
+
alpha = option_kwargs.pop("alpha", self._alpha)
|
|
173
|
+
|
|
174
|
+
passages = []
|
|
175
|
+
for query in queries:
|
|
176
|
+
result = self._run(
|
|
177
|
+
self._client.query(
|
|
178
|
+
self._index_name,
|
|
179
|
+
query,
|
|
180
|
+
options=QueryOptions(top_k=top_k, alpha=alpha, **option_kwargs),
|
|
181
|
+
)
|
|
182
|
+
)
|
|
183
|
+
for doc in result.docs:
|
|
184
|
+
passages.append(
|
|
185
|
+
_DotDict(
|
|
186
|
+
long_text=doc.text,
|
|
187
|
+
id=doc.id,
|
|
188
|
+
score=doc.score,
|
|
189
|
+
metadata=doc.metadata or {},
|
|
190
|
+
)
|
|
191
|
+
)
|
|
192
|
+
logger.debug("Moss query '%s' returned %d docs", query, len(result.docs))
|
|
193
|
+
|
|
194
|
+
return passages
|
|
195
|
+
|
|
196
|
+
# ------------------------------------------------------------------
|
|
197
|
+
# Mutable RM helpers (optional — for agents that write to the index)
|
|
198
|
+
# ------------------------------------------------------------------
|
|
199
|
+
|
|
200
|
+
def get_objects(self, num_samples: int = 5) -> list[dict[str, Any]]:
|
|
201
|
+
"""Fetch up to ``num_samples`` documents from the index.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
num_samples: Maximum number of documents to return.
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
List of dicts with ``id``, ``text``, and ``metadata`` keys.
|
|
208
|
+
"""
|
|
209
|
+
docs = self._run(self._client.get_docs(self._index_name))
|
|
210
|
+
return [
|
|
211
|
+
{"id": d.id, "text": d.text, "metadata": d.metadata or {}}
|
|
212
|
+
for d in docs[:num_samples]
|
|
213
|
+
]
|
|
214
|
+
|
|
215
|
+
def insert(self, new_objects: dict[str, Any] | list[dict[str, Any]]) -> None:
|
|
216
|
+
"""Add or upsert documents into the index.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
new_objects: A single document dict or a list of dicts.
|
|
220
|
+
Each dict must have an ``id`` and ``text`` key; ``metadata``
|
|
221
|
+
is optional.
|
|
222
|
+
"""
|
|
223
|
+
if isinstance(new_objects, dict):
|
|
224
|
+
new_objects = [new_objects]
|
|
225
|
+
|
|
226
|
+
moss_docs = [
|
|
227
|
+
DocumentInfo(
|
|
228
|
+
id=obj["id"],
|
|
229
|
+
text=obj["text"],
|
|
230
|
+
metadata={str(k): str(v) for k, v in obj.get("metadata", {}).items()} or None,
|
|
231
|
+
)
|
|
232
|
+
for obj in new_objects
|
|
233
|
+
]
|
|
234
|
+
self._run(
|
|
235
|
+
self._client.add_docs(
|
|
236
|
+
self._index_name, moss_docs, options=MutationOptions(upsert=True)
|
|
237
|
+
)
|
|
238
|
+
)
|
|
239
|
+
logger.info("Inserted %d documents into Moss index '%s'", len(moss_docs), self._index_name)
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dspy-moss
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Moss semantic search retrieval module for DSPy
|
|
5
|
+
License: BSD-2-Clause
|
|
6
|
+
Requires-Python: <3.15,>=3.10
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: moss>=1.1.1
|
|
10
|
+
Requires-Dist: dspy>=2.0.0
|
|
11
|
+
Dynamic: license-file
|
|
12
|
+
|
|
13
|
+
# dspy-moss
|
|
14
|
+
|
|
15
|
+
Moss semantic search retrieval module for [DSPy](https://dspy.ai).
|
|
16
|
+
|
|
17
|
+
Provides `MossRM` — a `dspy.Retrieve` subclass that plugs into DSPy's RM interface for sub-10ms knowledge retrieval with no external embedder.
|
|
18
|
+
|
|
19
|
+
## Installation
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
pip install dspy-moss
|
|
23
|
+
# or
|
|
24
|
+
uv add dspy-moss
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Quickstart
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import dspy
|
|
31
|
+
from dspy_moss import MossRM
|
|
32
|
+
|
|
33
|
+
# Reads MOSS_PROJECT_ID and MOSS_PROJECT_KEY from environment
|
|
34
|
+
rm = MossRM(index_name="my-index")
|
|
35
|
+
rm.load_index() # load into this process's memory before querying
|
|
36
|
+
dspy.configure(lm=dspy.LM("openai/gpt-4o"), rm=rm)
|
|
37
|
+
|
|
38
|
+
retrieve = dspy.Retrieve(k=3)
|
|
39
|
+
result = retrieve("What is the refund policy?")
|
|
40
|
+
for passage in result.passages:
|
|
41
|
+
print(f"[{passage['score']:.3f}] {passage['long_text']}")
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Usage patterns
|
|
45
|
+
|
|
46
|
+
### As a configured retriever
|
|
47
|
+
|
|
48
|
+
Set `MossRM` as the default retriever for all `dspy.Retrieve` calls in your program:
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
import dspy
|
|
52
|
+
from dspy_moss import MossRM
|
|
53
|
+
|
|
54
|
+
rm = MossRM(
|
|
55
|
+
index_name="support-kb",
|
|
56
|
+
k=5,
|
|
57
|
+
alpha=0.8, # 1.0 = semantic only, 0.0 = keyword only
|
|
58
|
+
)
|
|
59
|
+
dspy.configure(lm=dspy.LM("openai/gpt-4o"), rm=rm)
|
|
60
|
+
|
|
61
|
+
# Any dspy.Retrieve() now uses Moss
|
|
62
|
+
class RAG(dspy.Module):
|
|
63
|
+
def __init__(self):
|
|
64
|
+
self.retrieve = dspy.Retrieve(k=3)
|
|
65
|
+
self.generate = dspy.ChainOfThought("context, question -> answer")
|
|
66
|
+
|
|
67
|
+
def forward(self, question):
|
|
68
|
+
context = self.retrieve(question).passages
|
|
69
|
+
return self.generate(context=context, question=question)
|
|
70
|
+
|
|
71
|
+
rag = RAG()
|
|
72
|
+
print(rag("How long do refunds take?").answer)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### As a ReAct tool
|
|
76
|
+
|
|
77
|
+
`MossRM.forward()` is already sync, so pass the instance directly — no wrapper needed:
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
import dspy
|
|
81
|
+
from dspy_moss import MossRM
|
|
82
|
+
|
|
83
|
+
rm = MossRM(index_name="support-kb", k=5)
|
|
84
|
+
rm.load_index()
|
|
85
|
+
agent = dspy.ReAct(signature="question -> answer", tools=[rm])
|
|
86
|
+
print(agent(question="What payment methods do you accept?").answer)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### With an explicit client
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
from moss import MossClient
|
|
93
|
+
from dspy_moss import MossRM
|
|
94
|
+
|
|
95
|
+
client = MossClient("proj-id", "proj-key")
|
|
96
|
+
rm = MossRM(index_name="my-index", moss_client=client, k=5)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## Configuration
|
|
100
|
+
|
|
101
|
+
### MossRM
|
|
102
|
+
|
|
103
|
+
| Parameter | Default | Description |
|
|
104
|
+
| --- | --- | --- |
|
|
105
|
+
| `index_name` | (required) | Name of the Moss index to query |
|
|
106
|
+
| `moss_client` | `None` | Existing `MossClient`. When omitted, one is created from credentials |
|
|
107
|
+
| `project_id` | `MOSS_PROJECT_ID` env var | Moss project ID |
|
|
108
|
+
| `project_key` | `MOSS_PROJECT_KEY` env var | Moss project key |
|
|
109
|
+
| `k` | `3` | Default number of passages per query |
|
|
110
|
+
| `alpha` | `0.8` | Search blend: 1.0 = semantic only, 0.0 = keyword only |
|
|
111
|
+
|
|
112
|
+
### Passage format
|
|
113
|
+
|
|
114
|
+
Each entry in `result.passages` is a dict with:
|
|
115
|
+
|
|
116
|
+
| Key | Type | Description |
|
|
117
|
+
| --- | --- | --- |
|
|
118
|
+
| `long_text` | `str` | Document text (DSPy's standard passage field) |
|
|
119
|
+
| `id` | `str` | Document ID |
|
|
120
|
+
| `score` | `float` | Relevance score |
|
|
121
|
+
| `metadata` | `dict` | Key-value metadata stored with the document |
|
|
122
|
+
|
|
123
|
+
### Mutable index helpers
|
|
124
|
+
|
|
125
|
+
`MossRM` also exposes two optional helpers for agents that write to the knowledge base:
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
# Read documents
|
|
129
|
+
objects = rm.get_objects(num_samples=10)
|
|
130
|
+
|
|
131
|
+
# Add / upsert documents
|
|
132
|
+
rm.insert([{"id": "doc-1", "text": "New fact.", "metadata": {"source": "agent"}}])
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## License
|
|
136
|
+
|
|
137
|
+
BSD 2-Clause — see [LICENSE](LICENSE).
|
|
138
|
+
|
|
139
|
+
## Support
|
|
140
|
+
|
|
141
|
+
- [Moss Docs](https://docs.moss.dev)
|
|
142
|
+
- [Moss Discord](https://discord.gg/eMXExuafBR)
|
|
143
|
+
- [DSPy Docs](https://dspy.ai)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
src/dspy_moss/__init__.py
|
|
5
|
+
src/dspy_moss/retrieve.py
|
|
6
|
+
src/dspy_moss.egg-info/PKG-INFO
|
|
7
|
+
src/dspy_moss.egg-info/SOURCES.txt
|
|
8
|
+
src/dspy_moss.egg-info/dependency_links.txt
|
|
9
|
+
src/dspy_moss.egg-info/requires.txt
|
|
10
|
+
src/dspy_moss.egg-info/top_level.txt
|
|
11
|
+
tests/test_retrieve.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
dspy_moss
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
from dspy_moss import MossRM
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class FakeDoc:
|
|
13
|
+
id: str
|
|
14
|
+
text: str
|
|
15
|
+
score: float
|
|
16
|
+
metadata: dict[str, str] | None = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class FakeResult:
|
|
21
|
+
docs: list[FakeDoc]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class FakeMossClient:
|
|
25
|
+
def __init__(self) -> None:
|
|
26
|
+
self.queries: list[dict[str, Any]] = []
|
|
27
|
+
self.load_calls: list[dict[str, Any]] = []
|
|
28
|
+
|
|
29
|
+
async def query(self, index_name: str, query: str, options: Any) -> FakeResult:
|
|
30
|
+
self.queries.append({"index_name": index_name, "query": query, "options": options})
|
|
31
|
+
return FakeResult(
|
|
32
|
+
docs=[
|
|
33
|
+
FakeDoc(
|
|
34
|
+
id=f"{query}-1",
|
|
35
|
+
text=f"passage for {query}",
|
|
36
|
+
score=0.91,
|
|
37
|
+
metadata={"source": "unit-test"},
|
|
38
|
+
)
|
|
39
|
+
]
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
async def load_index(
|
|
43
|
+
self,
|
|
44
|
+
index_name: str,
|
|
45
|
+
auto_refresh: bool = False,
|
|
46
|
+
polling_interval_in_seconds: int = 600,
|
|
47
|
+
) -> str:
|
|
48
|
+
self.load_calls.append(
|
|
49
|
+
{
|
|
50
|
+
"index_name": index_name,
|
|
51
|
+
"auto_refresh": auto_refresh,
|
|
52
|
+
"polling_interval_in_seconds": polling_interval_in_seconds,
|
|
53
|
+
}
|
|
54
|
+
)
|
|
55
|
+
return index_name
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def test_forward_returns_dspy_passage_objects_and_query_options() -> None:
|
|
59
|
+
client = FakeMossClient()
|
|
60
|
+
rm = MossRM(index_name="support-kb", moss_client=client, k=3, alpha=0.8)
|
|
61
|
+
|
|
62
|
+
try:
|
|
63
|
+
passages = rm.forward("refund policy", k=5, alpha=0.25, filter={"tier": "pro"})
|
|
64
|
+
finally:
|
|
65
|
+
rm.close()
|
|
66
|
+
|
|
67
|
+
assert len(passages) == 1
|
|
68
|
+
assert passages[0].long_text == "passage for refund policy"
|
|
69
|
+
assert passages[0]["long_text"] == "passage for refund policy"
|
|
70
|
+
assert passages[0].id == "refund policy-1"
|
|
71
|
+
assert passages[0].score == 0.91
|
|
72
|
+
assert passages[0].metadata == {"source": "unit-test"}
|
|
73
|
+
|
|
74
|
+
assert client.queries[0]["index_name"] == "support-kb"
|
|
75
|
+
assert client.queries[0]["query"] == "refund policy"
|
|
76
|
+
options = client.queries[0]["options"]
|
|
77
|
+
assert options.top_k == 5
|
|
78
|
+
assert options.alpha == 0.25
|
|
79
|
+
assert options.filter == {"tier": "pro"}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def test_forward_handles_multiple_queries_and_skips_empty_queries() -> None:
|
|
83
|
+
client = FakeMossClient()
|
|
84
|
+
rm = MossRM(index_name="support-kb", moss_client=client, k=2)
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
passages = rm.forward(["refund policy", "", "payment methods"])
|
|
88
|
+
finally:
|
|
89
|
+
rm.close()
|
|
90
|
+
|
|
91
|
+
assert [call["query"] for call in client.queries] == ["refund policy", "payment methods"]
|
|
92
|
+
assert [passage.long_text for passage in passages] == [
|
|
93
|
+
"passage for refund policy",
|
|
94
|
+
"passage for payment methods",
|
|
95
|
+
]
|
|
96
|
+
assert all(call["options"].top_k == 2 for call in client.queries)
|
|
97
|
+
assert all(call["options"].alpha == pytest.approx(0.8) for call in client.queries)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@pytest.mark.asyncio
|
|
101
|
+
async def test_forward_inside_running_event_loop_reuses_executor() -> None:
|
|
102
|
+
client = FakeMossClient()
|
|
103
|
+
rm = MossRM(index_name="support-kb", moss_client=client)
|
|
104
|
+
executor = rm._executor
|
|
105
|
+
|
|
106
|
+
try:
|
|
107
|
+
first = rm.forward("first")
|
|
108
|
+
second = rm.forward("second")
|
|
109
|
+
finally:
|
|
110
|
+
rm.close()
|
|
111
|
+
|
|
112
|
+
assert rm._executor is executor
|
|
113
|
+
assert [call["query"] for call in client.queries] == ["first", "second"]
|
|
114
|
+
assert first[0].long_text == "passage for first"
|
|
115
|
+
assert second[0].long_text == "passage for second"
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def test_load_index_forwards_refresh_options() -> None:
|
|
119
|
+
client = FakeMossClient()
|
|
120
|
+
rm = MossRM(index_name="support-kb", moss_client=client)
|
|
121
|
+
|
|
122
|
+
try:
|
|
123
|
+
rm.load_index(auto_refresh=True, polling_interval_in_seconds=120)
|
|
124
|
+
finally:
|
|
125
|
+
rm.close()
|
|
126
|
+
|
|
127
|
+
assert client.load_calls == [
|
|
128
|
+
{
|
|
129
|
+
"index_name": "support-kb",
|
|
130
|
+
"auto_refresh": True,
|
|
131
|
+
"polling_interval_in_seconds": 120,
|
|
132
|
+
}
|
|
133
|
+
]
|