hammad-python 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hammad/__init__.py +7 -137
- hammad/_internal.py +1 -0
- hammad/cli/_runner.py +8 -8
- hammad/cli/plugins.py +55 -26
- hammad/cli/styles/utils.py +16 -8
- hammad/data/__init__.py +1 -5
- hammad/data/collections/__init__.py +2 -3
- hammad/data/collections/collection.py +41 -22
- hammad/data/collections/indexes/__init__.py +1 -1
- hammad/data/collections/indexes/qdrant/__init__.py +1 -1
- hammad/data/collections/indexes/qdrant/index.py +106 -118
- hammad/data/collections/indexes/qdrant/settings.py +14 -14
- hammad/data/collections/indexes/qdrant/utils.py +28 -38
- hammad/data/collections/indexes/tantivy/__init__.py +1 -1
- hammad/data/collections/indexes/tantivy/index.py +57 -59
- hammad/data/collections/indexes/tantivy/settings.py +8 -19
- hammad/data/collections/indexes/tantivy/utils.py +28 -52
- hammad/data/models/__init__.py +2 -7
- hammad/data/sql/__init__.py +1 -1
- hammad/data/sql/database.py +71 -73
- hammad/data/sql/types.py +37 -51
- hammad/formatting/__init__.py +2 -1
- hammad/formatting/json/converters.py +2 -2
- hammad/genai/__init__.py +96 -36
- hammad/genai/agents/__init__.py +47 -1
- hammad/genai/agents/agent.py +1022 -0
- hammad/genai/agents/run.py +615 -0
- hammad/genai/agents/types/__init__.py +29 -22
- hammad/genai/agents/types/agent_context.py +13 -0
- hammad/genai/agents/types/agent_event.py +128 -0
- hammad/genai/agents/types/agent_hooks.py +220 -0
- hammad/genai/agents/types/agent_messages.py +31 -0
- hammad/genai/agents/types/agent_response.py +90 -0
- hammad/genai/agents/types/agent_stream.py +242 -0
- hammad/genai/models/__init__.py +1 -0
- hammad/genai/models/embeddings/__init__.py +39 -0
- hammad/genai/{embedding_models/embedding_model.py → models/embeddings/model.py} +45 -41
- hammad/genai/{embedding_models → models/embeddings}/run.py +10 -8
- hammad/genai/models/embeddings/types/__init__.py +37 -0
- hammad/genai/{embedding_models → models/embeddings/types}/embedding_model_name.py +2 -4
- hammad/genai/{embedding_models → models/embeddings/types}/embedding_model_response.py +11 -4
- hammad/genai/{embedding_models/embedding_model_request.py → models/embeddings/types/embedding_model_run_params.py} +4 -3
- hammad/genai/models/embeddings/types/embedding_model_settings.py +47 -0
- hammad/genai/models/language/__init__.py +48 -0
- hammad/genai/{language_models/language_model.py → models/language/model.py} +481 -204
- hammad/genai/{language_models → models/language}/run.py +80 -57
- hammad/genai/models/language/types/__init__.py +40 -0
- hammad/genai/models/language/types/language_model_instructor_mode.py +47 -0
- hammad/genai/models/language/types/language_model_messages.py +28 -0
- hammad/genai/{language_models/_types.py → models/language/types/language_model_name.py} +3 -40
- hammad/genai/{language_models → models/language/types}/language_model_request.py +17 -25
- hammad/genai/{language_models → models/language/types}/language_model_response.py +61 -68
- hammad/genai/{language_models → models/language/types}/language_model_response_chunk.py +8 -5
- hammad/genai/models/language/types/language_model_settings.py +89 -0
- hammad/genai/{language_models/_streaming.py → models/language/types/language_model_stream.py} +221 -243
- hammad/genai/{language_models/_utils → models/language/utils}/__init__.py +8 -11
- hammad/genai/models/language/utils/requests.py +421 -0
- hammad/genai/{language_models/_utils/_structured_outputs.py → models/language/utils/structured_outputs.py} +31 -20
- hammad/genai/models/model_provider.py +4 -0
- hammad/genai/{multimodal_models.py → models/multimodal.py} +4 -5
- hammad/genai/models/reranking.py +26 -0
- hammad/genai/types/__init__.py +1 -0
- hammad/genai/types/base.py +215 -0
- hammad/genai/{agents/types → types}/history.py +101 -88
- hammad/genai/{agents/types/tool.py → types/tools.py} +156 -141
- hammad/logging/logger.py +2 -1
- hammad/mcp/client/__init__.py +2 -3
- hammad/mcp/client/client.py +10 -10
- hammad/mcp/servers/__init__.py +2 -1
- hammad/service/decorators.py +1 -3
- hammad/web/models.py +1 -3
- hammad/web/search/client.py +10 -22
- {hammad_python-0.0.18.dist-info → hammad_python-0.0.20.dist-info}/METADATA +10 -2
- hammad_python-0.0.20.dist-info/RECORD +127 -0
- hammad/genai/embedding_models/__init__.py +0 -41
- hammad/genai/language_models/__init__.py +0 -35
- hammad/genai/language_models/_utils/_completions.py +0 -131
- hammad/genai/language_models/_utils/_messages.py +0 -89
- hammad/genai/language_models/_utils/_requests.py +0 -202
- hammad/genai/rerank_models.py +0 -26
- hammad_python-0.0.18.dist-info/RECORD +0 -111
- {hammad_python-0.0.18.dist-info → hammad_python-0.0.20.dist-info}/WHEEL +0 -0
- {hammad_python-0.0.18.dist-info → hammad_python-0.0.20.dist-info}/licenses/LICENSE +0 -0
@@ -18,7 +18,7 @@ __all__ = (
|
|
18
18
|
DistanceMetric = Literal[
|
19
19
|
"cosine",
|
20
20
|
"dot",
|
21
|
-
"euclidean",
|
21
|
+
"euclidean",
|
22
22
|
"manhattan",
|
23
23
|
]
|
24
24
|
|
@@ -30,28 +30,28 @@ class QdrantCollectionIndexSettings:
|
|
30
30
|
|
31
31
|
vector_size: int = 768
|
32
32
|
"""The size/dimension of the vectors to store."""
|
33
|
-
|
33
|
+
|
34
34
|
distance_metric: DistanceMetric = "dot"
|
35
35
|
"""Distance metric for similarity search."""
|
36
|
-
|
36
|
+
|
37
37
|
path: Optional[str] = None
|
38
38
|
"""Path for local Qdrant storage (None = in-memory)."""
|
39
|
-
|
39
|
+
|
40
40
|
host: Optional[str] = None
|
41
41
|
"""Qdrant server host (if using remote server)."""
|
42
|
-
|
42
|
+
|
43
43
|
port: int = 6333
|
44
44
|
"""Qdrant server port."""
|
45
|
-
|
45
|
+
|
46
46
|
grpc_port: int = 6334
|
47
47
|
"""Qdrant gRPC port."""
|
48
|
-
|
48
|
+
|
49
49
|
prefer_grpc: bool = False
|
50
50
|
"""Whether to prefer gRPC over HTTP."""
|
51
|
-
|
51
|
+
|
52
52
|
api_key: Optional[str] = None
|
53
53
|
"""API key for Qdrant authentication."""
|
54
|
-
|
54
|
+
|
55
55
|
timeout: Optional[float] = None
|
56
56
|
"""Request timeout for Qdrant operations."""
|
57
57
|
|
@@ -59,7 +59,7 @@ class QdrantCollectionIndexSettings:
|
|
59
59
|
"""Returns a configuration dictionary used
|
60
60
|
to configure the qdrant client internally."""
|
61
61
|
config = {}
|
62
|
-
|
62
|
+
|
63
63
|
if self.path is not None:
|
64
64
|
config["path"] = self.path
|
65
65
|
elif self.host is not None:
|
@@ -74,7 +74,7 @@ class QdrantCollectionIndexSettings:
|
|
74
74
|
else:
|
75
75
|
# In-memory database
|
76
76
|
config["location"] = ":memory:"
|
77
|
-
|
77
|
+
|
78
78
|
return config
|
79
79
|
|
80
80
|
|
@@ -86,9 +86,9 @@ class QdrantCollectionIndexQuerySettings:
|
|
86
86
|
|
87
87
|
limit: int = 10
|
88
88
|
"""The maximum number of results to return."""
|
89
|
-
|
89
|
+
|
90
90
|
score_threshold: Optional[float] = None
|
91
91
|
"""Minimum similarity score threshold for results."""
|
92
|
-
|
92
|
+
|
93
93
|
exact: bool = False
|
94
|
-
"""Whether to use exact search (slower but more accurate)."""
|
94
|
+
"""Whether to use exact search (slower but more accurate)."""
|
@@ -1,14 +1,7 @@
|
|
1
1
|
"""hammad.data.collections.indexes.qdrant.utils"""
|
2
2
|
|
3
3
|
from dataclasses import dataclass
|
4
|
-
from typing import
|
5
|
-
Any,
|
6
|
-
Dict,
|
7
|
-
List,
|
8
|
-
Optional,
|
9
|
-
Union,
|
10
|
-
final
|
11
|
-
)
|
4
|
+
from typing import Any, Dict, List, Optional, Union, final
|
12
5
|
import uuid
|
13
6
|
|
14
7
|
from .....cache import cached
|
@@ -21,6 +14,7 @@ from .settings import (
|
|
21
14
|
# Lazy imports to avoid errors when qdrant is not installed
|
22
15
|
try:
|
23
16
|
import numpy as np
|
17
|
+
|
24
18
|
NUMPY_AVAILABLE = True
|
25
19
|
except ImportError:
|
26
20
|
NUMPY_AVAILABLE = False
|
@@ -42,10 +36,10 @@ class QdrantCollectionIndexError(Exception):
|
|
42
36
|
@dataclass
|
43
37
|
class QdrantClientWrapper:
|
44
38
|
"""Wrapper over the qdrant client and collection setup."""
|
45
|
-
|
39
|
+
|
46
40
|
client: Any
|
47
41
|
"""The qdrant client object."""
|
48
|
-
|
42
|
+
|
49
43
|
collection_name: str
|
50
44
|
"""The name of the qdrant collection."""
|
51
45
|
|
@@ -55,14 +49,14 @@ def convert_distance_metric(metric: DistanceMetric) -> Any:
|
|
55
49
|
"""Convert string distance metric to qdrant Distance enum."""
|
56
50
|
try:
|
57
51
|
from qdrant_client.models import Distance
|
58
|
-
|
52
|
+
|
59
53
|
mapping = {
|
60
54
|
"cosine": Distance.COSINE,
|
61
55
|
"dot": Distance.DOT,
|
62
56
|
"euclidean": Distance.EUCLID,
|
63
57
|
"manhattan": Distance.MANHATTAN,
|
64
58
|
}
|
65
|
-
|
59
|
+
|
66
60
|
return mapping.get(metric, Distance.DOT)
|
67
61
|
except ImportError:
|
68
62
|
raise QdrantCollectionIndexError(
|
@@ -71,7 +65,7 @@ def convert_distance_metric(metric: DistanceMetric) -> Any:
|
|
71
65
|
)
|
72
66
|
|
73
67
|
|
74
|
-
@cached
|
68
|
+
@cached
|
75
69
|
def create_qdrant_client(settings: QdrantCollectionIndexSettings) -> Any:
|
76
70
|
"""Create a qdrant client from settings."""
|
77
71
|
try:
|
@@ -81,9 +75,9 @@ def create_qdrant_client(settings: QdrantCollectionIndexSettings) -> Any:
|
|
81
75
|
"qdrant-client is required for QdrantCollectionIndex. "
|
82
76
|
"Install with: pip install qdrant-client"
|
83
77
|
)
|
84
|
-
|
78
|
+
|
85
79
|
config = settings.get_qdrant_config()
|
86
|
-
|
80
|
+
|
87
81
|
if "path" in config:
|
88
82
|
# Local persistent storage
|
89
83
|
return QdrantClient(path=config["path"])
|
@@ -95,12 +89,12 @@ def create_qdrant_client(settings: QdrantCollectionIndexSettings) -> Any:
|
|
95
89
|
"grpc_port": config.get("grpc_port", 6334),
|
96
90
|
"prefer_grpc": config.get("prefer_grpc", False),
|
97
91
|
}
|
98
|
-
|
92
|
+
|
99
93
|
if config.get("api_key"):
|
100
94
|
client_kwargs["api_key"] = config["api_key"]
|
101
95
|
if config.get("timeout"):
|
102
96
|
client_kwargs["timeout"] = config["timeout"]
|
103
|
-
|
97
|
+
|
104
98
|
return QdrantClient(**client_kwargs)
|
105
99
|
else:
|
106
100
|
# In-memory database
|
@@ -112,47 +106,43 @@ def prepare_vector(
|
|
112
106
|
expected_size: int,
|
113
107
|
) -> List[float]:
|
114
108
|
"""Prepare and validate a vector for qdrant storage."""
|
115
|
-
if NUMPY_AVAILABLE and hasattr(vector,
|
109
|
+
if NUMPY_AVAILABLE and hasattr(vector, "tolist"):
|
116
110
|
# Handle numpy arrays
|
117
111
|
vector = vector.tolist()
|
118
112
|
elif not isinstance(vector, list):
|
119
113
|
raise QdrantCollectionIndexError(
|
120
114
|
f"Vector must be a list or numpy array, got {type(vector)}"
|
121
115
|
)
|
122
|
-
|
116
|
+
|
123
117
|
if len(vector) != expected_size:
|
124
118
|
raise QdrantCollectionIndexError(
|
125
119
|
f"Vector size {len(vector)} doesn't match expected size {expected_size}"
|
126
120
|
)
|
127
|
-
|
121
|
+
|
128
122
|
# Ensure all elements are floats
|
129
123
|
try:
|
130
124
|
return [float(x) for x in vector]
|
131
125
|
except (TypeError, ValueError) as e:
|
132
|
-
raise QdrantCollectionIndexError(
|
133
|
-
f"Vector contains non-numeric values: {e}"
|
134
|
-
)
|
126
|
+
raise QdrantCollectionIndexError(f"Vector contains non-numeric values: {e}")
|
135
127
|
|
136
128
|
|
137
129
|
def build_qdrant_filter(filters: Optional[Dict[str, Any]]) -> Optional[Any]:
|
138
130
|
"""Build qdrant filter from filters dict."""
|
139
131
|
if not filters:
|
140
132
|
return None
|
141
|
-
|
133
|
+
|
142
134
|
try:
|
143
135
|
from qdrant_client.models import Filter, FieldCondition, MatchValue
|
144
|
-
|
136
|
+
|
145
137
|
conditions = []
|
146
138
|
for key, value in filters.items():
|
147
|
-
conditions.append(
|
148
|
-
|
149
|
-
)
|
150
|
-
|
139
|
+
conditions.append(FieldCondition(key=key, match=MatchValue(value=value)))
|
140
|
+
|
151
141
|
if len(conditions) == 1:
|
152
142
|
return Filter(must=[conditions[0]])
|
153
143
|
else:
|
154
144
|
return Filter(must=conditions)
|
155
|
-
|
145
|
+
|
156
146
|
except ImportError:
|
157
147
|
raise QdrantCollectionIndexError(
|
158
148
|
"qdrant-client is required for QdrantCollectionIndex. "
|
@@ -168,27 +158,26 @@ def create_collection_if_not_exists(
|
|
168
158
|
"""Create qdrant collection if it doesn't exist."""
|
169
159
|
try:
|
170
160
|
from qdrant_client.models import VectorParams
|
171
|
-
|
161
|
+
|
172
162
|
# Check if collection exists
|
173
163
|
try:
|
174
164
|
collections = client.get_collections()
|
175
165
|
collection_names = [col.name for col in collections.collections]
|
176
|
-
|
166
|
+
|
177
167
|
if collection_name not in collection_names:
|
178
168
|
# Create collection
|
179
169
|
distance_metric = convert_distance_metric(settings.distance_metric)
|
180
|
-
|
170
|
+
|
181
171
|
client.create_collection(
|
182
172
|
collection_name=collection_name,
|
183
173
|
vectors_config=VectorParams(
|
184
|
-
size=settings.vector_size,
|
185
|
-
distance=distance_metric
|
174
|
+
size=settings.vector_size, distance=distance_metric
|
186
175
|
),
|
187
176
|
)
|
188
177
|
except Exception:
|
189
178
|
# Collection might already exist or other issue
|
190
179
|
pass
|
191
|
-
|
180
|
+
|
192
181
|
except ImportError:
|
193
182
|
raise QdrantCollectionIndexError(
|
194
183
|
"qdrant-client is required for QdrantCollectionIndex. "
|
@@ -201,11 +190,12 @@ def serialize(obj: Any) -> Any:
|
|
201
190
|
"""Serialize an object to JSON-compatible format."""
|
202
191
|
try:
|
203
192
|
from msgspec import json
|
193
|
+
|
204
194
|
return json.decode(json.encode(obj))
|
205
195
|
except Exception:
|
206
196
|
# Fallback to manual serialization if msgspec fails
|
207
197
|
from dataclasses import is_dataclass, asdict
|
208
|
-
|
198
|
+
|
209
199
|
if isinstance(obj, (str, int, float, bool, type(None))):
|
210
200
|
return obj
|
211
201
|
elif isinstance(obj, (list, tuple)):
|
@@ -217,4 +207,4 @@ def serialize(obj: Any) -> Any:
|
|
217
207
|
elif hasattr(obj, "__dict__"):
|
218
208
|
return serialize(obj.__dict__)
|
219
209
|
else:
|
220
|
-
return str(obj)
|
210
|
+
return str(obj)
|
@@ -1 +1 @@
|
|
1
|
-
"""hammad.data.collections.indexes.tantivy"""
|
1
|
+
"""hammad.data.collections.indexes.tantivy"""
|
@@ -1,15 +1,7 @@
|
|
1
1
|
"""hammad.data.collections.indexes.tantivy.index"""
|
2
2
|
|
3
3
|
from datetime import datetime, timezone, timedelta
|
4
|
-
from typing import
|
5
|
-
Any,
|
6
|
-
Dict,
|
7
|
-
Generic,
|
8
|
-
List,
|
9
|
-
Optional,
|
10
|
-
Type,
|
11
|
-
final
|
12
|
-
)
|
4
|
+
from typing import Any, Dict, Generic, List, Optional, Type, final
|
13
5
|
import uuid
|
14
6
|
from pathlib import Path
|
15
7
|
import json
|
@@ -25,7 +17,7 @@ from ....sql.database import Database
|
|
25
17
|
from . import utils
|
26
18
|
from .settings import (
|
27
19
|
TantivyCollectionIndexSettings,
|
28
|
-
TantivyCollectionIndexQuerySettings
|
20
|
+
TantivyCollectionIndexQuerySettings,
|
29
21
|
)
|
30
22
|
|
31
23
|
|
@@ -35,7 +27,7 @@ class TantivyCollectionIndex(Generic[DatabaseItemType]):
|
|
35
27
|
storage / search engine for a collection, that implements
|
36
28
|
fast indexing & querying capabilities using the
|
37
29
|
`tantivy` package.
|
38
|
-
|
30
|
+
|
39
31
|
This collection index is built into the core dependencies
|
40
32
|
of the `hammad-python` package, and is the default index
|
41
33
|
used by the `Collection` class."""
|
@@ -52,15 +44,15 @@ class TantivyCollectionIndex(Generic[DatabaseItemType]):
|
|
52
44
|
query_settings: Optional[TantivyCollectionIndexQuerySettings] = None,
|
53
45
|
) -> None:
|
54
46
|
"""Initialize a new `TantivyCollectionIndex` with a given set
|
55
|
-
of parameters.
|
56
|
-
|
47
|
+
of parameters.
|
48
|
+
|
57
49
|
Args:
|
58
50
|
name: The name of the index.
|
59
51
|
schema: The schema of the items that can be stored
|
60
52
|
within this index.
|
61
53
|
ttl: The time to live for the items within this index.
|
62
54
|
path: The path to the directory where the index will be stored.
|
63
|
-
(If not provided, the collection will be built on memory. This is how to
|
55
|
+
(If not provided, the collection will be built on memory. This is how to
|
64
56
|
distinguish between different collection locations.)
|
65
57
|
fast: Whether to use fast schema building & indexing
|
66
58
|
from `tantivy`'s builtin implementation.
|
@@ -106,13 +98,13 @@ class TantivyCollectionIndex(Generic[DatabaseItemType]):
|
|
106
98
|
database_path = None
|
107
99
|
if self.path is not None:
|
108
100
|
database_path = self.path / f"{name}.db"
|
109
|
-
|
101
|
+
|
110
102
|
self._database = Database[DatabaseItemType](
|
111
103
|
name=name,
|
112
104
|
schema=schema,
|
113
105
|
ttl=ttl,
|
114
106
|
path=database_path,
|
115
|
-
table_name=f"tantivy_{name}"
|
107
|
+
table_name=f"tantivy_{name}",
|
116
108
|
)
|
117
109
|
|
118
110
|
try:
|
@@ -136,13 +128,13 @@ class TantivyCollectionIndex(Generic[DatabaseItemType]):
|
|
136
128
|
ttl: Optional[int] = None,
|
137
129
|
) -> str:
|
138
130
|
"""Add a new item to the index.
|
139
|
-
|
131
|
+
|
140
132
|
Args:
|
141
133
|
item: The item to add to the index.
|
142
134
|
id: The id of the item.
|
143
135
|
filters: The filters to apply to the item.
|
144
136
|
ttl: The time to live for the item.
|
145
|
-
|
137
|
+
|
146
138
|
Returns:
|
147
139
|
The ID of the added item.
|
148
140
|
"""
|
@@ -153,10 +145,10 @@ class TantivyCollectionIndex(Generic[DatabaseItemType]):
|
|
153
145
|
filters=filters,
|
154
146
|
ttl=ttl,
|
155
147
|
)
|
156
|
-
|
148
|
+
|
157
149
|
# Add to tantivy index for search
|
158
150
|
self._add_to_tantivy_index(item_id, item, filters)
|
159
|
-
|
151
|
+
|
160
152
|
return item_id
|
161
153
|
|
162
154
|
def _add_to_tantivy_index(
|
@@ -167,37 +159,41 @@ class TantivyCollectionIndex(Generic[DatabaseItemType]):
|
|
167
159
|
) -> None:
|
168
160
|
"""Add item to tantivy search index."""
|
169
161
|
doc = tantivy.Document()
|
170
|
-
|
162
|
+
|
171
163
|
# Add ID field
|
172
164
|
doc.add_text("id", item_id)
|
173
|
-
|
165
|
+
|
174
166
|
# Extract and add content for search
|
175
167
|
content = utils.extract_content_for_indexing(item)
|
176
168
|
doc.add_text("content", content)
|
177
|
-
|
169
|
+
|
178
170
|
# Add title field if present
|
179
171
|
if isinstance(item, dict) and "title" in item:
|
180
172
|
doc.add_text("title", str(item["title"]))
|
181
|
-
|
173
|
+
|
182
174
|
# Store the full data as JSON in tantivy
|
183
175
|
serialized_data = utils.serialize(item)
|
184
176
|
json_data = {"value": serialized_data}
|
185
177
|
doc.add_json("data", json.dumps(json_data))
|
186
|
-
|
178
|
+
|
187
179
|
# Add filters as facets
|
188
180
|
if filters:
|
189
181
|
for key, value in filters.items():
|
190
182
|
facet_value = f"/{key}/{value}"
|
191
183
|
doc.add_facet("filters", tantivy.Facet.from_string(facet_value))
|
192
|
-
|
184
|
+
|
193
185
|
# Add timestamps
|
194
186
|
now = datetime.now(timezone.utc)
|
195
187
|
doc.add_date("created_at", now)
|
196
|
-
|
188
|
+
|
197
189
|
# Add score field if present
|
198
|
-
if
|
190
|
+
if (
|
191
|
+
isinstance(item, dict)
|
192
|
+
and "score" in item
|
193
|
+
and isinstance(item["score"], (int, float))
|
194
|
+
):
|
199
195
|
doc.add_integer("score", int(item["score"]))
|
200
|
-
|
196
|
+
|
201
197
|
# Add to index
|
202
198
|
self._writer.add_document(doc)
|
203
199
|
self._writer.commit()
|
@@ -209,11 +205,11 @@ class TantivyCollectionIndex(Generic[DatabaseItemType]):
|
|
209
205
|
filters: Optional[DatabaseItemFilters] = None,
|
210
206
|
) -> Optional[DatabaseItem[DatabaseItemType]]:
|
211
207
|
"""Get an item by ID.
|
212
|
-
|
208
|
+
|
213
209
|
Args:
|
214
210
|
id: The item ID.
|
215
211
|
filters: Optional filters to match.
|
216
|
-
|
212
|
+
|
217
213
|
Returns:
|
218
214
|
The database item or None if not found.
|
219
215
|
"""
|
@@ -236,7 +232,7 @@ class TantivyCollectionIndex(Generic[DatabaseItemType]):
|
|
236
232
|
ascending: bool = True,
|
237
233
|
) -> List[DatabaseItem[DatabaseItemType]]:
|
238
234
|
"""Query items using tantivy search.
|
239
|
-
|
235
|
+
|
240
236
|
Args:
|
241
237
|
query: Search query string.
|
242
238
|
filters: Dictionary of filters to apply.
|
@@ -250,7 +246,7 @@ class TantivyCollectionIndex(Generic[DatabaseItemType]):
|
|
250
246
|
min_score: Minimum relevance score threshold.
|
251
247
|
sort_by: Field to sort by.
|
252
248
|
ascending: Sort direction.
|
253
|
-
|
249
|
+
|
254
250
|
Returns:
|
255
251
|
List of matching database items.
|
256
252
|
"""
|
@@ -262,14 +258,14 @@ class TantivyCollectionIndex(Generic[DatabaseItemType]):
|
|
262
258
|
order_by=sort_by,
|
263
259
|
ascending=ascending,
|
264
260
|
)
|
265
|
-
|
261
|
+
|
266
262
|
# Use tantivy for search
|
267
263
|
self._index.reload()
|
268
264
|
searcher = self._index.searcher()
|
269
|
-
|
265
|
+
|
270
266
|
# Build tantivy query
|
271
267
|
query_parts = []
|
272
|
-
|
268
|
+
|
273
269
|
# Add filter queries
|
274
270
|
if filters:
|
275
271
|
for key, value in filters.items():
|
@@ -279,7 +275,7 @@ class TantivyCollectionIndex(Generic[DatabaseItemType]):
|
|
279
275
|
tantivy.Facet.from_string(f"/{key}/{value}"),
|
280
276
|
)
|
281
277
|
query_parts.append((tantivy.Occur.Must, facet_query))
|
282
|
-
|
278
|
+
|
283
279
|
# Add search query
|
284
280
|
if phrase:
|
285
281
|
words = query.split()
|
@@ -292,7 +288,7 @@ class TantivyCollectionIndex(Generic[DatabaseItemType]):
|
|
292
288
|
for term in terms:
|
293
289
|
fuzzy_q = tantivy.Query.fuzzy_term_query(
|
294
290
|
self._schema,
|
295
|
-
"content",
|
291
|
+
"content",
|
296
292
|
term,
|
297
293
|
distance=fuzzy_distance,
|
298
294
|
)
|
@@ -310,63 +306,63 @@ class TantivyCollectionIndex(Generic[DatabaseItemType]):
|
|
310
306
|
search_query = self._index.parse_query(
|
311
307
|
query, default_field_names=["content", "title"]
|
312
308
|
)
|
313
|
-
|
309
|
+
|
314
310
|
query_parts.append((tantivy.Occur.Must, search_query))
|
315
|
-
|
311
|
+
|
316
312
|
# Build final query
|
317
313
|
if query_parts:
|
318
314
|
final_query = tantivy.Query.boolean_query(query_parts)
|
319
315
|
else:
|
320
316
|
final_query = tantivy.Query.all_query()
|
321
|
-
|
317
|
+
|
322
318
|
# Execute search
|
323
319
|
search_limit = limit or self.query_settings.limit
|
324
|
-
|
320
|
+
|
325
321
|
# Perform search
|
326
322
|
search_result = searcher.search(
|
327
323
|
final_query,
|
328
324
|
limit=search_limit,
|
329
325
|
offset=offset,
|
330
326
|
)
|
331
|
-
|
327
|
+
|
332
328
|
# Get IDs from search results and fetch from database
|
333
329
|
item_ids = []
|
334
330
|
for score, doc_address in search_result.hits:
|
335
331
|
if min_score and score < min_score:
|
336
332
|
continue
|
337
|
-
|
333
|
+
|
338
334
|
doc = searcher.doc(doc_address)
|
339
335
|
item_id = doc.get_first("id")
|
340
336
|
if item_id:
|
341
337
|
item_ids.append(item_id)
|
342
|
-
|
338
|
+
|
343
339
|
# Fetch items from database by IDs
|
344
340
|
results = []
|
345
341
|
for item_id in item_ids:
|
346
342
|
db_item = self._database.get(item_id, filters=filters)
|
347
343
|
if db_item:
|
348
344
|
results.append(db_item)
|
349
|
-
|
345
|
+
|
350
346
|
return results
|
351
347
|
|
352
348
|
def delete(self, id: str) -> bool:
|
353
349
|
"""Delete an item by ID.
|
354
|
-
|
350
|
+
|
355
351
|
Args:
|
356
352
|
id: The item ID.
|
357
|
-
|
353
|
+
|
358
354
|
Returns:
|
359
355
|
True if item was deleted, False if not found.
|
360
356
|
"""
|
361
357
|
# Delete from database
|
362
358
|
deleted = self._database.delete(id)
|
363
|
-
|
359
|
+
|
364
360
|
if deleted:
|
365
361
|
# Remove from tantivy index by reindexing without this item
|
366
362
|
# Note: Tantivy doesn't have efficient single-document deletion
|
367
363
|
# For now, we rely on the database as the source of truth
|
368
364
|
pass
|
369
|
-
|
365
|
+
|
370
366
|
return deleted
|
371
367
|
|
372
368
|
def count(
|
@@ -376,25 +372,27 @@ class TantivyCollectionIndex(Generic[DatabaseItemType]):
|
|
376
372
|
filters: Optional[DatabaseItemFilters] = None,
|
377
373
|
) -> int:
|
378
374
|
"""Count items matching the query and filters.
|
379
|
-
|
375
|
+
|
380
376
|
Args:
|
381
377
|
query: Search query string.
|
382
378
|
filters: Dictionary of filters to apply.
|
383
|
-
|
379
|
+
|
384
380
|
Returns:
|
385
381
|
Number of matching items.
|
386
382
|
"""
|
387
383
|
if not query:
|
388
384
|
# Simple count from database
|
389
385
|
from ....sql.types import QueryFilter, QueryCondition
|
390
|
-
|
386
|
+
|
391
387
|
query_filter = None
|
392
388
|
if filters:
|
393
389
|
conditions = [
|
394
|
-
QueryCondition(
|
390
|
+
QueryCondition(
|
391
|
+
field="filters", operator="contains", value=json.dumps(filters)
|
392
|
+
)
|
395
393
|
]
|
396
394
|
query_filter = QueryFilter(conditions=conditions)
|
397
|
-
|
395
|
+
|
398
396
|
return self._database.count(query_filter)
|
399
397
|
else:
|
400
398
|
# Count via search results
|
@@ -403,12 +401,12 @@ class TantivyCollectionIndex(Generic[DatabaseItemType]):
|
|
403
401
|
|
404
402
|
def clear(self) -> int:
|
405
403
|
"""Clear all items from the index.
|
406
|
-
|
404
|
+
|
407
405
|
Returns:
|
408
406
|
Number of items deleted.
|
409
407
|
"""
|
410
408
|
count = self._database.clear()
|
411
|
-
|
409
|
+
|
412
410
|
# Clear tantivy index by rebuilding it
|
413
411
|
try:
|
414
412
|
self._tantivy_wrapper = utils.build_tantivy_index_from_settings(
|
@@ -419,10 +417,10 @@ class TantivyCollectionIndex(Generic[DatabaseItemType]):
|
|
419
417
|
self._writer = self._tantivy_wrapper.index_writer
|
420
418
|
except Exception:
|
421
419
|
pass
|
422
|
-
|
420
|
+
|
423
421
|
return count
|
424
422
|
|
425
423
|
def __repr__(self) -> str:
|
426
424
|
"""String representation of the index."""
|
427
425
|
location = str(self.path) if self.path else "memory"
|
428
|
-
return f"<TantivyCollectionIndex name='{self.name}' location='{location}'>"
|
426
|
+
return f"<TantivyCollectionIndex name='{self.name}' location='{location}'>"
|
@@ -6,10 +6,7 @@ from typing import (
|
|
6
6
|
Dict,
|
7
7
|
)
|
8
8
|
|
9
|
-
__all__ = (
|
10
|
-
"TantivyCollectionIndexSettings",
|
11
|
-
"TantivyCollectionIndexQuerySettings"
|
12
|
-
)
|
9
|
+
__all__ = ("TantivyCollectionIndexSettings", "TantivyCollectionIndexQuerySettings")
|
13
10
|
|
14
11
|
|
15
12
|
@dataclass
|
@@ -17,7 +14,7 @@ class TantivyCollectionIndexSettings:
|
|
17
14
|
"""Object representation of user configurable settings
|
18
15
|
that can be used to configure a `TantivyCollectionIndex`."""
|
19
16
|
|
20
|
-
fast
|
17
|
+
fast: bool = True
|
21
18
|
"""Whether to use fast schema building & indexing from
|
22
19
|
`tantivy`'s builtin implementation."""
|
23
20
|
|
@@ -26,18 +23,10 @@ class TantivyCollectionIndexSettings:
|
|
26
23
|
to configure the tantivy index internally."""
|
27
24
|
|
28
25
|
return {
|
29
|
-
"text_fields" :
|
30
|
-
|
31
|
-
},
|
32
|
-
"
|
33
|
-
"stored" : True, "indexed" : True, "fast" : self.fast
|
34
|
-
},
|
35
|
-
"date_fields" : {
|
36
|
-
"stored" : True, "indexed" : True, "fast" : self.fast
|
37
|
-
},
|
38
|
-
"json_fields" : {
|
39
|
-
"stored" : True
|
40
|
-
}
|
26
|
+
"text_fields": {"stored": True, "fast": self.fast},
|
27
|
+
"numeric_fields": {"stored": True, "indexed": True, "fast": self.fast},
|
28
|
+
"date_fields": {"stored": True, "indexed": True, "fast": self.fast},
|
29
|
+
"json_fields": {"stored": True},
|
41
30
|
}
|
42
31
|
|
43
32
|
|
@@ -47,5 +36,5 @@ class TantivyCollectionIndexQuerySettings:
|
|
47
36
|
that can be used to configure the query engine for a
|
48
37
|
`TantivyCollectionIndex`."""
|
49
38
|
|
50
|
-
limit
|
51
|
-
"""The maximum number of results to return."""
|
39
|
+
limit: int = 10
|
40
|
+
"""The maximum number of results to return."""
|