postgres-fts-backend 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- postgres_fts_backend/__init__.py +770 -0
- postgres_fts_backend/apps.py +11 -0
- postgres_fts_backend/management/__init__.py +0 -0
- postgres_fts_backend/management/commands/__init__.py +0 -0
- postgres_fts_backend/management/commands/build_postgres_schema.py +84 -0
- postgres_fts_backend/migrations/__init__.py +0 -0
- postgres_fts_backend/models.py +213 -0
- postgres_fts_backend-0.0.1.dist-info/METADATA +111 -0
- postgres_fts_backend-0.0.1.dist-info/RECORD +12 -0
- postgres_fts_backend-0.0.1.dist-info/WHEEL +5 -0
- postgres_fts_backend-0.0.1.dist-info/licenses/LICENSE +21 -0
- postgres_fts_backend-0.0.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,770 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import re
|
|
5
|
+
import warnings
|
|
6
|
+
from typing import Any, NotRequired, TypedDict
|
|
7
|
+
|
|
8
|
+
from django.apps import apps as django_apps
|
|
9
|
+
from django.contrib.postgres.search import (
|
|
10
|
+
SearchHeadline,
|
|
11
|
+
SearchQuery,
|
|
12
|
+
SearchRank,
|
|
13
|
+
SearchVector,
|
|
14
|
+
)
|
|
15
|
+
from django.core.exceptions import FieldDoesNotExist
|
|
16
|
+
from django.db import DatabaseError, connection, models
|
|
17
|
+
from django.db.models import Count, F, FloatField, Q, Value
|
|
18
|
+
from django.db.models.functions import Trunc
|
|
19
|
+
from django.utils.encoding import force_str
|
|
20
|
+
from haystack import connections
|
|
21
|
+
from haystack.backends import (
|
|
22
|
+
BaseEngine,
|
|
23
|
+
BaseSearchBackend,
|
|
24
|
+
BaseSearchQuery,
|
|
25
|
+
SearchNode,
|
|
26
|
+
log_query,
|
|
27
|
+
)
|
|
28
|
+
from haystack.constants import DJANGO_CT, DJANGO_ID
|
|
29
|
+
from haystack.indexes import SearchIndex
|
|
30
|
+
from haystack.models import SearchResult
|
|
31
|
+
from haystack.utils import get_model_ct
|
|
32
|
+
|
|
33
|
+
from postgres_fts_backend.models import (
|
|
34
|
+
AlignedUnionQuerySet,
|
|
35
|
+
IndexQuerySet,
|
|
36
|
+
generate_index_models,
|
|
37
|
+
get_index_model,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class FacetResults(TypedDict, total=False):
|
|
42
|
+
fields: dict[str, list[tuple[Any, int]]]
|
|
43
|
+
dates: dict[str, list[tuple[Any, int]]]
|
|
44
|
+
queries: dict[str, int]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class SearchResponse(TypedDict):
|
|
48
|
+
results: list[SearchResult]
|
|
49
|
+
hits: int
|
|
50
|
+
spelling_suggestion: str | None
|
|
51
|
+
facets: NotRequired[FacetResults]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class _CtInfo(TypedDict):
|
|
55
|
+
model: type[models.Model]
|
|
56
|
+
field_names: list[str]
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
default_app_config = "postgres_fts_backend.apps.PostgresFTSConfig"
|
|
60
|
+
|
|
61
|
+
log = logging.getLogger("haystack")
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _table_name(model: type[models.Model]) -> str:
|
|
65
|
+
return f"haystack_index_{model._meta.app_label}_{model._meta.model_name}"
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def validate_all_schemas() -> None:
|
|
69
|
+
"""Validate all index tables at startup. Called from AppConfig.ready()."""
|
|
70
|
+
try:
|
|
71
|
+
ui = connections["default"].get_unified_index()
|
|
72
|
+
existing_tables = connection.introspection.table_names()
|
|
73
|
+
except Exception:
|
|
74
|
+
warnings.warn(
|
|
75
|
+
"Could not connect to database to validate index schemas. "
|
|
76
|
+
"Run 'manage.py build_postgres_schema' then "
|
|
77
|
+
"'manage.py migrate postgres_fts_backend' once the database is available."
|
|
78
|
+
)
|
|
79
|
+
return
|
|
80
|
+
|
|
81
|
+
for model, index in ui.get_indexes().items():
|
|
82
|
+
table = _table_name(model)
|
|
83
|
+
|
|
84
|
+
if table not in existing_tables:
|
|
85
|
+
warnings.warn(
|
|
86
|
+
f"Table '{table}' does not exist. Run 'manage.py build_postgres_schema' "
|
|
87
|
+
"then 'manage.py migrate postgres_fts_backend'."
|
|
88
|
+
)
|
|
89
|
+
continue
|
|
90
|
+
|
|
91
|
+
expected_columns = {"id", "django_id", "django_ct", "search_vector"}
|
|
92
|
+
for field_name in index.fields:
|
|
93
|
+
if field_name not in ("django_ct", "django_id"):
|
|
94
|
+
expected_columns.add(field_name)
|
|
95
|
+
|
|
96
|
+
with connection.cursor() as cursor:
|
|
97
|
+
db_columns = {
|
|
98
|
+
info.name
|
|
99
|
+
for info in connection.introspection.get_table_description(
|
|
100
|
+
cursor, table
|
|
101
|
+
)
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
missing = expected_columns - db_columns
|
|
105
|
+
if missing:
|
|
106
|
+
warnings.warn(
|
|
107
|
+
"Index table '{}' schema is out of date (missing columns: {}). "
|
|
108
|
+
"Run 'manage.py build_postgres_schema' then "
|
|
109
|
+
"'manage.py migrate postgres_fts_backend'.".format(
|
|
110
|
+
table, ", ".join(sorted(missing))
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _field_names(index: SearchIndex) -> list[str]:
|
|
116
|
+
return [name for name in index.fields if name not in ("django_ct", "django_id")]
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _resolve_field_name(field_name: str) -> str:
|
|
120
|
+
if field_name.endswith("_exact"):
|
|
121
|
+
return field_name[:-6]
|
|
122
|
+
return field_name
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _parse_narrow_query(query_string: str) -> tuple[str, str]:
|
|
126
|
+
match = re.match(r'^(\w+):"(.+)"$', query_string)
|
|
127
|
+
if not match:
|
|
128
|
+
raise ValueError(f"Cannot parse narrow query: '{query_string}'")
|
|
129
|
+
return match.group(1), match.group(2)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class IndexSearch:
|
|
133
|
+
def __init__(
|
|
134
|
+
self,
|
|
135
|
+
qs: IndexQuerySet,
|
|
136
|
+
index: SearchIndex,
|
|
137
|
+
search_config: str,
|
|
138
|
+
has_rank: bool = False,
|
|
139
|
+
search_text: str | None = None,
|
|
140
|
+
) -> None:
|
|
141
|
+
self.qs = qs
|
|
142
|
+
self.index = index
|
|
143
|
+
self.search_config: str = search_config
|
|
144
|
+
self.has_rank: bool = has_rank
|
|
145
|
+
self.search_text: str | None = search_text
|
|
146
|
+
self.score_field: str = "rank"
|
|
147
|
+
self.highlight_field: str | None = None
|
|
148
|
+
|
|
149
|
+
@classmethod
|
|
150
|
+
def from_query_string(
|
|
151
|
+
cls,
|
|
152
|
+
index_model: type[models.Model],
|
|
153
|
+
index: SearchIndex,
|
|
154
|
+
search_config: str,
|
|
155
|
+
query_string: str,
|
|
156
|
+
) -> IndexSearch:
|
|
157
|
+
if query_string == "*":
|
|
158
|
+
qs = index_model.objects.all().annotate( # type: ignore[attr-defined]
|
|
159
|
+
rank=Value(0, output_field=FloatField())
|
|
160
|
+
)
|
|
161
|
+
return cls(qs, index, search_config)
|
|
162
|
+
|
|
163
|
+
if ":" in query_string and not query_string.startswith('"'):
|
|
164
|
+
field, _, value = query_string.partition(":")
|
|
165
|
+
content_field = index.get_content_field()
|
|
166
|
+
if field == content_field:
|
|
167
|
+
return cls(
|
|
168
|
+
index_model.objects.search(value, config=search_config), # type: ignore[attr-defined]
|
|
169
|
+
index,
|
|
170
|
+
search_config,
|
|
171
|
+
has_rank=True,
|
|
172
|
+
search_text=value,
|
|
173
|
+
)
|
|
174
|
+
qs = index_model.objects.filter(**{field: value}).annotate( # type: ignore[attr-defined]
|
|
175
|
+
rank=Value(0, output_field=FloatField())
|
|
176
|
+
)
|
|
177
|
+
return cls(qs, index, search_config)
|
|
178
|
+
|
|
179
|
+
return cls(
|
|
180
|
+
index_model.objects.search(query_string, config=search_config), # type: ignore[attr-defined]
|
|
181
|
+
index,
|
|
182
|
+
search_config,
|
|
183
|
+
has_rank=True,
|
|
184
|
+
search_text=query_string,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
@classmethod
|
|
188
|
+
def from_orm_query(
|
|
189
|
+
cls,
|
|
190
|
+
index_model: type[models.Model],
|
|
191
|
+
index: SearchIndex,
|
|
192
|
+
search_config: str,
|
|
193
|
+
orm_query: Q,
|
|
194
|
+
) -> IndexSearch:
|
|
195
|
+
content_search_text = orm_query.content_search_text # type: ignore[attr-defined]
|
|
196
|
+
qs = index_model.objects.filter(orm_query) # type: ignore[attr-defined]
|
|
197
|
+
if content_search_text:
|
|
198
|
+
qs = qs.ranked(content_search_text, config=search_config)
|
|
199
|
+
else:
|
|
200
|
+
qs = qs.annotate(rank=Value(0, output_field=FloatField()))
|
|
201
|
+
return cls(
|
|
202
|
+
qs,
|
|
203
|
+
index,
|
|
204
|
+
search_config,
|
|
205
|
+
has_rank=bool(content_search_text),
|
|
206
|
+
search_text=content_search_text,
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
def narrow(self, narrow_queries: list[str]) -> None:
|
|
210
|
+
for nq in narrow_queries:
|
|
211
|
+
field, value = _parse_narrow_query(nq)
|
|
212
|
+
col = _resolve_field_name(field)
|
|
213
|
+
try:
|
|
214
|
+
self.qs.model._meta.get_field(col)
|
|
215
|
+
except FieldDoesNotExist:
|
|
216
|
+
self.qs = self.qs.none()
|
|
217
|
+
return
|
|
218
|
+
self.qs = self.qs.filter(**{col: value})
|
|
219
|
+
|
|
220
|
+
def highlight(self) -> None:
|
|
221
|
+
if self.search_text is None:
|
|
222
|
+
return
|
|
223
|
+
content_field = self.index.get_content_field()
|
|
224
|
+
sq = SearchQuery(
|
|
225
|
+
self.search_text, search_type="websearch", config=self.search_config
|
|
226
|
+
)
|
|
227
|
+
self.qs = self.qs.annotate(
|
|
228
|
+
headline=SearchHeadline(content_field, sq, config=self.search_config)
|
|
229
|
+
)
|
|
230
|
+
self.highlight_field = content_field
|
|
231
|
+
|
|
232
|
+
def boost(self, boost_dict: dict[str, float]) -> None:
|
|
233
|
+
if not self.has_rank or not boost_dict:
|
|
234
|
+
return
|
|
235
|
+
annotations: dict[str, Any] = {}
|
|
236
|
+
combined: Any = F("rank")
|
|
237
|
+
for i, (term, weight) in enumerate(boost_dict.items()):
|
|
238
|
+
alias = f"_boost_{i}"
|
|
239
|
+
bq = SearchQuery(term, search_type="websearch", config=self.search_config)
|
|
240
|
+
annotations[alias] = SearchRank(
|
|
241
|
+
"search_vector", bq, cover_density=True, normalization=32
|
|
242
|
+
)
|
|
243
|
+
combined = combined * (1.0 + F(alias) * weight)
|
|
244
|
+
annotations["_boosted_rank"] = combined
|
|
245
|
+
self.qs = self.qs.annotate(**annotations)
|
|
246
|
+
self.score_field = "_boosted_rank"
|
|
247
|
+
|
|
248
|
+
def count(self) -> int:
|
|
249
|
+
return self.qs.count()
|
|
250
|
+
|
|
251
|
+
def facets(
|
|
252
|
+
self,
|
|
253
|
+
facets: list[str] | None = None,
|
|
254
|
+
date_facets: dict[str, Any] | None = None,
|
|
255
|
+
query_facets: list[tuple[str, str]] | None = None,
|
|
256
|
+
) -> FacetResults:
|
|
257
|
+
result: FacetResults = {}
|
|
258
|
+
|
|
259
|
+
if facets:
|
|
260
|
+
result["fields"] = {}
|
|
261
|
+
for field_name in facets:
|
|
262
|
+
col = _resolve_field_name(field_name)
|
|
263
|
+
facet_qs = (
|
|
264
|
+
self.qs.values(col)
|
|
265
|
+
.annotate(count=Count("id"))
|
|
266
|
+
.order_by("-count", col)
|
|
267
|
+
)
|
|
268
|
+
result["fields"][field_name] = [
|
|
269
|
+
(row[col], row["count"]) for row in facet_qs
|
|
270
|
+
]
|
|
271
|
+
|
|
272
|
+
if date_facets:
|
|
273
|
+
result["dates"] = {}
|
|
274
|
+
for field_name, facet_opts in date_facets.items():
|
|
275
|
+
col = _resolve_field_name(field_name)
|
|
276
|
+
gap_by = facet_opts["gap_by"]
|
|
277
|
+
start_date = facet_opts["start_date"]
|
|
278
|
+
end_date = facet_opts["end_date"]
|
|
279
|
+
facet_qs = (
|
|
280
|
+
self.qs.filter(
|
|
281
|
+
**{
|
|
282
|
+
f"{col}__gte": start_date,
|
|
283
|
+
f"{col}__lt": end_date,
|
|
284
|
+
}
|
|
285
|
+
)
|
|
286
|
+
.annotate(bucket=Trunc(col, gap_by))
|
|
287
|
+
.values("bucket")
|
|
288
|
+
.annotate(count=Count("id"))
|
|
289
|
+
.order_by("bucket")
|
|
290
|
+
)
|
|
291
|
+
result["dates"][field_name] = [
|
|
292
|
+
(row["bucket"], row["count"]) for row in facet_qs
|
|
293
|
+
]
|
|
294
|
+
|
|
295
|
+
if query_facets:
|
|
296
|
+
result["queries"] = {}
|
|
297
|
+
for field_name, value in query_facets:
|
|
298
|
+
col = _resolve_field_name(field_name)
|
|
299
|
+
count = self.qs.filter(**{col: value}).count()
|
|
300
|
+
result["queries"][f"{field_name}_{value}"] = count
|
|
301
|
+
|
|
302
|
+
return result
|
|
303
|
+
|
|
304
|
+
def results(
|
|
305
|
+
self,
|
|
306
|
+
sort_by: list[str] | None = None,
|
|
307
|
+
start_offset: int = 0,
|
|
308
|
+
end_offset: int | None = None,
|
|
309
|
+
result_class: type = SearchResult,
|
|
310
|
+
) -> list[SearchResult]:
|
|
311
|
+
qs = self.qs
|
|
312
|
+
|
|
313
|
+
# Ordering
|
|
314
|
+
if sort_by:
|
|
315
|
+
qs = qs.order_by(*sort_by)
|
|
316
|
+
elif self.has_rank:
|
|
317
|
+
qs = qs.order_by(f"-{self.score_field}")
|
|
318
|
+
|
|
319
|
+
# Pagination
|
|
320
|
+
if end_offset is not None:
|
|
321
|
+
qs = qs[start_offset:end_offset]
|
|
322
|
+
elif start_offset:
|
|
323
|
+
qs = qs[start_offset:]
|
|
324
|
+
|
|
325
|
+
# Materialize
|
|
326
|
+
model = self.index.get_model()
|
|
327
|
+
field_names = _field_names(self.index)
|
|
328
|
+
app_label = model._meta.app_label
|
|
329
|
+
model_name = model._meta.model_name
|
|
330
|
+
|
|
331
|
+
results = []
|
|
332
|
+
for obj in qs:
|
|
333
|
+
stored_fields = {fn: getattr(obj, fn) for fn in field_names}
|
|
334
|
+
if self.highlight_field:
|
|
335
|
+
headline = getattr(obj, "headline", None)
|
|
336
|
+
if headline:
|
|
337
|
+
stored_fields["highlighted"] = {self.highlight_field: [headline]}
|
|
338
|
+
rank = getattr(obj, self.score_field, None)
|
|
339
|
+
score = float(rank) if self.has_rank and rank is not None else 0
|
|
340
|
+
results.append(
|
|
341
|
+
result_class(
|
|
342
|
+
app_label, model_name, obj.django_id, score, **stored_fields
|
|
343
|
+
)
|
|
344
|
+
)
|
|
345
|
+
return results
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
class MultiIndexSearch:
|
|
349
|
+
"""Wraps multiple IndexSearch instances for cross-model search."""
|
|
350
|
+
|
|
351
|
+
def __init__(self, searches: list[tuple[IndexSearch, type[models.Model]]]) -> None:
|
|
352
|
+
self.searches = searches
|
|
353
|
+
|
|
354
|
+
def count(self) -> int:
|
|
355
|
+
return sum(s.count() for s, _model in self.searches)
|
|
356
|
+
|
|
357
|
+
def facets(
|
|
358
|
+
self,
|
|
359
|
+
facets: list[str] | None = None,
|
|
360
|
+
date_facets: dict[str, Any] | None = None,
|
|
361
|
+
query_facets: list[tuple[str, str]] | None = None,
|
|
362
|
+
) -> FacetResults:
|
|
363
|
+
merged: FacetResults = {}
|
|
364
|
+
|
|
365
|
+
if facets:
|
|
366
|
+
merged["fields"] = {}
|
|
367
|
+
for field_name in facets:
|
|
368
|
+
col = _resolve_field_name(field_name)
|
|
369
|
+
combined_counts: dict[Any, int] = {}
|
|
370
|
+
for s, model in self.searches:
|
|
371
|
+
index_model = get_index_model(model)
|
|
372
|
+
try:
|
|
373
|
+
index_model._meta.get_field(col)
|
|
374
|
+
except FieldDoesNotExist:
|
|
375
|
+
continue
|
|
376
|
+
sub = s.facets(facets=[field_name])
|
|
377
|
+
for value, count in sub.get("fields", {}).get(field_name, []):
|
|
378
|
+
combined_counts[value] = combined_counts.get(value, 0) + count
|
|
379
|
+
merged["fields"][field_name] = sorted(
|
|
380
|
+
combined_counts.items(), key=lambda x: (-x[1], x[0])
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
if date_facets:
|
|
384
|
+
merged["dates"] = {}
|
|
385
|
+
for field_name, facet_opts in date_facets.items():
|
|
386
|
+
col = _resolve_field_name(field_name)
|
|
387
|
+
combined_buckets: dict[Any, int] = {}
|
|
388
|
+
for s, model in self.searches:
|
|
389
|
+
index_model = get_index_model(model)
|
|
390
|
+
try:
|
|
391
|
+
index_model._meta.get_field(col)
|
|
392
|
+
except FieldDoesNotExist:
|
|
393
|
+
continue
|
|
394
|
+
sub = s.facets(date_facets={field_name: facet_opts})
|
|
395
|
+
for bucket, count in sub.get("dates", {}).get(field_name, []):
|
|
396
|
+
combined_buckets[bucket] = (
|
|
397
|
+
combined_buckets.get(bucket, 0) + count
|
|
398
|
+
)
|
|
399
|
+
merged["dates"][field_name] = sorted(combined_buckets.items())
|
|
400
|
+
|
|
401
|
+
if query_facets:
|
|
402
|
+
merged["queries"] = {}
|
|
403
|
+
for field_name, value in query_facets:
|
|
404
|
+
col = _resolve_field_name(field_name)
|
|
405
|
+
total = 0
|
|
406
|
+
for s, model in self.searches:
|
|
407
|
+
index_model = get_index_model(model)
|
|
408
|
+
try:
|
|
409
|
+
index_model._meta.get_field(col)
|
|
410
|
+
except FieldDoesNotExist:
|
|
411
|
+
continue
|
|
412
|
+
sub = s.facets(query_facets=[(field_name, value)])
|
|
413
|
+
key = f"{field_name}_{value}"
|
|
414
|
+
total += sub.get("queries", {}).get(key, 0)
|
|
415
|
+
merged["queries"][f"{field_name}_{value}"] = total
|
|
416
|
+
|
|
417
|
+
return merged
|
|
418
|
+
|
|
419
|
+
def results(
|
|
420
|
+
self,
|
|
421
|
+
sort_by: list[str] | None = None,
|
|
422
|
+
start_offset: int = 0,
|
|
423
|
+
end_offset: int | None = None,
|
|
424
|
+
result_class: type = SearchResult,
|
|
425
|
+
) -> list[SearchResult]:
|
|
426
|
+
# These are uniform across all searches (same kwargs applied to each)
|
|
427
|
+
first_search = self.searches[0][0]
|
|
428
|
+
score_field = first_search.score_field
|
|
429
|
+
has_rank = first_search.has_rank
|
|
430
|
+
highlight_field = first_search.highlight_field
|
|
431
|
+
|
|
432
|
+
# Build the aligned union
|
|
433
|
+
union_qs: IndexQuerySet | AlignedUnionQuerySet = first_search.qs
|
|
434
|
+
for s, model in self.searches[1:]:
|
|
435
|
+
union_qs = union_qs.aligned_union(s.qs)
|
|
436
|
+
|
|
437
|
+
# Per-model lookup: field_names and model identity vary across indexes
|
|
438
|
+
ct_map: dict[str, _CtInfo] = {
|
|
439
|
+
get_model_ct(model): {
|
|
440
|
+
"model": model,
|
|
441
|
+
"field_names": _field_names(s.index),
|
|
442
|
+
}
|
|
443
|
+
for s, model in self.searches
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
# Ordering — always include tiebreakers for stable pagination
|
|
447
|
+
if sort_by:
|
|
448
|
+
ordered_qs = union_qs.order_by(*sort_by, "django_ct", "django_id")
|
|
449
|
+
else:
|
|
450
|
+
ordered_qs = union_qs.order_by("-rank", "django_ct", "django_id")
|
|
451
|
+
|
|
452
|
+
# Pagination
|
|
453
|
+
if end_offset is not None:
|
|
454
|
+
ordered_qs = ordered_qs[start_offset:end_offset]
|
|
455
|
+
elif start_offset:
|
|
456
|
+
ordered_qs = ordered_qs[start_offset:]
|
|
457
|
+
|
|
458
|
+
# Materialize
|
|
459
|
+
results = []
|
|
460
|
+
for row in ordered_qs:
|
|
461
|
+
info = ct_map[row["django_ct"]]
|
|
462
|
+
model = info["model"]
|
|
463
|
+
|
|
464
|
+
stored_fields = {fn: row.get(fn) for fn in info["field_names"]}
|
|
465
|
+
|
|
466
|
+
if highlight_field and "headline" in row and row["headline"]:
|
|
467
|
+
stored_fields["highlighted"] = {highlight_field: [row["headline"]]}
|
|
468
|
+
|
|
469
|
+
rank = row.get(score_field)
|
|
470
|
+
score = float(rank) if has_rank and rank is not None else 0
|
|
471
|
+
|
|
472
|
+
results.append(
|
|
473
|
+
result_class(
|
|
474
|
+
model._meta.app_label,
|
|
475
|
+
model._meta.model_name,
|
|
476
|
+
row["django_id"],
|
|
477
|
+
score,
|
|
478
|
+
**stored_fields,
|
|
479
|
+
)
|
|
480
|
+
)
|
|
481
|
+
return results
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
class PostgresFTSSearchBackend(BaseSearchBackend):
|
|
485
|
+
def __init__(self, connection_alias, **connection_options):
|
|
486
|
+
super().__init__(connection_alias, **connection_options)
|
|
487
|
+
self.search_config = connection_options.get("SEARCH_CONFIG", "english")
|
|
488
|
+
|
|
489
|
+
def build_schema(self, fields):
|
|
490
|
+
return generate_index_models()
|
|
491
|
+
|
|
492
|
+
def update(self, index, iterable, commit=True):
|
|
493
|
+
try:
|
|
494
|
+
model = index.get_model()
|
|
495
|
+
index_model = get_index_model(model)
|
|
496
|
+
field_names = _field_names(index)
|
|
497
|
+
content_field = index.get_content_field()
|
|
498
|
+
|
|
499
|
+
rows = []
|
|
500
|
+
for obj in iterable:
|
|
501
|
+
prepared = index.full_prepare(obj)
|
|
502
|
+
defaults = {fn: prepared.get(fn) for fn in field_names}
|
|
503
|
+
rows.append(
|
|
504
|
+
index_model(
|
|
505
|
+
django_ct=prepared[DJANGO_CT],
|
|
506
|
+
django_id=prepared[DJANGO_ID],
|
|
507
|
+
**defaults,
|
|
508
|
+
)
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
if rows:
|
|
512
|
+
index_model.objects.bulk_create(
|
|
513
|
+
rows,
|
|
514
|
+
update_conflicts=True,
|
|
515
|
+
unique_fields=["django_ct", "django_id"],
|
|
516
|
+
update_fields=field_names,
|
|
517
|
+
)
|
|
518
|
+
index_model.objects.filter(
|
|
519
|
+
django_ct=get_model_ct(model),
|
|
520
|
+
django_id__in=[r.django_id for r in rows],
|
|
521
|
+
).update(
|
|
522
|
+
search_vector=SearchVector(content_field, config=self.search_config)
|
|
523
|
+
)
|
|
524
|
+
except DatabaseError:
|
|
525
|
+
if not self.silently_fail:
|
|
526
|
+
raise
|
|
527
|
+
log.exception("Failed to update index for %s", index)
|
|
528
|
+
|
|
529
|
+
def remove(self, obj_or_string, commit=True):
|
|
530
|
+
try:
|
|
531
|
+
if isinstance(obj_or_string, str):
|
|
532
|
+
# String format: "app_label.model_name.pk"
|
|
533
|
+
parts = obj_or_string.split(".", 2)
|
|
534
|
+
if len(parts) != 3:
|
|
535
|
+
raise ValueError(
|
|
536
|
+
"String identifier must be 'app_label.model_name.pk', "
|
|
537
|
+
f"got '{obj_or_string}'"
|
|
538
|
+
)
|
|
539
|
+
model = django_apps.get_model(parts[0], parts[1])
|
|
540
|
+
django_ct = get_model_ct(model)
|
|
541
|
+
django_id = parts[2]
|
|
542
|
+
else:
|
|
543
|
+
model = type(obj_or_string)
|
|
544
|
+
django_ct = get_model_ct(model)
|
|
545
|
+
django_id = force_str(obj_or_string.pk)
|
|
546
|
+
|
|
547
|
+
index_model = get_index_model(model)
|
|
548
|
+
index_model.objects.filter(
|
|
549
|
+
django_ct=django_ct, django_id=django_id
|
|
550
|
+
).delete()
|
|
551
|
+
except DatabaseError:
|
|
552
|
+
if not self.silently_fail:
|
|
553
|
+
raise
|
|
554
|
+
log.exception("Failed to remove document '%s'", obj_or_string)
|
|
555
|
+
|
|
556
|
+
def clear(self, models=None, commit=True):
|
|
557
|
+
try:
|
|
558
|
+
if models is None:
|
|
559
|
+
ui = connections["default"].get_unified_index()
|
|
560
|
+
models = ui.get_indexes().keys()
|
|
561
|
+
|
|
562
|
+
for model in models:
|
|
563
|
+
index_model = get_index_model(model)
|
|
564
|
+
index_model.objects.all().delete()
|
|
565
|
+
except DatabaseError:
|
|
566
|
+
if not self.silently_fail:
|
|
567
|
+
raise
|
|
568
|
+
log.exception("Failed to clear index")
|
|
569
|
+
|
|
570
|
+
@log_query
|
|
571
|
+
def search(self, query_string: str, **kwargs: Any) -> SearchResponse:
|
|
572
|
+
if not query_string or not query_string.strip():
|
|
573
|
+
return {"results": [], "hits": 0, "spelling_suggestion": None}
|
|
574
|
+
|
|
575
|
+
try:
|
|
576
|
+
ui = connections["default"].get_unified_index()
|
|
577
|
+
|
|
578
|
+
requested_models = kwargs.get("models")
|
|
579
|
+
if requested_models:
|
|
580
|
+
model_index_pairs = [(m, ui.get_index(m)) for m in requested_models]
|
|
581
|
+
else:
|
|
582
|
+
model_index_pairs = list(ui.get_indexes().items())
|
|
583
|
+
|
|
584
|
+
orm_query = kwargs.pop("orm_query", None)
|
|
585
|
+
|
|
586
|
+
result_class = kwargs.get("result_class", SearchResult)
|
|
587
|
+
sort_by = kwargs.get("sort_by")
|
|
588
|
+
start_offset = int(kwargs.get("start_offset", 0))
|
|
589
|
+
end_offset = (
|
|
590
|
+
int(kwargs["end_offset"])
|
|
591
|
+
if kwargs.get("end_offset") is not None
|
|
592
|
+
else None
|
|
593
|
+
)
|
|
594
|
+
|
|
595
|
+
# Build IndexSearch per model
|
|
596
|
+
searches = []
|
|
597
|
+
for model, index in model_index_pairs:
|
|
598
|
+
index_model = get_index_model(model)
|
|
599
|
+
if orm_query is not None:
|
|
600
|
+
s = IndexSearch.from_orm_query(
|
|
601
|
+
index_model, index, self.search_config, orm_query
|
|
602
|
+
)
|
|
603
|
+
else:
|
|
604
|
+
s = IndexSearch.from_query_string(
|
|
605
|
+
index_model, index, self.search_config, query_string
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
if narrow_queries := kwargs.get("narrow_queries"):
|
|
609
|
+
s.narrow(narrow_queries)
|
|
610
|
+
if boost := kwargs.get("boost"):
|
|
611
|
+
s.boost(boost)
|
|
612
|
+
if kwargs.get("highlight"):
|
|
613
|
+
s.highlight()
|
|
614
|
+
|
|
615
|
+
searches.append((s, model))
|
|
616
|
+
|
|
617
|
+
if len(searches) == 1:
|
|
618
|
+
# Single-model path (no behavior change)
|
|
619
|
+
s, model = searches[0]
|
|
620
|
+
total_count = s.count()
|
|
621
|
+
facets = s.facets(
|
|
622
|
+
facets=kwargs.get("facets"),
|
|
623
|
+
date_facets=kwargs.get("date_facets"),
|
|
624
|
+
query_facets=kwargs.get("query_facets"),
|
|
625
|
+
)
|
|
626
|
+
results = s.results(sort_by, start_offset, end_offset, result_class)
|
|
627
|
+
else:
|
|
628
|
+
# Multi-model path
|
|
629
|
+
multi = MultiIndexSearch(searches)
|
|
630
|
+
total_count = multi.count()
|
|
631
|
+
facets = multi.facets(
|
|
632
|
+
facets=kwargs.get("facets"),
|
|
633
|
+
date_facets=kwargs.get("date_facets"),
|
|
634
|
+
query_facets=kwargs.get("query_facets"),
|
|
635
|
+
)
|
|
636
|
+
results = multi.results(sort_by, start_offset, end_offset, result_class)
|
|
637
|
+
|
|
638
|
+
response: SearchResponse = {
|
|
639
|
+
"results": results,
|
|
640
|
+
"hits": total_count,
|
|
641
|
+
"spelling_suggestion": None,
|
|
642
|
+
}
|
|
643
|
+
if facets:
|
|
644
|
+
response["facets"] = facets
|
|
645
|
+
return response
|
|
646
|
+
except DatabaseError:
|
|
647
|
+
if not self.silently_fail:
|
|
648
|
+
raise
|
|
649
|
+
log.exception("Failed to search with query '%s'", query_string)
|
|
650
|
+
return {"results": [], "hits": 0, "spelling_suggestion": None}
|
|
651
|
+
|
|
652
|
+
def prep_value(self, value: Any) -> Any:
|
|
653
|
+
return value
|
|
654
|
+
|
|
655
|
+
def more_like_this(
|
|
656
|
+
self,
|
|
657
|
+
model_instance: models.Model,
|
|
658
|
+
additional_query_string: str | None = None,
|
|
659
|
+
**kwargs: Any,
|
|
660
|
+
) -> None:
|
|
661
|
+
raise NotImplementedError(
|
|
662
|
+
"postgres_fts_backend does not support more_like_this. "
|
|
663
|
+
"PostgreSQL has no native document similarity feature."
|
|
664
|
+
)
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
class ORMSearchNode(SearchNode):
|
|
668
|
+
def as_orm_query(self, query_fragment_callback):
|
|
669
|
+
result = []
|
|
670
|
+
for child in self.children:
|
|
671
|
+
if hasattr(child, "as_orm_query"):
|
|
672
|
+
result.append(child.as_orm_query(query_fragment_callback))
|
|
673
|
+
else:
|
|
674
|
+
expression, value = child
|
|
675
|
+
field, filter_type = self.split_expression(expression)
|
|
676
|
+
result.append(query_fragment_callback(field, filter_type, value))
|
|
677
|
+
|
|
678
|
+
query = Q()
|
|
679
|
+
if self.connector == self.AND:
|
|
680
|
+
for subquery in result:
|
|
681
|
+
query &= subquery
|
|
682
|
+
elif self.connector == self.OR:
|
|
683
|
+
for subquery in result:
|
|
684
|
+
query |= subquery
|
|
685
|
+
|
|
686
|
+
if query and self.negated:
|
|
687
|
+
query = ~query
|
|
688
|
+
|
|
689
|
+
return query
|
|
690
|
+
|
|
691
|
+
|
|
692
|
+
class ORMSearchQuery(BaseSearchQuery):
|
|
693
|
+
|
|
694
|
+
def __init__(self, using="default"):
|
|
695
|
+
super().__init__(using=using)
|
|
696
|
+
self.query_filter = ORMSearchNode()
|
|
697
|
+
self.content_search_text = None
|
|
698
|
+
|
|
699
|
+
def clean(self, query_fragment):
|
|
700
|
+
return query_fragment
|
|
701
|
+
|
|
702
|
+
def build_query_fragment(self, field, filter_type, value):
|
|
703
|
+
if hasattr(value, "prepare"):
|
|
704
|
+
value = value.prepare(self)
|
|
705
|
+
|
|
706
|
+
if filter_type == "content":
|
|
707
|
+
if field == "content":
|
|
708
|
+
self.content_search_text = value
|
|
709
|
+
return Q(
|
|
710
|
+
search_vector=SearchQuery(
|
|
711
|
+
value,
|
|
712
|
+
search_type="websearch",
|
|
713
|
+
config=self.backend.search_config,
|
|
714
|
+
)
|
|
715
|
+
)
|
|
716
|
+
return Q(**{f"{field}__trigram_similar": value})
|
|
717
|
+
|
|
718
|
+
if filter_type == "fuzzy":
|
|
719
|
+
return Q(**{f"{field}__trigram_similar": value})
|
|
720
|
+
|
|
721
|
+
if filter_type == "in":
|
|
722
|
+
value = list(value)
|
|
723
|
+
if not value:
|
|
724
|
+
return Q(pk__in=[])
|
|
725
|
+
elif filter_type == "range":
|
|
726
|
+
value = (value[0], value[1])
|
|
727
|
+
|
|
728
|
+
# contains/startswith/endswith → case-insensitive Django lookups
|
|
729
|
+
lookup = {
|
|
730
|
+
"contains": "icontains",
|
|
731
|
+
"startswith": "istartswith",
|
|
732
|
+
"endswith": "iendswith",
|
|
733
|
+
}.get(filter_type, filter_type)
|
|
734
|
+
|
|
735
|
+
return Q(**{f"{field}__{lookup}": value})
|
|
736
|
+
|
|
737
|
+
def build_query(self):
|
|
738
|
+
final_query = self.query_filter.as_orm_query(self.build_query_fragment)
|
|
739
|
+
if not final_query:
|
|
740
|
+
return Q()
|
|
741
|
+
return final_query
|
|
742
|
+
|
|
743
|
+
def matching_all_fragment(self):
|
|
744
|
+
return Q()
|
|
745
|
+
|
|
746
|
+
def run(self, spelling_query=None, **kwargs):
|
|
747
|
+
final_query = self.build_query()
|
|
748
|
+
search_kwargs = self.build_params(spelling_query=spelling_query)
|
|
749
|
+
|
|
750
|
+
if kwargs:
|
|
751
|
+
search_kwargs.update(kwargs)
|
|
752
|
+
|
|
753
|
+
final_query.content_search_text = self.content_search_text
|
|
754
|
+
search_kwargs["orm_query"] = final_query
|
|
755
|
+
|
|
756
|
+
results = self.backend.search("*", **search_kwargs)
|
|
757
|
+
self._results = results.get("results", [])
|
|
758
|
+
self._hit_count = results.get("hits", 0)
|
|
759
|
+
self._facet_counts = self.post_process_facets(results)
|
|
760
|
+
self._spelling_suggestion = results.get("spelling_suggestion", None)
|
|
761
|
+
|
|
762
|
+
def get_count(self):
|
|
763
|
+
if self._hit_count is None:
|
|
764
|
+
self.run()
|
|
765
|
+
return self._hit_count
|
|
766
|
+
|
|
767
|
+
|
|
768
|
+
class PostgresFTSEngine(BaseEngine):
|
|
769
|
+
backend = PostgresFTSSearchBackend
|
|
770
|
+
query = ORMSearchQuery
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from django.apps import AppConfig
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class PostgresFTSConfig(AppConfig):
|
|
5
|
+
name = "postgres_fts_backend"
|
|
6
|
+
default_auto_field = "django.db.models.AutoField"
|
|
7
|
+
|
|
8
|
+
def ready(self):
|
|
9
|
+
from postgres_fts_backend import validate_all_schemas # noqa: PLC0415
|
|
10
|
+
|
|
11
|
+
validate_all_schemas()
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
|
|
4
|
+
from django.conf import settings
|
|
5
|
+
from django.contrib.postgres.operations import TrigramExtension
|
|
6
|
+
from django.core.management.base import BaseCommand
|
|
7
|
+
from django.db.migrations.autodetector import MigrationAutodetector
|
|
8
|
+
from django.db.migrations.loader import MigrationLoader
|
|
9
|
+
from django.db.migrations.state import ModelState, ProjectState
|
|
10
|
+
from django.db.migrations.writer import MigrationWriter
|
|
11
|
+
|
|
12
|
+
from postgres_fts_backend.models import generate_index_models
|
|
13
|
+
|
|
14
|
+
APP_LABEL = "postgres_fts_backend"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Command(BaseCommand):
|
|
18
|
+
help = (
|
|
19
|
+
"Generate Django migrations for haystack search index tables. "
|
|
20
|
+
"Run this after changing SearchIndex definitions, then run "
|
|
21
|
+
"'manage.py migrate postgres_fts_backend'."
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
def handle(self, *args, **options):
|
|
25
|
+
index_models = generate_index_models()
|
|
26
|
+
|
|
27
|
+
if not index_models:
|
|
28
|
+
self.stdout.write("No search indexes found.")
|
|
29
|
+
return
|
|
30
|
+
|
|
31
|
+
# Build the target state from the dynamic models
|
|
32
|
+
new_state = ProjectState()
|
|
33
|
+
for model_cls in index_models.values():
|
|
34
|
+
model_state = ModelState.from_model(model_cls)
|
|
35
|
+
new_state.add_model(model_state)
|
|
36
|
+
|
|
37
|
+
# Load existing migrations to get the current state
|
|
38
|
+
loader = MigrationLoader(None, ignore_no_migrations=True)
|
|
39
|
+
old_state = loader.project_state()
|
|
40
|
+
|
|
41
|
+
# Detect changes
|
|
42
|
+
autodetector = MigrationAutodetector(old_state, new_state)
|
|
43
|
+
changes = autodetector.changes(graph=loader.graph)
|
|
44
|
+
|
|
45
|
+
if not changes.get(APP_LABEL):
|
|
46
|
+
self.stdout.write("No changes detected.")
|
|
47
|
+
return
|
|
48
|
+
|
|
49
|
+
# Determine output directory
|
|
50
|
+
migrations_module = getattr(settings, "MIGRATION_MODULES", {}).get(APP_LABEL)
|
|
51
|
+
if migrations_module:
|
|
52
|
+
migrations_dir = os.path.join(*migrations_module.split("."))
|
|
53
|
+
# Make it relative to the project base if not absolute
|
|
54
|
+
if not os.path.isabs(migrations_dir):
|
|
55
|
+
# Find the root by looking at the first component on sys.path
|
|
56
|
+
# that contains the module
|
|
57
|
+
for path in sys.path:
|
|
58
|
+
candidate = os.path.join(path, migrations_dir)
|
|
59
|
+
if os.path.isdir(os.path.dirname(candidate)) or path == "":
|
|
60
|
+
migrations_dir = candidate
|
|
61
|
+
break
|
|
62
|
+
else:
|
|
63
|
+
# Default to the package's own migrations dir
|
|
64
|
+
migrations_dir = os.path.join(
|
|
65
|
+
os.path.dirname(os.path.dirname(__file__)),
|
|
66
|
+
"migrations",
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
os.makedirs(migrations_dir, exist_ok=True)
|
|
70
|
+
|
|
71
|
+
# Ensure __init__.py exists
|
|
72
|
+
init_path = os.path.join(migrations_dir, "__init__.py")
|
|
73
|
+
if not os.path.exists(init_path):
|
|
74
|
+
with open(init_path, "w") as f:
|
|
75
|
+
f.write("")
|
|
76
|
+
|
|
77
|
+
for migration in changes[APP_LABEL]:
|
|
78
|
+
if getattr(migration, "initial", False):
|
|
79
|
+
migration.operations.insert(0, TrigramExtension())
|
|
80
|
+
writer = MigrationWriter(migration)
|
|
81
|
+
migration_path = os.path.join(migrations_dir, f"{migration.name}.py")
|
|
82
|
+
with open(migration_path, "w") as f:
|
|
83
|
+
f.write(writer.as_string())
|
|
84
|
+
self.stdout.write(f"Created {migration_path}")
|
|
File without changes
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable, Iterator
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from django.contrib.postgres.indexes import GinIndex, OpClass
|
|
7
|
+
from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVectorField
|
|
8
|
+
from django.db import models
|
|
9
|
+
from django.db.models import Value
|
|
10
|
+
from haystack import connections as haystack_connections
|
|
11
|
+
from haystack import indexes as haystack_indexes
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class IndexQuerySet(models.QuerySet):
|
|
15
|
+
def search(self, search_text: str, config: str = "english") -> IndexQuerySet:
|
|
16
|
+
"""Full-text filter + rank annotation in one call."""
|
|
17
|
+
sq = SearchQuery(search_text, search_type="websearch", config=config)
|
|
18
|
+
return self.filter(search_vector=sq).annotate(
|
|
19
|
+
rank=SearchRank("search_vector", sq, cover_density=True, normalization=32)
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
def ranked(self, search_text: str, config: str = "english") -> IndexQuerySet:
|
|
23
|
+
"""Add rank annotation only (when filter is applied separately)."""
|
|
24
|
+
sq = SearchQuery(search_text, search_type="websearch", config=config)
|
|
25
|
+
return self.annotate(
|
|
26
|
+
rank=SearchRank("search_vector", sq, cover_density=True, normalization=32)
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
def aligned_union(self, other: IndexQuerySet) -> AlignedUnionQuerySet:
|
|
30
|
+
"""Start a chainable aligned union with another queryset.
|
|
31
|
+
|
|
32
|
+
Returns an AlignedUnionQuerySet that can be further chained:
|
|
33
|
+
qs1.aligned_union(qs2).aligned_union(qs3)
|
|
34
|
+
"""
|
|
35
|
+
return AlignedUnionQuerySet([self, other])
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class AlignedUnionQuerySet:
|
|
39
|
+
"""Accumulates querysets and builds an aligned union lazily.
|
|
40
|
+
|
|
41
|
+
Introspects model fields and annotations to build a superset of columns.
|
|
42
|
+
Missing columns are filled with Value(None, output_field=...).
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
SKIP_FIELDS: set[str] = {"id", "search_vector"}
|
|
46
|
+
|
|
47
|
+
def __init__(self, querysets: list[IndexQuerySet]) -> None:
|
|
48
|
+
self._querysets = list(querysets)
|
|
49
|
+
self._built: models.QuerySet | None = None
|
|
50
|
+
|
|
51
|
+
def aligned_union(self, other: IndexQuerySet) -> AlignedUnionQuerySet:
|
|
52
|
+
return AlignedUnionQuerySet(self._querysets + [other])
|
|
53
|
+
|
|
54
|
+
def _build(self) -> models.QuerySet:
|
|
55
|
+
if self._built is not None:
|
|
56
|
+
return self._built
|
|
57
|
+
|
|
58
|
+
all_columns = {}
|
|
59
|
+
per_qs_columns = []
|
|
60
|
+
|
|
61
|
+
for qs in self._querysets:
|
|
62
|
+
qs_cols = set()
|
|
63
|
+
for f in qs.model._meta.get_fields():
|
|
64
|
+
if f.name in self.SKIP_FIELDS:
|
|
65
|
+
continue
|
|
66
|
+
if hasattr(f, "column"):
|
|
67
|
+
qs_cols.add(f.name)
|
|
68
|
+
if f.name not in all_columns:
|
|
69
|
+
all_columns[f.name] = f
|
|
70
|
+
if hasattr(qs, "query") and hasattr(qs.query, "annotations"):
|
|
71
|
+
for name, annotation in qs.query.annotations.items():
|
|
72
|
+
qs_cols.add(name)
|
|
73
|
+
if name not in all_columns:
|
|
74
|
+
all_columns[name] = annotation.output_field
|
|
75
|
+
per_qs_columns.append(qs_cols)
|
|
76
|
+
|
|
77
|
+
sorted_cols = sorted(all_columns.keys())
|
|
78
|
+
|
|
79
|
+
aligned = []
|
|
80
|
+
for qs, qs_cols in zip(self._querysets, per_qs_columns):
|
|
81
|
+
missing = set(sorted_cols) - qs_cols
|
|
82
|
+
if missing:
|
|
83
|
+
annotations = {}
|
|
84
|
+
for col_name in missing:
|
|
85
|
+
field_meta = all_columns[col_name]
|
|
86
|
+
if hasattr(field_meta, "column"):
|
|
87
|
+
output_field = field_meta.__class__(null=True)
|
|
88
|
+
else:
|
|
89
|
+
output_field = field_meta.__class__()
|
|
90
|
+
annotations[col_name] = Value(None, output_field=output_field)
|
|
91
|
+
qs = qs.annotate(**annotations)
|
|
92
|
+
aligned.append(qs.values(*sorted_cols))
|
|
93
|
+
|
|
94
|
+
self._built = aligned[0].union(*aligned[1:], all=True)
|
|
95
|
+
return self._built
|
|
96
|
+
|
|
97
|
+
def order_by(self, *args: str) -> models.QuerySet:
|
|
98
|
+
return self._build().order_by(*args)
|
|
99
|
+
|
|
100
|
+
def count(self) -> int:
|
|
101
|
+
return self._build().count()
|
|
102
|
+
|
|
103
|
+
def __iter__(self) -> Iterator[dict[str, Any]]:
|
|
104
|
+
return iter(self._build())
|
|
105
|
+
|
|
106
|
+
def __getitem__(self, key: int | slice) -> Any:
|
|
107
|
+
return self._build()[key]
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
FIELD_MAP: dict[type, Callable[[], models.Field]] = {
|
|
111
|
+
haystack_indexes.CharField: lambda: models.TextField(null=True),
|
|
112
|
+
haystack_indexes.EdgeNgramField: lambda: models.TextField(null=True),
|
|
113
|
+
haystack_indexes.NgramField: lambda: models.TextField(null=True),
|
|
114
|
+
haystack_indexes.DateTimeField: lambda: models.DateTimeField(null=True),
|
|
115
|
+
haystack_indexes.DateField: lambda: models.DateField(null=True),
|
|
116
|
+
haystack_indexes.IntegerField: lambda: models.IntegerField(null=True),
|
|
117
|
+
haystack_indexes.FloatField: lambda: models.FloatField(null=True),
|
|
118
|
+
haystack_indexes.BooleanField: lambda: models.BooleanField(null=True),
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _django_field_for(haystack_field: haystack_indexes.SearchField) -> models.Field:
|
|
123
|
+
for haystack_cls, factory in FIELD_MAP.items():
|
|
124
|
+
if isinstance(haystack_field, haystack_cls):
|
|
125
|
+
return factory()
|
|
126
|
+
return models.TextField(null=True)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# Django models for search index tables are created dynamically from Haystack
|
|
130
|
+
# SearchIndex definitions. Unlike normal Django models (defined statically in
|
|
131
|
+
# models.py), these must be built at runtime because the set of fields depends
|
|
132
|
+
# on the user's SearchIndex classes, which can change independently of
|
|
133
|
+
# migrations. The database schema is managed separately via the
|
|
134
|
+
# build_postgres_schema management command, so the runtime model and the
|
|
135
|
+
# database can be out of sync until the user regenerates and applies migrations.
|
|
136
|
+
# validate_all_schemas() (called at startup) checks for this.
|
|
137
|
+
|
|
138
|
+
_index_models_cache: dict[str, type[models.Model]] = {}
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _build_index_model(
|
|
142
|
+
source_model: type[models.Model], search_index: haystack_indexes.SearchIndex
|
|
143
|
+
) -> type[models.Model]:
|
|
144
|
+
app_label: str = source_model._meta.app_label
|
|
145
|
+
model_name: str = source_model._meta.model_name # type: ignore[assignment]
|
|
146
|
+
|
|
147
|
+
class_name = f"HaystackIndex_{app_label.capitalize()}_{model_name.capitalize()}"
|
|
148
|
+
|
|
149
|
+
if class_name in _index_models_cache:
|
|
150
|
+
return _index_models_cache[class_name]
|
|
151
|
+
|
|
152
|
+
attrs = {
|
|
153
|
+
"__module__": "postgres_fts_backend.models",
|
|
154
|
+
"django_id": models.CharField(max_length=255),
|
|
155
|
+
"django_ct": models.CharField(max_length=255),
|
|
156
|
+
"search_vector": SearchVectorField(null=True),
|
|
157
|
+
"objects": IndexQuerySet.as_manager(),
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
for field_name, field_obj in search_index.fields.items():
|
|
161
|
+
if field_name in ("django_ct", "django_id"):
|
|
162
|
+
continue
|
|
163
|
+
attrs[field_name] = _django_field_for(field_obj)
|
|
164
|
+
|
|
165
|
+
table_name = f"haystack_index_{app_label}_{model_name}"
|
|
166
|
+
|
|
167
|
+
db_indexes = [
|
|
168
|
+
GinIndex(
|
|
169
|
+
fields=["search_vector"],
|
|
170
|
+
name=f"{table_name}_sv_gin",
|
|
171
|
+
)
|
|
172
|
+
]
|
|
173
|
+
|
|
174
|
+
for field_name, field_obj in search_index.fields.items():
|
|
175
|
+
if isinstance(
|
|
176
|
+
field_obj, (haystack_indexes.EdgeNgramField, haystack_indexes.NgramField)
|
|
177
|
+
):
|
|
178
|
+
db_indexes.append(
|
|
179
|
+
GinIndex(
|
|
180
|
+
OpClass(models.F(field_name), name="gin_trgm_ops"),
|
|
181
|
+
name=f"{table_name}_{field_name}_trgm",
|
|
182
|
+
)
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
meta = type(
|
|
186
|
+
"Meta",
|
|
187
|
+
(),
|
|
188
|
+
{
|
|
189
|
+
"app_label": "postgres_fts_backend",
|
|
190
|
+
"db_table": table_name,
|
|
191
|
+
"unique_together": [("django_ct", "django_id")],
|
|
192
|
+
"indexes": db_indexes,
|
|
193
|
+
},
|
|
194
|
+
)
|
|
195
|
+
attrs["Meta"] = meta
|
|
196
|
+
|
|
197
|
+
model_cls = type(class_name, (models.Model,), attrs)
|
|
198
|
+
_index_models_cache[class_name] = model_cls
|
|
199
|
+
return model_cls
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def get_index_model(source_model: type[models.Model]) -> type[models.Model]:
|
|
203
|
+
ui = haystack_connections["default"].get_unified_index()
|
|
204
|
+
search_index = ui.get_index(source_model)
|
|
205
|
+
return _build_index_model(source_model, search_index)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def generate_index_models() -> dict[type[models.Model], type[models.Model]]:
|
|
209
|
+
ui = haystack_connections["default"].get_unified_index()
|
|
210
|
+
return {
|
|
211
|
+
source_model: _build_index_model(source_model, search_index)
|
|
212
|
+
for source_model, search_index in ui.get_indexes().items()
|
|
213
|
+
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: postgres-fts-backend
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: A PostgreSQL Full Text Seach Backend for Haystack
|
|
5
|
+
Author-email: Forest Gregg <fgregg@datamade.us>
|
|
6
|
+
Project-URL: Repository, https://github.com/fgregg/postgres-fts-backend
|
|
7
|
+
Project-URL: Issues, https://github.com/fgregg/postgres-fts-backend/issues
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Requires-Python: >=3.12
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Requires-Dist: django>=5.0
|
|
13
|
+
Requires-Dist: django-haystack>=2.8.0
|
|
14
|
+
Provides-Extra: dev
|
|
15
|
+
Requires-Dist: pytest; extra == "dev"
|
|
16
|
+
Requires-Dist: pytest-django; extra == "dev"
|
|
17
|
+
Requires-Dist: ruff; extra == "dev"
|
|
18
|
+
Requires-Dist: black; extra == "dev"
|
|
19
|
+
Requires-Dist: mypy; extra == "dev"
|
|
20
|
+
Requires-Dist: django-stubs; extra == "dev"
|
|
21
|
+
Dynamic: license-file
|
|
22
|
+
|
|
23
|
+
# postgres-fts-backend
|
|
24
|
+
|
|
25
|
+
A [Django Haystack](https://django-haystack.readthedocs.io/) backend that uses
|
|
26
|
+
PostgreSQL's built-in full-text search. No external search service required.
|
|
27
|
+
|
|
28
|
+
## Requirements
|
|
29
|
+
|
|
30
|
+
- Python >= 3.12
|
|
31
|
+
- Django >= 5.0
|
|
32
|
+
- django-haystack >= 2.8.0
|
|
33
|
+
- PostgreSQL
|
|
34
|
+
|
|
35
|
+
## Installation
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install postgres-fts-backend
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Add to `INSTALLED_APPS`:
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
INSTALLED_APPS = [
|
|
45
|
+
"django.contrib.postgres",
|
|
46
|
+
"haystack",
|
|
47
|
+
"postgres_fts_backend",
|
|
48
|
+
# ...
|
|
49
|
+
]
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Set a migration module so the generated search index migrations live in
|
|
53
|
+
your project rather than inside the installed package:
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
MIGRATION_MODULES = {
|
|
57
|
+
"postgres_fts_backend": "myapp.search_migrations",
|
|
58
|
+
}
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Configure Haystack:
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
HAYSTACK_CONNECTIONS = {
|
|
65
|
+
"default": {
|
|
66
|
+
"ENGINE": "postgres_fts_backend.PostgresFTSEngine",
|
|
67
|
+
},
|
|
68
|
+
}
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
To use a search configuration other than `"english"`:
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
HAYSTACK_CONNECTIONS = {
|
|
75
|
+
"default": {
|
|
76
|
+
"ENGINE": "postgres_fts_backend.PostgresFTSEngine",
|
|
77
|
+
"SEARCH_CONFIG": "spanish",
|
|
78
|
+
},
|
|
79
|
+
}
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Other Peculiarities of this backend
|
|
83
|
+
|
|
84
|
+
### Build indexes through models and migrations
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
python manage.py build_postgres_schema
|
|
88
|
+
python manage.py migrate postgres_fts_backend
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Run these two commands again whenever you change a `SearchIndex` definition.
|
|
92
|
+
|
|
93
|
+
### Fuzzy search
|
|
94
|
+
|
|
95
|
+
Fuzzy queries use PostgreSQL's trigram similarity matching (`pg_trgm`):
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
results = SearchQuerySet().filter(author__fuzzy="Janee")
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
The similarity threshold is controlled by PostgreSQL's
|
|
102
|
+
`pg_trgm.similarity_threshold` setting (default 0.3). To adjust it:
|
|
103
|
+
|
|
104
|
+
```sql
|
|
105
|
+
ALTER DATABASE mydb SET pg_trgm.similarity_threshold = 0.5;
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### `more_like_this` not implemented
|
|
109
|
+
PostgreSQL FTS doesn't provide any facilities for this. It could be done, but I just need to think more about it.
|
|
110
|
+
|
|
111
|
+
### `spelling_suggestions` are not supported
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
postgres_fts_backend/__init__.py,sha256=PeVca49tR0wFo87SEYdGc5lt4-b5JTV3-IKHCux5q0w,27151
|
|
2
|
+
postgres_fts_backend/apps.py,sha256=Yc9zEu63CorJLVh1HkuLjGqTThn0BBqIW3S2D72n3JA,293
|
|
3
|
+
postgres_fts_backend/models.py,sha256=U_Gjur12y_ck3U4wNB5qeRFG46AtneHgsH-2VR-lNB8,8175
|
|
4
|
+
postgres_fts_backend/management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
+
postgres_fts_backend/management/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
postgres_fts_backend/management/commands/build_postgres_schema.py,sha256=-y5pLZ1Eq2cgOUjL07b5li8tI_iNKHQSO4QYwKtvv8w,3296
|
|
7
|
+
postgres_fts_backend/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
postgres_fts_backend-0.0.1.dist-info/licenses/LICENSE,sha256=tLxqA1coRYSQ1uk2S_CuxAb-5Pxr3uJAVx3_BbWWTOQ,1084
|
|
9
|
+
postgres_fts_backend-0.0.1.dist-info/METADATA,sha256=bBQEQb_wHZlQq0QtaMXq5Cvr0dJApGBEkSIMahbARM0,2688
|
|
10
|
+
postgres_fts_backend-0.0.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
11
|
+
postgres_fts_backend-0.0.1.dist-info/top_level.txt,sha256=o2Bn_Q4mThOCsDP4O-3P2HR3L03fuZnB6hwDF-ywmGQ,21
|
|
12
|
+
postgres_fts_backend-0.0.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Forest Gregg, 2024 Datamade
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
postgres_fts_backend
|