postgres-fts-backend 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,770 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import re
5
+ import warnings
6
+ from typing import Any, NotRequired, TypedDict
7
+
8
+ from django.apps import apps as django_apps
9
+ from django.contrib.postgres.search import (
10
+ SearchHeadline,
11
+ SearchQuery,
12
+ SearchRank,
13
+ SearchVector,
14
+ )
15
+ from django.core.exceptions import FieldDoesNotExist
16
+ from django.db import DatabaseError, connection, models
17
+ from django.db.models import Count, F, FloatField, Q, Value
18
+ from django.db.models.functions import Trunc
19
+ from django.utils.encoding import force_str
20
+ from haystack import connections
21
+ from haystack.backends import (
22
+ BaseEngine,
23
+ BaseSearchBackend,
24
+ BaseSearchQuery,
25
+ SearchNode,
26
+ log_query,
27
+ )
28
+ from haystack.constants import DJANGO_CT, DJANGO_ID
29
+ from haystack.indexes import SearchIndex
30
+ from haystack.models import SearchResult
31
+ from haystack.utils import get_model_ct
32
+
33
+ from postgres_fts_backend.models import (
34
+ AlignedUnionQuerySet,
35
+ IndexQuerySet,
36
+ generate_index_models,
37
+ get_index_model,
38
+ )
39
+
40
+
41
+ class FacetResults(TypedDict, total=False):
42
+ fields: dict[str, list[tuple[Any, int]]]
43
+ dates: dict[str, list[tuple[Any, int]]]
44
+ queries: dict[str, int]
45
+
46
+
47
+ class SearchResponse(TypedDict):
48
+ results: list[SearchResult]
49
+ hits: int
50
+ spelling_suggestion: str | None
51
+ facets: NotRequired[FacetResults]
52
+
53
+
54
+ class _CtInfo(TypedDict):
55
+ model: type[models.Model]
56
+ field_names: list[str]
57
+
58
+
59
+ default_app_config = "postgres_fts_backend.apps.PostgresFTSConfig"
60
+
61
+ log = logging.getLogger("haystack")
62
+
63
+
64
+ def _table_name(model: type[models.Model]) -> str:
65
+ return f"haystack_index_{model._meta.app_label}_{model._meta.model_name}"
66
+
67
+
68
+ def validate_all_schemas() -> None:
69
+ """Validate all index tables at startup. Called from AppConfig.ready()."""
70
+ try:
71
+ ui = connections["default"].get_unified_index()
72
+ existing_tables = connection.introspection.table_names()
73
+ except Exception:
74
+ warnings.warn(
75
+ "Could not connect to database to validate index schemas. "
76
+ "Run 'manage.py build_postgres_schema' then "
77
+ "'manage.py migrate postgres_fts_backend' once the database is available."
78
+ )
79
+ return
80
+
81
+ for model, index in ui.get_indexes().items():
82
+ table = _table_name(model)
83
+
84
+ if table not in existing_tables:
85
+ warnings.warn(
86
+ f"Table '{table}' does not exist. Run 'manage.py build_postgres_schema' "
87
+ "then 'manage.py migrate postgres_fts_backend'."
88
+ )
89
+ continue
90
+
91
+ expected_columns = {"id", "django_id", "django_ct", "search_vector"}
92
+ for field_name in index.fields:
93
+ if field_name not in ("django_ct", "django_id"):
94
+ expected_columns.add(field_name)
95
+
96
+ with connection.cursor() as cursor:
97
+ db_columns = {
98
+ info.name
99
+ for info in connection.introspection.get_table_description(
100
+ cursor, table
101
+ )
102
+ }
103
+
104
+ missing = expected_columns - db_columns
105
+ if missing:
106
+ warnings.warn(
107
+ "Index table '{}' schema is out of date (missing columns: {}). "
108
+ "Run 'manage.py build_postgres_schema' then "
109
+ "'manage.py migrate postgres_fts_backend'.".format(
110
+ table, ", ".join(sorted(missing))
111
+ )
112
+ )
113
+
114
+
115
+ def _field_names(index: SearchIndex) -> list[str]:
116
+ return [name for name in index.fields if name not in ("django_ct", "django_id")]
117
+
118
+
119
+ def _resolve_field_name(field_name: str) -> str:
120
+ if field_name.endswith("_exact"):
121
+ return field_name[:-6]
122
+ return field_name
123
+
124
+
125
+ def _parse_narrow_query(query_string: str) -> tuple[str, str]:
126
+ match = re.match(r'^(\w+):"(.+)"$', query_string)
127
+ if not match:
128
+ raise ValueError(f"Cannot parse narrow query: '{query_string}'")
129
+ return match.group(1), match.group(2)
130
+
131
+
132
+ class IndexSearch:
133
+ def __init__(
134
+ self,
135
+ qs: IndexQuerySet,
136
+ index: SearchIndex,
137
+ search_config: str,
138
+ has_rank: bool = False,
139
+ search_text: str | None = None,
140
+ ) -> None:
141
+ self.qs = qs
142
+ self.index = index
143
+ self.search_config: str = search_config
144
+ self.has_rank: bool = has_rank
145
+ self.search_text: str | None = search_text
146
+ self.score_field: str = "rank"
147
+ self.highlight_field: str | None = None
148
+
149
+ @classmethod
150
+ def from_query_string(
151
+ cls,
152
+ index_model: type[models.Model],
153
+ index: SearchIndex,
154
+ search_config: str,
155
+ query_string: str,
156
+ ) -> IndexSearch:
157
+ if query_string == "*":
158
+ qs = index_model.objects.all().annotate( # type: ignore[attr-defined]
159
+ rank=Value(0, output_field=FloatField())
160
+ )
161
+ return cls(qs, index, search_config)
162
+
163
+ if ":" in query_string and not query_string.startswith('"'):
164
+ field, _, value = query_string.partition(":")
165
+ content_field = index.get_content_field()
166
+ if field == content_field:
167
+ return cls(
168
+ index_model.objects.search(value, config=search_config), # type: ignore[attr-defined]
169
+ index,
170
+ search_config,
171
+ has_rank=True,
172
+ search_text=value,
173
+ )
174
+ qs = index_model.objects.filter(**{field: value}).annotate( # type: ignore[attr-defined]
175
+ rank=Value(0, output_field=FloatField())
176
+ )
177
+ return cls(qs, index, search_config)
178
+
179
+ return cls(
180
+ index_model.objects.search(query_string, config=search_config), # type: ignore[attr-defined]
181
+ index,
182
+ search_config,
183
+ has_rank=True,
184
+ search_text=query_string,
185
+ )
186
+
187
+ @classmethod
188
+ def from_orm_query(
189
+ cls,
190
+ index_model: type[models.Model],
191
+ index: SearchIndex,
192
+ search_config: str,
193
+ orm_query: Q,
194
+ ) -> IndexSearch:
195
+ content_search_text = orm_query.content_search_text # type: ignore[attr-defined]
196
+ qs = index_model.objects.filter(orm_query) # type: ignore[attr-defined]
197
+ if content_search_text:
198
+ qs = qs.ranked(content_search_text, config=search_config)
199
+ else:
200
+ qs = qs.annotate(rank=Value(0, output_field=FloatField()))
201
+ return cls(
202
+ qs,
203
+ index,
204
+ search_config,
205
+ has_rank=bool(content_search_text),
206
+ search_text=content_search_text,
207
+ )
208
+
209
+ def narrow(self, narrow_queries: list[str]) -> None:
210
+ for nq in narrow_queries:
211
+ field, value = _parse_narrow_query(nq)
212
+ col = _resolve_field_name(field)
213
+ try:
214
+ self.qs.model._meta.get_field(col)
215
+ except FieldDoesNotExist:
216
+ self.qs = self.qs.none()
217
+ return
218
+ self.qs = self.qs.filter(**{col: value})
219
+
220
+ def highlight(self) -> None:
221
+ if self.search_text is None:
222
+ return
223
+ content_field = self.index.get_content_field()
224
+ sq = SearchQuery(
225
+ self.search_text, search_type="websearch", config=self.search_config
226
+ )
227
+ self.qs = self.qs.annotate(
228
+ headline=SearchHeadline(content_field, sq, config=self.search_config)
229
+ )
230
+ self.highlight_field = content_field
231
+
232
+ def boost(self, boost_dict: dict[str, float]) -> None:
233
+ if not self.has_rank or not boost_dict:
234
+ return
235
+ annotations: dict[str, Any] = {}
236
+ combined: Any = F("rank")
237
+ for i, (term, weight) in enumerate(boost_dict.items()):
238
+ alias = f"_boost_{i}"
239
+ bq = SearchQuery(term, search_type="websearch", config=self.search_config)
240
+ annotations[alias] = SearchRank(
241
+ "search_vector", bq, cover_density=True, normalization=32
242
+ )
243
+ combined = combined * (1.0 + F(alias) * weight)
244
+ annotations["_boosted_rank"] = combined
245
+ self.qs = self.qs.annotate(**annotations)
246
+ self.score_field = "_boosted_rank"
247
+
248
+ def count(self) -> int:
249
+ return self.qs.count()
250
+
251
+ def facets(
252
+ self,
253
+ facets: list[str] | None = None,
254
+ date_facets: dict[str, Any] | None = None,
255
+ query_facets: list[tuple[str, str]] | None = None,
256
+ ) -> FacetResults:
257
+ result: FacetResults = {}
258
+
259
+ if facets:
260
+ result["fields"] = {}
261
+ for field_name in facets:
262
+ col = _resolve_field_name(field_name)
263
+ facet_qs = (
264
+ self.qs.values(col)
265
+ .annotate(count=Count("id"))
266
+ .order_by("-count", col)
267
+ )
268
+ result["fields"][field_name] = [
269
+ (row[col], row["count"]) for row in facet_qs
270
+ ]
271
+
272
+ if date_facets:
273
+ result["dates"] = {}
274
+ for field_name, facet_opts in date_facets.items():
275
+ col = _resolve_field_name(field_name)
276
+ gap_by = facet_opts["gap_by"]
277
+ start_date = facet_opts["start_date"]
278
+ end_date = facet_opts["end_date"]
279
+ facet_qs = (
280
+ self.qs.filter(
281
+ **{
282
+ f"{col}__gte": start_date,
283
+ f"{col}__lt": end_date,
284
+ }
285
+ )
286
+ .annotate(bucket=Trunc(col, gap_by))
287
+ .values("bucket")
288
+ .annotate(count=Count("id"))
289
+ .order_by("bucket")
290
+ )
291
+ result["dates"][field_name] = [
292
+ (row["bucket"], row["count"]) for row in facet_qs
293
+ ]
294
+
295
+ if query_facets:
296
+ result["queries"] = {}
297
+ for field_name, value in query_facets:
298
+ col = _resolve_field_name(field_name)
299
+ count = self.qs.filter(**{col: value}).count()
300
+ result["queries"][f"{field_name}_{value}"] = count
301
+
302
+ return result
303
+
304
+ def results(
305
+ self,
306
+ sort_by: list[str] | None = None,
307
+ start_offset: int = 0,
308
+ end_offset: int | None = None,
309
+ result_class: type = SearchResult,
310
+ ) -> list[SearchResult]:
311
+ qs = self.qs
312
+
313
+ # Ordering
314
+ if sort_by:
315
+ qs = qs.order_by(*sort_by)
316
+ elif self.has_rank:
317
+ qs = qs.order_by(f"-{self.score_field}")
318
+
319
+ # Pagination
320
+ if end_offset is not None:
321
+ qs = qs[start_offset:end_offset]
322
+ elif start_offset:
323
+ qs = qs[start_offset:]
324
+
325
+ # Materialize
326
+ model = self.index.get_model()
327
+ field_names = _field_names(self.index)
328
+ app_label = model._meta.app_label
329
+ model_name = model._meta.model_name
330
+
331
+ results = []
332
+ for obj in qs:
333
+ stored_fields = {fn: getattr(obj, fn) for fn in field_names}
334
+ if self.highlight_field:
335
+ headline = getattr(obj, "headline", None)
336
+ if headline:
337
+ stored_fields["highlighted"] = {self.highlight_field: [headline]}
338
+ rank = getattr(obj, self.score_field, None)
339
+ score = float(rank) if self.has_rank and rank is not None else 0
340
+ results.append(
341
+ result_class(
342
+ app_label, model_name, obj.django_id, score, **stored_fields
343
+ )
344
+ )
345
+ return results
346
+
347
+
348
+ class MultiIndexSearch:
349
+ """Wraps multiple IndexSearch instances for cross-model search."""
350
+
351
+ def __init__(self, searches: list[tuple[IndexSearch, type[models.Model]]]) -> None:
352
+ self.searches = searches
353
+
354
+ def count(self) -> int:
355
+ return sum(s.count() for s, _model in self.searches)
356
+
357
+ def facets(
358
+ self,
359
+ facets: list[str] | None = None,
360
+ date_facets: dict[str, Any] | None = None,
361
+ query_facets: list[tuple[str, str]] | None = None,
362
+ ) -> FacetResults:
363
+ merged: FacetResults = {}
364
+
365
+ if facets:
366
+ merged["fields"] = {}
367
+ for field_name in facets:
368
+ col = _resolve_field_name(field_name)
369
+ combined_counts: dict[Any, int] = {}
370
+ for s, model in self.searches:
371
+ index_model = get_index_model(model)
372
+ try:
373
+ index_model._meta.get_field(col)
374
+ except FieldDoesNotExist:
375
+ continue
376
+ sub = s.facets(facets=[field_name])
377
+ for value, count in sub.get("fields", {}).get(field_name, []):
378
+ combined_counts[value] = combined_counts.get(value, 0) + count
379
+ merged["fields"][field_name] = sorted(
380
+ combined_counts.items(), key=lambda x: (-x[1], x[0])
381
+ )
382
+
383
+ if date_facets:
384
+ merged["dates"] = {}
385
+ for field_name, facet_opts in date_facets.items():
386
+ col = _resolve_field_name(field_name)
387
+ combined_buckets: dict[Any, int] = {}
388
+ for s, model in self.searches:
389
+ index_model = get_index_model(model)
390
+ try:
391
+ index_model._meta.get_field(col)
392
+ except FieldDoesNotExist:
393
+ continue
394
+ sub = s.facets(date_facets={field_name: facet_opts})
395
+ for bucket, count in sub.get("dates", {}).get(field_name, []):
396
+ combined_buckets[bucket] = (
397
+ combined_buckets.get(bucket, 0) + count
398
+ )
399
+ merged["dates"][field_name] = sorted(combined_buckets.items())
400
+
401
+ if query_facets:
402
+ merged["queries"] = {}
403
+ for field_name, value in query_facets:
404
+ col = _resolve_field_name(field_name)
405
+ total = 0
406
+ for s, model in self.searches:
407
+ index_model = get_index_model(model)
408
+ try:
409
+ index_model._meta.get_field(col)
410
+ except FieldDoesNotExist:
411
+ continue
412
+ sub = s.facets(query_facets=[(field_name, value)])
413
+ key = f"{field_name}_{value}"
414
+ total += sub.get("queries", {}).get(key, 0)
415
+ merged["queries"][f"{field_name}_{value}"] = total
416
+
417
+ return merged
418
+
419
+ def results(
420
+ self,
421
+ sort_by: list[str] | None = None,
422
+ start_offset: int = 0,
423
+ end_offset: int | None = None,
424
+ result_class: type = SearchResult,
425
+ ) -> list[SearchResult]:
426
+ # These are uniform across all searches (same kwargs applied to each)
427
+ first_search = self.searches[0][0]
428
+ score_field = first_search.score_field
429
+ has_rank = first_search.has_rank
430
+ highlight_field = first_search.highlight_field
431
+
432
+ # Build the aligned union
433
+ union_qs: IndexQuerySet | AlignedUnionQuerySet = first_search.qs
434
+ for s, model in self.searches[1:]:
435
+ union_qs = union_qs.aligned_union(s.qs)
436
+
437
+ # Per-model lookup: field_names and model identity vary across indexes
438
+ ct_map: dict[str, _CtInfo] = {
439
+ get_model_ct(model): {
440
+ "model": model,
441
+ "field_names": _field_names(s.index),
442
+ }
443
+ for s, model in self.searches
444
+ }
445
+
446
+ # Ordering — always include tiebreakers for stable pagination
447
+ if sort_by:
448
+ ordered_qs = union_qs.order_by(*sort_by, "django_ct", "django_id")
449
+ else:
450
+ ordered_qs = union_qs.order_by("-rank", "django_ct", "django_id")
451
+
452
+ # Pagination
453
+ if end_offset is not None:
454
+ ordered_qs = ordered_qs[start_offset:end_offset]
455
+ elif start_offset:
456
+ ordered_qs = ordered_qs[start_offset:]
457
+
458
+ # Materialize
459
+ results = []
460
+ for row in ordered_qs:
461
+ info = ct_map[row["django_ct"]]
462
+ model = info["model"]
463
+
464
+ stored_fields = {fn: row.get(fn) for fn in info["field_names"]}
465
+
466
+ if highlight_field and "headline" in row and row["headline"]:
467
+ stored_fields["highlighted"] = {highlight_field: [row["headline"]]}
468
+
469
+ rank = row.get(score_field)
470
+ score = float(rank) if has_rank and rank is not None else 0
471
+
472
+ results.append(
473
+ result_class(
474
+ model._meta.app_label,
475
+ model._meta.model_name,
476
+ row["django_id"],
477
+ score,
478
+ **stored_fields,
479
+ )
480
+ )
481
+ return results
482
+
483
+
484
+ class PostgresFTSSearchBackend(BaseSearchBackend):
485
+ def __init__(self, connection_alias, **connection_options):
486
+ super().__init__(connection_alias, **connection_options)
487
+ self.search_config = connection_options.get("SEARCH_CONFIG", "english")
488
+
489
+ def build_schema(self, fields):
490
+ return generate_index_models()
491
+
492
+ def update(self, index, iterable, commit=True):
493
+ try:
494
+ model = index.get_model()
495
+ index_model = get_index_model(model)
496
+ field_names = _field_names(index)
497
+ content_field = index.get_content_field()
498
+
499
+ rows = []
500
+ for obj in iterable:
501
+ prepared = index.full_prepare(obj)
502
+ defaults = {fn: prepared.get(fn) for fn in field_names}
503
+ rows.append(
504
+ index_model(
505
+ django_ct=prepared[DJANGO_CT],
506
+ django_id=prepared[DJANGO_ID],
507
+ **defaults,
508
+ )
509
+ )
510
+
511
+ if rows:
512
+ index_model.objects.bulk_create(
513
+ rows,
514
+ update_conflicts=True,
515
+ unique_fields=["django_ct", "django_id"],
516
+ update_fields=field_names,
517
+ )
518
+ index_model.objects.filter(
519
+ django_ct=get_model_ct(model),
520
+ django_id__in=[r.django_id for r in rows],
521
+ ).update(
522
+ search_vector=SearchVector(content_field, config=self.search_config)
523
+ )
524
+ except DatabaseError:
525
+ if not self.silently_fail:
526
+ raise
527
+ log.exception("Failed to update index for %s", index)
528
+
529
+ def remove(self, obj_or_string, commit=True):
530
+ try:
531
+ if isinstance(obj_or_string, str):
532
+ # String format: "app_label.model_name.pk"
533
+ parts = obj_or_string.split(".", 2)
534
+ if len(parts) != 3:
535
+ raise ValueError(
536
+ "String identifier must be 'app_label.model_name.pk', "
537
+ f"got '{obj_or_string}'"
538
+ )
539
+ model = django_apps.get_model(parts[0], parts[1])
540
+ django_ct = get_model_ct(model)
541
+ django_id = parts[2]
542
+ else:
543
+ model = type(obj_or_string)
544
+ django_ct = get_model_ct(model)
545
+ django_id = force_str(obj_or_string.pk)
546
+
547
+ index_model = get_index_model(model)
548
+ index_model.objects.filter(
549
+ django_ct=django_ct, django_id=django_id
550
+ ).delete()
551
+ except DatabaseError:
552
+ if not self.silently_fail:
553
+ raise
554
+ log.exception("Failed to remove document '%s'", obj_or_string)
555
+
556
+ def clear(self, models=None, commit=True):
557
+ try:
558
+ if models is None:
559
+ ui = connections["default"].get_unified_index()
560
+ models = ui.get_indexes().keys()
561
+
562
+ for model in models:
563
+ index_model = get_index_model(model)
564
+ index_model.objects.all().delete()
565
+ except DatabaseError:
566
+ if not self.silently_fail:
567
+ raise
568
+ log.exception("Failed to clear index")
569
+
570
+ @log_query
571
+ def search(self, query_string: str, **kwargs: Any) -> SearchResponse:
572
+ if not query_string or not query_string.strip():
573
+ return {"results": [], "hits": 0, "spelling_suggestion": None}
574
+
575
+ try:
576
+ ui = connections["default"].get_unified_index()
577
+
578
+ requested_models = kwargs.get("models")
579
+ if requested_models:
580
+ model_index_pairs = [(m, ui.get_index(m)) for m in requested_models]
581
+ else:
582
+ model_index_pairs = list(ui.get_indexes().items())
583
+
584
+ orm_query = kwargs.pop("orm_query", None)
585
+
586
+ result_class = kwargs.get("result_class", SearchResult)
587
+ sort_by = kwargs.get("sort_by")
588
+ start_offset = int(kwargs.get("start_offset", 0))
589
+ end_offset = (
590
+ int(kwargs["end_offset"])
591
+ if kwargs.get("end_offset") is not None
592
+ else None
593
+ )
594
+
595
+ # Build IndexSearch per model
596
+ searches = []
597
+ for model, index in model_index_pairs:
598
+ index_model = get_index_model(model)
599
+ if orm_query is not None:
600
+ s = IndexSearch.from_orm_query(
601
+ index_model, index, self.search_config, orm_query
602
+ )
603
+ else:
604
+ s = IndexSearch.from_query_string(
605
+ index_model, index, self.search_config, query_string
606
+ )
607
+
608
+ if narrow_queries := kwargs.get("narrow_queries"):
609
+ s.narrow(narrow_queries)
610
+ if boost := kwargs.get("boost"):
611
+ s.boost(boost)
612
+ if kwargs.get("highlight"):
613
+ s.highlight()
614
+
615
+ searches.append((s, model))
616
+
617
+ if len(searches) == 1:
618
+ # Single-model path (no behavior change)
619
+ s, model = searches[0]
620
+ total_count = s.count()
621
+ facets = s.facets(
622
+ facets=kwargs.get("facets"),
623
+ date_facets=kwargs.get("date_facets"),
624
+ query_facets=kwargs.get("query_facets"),
625
+ )
626
+ results = s.results(sort_by, start_offset, end_offset, result_class)
627
+ else:
628
+ # Multi-model path
629
+ multi = MultiIndexSearch(searches)
630
+ total_count = multi.count()
631
+ facets = multi.facets(
632
+ facets=kwargs.get("facets"),
633
+ date_facets=kwargs.get("date_facets"),
634
+ query_facets=kwargs.get("query_facets"),
635
+ )
636
+ results = multi.results(sort_by, start_offset, end_offset, result_class)
637
+
638
+ response: SearchResponse = {
639
+ "results": results,
640
+ "hits": total_count,
641
+ "spelling_suggestion": None,
642
+ }
643
+ if facets:
644
+ response["facets"] = facets
645
+ return response
646
+ except DatabaseError:
647
+ if not self.silently_fail:
648
+ raise
649
+ log.exception("Failed to search with query '%s'", query_string)
650
+ return {"results": [], "hits": 0, "spelling_suggestion": None}
651
+
652
+ def prep_value(self, value: Any) -> Any:
653
+ return value
654
+
655
+ def more_like_this(
656
+ self,
657
+ model_instance: models.Model,
658
+ additional_query_string: str | None = None,
659
+ **kwargs: Any,
660
+ ) -> None:
661
+ raise NotImplementedError(
662
+ "postgres_fts_backend does not support more_like_this. "
663
+ "PostgreSQL has no native document similarity feature."
664
+ )
665
+
666
+
667
+ class ORMSearchNode(SearchNode):
668
+ def as_orm_query(self, query_fragment_callback):
669
+ result = []
670
+ for child in self.children:
671
+ if hasattr(child, "as_orm_query"):
672
+ result.append(child.as_orm_query(query_fragment_callback))
673
+ else:
674
+ expression, value = child
675
+ field, filter_type = self.split_expression(expression)
676
+ result.append(query_fragment_callback(field, filter_type, value))
677
+
678
+ query = Q()
679
+ if self.connector == self.AND:
680
+ for subquery in result:
681
+ query &= subquery
682
+ elif self.connector == self.OR:
683
+ for subquery in result:
684
+ query |= subquery
685
+
686
+ if query and self.negated:
687
+ query = ~query
688
+
689
+ return query
690
+
691
+
692
+ class ORMSearchQuery(BaseSearchQuery):
693
+
694
+ def __init__(self, using="default"):
695
+ super().__init__(using=using)
696
+ self.query_filter = ORMSearchNode()
697
+ self.content_search_text = None
698
+
699
+ def clean(self, query_fragment):
700
+ return query_fragment
701
+
702
+ def build_query_fragment(self, field, filter_type, value):
703
+ if hasattr(value, "prepare"):
704
+ value = value.prepare(self)
705
+
706
+ if filter_type == "content":
707
+ if field == "content":
708
+ self.content_search_text = value
709
+ return Q(
710
+ search_vector=SearchQuery(
711
+ value,
712
+ search_type="websearch",
713
+ config=self.backend.search_config,
714
+ )
715
+ )
716
+ return Q(**{f"{field}__trigram_similar": value})
717
+
718
+ if filter_type == "fuzzy":
719
+ return Q(**{f"{field}__trigram_similar": value})
720
+
721
+ if filter_type == "in":
722
+ value = list(value)
723
+ if not value:
724
+ return Q(pk__in=[])
725
+ elif filter_type == "range":
726
+ value = (value[0], value[1])
727
+
728
+ # contains/startswith/endswith → case-insensitive Django lookups
729
+ lookup = {
730
+ "contains": "icontains",
731
+ "startswith": "istartswith",
732
+ "endswith": "iendswith",
733
+ }.get(filter_type, filter_type)
734
+
735
+ return Q(**{f"{field}__{lookup}": value})
736
+
737
+ def build_query(self):
738
+ final_query = self.query_filter.as_orm_query(self.build_query_fragment)
739
+ if not final_query:
740
+ return Q()
741
+ return final_query
742
+
743
+ def matching_all_fragment(self):
744
+ return Q()
745
+
746
+ def run(self, spelling_query=None, **kwargs):
747
+ final_query = self.build_query()
748
+ search_kwargs = self.build_params(spelling_query=spelling_query)
749
+
750
+ if kwargs:
751
+ search_kwargs.update(kwargs)
752
+
753
+ final_query.content_search_text = self.content_search_text
754
+ search_kwargs["orm_query"] = final_query
755
+
756
+ results = self.backend.search("*", **search_kwargs)
757
+ self._results = results.get("results", [])
758
+ self._hit_count = results.get("hits", 0)
759
+ self._facet_counts = self.post_process_facets(results)
760
+ self._spelling_suggestion = results.get("spelling_suggestion", None)
761
+
762
+ def get_count(self):
763
+ if self._hit_count is None:
764
+ self.run()
765
+ return self._hit_count
766
+
767
+
768
+ class PostgresFTSEngine(BaseEngine):
769
+ backend = PostgresFTSSearchBackend
770
+ query = ORMSearchQuery
@@ -0,0 +1,11 @@
1
+ from django.apps import AppConfig
2
+
3
+
4
+ class PostgresFTSConfig(AppConfig):
5
+ name = "postgres_fts_backend"
6
+ default_auto_field = "django.db.models.AutoField"
7
+
8
+ def ready(self):
9
+ from postgres_fts_backend import validate_all_schemas # noqa: PLC0415
10
+
11
+ validate_all_schemas()
File without changes
File without changes
@@ -0,0 +1,84 @@
1
+ import os
2
+ import sys
3
+
4
+ from django.conf import settings
5
+ from django.contrib.postgres.operations import TrigramExtension
6
+ from django.core.management.base import BaseCommand
7
+ from django.db.migrations.autodetector import MigrationAutodetector
8
+ from django.db.migrations.loader import MigrationLoader
9
+ from django.db.migrations.state import ModelState, ProjectState
10
+ from django.db.migrations.writer import MigrationWriter
11
+
12
+ from postgres_fts_backend.models import generate_index_models
13
+
14
+ APP_LABEL = "postgres_fts_backend"
15
+
16
+
17
+ class Command(BaseCommand):
18
+ help = (
19
+ "Generate Django migrations for haystack search index tables. "
20
+ "Run this after changing SearchIndex definitions, then run "
21
+ "'manage.py migrate postgres_fts_backend'."
22
+ )
23
+
24
+ def handle(self, *args, **options):
25
+ index_models = generate_index_models()
26
+
27
+ if not index_models:
28
+ self.stdout.write("No search indexes found.")
29
+ return
30
+
31
+ # Build the target state from the dynamic models
32
+ new_state = ProjectState()
33
+ for model_cls in index_models.values():
34
+ model_state = ModelState.from_model(model_cls)
35
+ new_state.add_model(model_state)
36
+
37
+ # Load existing migrations to get the current state
38
+ loader = MigrationLoader(None, ignore_no_migrations=True)
39
+ old_state = loader.project_state()
40
+
41
+ # Detect changes
42
+ autodetector = MigrationAutodetector(old_state, new_state)
43
+ changes = autodetector.changes(graph=loader.graph)
44
+
45
+ if not changes.get(APP_LABEL):
46
+ self.stdout.write("No changes detected.")
47
+ return
48
+
49
+ # Determine output directory
50
+ migrations_module = getattr(settings, "MIGRATION_MODULES", {}).get(APP_LABEL)
51
+ if migrations_module:
52
+ migrations_dir = os.path.join(*migrations_module.split("."))
53
+ # Make it relative to the project base if not absolute
54
+ if not os.path.isabs(migrations_dir):
55
+ # Find the root by looking at the first component on sys.path
56
+ # that contains the module
57
+ for path in sys.path:
58
+ candidate = os.path.join(path, migrations_dir)
59
+ if os.path.isdir(os.path.dirname(candidate)) or path == "":
60
+ migrations_dir = candidate
61
+ break
62
+ else:
63
+ # Default to the package's own migrations dir
64
+ migrations_dir = os.path.join(
65
+ os.path.dirname(os.path.dirname(__file__)),
66
+ "migrations",
67
+ )
68
+
69
+ os.makedirs(migrations_dir, exist_ok=True)
70
+
71
+ # Ensure __init__.py exists
72
+ init_path = os.path.join(migrations_dir, "__init__.py")
73
+ if not os.path.exists(init_path):
74
+ with open(init_path, "w") as f:
75
+ f.write("")
76
+
77
+ for migration in changes[APP_LABEL]:
78
+ if getattr(migration, "initial", False):
79
+ migration.operations.insert(0, TrigramExtension())
80
+ writer = MigrationWriter(migration)
81
+ migration_path = os.path.join(migrations_dir, f"{migration.name}.py")
82
+ with open(migration_path, "w") as f:
83
+ f.write(writer.as_string())
84
+ self.stdout.write(f"Created {migration_path}")
File without changes
@@ -0,0 +1,213 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable, Iterator
4
+ from typing import Any
5
+
6
+ from django.contrib.postgres.indexes import GinIndex, OpClass
7
+ from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVectorField
8
+ from django.db import models
9
+ from django.db.models import Value
10
+ from haystack import connections as haystack_connections
11
+ from haystack import indexes as haystack_indexes
12
+
13
+
14
+ class IndexQuerySet(models.QuerySet):
15
+ def search(self, search_text: str, config: str = "english") -> IndexQuerySet:
16
+ """Full-text filter + rank annotation in one call."""
17
+ sq = SearchQuery(search_text, search_type="websearch", config=config)
18
+ return self.filter(search_vector=sq).annotate(
19
+ rank=SearchRank("search_vector", sq, cover_density=True, normalization=32)
20
+ )
21
+
22
+ def ranked(self, search_text: str, config: str = "english") -> IndexQuerySet:
23
+ """Add rank annotation only (when filter is applied separately)."""
24
+ sq = SearchQuery(search_text, search_type="websearch", config=config)
25
+ return self.annotate(
26
+ rank=SearchRank("search_vector", sq, cover_density=True, normalization=32)
27
+ )
28
+
29
+ def aligned_union(self, other: IndexQuerySet) -> AlignedUnionQuerySet:
30
+ """Start a chainable aligned union with another queryset.
31
+
32
+ Returns an AlignedUnionQuerySet that can be further chained:
33
+ qs1.aligned_union(qs2).aligned_union(qs3)
34
+ """
35
+ return AlignedUnionQuerySet([self, other])
36
+
37
+
38
+ class AlignedUnionQuerySet:
39
+ """Accumulates querysets and builds an aligned union lazily.
40
+
41
+ Introspects model fields and annotations to build a superset of columns.
42
+ Missing columns are filled with Value(None, output_field=...).
43
+ """
44
+
45
+ SKIP_FIELDS: set[str] = {"id", "search_vector"}
46
+
47
+ def __init__(self, querysets: list[IndexQuerySet]) -> None:
48
+ self._querysets = list(querysets)
49
+ self._built: models.QuerySet | None = None
50
+
51
+ def aligned_union(self, other: IndexQuerySet) -> AlignedUnionQuerySet:
52
+ return AlignedUnionQuerySet(self._querysets + [other])
53
+
54
+ def _build(self) -> models.QuerySet:
55
+ if self._built is not None:
56
+ return self._built
57
+
58
+ all_columns = {}
59
+ per_qs_columns = []
60
+
61
+ for qs in self._querysets:
62
+ qs_cols = set()
63
+ for f in qs.model._meta.get_fields():
64
+ if f.name in self.SKIP_FIELDS:
65
+ continue
66
+ if hasattr(f, "column"):
67
+ qs_cols.add(f.name)
68
+ if f.name not in all_columns:
69
+ all_columns[f.name] = f
70
+ if hasattr(qs, "query") and hasattr(qs.query, "annotations"):
71
+ for name, annotation in qs.query.annotations.items():
72
+ qs_cols.add(name)
73
+ if name not in all_columns:
74
+ all_columns[name] = annotation.output_field
75
+ per_qs_columns.append(qs_cols)
76
+
77
+ sorted_cols = sorted(all_columns.keys())
78
+
79
+ aligned = []
80
+ for qs, qs_cols in zip(self._querysets, per_qs_columns):
81
+ missing = set(sorted_cols) - qs_cols
82
+ if missing:
83
+ annotations = {}
84
+ for col_name in missing:
85
+ field_meta = all_columns[col_name]
86
+ if hasattr(field_meta, "column"):
87
+ output_field = field_meta.__class__(null=True)
88
+ else:
89
+ output_field = field_meta.__class__()
90
+ annotations[col_name] = Value(None, output_field=output_field)
91
+ qs = qs.annotate(**annotations)
92
+ aligned.append(qs.values(*sorted_cols))
93
+
94
+ self._built = aligned[0].union(*aligned[1:], all=True)
95
+ return self._built
96
+
97
+ def order_by(self, *args: str) -> models.QuerySet:
98
+ return self._build().order_by(*args)
99
+
100
+ def count(self) -> int:
101
+ return self._build().count()
102
+
103
+ def __iter__(self) -> Iterator[dict[str, Any]]:
104
+ return iter(self._build())
105
+
106
+ def __getitem__(self, key: int | slice) -> Any:
107
+ return self._build()[key]
108
+
109
+
110
+ FIELD_MAP: dict[type, Callable[[], models.Field]] = {
111
+ haystack_indexes.CharField: lambda: models.TextField(null=True),
112
+ haystack_indexes.EdgeNgramField: lambda: models.TextField(null=True),
113
+ haystack_indexes.NgramField: lambda: models.TextField(null=True),
114
+ haystack_indexes.DateTimeField: lambda: models.DateTimeField(null=True),
115
+ haystack_indexes.DateField: lambda: models.DateField(null=True),
116
+ haystack_indexes.IntegerField: lambda: models.IntegerField(null=True),
117
+ haystack_indexes.FloatField: lambda: models.FloatField(null=True),
118
+ haystack_indexes.BooleanField: lambda: models.BooleanField(null=True),
119
+ }
120
+
121
+
122
+ def _django_field_for(haystack_field: haystack_indexes.SearchField) -> models.Field:
123
+ for haystack_cls, factory in FIELD_MAP.items():
124
+ if isinstance(haystack_field, haystack_cls):
125
+ return factory()
126
+ return models.TextField(null=True)
127
+
128
+
129
+ # Django models for search index tables are created dynamically from Haystack
130
+ # SearchIndex definitions. Unlike normal Django models (defined statically in
131
+ # models.py), these must be built at runtime because the set of fields depends
132
+ # on the user's SearchIndex classes, which can change independently of
133
+ # migrations. The database schema is managed separately via the
134
+ # build_postgres_schema management command, so the runtime model and the
135
+ # database can be out of sync until the user regenerates and applies migrations.
136
+ # validate_all_schemas() (called at startup) checks for this.
137
+
138
+ _index_models_cache: dict[str, type[models.Model]] = {}
139
+
140
+
141
+ def _build_index_model(
142
+ source_model: type[models.Model], search_index: haystack_indexes.SearchIndex
143
+ ) -> type[models.Model]:
144
+ app_label: str = source_model._meta.app_label
145
+ model_name: str = source_model._meta.model_name # type: ignore[assignment]
146
+
147
+ class_name = f"HaystackIndex_{app_label.capitalize()}_{model_name.capitalize()}"
148
+
149
+ if class_name in _index_models_cache:
150
+ return _index_models_cache[class_name]
151
+
152
+ attrs = {
153
+ "__module__": "postgres_fts_backend.models",
154
+ "django_id": models.CharField(max_length=255),
155
+ "django_ct": models.CharField(max_length=255),
156
+ "search_vector": SearchVectorField(null=True),
157
+ "objects": IndexQuerySet.as_manager(),
158
+ }
159
+
160
+ for field_name, field_obj in search_index.fields.items():
161
+ if field_name in ("django_ct", "django_id"):
162
+ continue
163
+ attrs[field_name] = _django_field_for(field_obj)
164
+
165
+ table_name = f"haystack_index_{app_label}_{model_name}"
166
+
167
+ db_indexes = [
168
+ GinIndex(
169
+ fields=["search_vector"],
170
+ name=f"{table_name}_sv_gin",
171
+ )
172
+ ]
173
+
174
+ for field_name, field_obj in search_index.fields.items():
175
+ if isinstance(
176
+ field_obj, (haystack_indexes.EdgeNgramField, haystack_indexes.NgramField)
177
+ ):
178
+ db_indexes.append(
179
+ GinIndex(
180
+ OpClass(models.F(field_name), name="gin_trgm_ops"),
181
+ name=f"{table_name}_{field_name}_trgm",
182
+ )
183
+ )
184
+
185
+ meta = type(
186
+ "Meta",
187
+ (),
188
+ {
189
+ "app_label": "postgres_fts_backend",
190
+ "db_table": table_name,
191
+ "unique_together": [("django_ct", "django_id")],
192
+ "indexes": db_indexes,
193
+ },
194
+ )
195
+ attrs["Meta"] = meta
196
+
197
+ model_cls = type(class_name, (models.Model,), attrs)
198
+ _index_models_cache[class_name] = model_cls
199
+ return model_cls
200
+
201
+
202
+ def get_index_model(source_model: type[models.Model]) -> type[models.Model]:
203
+ ui = haystack_connections["default"].get_unified_index()
204
+ search_index = ui.get_index(source_model)
205
+ return _build_index_model(source_model, search_index)
206
+
207
+
208
+ def generate_index_models() -> dict[type[models.Model], type[models.Model]]:
209
+ ui = haystack_connections["default"].get_unified_index()
210
+ return {
211
+ source_model: _build_index_model(source_model, search_index)
212
+ for source_model, search_index in ui.get_indexes().items()
213
+ }
@@ -0,0 +1,111 @@
1
+ Metadata-Version: 2.4
2
+ Name: postgres-fts-backend
3
+ Version: 0.0.1
4
+ Summary: A PostgreSQL Full Text Seach Backend for Haystack
5
+ Author-email: Forest Gregg <fgregg@datamade.us>
6
+ Project-URL: Repository, https://github.com/fgregg/postgres-fts-backend
7
+ Project-URL: Issues, https://github.com/fgregg/postgres-fts-backend/issues
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Requires-Python: >=3.12
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Requires-Dist: django>=5.0
13
+ Requires-Dist: django-haystack>=2.8.0
14
+ Provides-Extra: dev
15
+ Requires-Dist: pytest; extra == "dev"
16
+ Requires-Dist: pytest-django; extra == "dev"
17
+ Requires-Dist: ruff; extra == "dev"
18
+ Requires-Dist: black; extra == "dev"
19
+ Requires-Dist: mypy; extra == "dev"
20
+ Requires-Dist: django-stubs; extra == "dev"
21
+ Dynamic: license-file
22
+
23
+ # postgres-fts-backend
24
+
25
+ A [Django Haystack](https://django-haystack.readthedocs.io/) backend that uses
26
+ PostgreSQL's built-in full-text search. No external search service required.
27
+
28
+ ## Requirements
29
+
30
+ - Python >= 3.12
31
+ - Django >= 5.0
32
+ - django-haystack >= 2.8.0
33
+ - PostgreSQL
34
+
35
+ ## Installation
36
+
37
+ ```bash
38
+ pip install postgres-fts-backend
39
+ ```
40
+
41
+ Add to `INSTALLED_APPS`:
42
+
43
+ ```python
44
+ INSTALLED_APPS = [
45
+ "django.contrib.postgres",
46
+ "haystack",
47
+ "postgres_fts_backend",
48
+ # ...
49
+ ]
50
+ ```
51
+
52
+ Set a migration module so the generated search index migrations live in
53
+ your project rather than inside the installed package:
54
+
55
+ ```python
56
+ MIGRATION_MODULES = {
57
+ "postgres_fts_backend": "myapp.search_migrations",
58
+ }
59
+ ```
60
+
61
+ Configure Haystack:
62
+
63
+ ```python
64
+ HAYSTACK_CONNECTIONS = {
65
+ "default": {
66
+ "ENGINE": "postgres_fts_backend.PostgresFTSEngine",
67
+ },
68
+ }
69
+ ```
70
+
71
+ To use a search configuration other than `"english"`:
72
+
73
+ ```python
74
+ HAYSTACK_CONNECTIONS = {
75
+ "default": {
76
+ "ENGINE": "postgres_fts_backend.PostgresFTSEngine",
77
+ "SEARCH_CONFIG": "spanish",
78
+ },
79
+ }
80
+ ```
81
+
82
+ ## Other Peculiarities of this backend
83
+
84
+ ### Build indexes through models and migrations
85
+
86
+ ```bash
87
+ python manage.py build_postgres_schema
88
+ python manage.py migrate postgres_fts_backend
89
+ ```
90
+
91
+ Run these two commands again whenever you change a `SearchIndex` definition.
92
+
93
+ ### Fuzzy search
94
+
95
+ Fuzzy queries use PostgreSQL's trigram similarity matching (`pg_trgm`):
96
+
97
+ ```python
98
+ results = SearchQuerySet().filter(author__fuzzy="Janee")
99
+ ```
100
+
101
+ The similarity threshold is controlled by PostgreSQL's
102
+ `pg_trgm.similarity_threshold` setting (default 0.3). To adjust it:
103
+
104
+ ```sql
105
+ ALTER DATABASE mydb SET pg_trgm.similarity_threshold = 0.5;
106
+ ```
107
+
108
+ ### `more_like_this` not implemented
109
+ PostgreSQL FTS doesn't provide any facilities for this. It could be done, but I just need to think more about it.
110
+
111
+ ### `spelling_suggestions` are not supported
@@ -0,0 +1,12 @@
1
+ postgres_fts_backend/__init__.py,sha256=PeVca49tR0wFo87SEYdGc5lt4-b5JTV3-IKHCux5q0w,27151
2
+ postgres_fts_backend/apps.py,sha256=Yc9zEu63CorJLVh1HkuLjGqTThn0BBqIW3S2D72n3JA,293
3
+ postgres_fts_backend/models.py,sha256=U_Gjur12y_ck3U4wNB5qeRFG46AtneHgsH-2VR-lNB8,8175
4
+ postgres_fts_backend/management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ postgres_fts_backend/management/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ postgres_fts_backend/management/commands/build_postgres_schema.py,sha256=-y5pLZ1Eq2cgOUjL07b5li8tI_iNKHQSO4QYwKtvv8w,3296
7
+ postgres_fts_backend/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ postgres_fts_backend-0.0.1.dist-info/licenses/LICENSE,sha256=tLxqA1coRYSQ1uk2S_CuxAb-5Pxr3uJAVx3_BbWWTOQ,1084
9
+ postgres_fts_backend-0.0.1.dist-info/METADATA,sha256=bBQEQb_wHZlQq0QtaMXq5Cvr0dJApGBEkSIMahbARM0,2688
10
+ postgres_fts_backend-0.0.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
11
+ postgres_fts_backend-0.0.1.dist-info/top_level.txt,sha256=o2Bn_Q4mThOCsDP4O-3P2HR3L03fuZnB6hwDF-ywmGQ,21
12
+ postgres_fts_backend-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.10.2)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Forest Gregg, 2024 Datamade
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ postgres_fts_backend