TypeDAL 3.17.3__py3-none-any.whl → 4.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of TypeDAL might be problematic. Click here for more details.

@@ -0,0 +1,1059 @@
1
+ """
2
+ Contains base functionality related to the Query Builder.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import datetime as dt
8
+ import math
9
+ import typing as t
10
+ from collections import defaultdict
11
+
12
+ import pydal.objects
13
+
14
+ from .constants import DEFAULT_JOIN_OPTION, JOIN_OPTIONS
15
+ from .core import TypeDAL
16
+ from .fields import TypedField, is_typed_field
17
+ from .helpers import DummyQuery, as_lambda, looks_like, normalize_table_keys, throw
18
+ from .tables import TypedTable
19
+ from .types import (
20
+ CacheMetadata,
21
+ Condition,
22
+ Expression,
23
+ Field,
24
+ Metadata,
25
+ OnQuery,
26
+ OrderBy,
27
+ Query,
28
+ Rows,
29
+ SelectKwargs,
30
+ T,
31
+ T_MetaInstance,
32
+ )
33
+
34
+
35
+ class QueryBuilder(t.Generic[T_MetaInstance]):
36
+ """
37
+ Abstration on top of pydal's query system.
38
+ """
39
+
40
+ model: t.Type[T_MetaInstance]
41
+ query: Query
42
+ select_args: list[t.Any]
43
+ select_kwargs: SelectKwargs
44
+ relationships: dict[str, Relationship[t.Any]]
45
+ metadata: Metadata
46
+
47
+ def __init__(
48
+ self,
49
+ model: t.Type[T_MetaInstance],
50
+ add_query: t.Optional[Query] = None,
51
+ select_args: t.Optional[list[t.Any]] = None,
52
+ select_kwargs: t.Optional[SelectKwargs] = None,
53
+ relationships: dict[str, Relationship[t.Any]] = None,
54
+ metadata: Metadata = None,
55
+ ):
56
+ """
57
+ Normally, you wouldn't manually initialize a QueryBuilder but start using a method on a TypedTable.
58
+
59
+ Example:
60
+ MyTable.where(...) -> QueryBuilder[MyTable]
61
+ """
62
+ self.model = model
63
+ table = model._ensure_table_defined()
64
+ default_query = t.cast(Query, table.id > 0)
65
+ self.query = add_query or default_query
66
+ self.select_args = select_args or []
67
+ self.select_kwargs = select_kwargs or {}
68
+ self.relationships = relationships or {}
69
+ self.metadata = metadata or {}
70
+
71
+ def __str__(self) -> str:
72
+ """
73
+ Simple string representation for the query builder.
74
+ """
75
+ return f"QueryBuilder for {self.model}"
76
+
77
+ def __repr__(self) -> str:
78
+ """
79
+ Advanced string representation for the query builder.
80
+ """
81
+ return (
82
+ f"<QueryBuilder for {self.model} with "
83
+ f"{len(self.select_args)} select args; "
84
+ f"{len(self.select_kwargs)} select kwargs; "
85
+ f"{len(self.relationships)} relationships; "
86
+ f"query: {bool(self.query)}; "
87
+ f"metadata: {self.metadata}; "
88
+ f">"
89
+ )
90
+
91
+ def __bool__(self) -> bool:
92
+ """
93
+ Querybuilder is truthy if it has t.Any conditions.
94
+ """
95
+ table = self.model._ensure_table_defined()
96
+ default_query = t.cast(Query, table.id > 0)
97
+ return any(
98
+ [
99
+ self.query != default_query,
100
+ self.select_args,
101
+ self.select_kwargs,
102
+ self.relationships,
103
+ self.metadata,
104
+ ],
105
+ )
106
+
107
+ def _extend(
108
+ self,
109
+ add_query: t.Optional[Query] = None,
110
+ overwrite_query: t.Optional[Query] = None,
111
+ select_args: t.Optional[list[t.Any]] = None,
112
+ select_kwargs: t.Optional[SelectKwargs] = None,
113
+ relationships: dict[str, Relationship[t.Any]] = None,
114
+ metadata: Metadata = None,
115
+ ) -> "QueryBuilder[T_MetaInstance]":
116
+ return QueryBuilder(
117
+ self.model,
118
+ (add_query & self.query) if add_query else overwrite_query or self.query,
119
+ (self.select_args + select_args) if select_args else self.select_args,
120
+ (self.select_kwargs | select_kwargs) if select_kwargs else self.select_kwargs,
121
+ (self.relationships | relationships) if relationships else self.relationships,
122
+ (self.metadata | (metadata or {})) if metadata else self.metadata,
123
+ )
124
+
125
+ def select(self, *fields: t.Any, **options: t.Unpack[SelectKwargs]) -> "QueryBuilder[T_MetaInstance]":
126
+ """
127
+ Fields: database columns by name ('id'), by field reference (table.id) or other (e.g. table.ALL).
128
+
129
+ Options:
130
+ paraphrased from the web2py pydal docs,
131
+ For more info, see http://www.web2py.com/books/default/chapter/29/06/the-database-abstraction-layer#orderby-groupby-limitby-distinct-having-orderby_on_limitby-join-left-cache
132
+
133
+ orderby: field(s) to order by. Supported:
134
+ table.name - sort by name, ascending
135
+ ~table.name - sort by name, descending
136
+ <random> - sort randomly
137
+ table.name|table.id - sort by two fields (first name, then id)
138
+
139
+ groupby, having: together with orderby:
140
+ groupby can be a field (e.g. table.name) to group records by
141
+ having can be a query, only those `having` the condition are grouped
142
+
143
+ limitby: tuple of min and max. When using the query builder, .paginate(limit, page) is recommended.
144
+ distinct: bool/field. Only select rows that differ
145
+ orderby_on_limitby (bool, default: True): by default, an implicit orderby is added when doing limitby.
146
+ join: othertable.on(query) - do an INNER JOIN. Using TypeDAL relationships with .join() is recommended!
147
+ left: othertable.on(query) - do a LEFT JOIN. Using TypeDAL relationships with .join() is recommended!
148
+ cache: cache the query result to speed up repeated queries; e.g. (cache=(cache.ram, 3600), cacheable=True)
149
+ """
150
+ return self._extend(select_args=list(fields), select_kwargs=options)
151
+
152
+ def orderby(self, *fields: OrderBy) -> "QueryBuilder[T_MetaInstance]":
153
+ """
154
+ Order the query results by specified fields.
155
+
156
+ Args:
157
+ fields: field(s) to order by. Supported:
158
+ table.name - sort by name, ascending
159
+ ~table.name - sort by name, descending
160
+ <random> - sort randomly
161
+ table.name|table.id - sort by two fields (first name, then id)
162
+
163
+ Returns:
164
+ QueryBuilder: A new QueryBuilder instance with the ordering applied.
165
+ """
166
+ return self.select(orderby=fields)
167
+
168
+ def where(
169
+ self,
170
+ *queries_or_lambdas: Query | t.Callable[[t.Type[T_MetaInstance]], Query] | dict[str, t.Any],
171
+ **filters: t.Any,
172
+ ) -> "QueryBuilder[T_MetaInstance]":
173
+ """
174
+ Extend the builder's query.
175
+
176
+ Can be used in multiple ways:
177
+ .where(Query) -> with a direct query such as `Table.id == 5`
178
+ .where(lambda table: table.id == 5) -> with a query via a lambda
179
+ .where(id=5) -> via keyword arguments
180
+
181
+ When using multiple where's, they will be ANDed:
182
+ .where(lambda table: table.id == 5).where(lambda table: table.id == 6) == (table.id == 5) & (table.id=6)
183
+ When passing multiple queries to a single .where, they will be ORed:
184
+ .where(lambda table: table.id == 5, lambda table: table.id == 6) == (table.id == 5) | (table.id=6)
185
+ """
186
+ new_query = self.query
187
+ table = self.model._ensure_table_defined()
188
+
189
+ queries_or_lambdas = (
190
+ *queries_or_lambdas,
191
+ filters,
192
+ )
193
+
194
+ subquery = t.cast(Query, DummyQuery())
195
+ for query_part in queries_or_lambdas:
196
+ if isinstance(query_part, (Field, pydal.objects.Field)) or is_typed_field(query_part):
197
+ subquery |= t.cast(Query, query_part != None)
198
+ elif isinstance(query_part, (pydal.objects.Query, Expression, pydal.objects.Expression)):
199
+ subquery |= t.cast(Query, query_part)
200
+ elif callable(query_part):
201
+ if result := query_part(self.model):
202
+ subquery |= result
203
+ elif isinstance(query_part, dict):
204
+ subsubquery = DummyQuery()
205
+ for field, value in query_part.items():
206
+ subsubquery &= table[field] == value
207
+ if subsubquery:
208
+ subquery |= subsubquery
209
+ else:
210
+ raise ValueError(f"Unexpected query type ({type(query_part)}).")
211
+
212
+ if subquery:
213
+ new_query &= subquery
214
+
215
+ return self._extend(overwrite_query=new_query)
216
+
217
+ def _parse_relationships(
218
+ self, fields: t.Iterable[str | t.Type[TypedTable]], method: JOIN_OPTIONS = None, **update: t.Any
219
+ ) -> dict[str, Relationship[t.Any]]:
220
+ """
221
+ Parse relationship fields into a dict of base relationships with nested relationships.
222
+
223
+ Args:
224
+ fields: Iterable of relationship field names
225
+ (e.g., ['relationship', 'relationship.with_nested', 'relationship.no2'])
226
+ condition_and: Optional condition to pass to relationship clones
227
+
228
+ Returns:
229
+ Dict mapping base relationship names to Relationship objects with nested relationships
230
+ Example: {'relationship': Relationship('relationship',
231
+ nested={'with_nested': Relationship(),
232
+ 'no2': Relationship()})}
233
+ """
234
+ relationships: dict[str, Relationship[t.Any]] = {}
235
+ base_relationships = self.model.get_relationships()
236
+ db = self._get_db()
237
+
238
+ for field in fields:
239
+ relation_name = str(field)
240
+ parts = relation_name.split(".")
241
+ base_name = parts[0]
242
+
243
+ # Create base relationship if it doesn't exist
244
+ if base_name not in relationships:
245
+ relationships[base_name] = base_relationships[base_name].clone(join=method, **update)
246
+
247
+ # If this is a nested relationship, traverse and add it
248
+ if len(parts) > 1:
249
+ current = relationships[base_name]
250
+
251
+ for level in parts[1:]:
252
+ # Check if this nested relationship already exists
253
+ if level not in current.nested:
254
+ # Create new nested relationship
255
+ subrelationship = current.get_table(db).get_relationships()[level].clone(join=method)
256
+ current.nested[level] = subrelationship
257
+
258
+ current = current.nested[level]
259
+
260
+ return relationships
261
+
262
+ def join(
263
+ self,
264
+ *fields: str | t.Type[TypedTable],
265
+ method: JOIN_OPTIONS = None,
266
+ on: OnQuery | list[Expression] | Expression = None,
267
+ condition: Condition = None,
268
+ condition_and: Condition = None,
269
+ ) -> "QueryBuilder[T_MetaInstance]":
270
+ """
271
+ Include relationship fields in the result.
272
+
273
+ `fields` can be names of Relationships on the current model.
274
+ If no fields are passed, all will be used.
275
+
276
+ By default, the `method` defined in the relationship is used.
277
+ This can be overwritten with the `method` keyword argument (left or inner)
278
+
279
+ `condition_and` can be used to add extra conditions to an inner join.
280
+ """
281
+ # todo: allow limiting amount of related rows returned for join?
282
+ # todo: it would be nice if 'fields' could be an actual relationship
283
+ # (Article.tags = list[Tag]) and you could change the .condition and .on
284
+ # this could deprecate condition_and
285
+ relationships = self.model.get_relationships()
286
+
287
+ if condition and on:
288
+ raise ValueError("condition and on can not be used together!")
289
+ elif condition:
290
+ if len(fields) != 1:
291
+ raise ValueError("join(field, condition=...) can only be used with exactly one field!")
292
+
293
+ if isinstance(condition, pydal.objects.Query):
294
+ condition = as_lambda(condition)
295
+
296
+ to_field = t.cast(t.Type[TypedTable], fields[0])
297
+ relationships = {
298
+ str(to_field): Relationship(to_field, condition=condition, join=method, condition_and=condition_and)
299
+ }
300
+ elif on:
301
+ if len(fields) != 1:
302
+ raise ValueError("join(field, on=...) can only be used with exactly one field!")
303
+
304
+ if isinstance(on, pydal.objects.Expression):
305
+ on = [on]
306
+
307
+ if isinstance(on, list):
308
+ on = as_lambda(on)
309
+
310
+ to_field = t.cast(t.Type[TypedTable], fields[0])
311
+ relationships = {str(to_field): Relationship(to_field, on=on, join=method, condition_and=condition_and)}
312
+
313
+ else:
314
+ if fields:
315
+ # join on every relationship
316
+ # simple: 'relationship'
317
+ # -> {'relationship': Relationship('relationship')}
318
+ # complex with one: relationship.with_nested
319
+ # -> {'relationship': Relationship('relationship', nested=[Relationship('with_nested')])
320
+ # complex with two: relationship.with_nested, relationship.no2
321
+ # -> {'relationship': Relationship('relationship',
322
+ # nested=[Relationship('with_nested'), Relationship('no2')])
323
+
324
+ relationships = self._parse_relationships(fields, method=method, condition_and=condition_and)
325
+
326
+ if method:
327
+ relationships = {
328
+ str(k): r.clone(join=method, condition_and=condition_and) for k, r in relationships.items()
329
+ }
330
+
331
+ return self._extend(relationships=relationships)
332
+
333
+ def cache(
334
+ self,
335
+ *deps: t.Any,
336
+ expires_at: t.Optional[dt.datetime] = None,
337
+ ttl: t.Optional[int | dt.timedelta] = None,
338
+ ) -> "QueryBuilder[T_MetaInstance]":
339
+ """
340
+ Enable caching for this query to load repeated calls from a dill row \
341
+ instead of executing the sql and collecing matching rows again.
342
+ """
343
+ existing = self.metadata.get("cache", {})
344
+
345
+ metadata: Metadata = {}
346
+
347
+ cache_meta = t.cast(
348
+ CacheMetadata,
349
+ self.metadata.get("cache", {})
350
+ | {
351
+ "enabled": True,
352
+ "depends_on": existing.get("depends_on", []) + [str(_) for _ in deps],
353
+ "expires_at": get_expire(expires_at=expires_at, ttl=ttl),
354
+ },
355
+ )
356
+
357
+ metadata["cache"] = cache_meta
358
+ return self._extend(metadata=metadata)
359
+
360
+ def _get_db(self) -> TypeDAL:
361
+ return self.model._db or throw(EnvironmentError("@define or db.define is not called on this class yet!"))
362
+
363
+ def _select_arg_convert(self, arg: t.Any) -> t.Any:
364
+ # typedfield are not really used at runtime t.Anymore, but leave it in for safety:
365
+ if isinstance(arg, TypedField): # pragma: no cover
366
+ arg = arg._field
367
+
368
+ return arg
369
+
370
+ def delete(self) -> list[int]:
371
+ """
372
+ Based on the current query, delete rows and return a list of deleted IDs.
373
+ """
374
+ db = self._get_db()
375
+ removed_ids = [_.id for _ in db(self.query).select("id")]
376
+ if db(self.query).delete():
377
+ # success!
378
+ return removed_ids
379
+
380
+ return []
381
+
382
+ def _delete(self) -> str:
383
+ db = self._get_db()
384
+ return str(db(self.query)._delete())
385
+
386
+ def update(self, **fields: t.Any) -> list[int]:
387
+ """
388
+ Based on the current query, update `fields` and return a list of updated IDs.
389
+ """
390
+ # todo: limit?
391
+ db = self._get_db()
392
+ updated_ids = db(self.query).select("id").column("id")
393
+ if db(self.query).update(**fields):
394
+ # success!
395
+ return updated_ids
396
+
397
+ return []
398
+
399
+ def _update(self, **fields: t.Any) -> str:
400
+ db = self._get_db()
401
+ return str(db(self.query)._update(**fields))
402
+
403
+ def _before_query(self, mut_metadata: Metadata, add_id: bool = True) -> tuple[Query, list[t.Any], SelectKwargs]:
404
+ select_args = [self._select_arg_convert(_) for _ in self.select_args] or [self.model.ALL]
405
+ select_kwargs = self.select_kwargs.copy()
406
+ query = self.query
407
+ model = self.model
408
+ mut_metadata["query"] = query
409
+ # require at least id of main table:
410
+ select_fields = ", ".join([str(_) for _ in select_args])
411
+ tablename = str(model)
412
+
413
+ if add_id and f"{tablename}.id" not in select_fields:
414
+ # fields of other selected, but required ID is missing.
415
+ select_args.append(model.id)
416
+
417
+ if self.relationships:
418
+ query, select_args = self._handle_relationships_pre_select(query, select_args, select_kwargs, mut_metadata)
419
+
420
+ return query, select_args, select_kwargs
421
+
422
+ def to_sql(self, add_id: bool = False) -> str:
423
+ """
424
+ Generate the SQL for the built query.
425
+ """
426
+ db = self._get_db()
427
+
428
+ query, select_args, select_kwargs = self._before_query({}, add_id=add_id)
429
+
430
+ return str(db(query)._select(*select_args, **select_kwargs))
431
+
432
+ def _collect(self) -> str:
433
+ """
434
+ Alias for to_sql, pydal-like syntax.
435
+ """
436
+ return self.to_sql()
437
+
438
+ def _collect_cached(self, metadata: Metadata) -> "TypedRows[T_MetaInstance] | None":
439
+ expires_at = metadata["cache"].get("expires_at")
440
+ metadata["cache"] |= {
441
+ # key is partly dependant on cache metadata but not these:
442
+ "key": None,
443
+ "status": None,
444
+ "cached_at": None,
445
+ "expires_at": None,
446
+ }
447
+
448
+ _, key = create_and_hash_cache_key(
449
+ self.model,
450
+ metadata,
451
+ self.query,
452
+ self.select_args,
453
+ self.select_kwargs,
454
+ self.relationships.keys(),
455
+ )
456
+
457
+ # re-set after creating key:
458
+ metadata["cache"]["expires_at"] = expires_at
459
+ metadata["cache"]["key"] = key
460
+
461
+ return load_from_cache(key, self._get_db())
462
+
463
+ def execute(self, add_id: bool = False) -> Rows:
464
+ """
465
+ Raw version of .collect which only executes the SQL, without performing t.Any magic afterwards.
466
+ """
467
+ db = self._get_db()
468
+ metadata = t.cast(Metadata, self.metadata.copy())
469
+
470
+ query, select_args, select_kwargs = self._before_query(metadata, add_id=add_id)
471
+
472
+ return db(query).select(*select_args, **select_kwargs)
473
+
474
+ def collect(
475
+ self,
476
+ verbose: bool = False,
477
+ _to: t.Type["TypedRows[t.Any]"] = None,
478
+ add_id: bool = True,
479
+ ) -> "TypedRows[T_MetaInstance]":
480
+ """
481
+ Execute the built query and turn it into model instances, while handling relationships.
482
+ """
483
+ if _to is None:
484
+ _to = TypedRows
485
+
486
+ db = self._get_db()
487
+ metadata = t.cast(Metadata, self.metadata.copy())
488
+
489
+ if metadata.get("cache", {}).get("enabled") and (result := self._collect_cached(metadata)):
490
+ return result
491
+
492
+ query, select_args, select_kwargs = self._before_query(metadata, add_id=add_id)
493
+
494
+ metadata["sql"] = db(query)._select(*select_args, **select_kwargs)
495
+
496
+ if verbose: # pragma: no cover
497
+ print(metadata["sql"])
498
+
499
+ rows: Rows = db(query).select(*select_args, **select_kwargs)
500
+
501
+ metadata["final_query"] = str(query)
502
+ metadata["final_args"] = [str(_) for _ in select_args]
503
+ metadata["final_kwargs"] = select_kwargs
504
+
505
+ if verbose: # pragma: no cover
506
+ print(rows)
507
+
508
+ if not self.relationships:
509
+ # easy
510
+ typed_rows = _to.from_rows(rows, self.model, metadata=metadata)
511
+
512
+ else:
513
+ # harder: try to match rows to the belonging objects
514
+ # assume structure of {'table': <data>} per row.
515
+ # if that's not the case, return default behavior again
516
+ typed_rows = self._collect_with_relationships(rows, metadata=metadata, _to=_to)
517
+
518
+ # only saves if requested in metadata:
519
+ return save_to_cache(typed_rows, rows)
520
+
521
+ @t.overload
522
+ def column(self, field: TypedField[T], **options: t.Unpack[SelectKwargs]) -> list[T]:
523
+ """
524
+ If a typedfield is passed, the output type can be safely determined.
525
+ """
526
+
527
+ @t.overload
528
+ def column(self, field: T, **options: t.Unpack[SelectKwargs]) -> list[T]:
529
+ """
530
+ Otherwise, the output type is loosely determined (assumes `field: type` or t.Any).
531
+ """
532
+
533
+ def column(self, field: TypedField[T] | T, **options: t.Unpack[SelectKwargs]) -> list[T]:
534
+ """
535
+ Get all values in a specific column.
536
+
537
+ Shortcut for `.select(field).execute().column(field)`.
538
+ """
539
+ return self.select(field, **options).execute().column(field)
540
+
541
+ def _handle_relationships_pre_select(
542
+ self,
543
+ query: Query,
544
+ select_args: list[t.Any],
545
+ select_kwargs: SelectKwargs,
546
+ metadata: Metadata,
547
+ ) -> tuple[Query, list[t.Any]]:
548
+ """Handle relationship joins and field selection for database query."""
549
+ # Collect all relationship keys including nested ones
550
+ metadata["relationships"] = self._collect_all_relationship_keys()
551
+
552
+ # Build joins and apply limitby optimization if needed
553
+ inner_joins = self._build_inner_joins()
554
+ query = self._apply_limitby_optimization(query, select_kwargs, inner_joins, metadata)
555
+
556
+ if inner_joins:
557
+ select_kwargs["join"] = inner_joins
558
+
559
+ # Build left joins and handle field selection
560
+ left_joins: list[Expression] = []
561
+ select_args = self._build_left_joins_and_fields(select_args, left_joins)
562
+
563
+ select_kwargs["left"] = left_joins
564
+ return query, select_args
565
+
566
+ def _collect_all_relationship_keys(self) -> set[str]:
567
+ """Collect all relationship keys including nested ones."""
568
+ keys = set(self.relationships.keys())
569
+
570
+ for relation in self.relationships.values():
571
+ keys.update(self._collect_nested_keys(relation))
572
+
573
+ return keys
574
+
575
+ def _collect_nested_keys(self, relation: Relationship[t.Any], prefix: str = "") -> set[str]:
576
+ """Recursively collect nested relationship keys."""
577
+ keys = set()
578
+
579
+ for name, nested in relation.nested.items():
580
+ nested_key = f"{prefix}.{name}" if prefix else name
581
+ keys.add(nested_key)
582
+ keys.update(self._collect_nested_keys(nested, nested_key))
583
+
584
+ return keys
585
+
586
+ def _build_inner_joins(self) -> list[t.Any]:
587
+ """Build inner joins for relationships with conditions."""
588
+ joins = []
589
+
590
+ for key, relation in self.relationships.items():
591
+ joins.extend(self._build_inner_joins_recursive(relation, self.model, key))
592
+
593
+ return joins
594
+
595
+ def _build_inner_joins_recursive(
596
+ self, relation: Relationship[t.Any], parent_table: t.Type[TypedTable], key: str, parent_key: str = ""
597
+ ) -> list[t.Any]:
598
+ """Recursively build inner joins for a relationship and its nested relationships."""
599
+ db = self._get_db()
600
+ joins = []
601
+
602
+ # Handle current level
603
+ if relation.condition and relation.join == "inner":
604
+ other = relation.get_table(db)
605
+ other = other.with_alias(f"{key}_{hash(relation)}")
606
+ condition = relation.condition(parent_table, other)
607
+
608
+ if callable(relation.condition_and):
609
+ condition &= relation.condition_and(parent_table, other)
610
+
611
+ joins.append(other.on(condition))
612
+
613
+ # Process nested relationships
614
+ for nested_name, nested in relation.nested.items():
615
+ # todo: add additional test, deduplicate
616
+ nested_key = f"{parent_key}.{nested_name}" if parent_key else f"{key}.{nested_name}"
617
+ joins.extend(self._build_inner_joins_recursive(nested, other, nested_name, nested_key))
618
+
619
+ return joins
620
+
621
+ def _apply_limitby_optimization(
622
+ self,
623
+ query: Query,
624
+ select_kwargs: SelectKwargs,
625
+ joins: list[t.Any],
626
+ metadata: Metadata,
627
+ ) -> Query:
628
+ """Apply limitby optimization when relationships are present."""
629
+ if not (limitby := select_kwargs.pop("limitby", ())):
630
+ return query
631
+
632
+ db = self._get_db()
633
+ model = self.model
634
+
635
+ kwargs: SelectKwargs = select_kwargs.copy()
636
+ kwargs["limitby"] = limitby
637
+
638
+ if joins:
639
+ kwargs["join"] = joins
640
+
641
+ ids = db(query)._select(model.id, **kwargs)
642
+ query = model.id.belongs(ids)
643
+ metadata["ids"] = ids
644
+
645
+ return query
646
+
647
+ def _build_left_joins_and_fields(self, select_args: list[t.Any], left_joins: list[Expression]) -> list[t.Any]:
648
+ """
649
+ Build left joins and ensure required fields are selected.
650
+ """
651
+ for key, relation in self.relationships.items():
652
+ select_args = self._process_relationship_for_left_join(relation, key, select_args, left_joins, self.model)
653
+
654
+ return select_args
655
+
656
+ def _process_relationship_for_left_join(
657
+ self,
658
+ relation: Relationship[t.Any],
659
+ key: str,
660
+ select_args: list[t.Any],
661
+ left_joins: list[Expression],
662
+ parent_table: t.Type[TypedTable],
663
+ parent_key: str = "",
664
+ ) -> list[t.Any]:
665
+ """Process a single relationship for left join and field selection."""
666
+ db = self._get_db()
667
+ other = relation.get_table(db)
668
+ method: JOIN_OPTIONS = relation.join or DEFAULT_JOIN_OPTION
669
+
670
+ select_fields = ", ".join([str(_) for _ in select_args])
671
+ pre_alias = str(other)
672
+
673
+ # Ensure required fields are selected
674
+ select_args = self._ensure_relationship_fields(select_args, other, select_fields)
675
+
676
+ # Build join condition
677
+ if relation.on:
678
+ # Custom .on condition - always left join
679
+ on = relation.on(parent_table, other)
680
+ if not isinstance(on, list):
681
+ on = [on]
682
+
683
+ on = [_ for _ in on if isinstance(_, pydal.objects.Expression)]
684
+ left_joins.extend(on)
685
+ elif method == "left":
686
+ # Generate left join condition
687
+ other = other.with_alias(f"{key}_{hash(relation)}")
688
+ condition = t.cast(Query, relation.condition(parent_table, other))
689
+
690
+ if callable(relation.condition_and):
691
+ condition &= relation.condition_and(parent_table, other)
692
+
693
+ left_joins.append(other.on(condition))
694
+ else:
695
+ # Inner join (handled in _build_inner_joins)
696
+ other = other.with_alias(f"{key}_{hash(relation)}")
697
+
698
+ # Handle aliasing in select_args
699
+ select_args = self._update_select_args_with_alias(select_args, pre_alias, other)
700
+
701
+ # Process nested relationships
702
+ for nested_name, nested in relation.nested.items():
703
+ # todo: add additional test, deduplicate
704
+ nested_key = f"{parent_key}.{nested_name}" if parent_key else f"{key}.{nested_name}"
705
+ select_args = self._process_relationship_for_left_join(
706
+ nested, nested_name, select_args, left_joins, other, nested_key
707
+ )
708
+
709
+ return select_args
710
+
711
+ def _ensure_relationship_fields(
712
+ self, select_args: list[t.Any], other: t.Type[TypedTable], select_fields: str
713
+ ) -> list[t.Any]:
714
+ """Ensure required fields from relationship table are selected."""
715
+ if f"{other}." not in select_fields:
716
+ # No fields of other selected, add .ALL
717
+ select_args.append(other.ALL)
718
+ elif f"{other}.id" not in select_fields:
719
+ # Fields of other selected, but required ID is missing
720
+ select_args.append(other.id)
721
+
722
+ return select_args
723
+
724
+ def _update_select_args_with_alias(
725
+ self, select_args: list[t.Any], pre_alias: str, other: t.Type[TypedTable]
726
+ ) -> list[t.Any]:
727
+ """Update select_args to use aliased table names."""
728
+ post_alias = str(other).split(" AS ")[-1]
729
+
730
+ if pre_alias != post_alias:
731
+ select_fields = ", ".join([str(_) for _ in select_args])
732
+ select_fields = select_fields.replace(f"{pre_alias}.", f"{post_alias}.")
733
+ select_args = select_fields.split(", ")
734
+
735
+ return select_args
736
+
737
+ def _collect_with_relationships(
738
+ self,
739
+ rows: Rows,
740
+ metadata: Metadata,
741
+ _to: t.Type["TypedRows[T_MetaInstance]"],
742
+ ) -> "TypedRows[T_MetaInstance]":
743
+ """
744
+ Transform the raw rows into Typed Table model instances with nested relationships.
745
+ """
746
+ db = self._get_db()
747
+ main_table = self.model._ensure_table_defined()
748
+
749
+ # id: Model
750
+ records: dict[t.Any, T_MetaInstance] = {}
751
+
752
+ # id: [Row]
753
+ raw_per_id: dict[t.Any, list[t.Any]] = defaultdict(list)
754
+
755
+ # Track what we've seen: main_id -> "column-relation_id"
756
+ seen_relations: dict[str, set[str]] = defaultdict(set)
757
+
758
+ for row in rows:
759
+ main = row[main_table]
760
+ main_id = main.id
761
+
762
+ raw_per_id[main_id].append(normalize_table_keys(row))
763
+
764
+ if main_id not in records:
765
+ records[main_id] = self.model(main)
766
+ records[main_id]._with = list(self.relationships.keys())
767
+
768
+ # Setup all relationship defaults (once)
769
+ for col, relationship in self.relationships.items():
770
+ records[main_id][col] = [] if relationship.multiple else None
771
+
772
+ # Process each top-level relationship
773
+ for column, relation in self.relationships.items():
774
+ self._process_relationship_data(
775
+ row=row,
776
+ column=column,
777
+ relation=relation,
778
+ parent_record=records[main_id],
779
+ parent_id=main_id,
780
+ seen_relations=seen_relations,
781
+ db=db,
782
+ )
783
+
784
+ return _to(rows, self.model, records, metadata=metadata, raw=raw_per_id)
785
+
786
+ def _process_relationship_data(
787
+ self,
788
+ row: t.Any,
789
+ column: str,
790
+ relation: Relationship[t.Any],
791
+ parent_record: t.Any,
792
+ parent_id: t.Any,
793
+ seen_relations: dict[str, set[str]],
794
+ db: t.Any,
795
+ path: str = "",
796
+ ) -> t.Any | None:
797
+ """
798
+ Process relationship data from a row and attach it to the parent record.
799
+
800
+ Returns the created instance (for nested processing).
801
+
802
+ Args:
803
+ row: The database row containing relationship data
804
+ column: The relationship column name
805
+ relation: The Relationship object
806
+ parent_record: The parent model instance to attach data to
807
+ parent_id: ID of the parent for tracking
808
+ seen_relations: Dict tracking which relationships we've already processed
809
+ db: Database instance
810
+ path: Current relationship path (e.g., "users.bestie")
811
+
812
+ Returns:
813
+ The created relationship instance, or None if skipped
814
+ """
815
+ # Build the full path for tracking (e.g., "users", "users.bestie", "users.bestie.articles")
816
+ current_path = f"{path}.{column}" if path else column
817
+
818
+ # Get the relationship column name (with hash for alias)
819
+ relationship_column = f"{column}_{hash(relation)}"
820
+
821
+ # Get relation data from row
822
+ relation_data = row[relationship_column] if relationship_column in row else row.get(relation.get_table_name())
823
+
824
+ # Skip if no data or NULL id
825
+ if not relation_data or relation_data.id is None:
826
+ return None
827
+
828
+ # Check if we've already seen this relationship instance
829
+ seen_key = f"{current_path}-{relation_data.id}"
830
+ if seen_key in seen_relations[parent_id]:
831
+ return None # Already processed
832
+
833
+ seen_relations[parent_id].add(seen_key)
834
+
835
+ # Create the relationship instance
836
+ relation_table = relation.get_table(db)
837
+ instance = relation_table(relation_data) if looks_like(relation_table, TypedTable) else relation_data
838
+
839
+ # Process nested relationships on this instance
840
+ if relation.nested:
841
+ self._process_nested_relationships(
842
+ row=row,
843
+ relation=relation,
844
+ instance=instance,
845
+ parent_id=parent_id,
846
+ seen_relations=seen_relations,
847
+ db=db,
848
+ path=current_path,
849
+ )
850
+
851
+ # Attach to parent
852
+ if relation.multiple:
853
+ # current_value = parent_record.get(column)
854
+ # if not isinstance(current_value, list):
855
+ # setattr(parent_record, column, [])
856
+ parent_record[column].append(instance)
857
+ else:
858
+ parent_record[column] = instance
859
+
860
+ return instance
861
+
862
+ def _process_nested_relationships(
863
+ self,
864
+ row: t.Any,
865
+ relation: Relationship[t.Any],
866
+ instance: t.Any,
867
+ parent_id: t.Any,
868
+ seen_relations: dict[str, set[str]],
869
+ db: t.Any,
870
+ path: str,
871
+ ) -> None:
872
+ """
873
+ Process all nested relationships for a given instance.
874
+
875
+ Args:
876
+ row: The database row containing relationship data
877
+ relation: The parent Relationship object containing nested relationships
878
+ instance: The instance to attach nested data to
879
+ parent_id: ID of the root parent for tracking
880
+ seen_relations: Dict tracking which relationships we've already processed
881
+ db: Database instance
882
+ path: Current relationship path
883
+ """
884
+ # Initialize nested relationship defaults on the instance
885
+ # Use __dict__ to avoid triggering __get__ descriptors
886
+ for nested_col, nested_relation in relation.nested.items():
887
+ if nested_col not in instance.__dict__:
888
+ instance.__dict__[nested_col] = [] if nested_relation.multiple else None
889
+
890
+ # Process each nested relationship
891
+ for nested_col, nested_relation in relation.nested.items():
892
+ self._process_relationship_data(
893
+ row=row,
894
+ column=nested_col,
895
+ relation=nested_relation,
896
+ parent_record=instance,
897
+ parent_id=parent_id,
898
+ seen_relations=seen_relations,
899
+ db=db,
900
+ path=path,
901
+ )
902
+
903
+ def collect_or_fail(self, exception: t.Optional[Exception] = None) -> "TypedRows[T_MetaInstance]":
904
+ """
905
+ Call .collect() and raise an error if nothing found.
906
+
907
+ Basically unwraps t.Optional type.
908
+ """
909
+ return self.collect() or throw(exception or ValueError("Nothing found!"))
910
+
911
+ def __iter__(self) -> t.Generator[T_MetaInstance, None, None]:
912
+ """
913
+ You can start iterating a Query Builder object before calling collect, for ease of use.
914
+ """
915
+ yield from self.collect()
916
+
917
+ def __count(self, db: TypeDAL, distinct: t.Optional[bool] = None) -> Query:
918
+ # internal, shared logic between .count and ._count
919
+ model = self.model
920
+ query = self.query
921
+ for key, relation in self.relationships.items():
922
+ if (not relation.condition or relation.join != "inner") and not distinct:
923
+ continue
924
+
925
+ other = relation.get_table(db)
926
+ if not distinct:
927
+ # todo: can this lead to other issues?
928
+ other = other.with_alias(f"{key}_{hash(relation)}")
929
+ query &= relation.condition(model, other)
930
+
931
+ return query
932
+
933
+ def count(self, distinct: t.Optional[bool] = None) -> int:
934
+ """
935
+ Return the amount of rows matching the current query.
936
+ """
937
+ db = self._get_db()
938
+ query = self.__count(db, distinct=distinct)
939
+
940
+ return db(query).count(distinct)
941
+
942
+ def _count(self, distinct: t.Optional[bool] = None) -> str:
943
+ """
944
+ Return the SQL for .count().
945
+ """
946
+ db = self._get_db()
947
+ query = self.__count(db, distinct=distinct)
948
+
949
+ return t.cast(str, db(query)._count(distinct))
950
+
951
+ def exists(self) -> bool:
952
+ """
953
+ Determines if t.Any records exist matching the current query.
954
+
955
+ Returns True if one or more records exist; otherwise, False.
956
+
957
+ Returns:
958
+ bool: A boolean indicating whether t.Any records exist.
959
+ """
960
+ return bool(self.count())
961
+
962
+ def __paginate(
963
+ self,
964
+ limit: int,
965
+ page: int = 1,
966
+ ) -> "QueryBuilder[T_MetaInstance]":
967
+ available = self.count()
968
+
969
+ _from = limit * (page - 1)
970
+ _to = (limit * page) if limit else available
971
+
972
+ metadata: Metadata = {}
973
+
974
+ metadata["pagination"] = {
975
+ "limit": limit,
976
+ "current_page": page,
977
+ "max_page": math.ceil(available / limit) if limit else 1,
978
+ "rows": available,
979
+ "min_max": (_from, _to),
980
+ }
981
+
982
+ return self._extend(select_kwargs={"limitby": (_from, _to)}, metadata=metadata)
983
+
984
+ def paginate(self, limit: int, page: int = 1, verbose: bool = False) -> "PaginatedRows[T_MetaInstance]":
985
+ """
986
+ Paginate transforms the more readable `page` and `limit` to pydals internal limit and offset.
987
+
988
+ Note: when using relationships, this limit is only applied to the 'main' table and t.Any number of extra rows \
989
+ can be loaded with relationship data!
990
+ """
991
+ builder = self.__paginate(limit, page)
992
+
993
+ rows = t.cast(PaginatedRows[T_MetaInstance], builder.collect(verbose=verbose, _to=PaginatedRows))
994
+
995
+ rows._query_builder = builder
996
+ return rows
997
+
998
+ def _paginate(
999
+ self,
1000
+ limit: int,
1001
+ page: int = 1,
1002
+ ) -> str:
1003
+ builder = self.__paginate(limit, page)
1004
+ return builder._collect()
1005
+
1006
+ def chunk(self, chunk_size: int) -> t.Generator["TypedRows[T_MetaInstance]", t.Any, None]:
1007
+ """
1008
+ Generator that yields rows from a paginated source in chunks.
1009
+
1010
+ This function retrieves rows from a paginated data source in chunks of the
1011
+ specified `chunk_size` and yields them as TypedRows.
1012
+
1013
+ Example:
1014
+ ```
1015
+ for chunk_of_rows in Table.where(SomeTable.id > 5).chunk(100):
1016
+ for row in chunk_of_rows:
1017
+ # Process each row within the chunk.
1018
+ pass
1019
+ ```
1020
+ """
1021
+ page = 1
1022
+
1023
+ while rows := self.__paginate(chunk_size, page).collect():
1024
+ yield rows
1025
+ page += 1
1026
+
1027
+ def first(self, verbose: bool = False) -> T_MetaInstance | None:
1028
+ """
1029
+ Get the first row matching the currently built query.
1030
+
1031
+ Also adds paginate, since it would be a waste to select more rows than needed.
1032
+ """
1033
+ if row := self.paginate(page=1, limit=1, verbose=verbose).first():
1034
+ return self.model.from_row(row)
1035
+ else:
1036
+ return None
1037
+
1038
+ def _first(self) -> str:
1039
+ return self._paginate(page=1, limit=1)
1040
+
1041
+ def first_or_fail(self, exception: t.Optional[BaseException] = None, verbose: bool = False) -> T_MetaInstance:
1042
+ """
1043
+ Call .first() and raise an error if nothing found.
1044
+
1045
+ Basically unwraps t.Optional type.
1046
+ """
1047
+ return self.first(verbose=verbose) or throw(exception or ValueError("Nothing found!"))
1048
+
1049
+
1050
+ # note: these imports exist at the bottom of this file to prevent circular import issues:
1051
+
1052
+ from .caching import ( # noqa: E402
1053
+ create_and_hash_cache_key,
1054
+ get_expire,
1055
+ load_from_cache,
1056
+ save_to_cache,
1057
+ )
1058
+ from .relationships import Relationship # noqa: E402
1059
+ from .rows import PaginatedRows, TypedRows # noqa: E402