TypeDAL 3.12.1__py3-none-any.whl → 4.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1119 @@
1
+ """
2
+ Contains base functionality related to the Query Builder.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import datetime as dt
8
+ import math
9
+ import typing as t
10
+ from collections import defaultdict
11
+
12
+ import pydal.objects
13
+
14
+ from .constants import DEFAULT_JOIN_OPTION, JOIN_OPTIONS
15
+ from .core import TypeDAL
16
+ from .fields import TypedField, is_typed_field
17
+ from .helpers import (
18
+ DummyQuery,
19
+ as_lambda,
20
+ filter_out,
21
+ looks_like,
22
+ normalize_table_keys,
23
+ throw,
24
+ )
25
+ from .tables import TableMeta, TypedTable
26
+ from .types import (
27
+ CacheMetadata,
28
+ Condition,
29
+ Expression,
30
+ Field,
31
+ Metadata,
32
+ OnQuery,
33
+ OrderBy,
34
+ Query,
35
+ Rows,
36
+ SelectKwargs,
37
+ T,
38
+ T_MetaInstance,
39
+ )
40
+
41
+
42
+ class QueryBuilder(t.Generic[T_MetaInstance]):
43
+ """
44
+ Abstration on top of pydal's query system.
45
+ """
46
+
47
+ model: t.Type[T_MetaInstance]
48
+ query: Query
49
+ select_args: list[t.Any]
50
+ select_kwargs: SelectKwargs
51
+ relationships: dict[str, Relationship[t.Any]]
52
+ metadata: Metadata
53
+
54
+ def __init__(
55
+ self,
56
+ model: t.Type[T_MetaInstance],
57
+ add_query: t.Optional[Query] = None,
58
+ select_args: t.Optional[list[t.Any]] = None,
59
+ select_kwargs: t.Optional[SelectKwargs] = None,
60
+ relationships: dict[str, Relationship[t.Any]] = None,
61
+ metadata: Metadata = None,
62
+ ):
63
+ """
64
+ Normally, you wouldn't manually initialize a QueryBuilder but start using a method on a TypedTable.
65
+
66
+ Example:
67
+ MyTable.where(...) -> QueryBuilder[MyTable]
68
+ """
69
+ self.model = model
70
+ table = self._ensure_table_defined()
71
+
72
+ default_query = table.id > 0
73
+ self.query = add_query or default_query
74
+ self.select_args = select_args or []
75
+ self.select_kwargs = select_kwargs or {}
76
+ self.relationships = relationships or {}
77
+ self.metadata = metadata or {}
78
+
79
+ def _ensure_table_defined(self):
80
+ model = self.model
81
+ if hasattr(model, "_ensure_table_defined"):
82
+ return model._ensure_table_defined()
83
+ else:
84
+ # already a pydal table
85
+ return model
86
+
87
+ def __str__(self) -> str:
88
+ """
89
+ Simple string representation for the query builder.
90
+ """
91
+ return f"QueryBuilder for {self.model}"
92
+
93
+ def __repr__(self) -> str:
94
+ """
95
+ Advanced string representation for the query builder.
96
+ """
97
+ return (
98
+ f"<QueryBuilder for {self.model} with "
99
+ f"{len(self.select_args)} select args; "
100
+ f"{len(self.select_kwargs)} select kwargs; "
101
+ f"{len(self.relationships)} relationships; "
102
+ f"query: {bool(self.query)}; "
103
+ f"metadata: {self.metadata}; "
104
+ f">"
105
+ )
106
+
107
+ def __bool__(self) -> bool:
108
+ """
109
+ Querybuilder is truthy if it has t.Any conditions.
110
+ """
111
+ table = self._ensure_table_defined()
112
+ default_query = table.id > 0
113
+ return any(
114
+ [
115
+ self.query != default_query,
116
+ self.select_args,
117
+ self.select_kwargs,
118
+ self.relationships,
119
+ self.metadata,
120
+ ],
121
+ )
122
+
123
+ def _extend(
124
+ self,
125
+ add_query: t.Optional[Query] = None,
126
+ overwrite_query: t.Optional[Query] = None,
127
+ select_args: t.Optional[list[t.Any]] = None,
128
+ select_kwargs: t.Optional[SelectKwargs] = None,
129
+ relationships: dict[str, Relationship[t.Any]] = None,
130
+ metadata: Metadata = None,
131
+ ) -> "QueryBuilder[T_MetaInstance]":
132
+ return QueryBuilder(
133
+ self.model,
134
+ (add_query & self.query) if add_query else overwrite_query or self.query,
135
+ (self.select_args + select_args) if select_args else self.select_args,
136
+ (self.select_kwargs | select_kwargs) if select_kwargs else self.select_kwargs,
137
+ (self.relationships | relationships) if relationships else self.relationships,
138
+ (self.metadata | (metadata or {})) if metadata else self.metadata,
139
+ )
140
+
141
+ def select(self, *fields: t.Any, **options: t.Unpack[SelectKwargs]) -> "QueryBuilder[T_MetaInstance]":
142
+ """
143
+ Fields: database columns by name ('id'), by field reference (table.id) or other (e.g. table.ALL).
144
+
145
+ Options:
146
+ paraphrased from the web2py pydal docs,
147
+ For more info, see http://www.web2py.com/books/default/chapter/29/06/the-database-abstraction-layer#orderby-groupby-limitby-distinct-having-orderby_on_limitby-join-left-cache
148
+
149
+ orderby: field(s) to order by. Supported:
150
+ table.name - sort by name, ascending
151
+ ~table.name - sort by name, descending
152
+ <random> - sort randomly
153
+ table.name|table.id - sort by two fields (first name, then id)
154
+
155
+ groupby, having: together with orderby:
156
+ groupby can be a field (e.g. table.name) to group records by
157
+ having can be a query, only those `having` the condition are grouped
158
+
159
+ limitby: tuple of min and max. When using the query builder, .paginate(limit, page) is recommended.
160
+ distinct: bool/field. Only select rows that differ
161
+ orderby_on_limitby (bool, default: True): by default, an implicit orderby is added when doing limitby.
162
+ join: othertable.on(query) - do an INNER JOIN. Using TypeDAL relationships with .join() is recommended!
163
+ left: othertable.on(query) - do a LEFT JOIN. Using TypeDAL relationships with .join() is recommended!
164
+ cache: cache the query result to speed up repeated queries; e.g. (cache=(cache.ram, 3600), cacheable=True)
165
+ """
166
+ return self._extend(select_args=list(fields), select_kwargs=options)
167
+
168
+ def orderby(self, *fields: OrderBy) -> "QueryBuilder[T_MetaInstance]":
169
+ """
170
+ Order the query results by specified fields.
171
+
172
+ Args:
173
+ fields: field(s) to order by. Supported:
174
+ table.name - sort by name, ascending
175
+ ~table.name - sort by name, descending
176
+ <random> - sort randomly
177
+ table.name|table.id - sort by two fields (first name, then id)
178
+
179
+ Returns:
180
+ QueryBuilder: A new QueryBuilder instance with the ordering applied.
181
+ """
182
+ return self.select(orderby=fields)
183
+
184
+ def where(
185
+ self,
186
+ *queries_or_lambdas: Query | t.Callable[[t.Type[T_MetaInstance]], Query] | dict[str, t.Any],
187
+ **filters: t.Any,
188
+ ) -> "QueryBuilder[T_MetaInstance]":
189
+ """
190
+ Extend the builder's query.
191
+
192
+ Can be used in multiple ways:
193
+ .where(Query) -> with a direct query such as `Table.id == 5`
194
+ .where(lambda table: table.id == 5) -> with a query via a lambda
195
+ .where(id=5) -> via keyword arguments
196
+
197
+ When using multiple where's, they will be ANDed:
198
+ .where(lambda table: table.id == 5).where(lambda table: table.id == 6) == (table.id == 5) & (table.id=6)
199
+ When passing multiple queries to a single .where, they will be ORed:
200
+ .where(lambda table: table.id == 5, lambda table: table.id == 6) == (table.id == 5) | (table.id=6)
201
+ """
202
+ new_query = self.query
203
+ table = self._ensure_table_defined()
204
+
205
+ queries_or_lambdas = (
206
+ *queries_or_lambdas,
207
+ filters,
208
+ )
209
+
210
+ subquery = t.cast(Query, DummyQuery())
211
+ for query_part in queries_or_lambdas:
212
+ if isinstance(query_part, (Field, pydal.objects.Field)) or is_typed_field(query_part):
213
+ subquery |= t.cast(Query, query_part != None)
214
+ elif isinstance(query_part, (pydal.objects.Query, Expression, pydal.objects.Expression)):
215
+ subquery |= t.cast(Query, query_part)
216
+ elif callable(query_part):
217
+ if result := query_part(self.model):
218
+ subquery |= result
219
+ elif isinstance(query_part, dict):
220
+ subsubquery = DummyQuery()
221
+ for field, value in query_part.items():
222
+ subsubquery &= table[field] == value
223
+ if subsubquery:
224
+ subquery |= subsubquery
225
+ else:
226
+ raise ValueError(f"Unexpected query type ({type(query_part)}).")
227
+
228
+ if subquery:
229
+ new_query &= subquery
230
+
231
+ return self._extend(overwrite_query=new_query)
232
+
233
+ def _parse_relationships(
234
+ self, fields: t.Iterable[str | t.Type[TypedTable]], method: JOIN_OPTIONS = None, **update: t.Any
235
+ ) -> dict[str, Relationship[t.Any]]:
236
+ """
237
+ Parse relationship fields into a dict of base relationships with nested relationships.
238
+
239
+ Args:
240
+ fields: Iterable of relationship field names
241
+ (e.g., ['relationship', 'relationship.with_nested', 'relationship.no2'])
242
+ condition_and: Optional condition to pass to relationship clones
243
+
244
+ Returns:
245
+ Dict mapping base relationship names to Relationship objects with nested relationships
246
+ Example: {'relationship': Relationship('relationship',
247
+ nested={'with_nested': Relationship(),
248
+ 'no2': Relationship()})}
249
+ """
250
+ relationships: dict[str, Relationship[t.Any]] = {}
251
+ base_relationships = self.model.get_relationships()
252
+ db = self._get_db()
253
+
254
+ for field in fields:
255
+ relation_name = str(field)
256
+ parts = relation_name.split(".")
257
+ base_name = parts[0]
258
+
259
+ # Create base relationship if it doesn't exist
260
+ if base_name not in relationships:
261
+ relationships[base_name] = base_relationships[base_name].clone(join=method, **update)
262
+
263
+ # If this is a nested relationship, traverse and add it
264
+ if len(parts) > 1:
265
+ current = relationships[base_name]
266
+
267
+ for level in parts[1:]:
268
+ # Check if this nested relationship already exists
269
+ if level not in current.nested:
270
+ # Create new nested relationship
271
+ subrelationship = current.get_table(db).get_relationships()[level].clone(join=method)
272
+ current.nested[level] = subrelationship
273
+
274
+ current = current.nested[level]
275
+
276
+ return relationships
277
+
278
+ def join(
279
+ self,
280
+ *fields: str | t.Type[TypedTable] | Relationship[t.Any],
281
+ method: JOIN_OPTIONS = None,
282
+ on: OnQuery | list[Expression] | Expression = None,
283
+ condition: Condition = None,
284
+ condition_and: Condition = None,
285
+ ) -> "QueryBuilder[T_MetaInstance]":
286
+ """
287
+ Include relationship fields in the result.
288
+
289
+ Supports:
290
+ - join("example")
291
+ - join(Table.example, "second", method="left")
292
+ - join(Table.example, on=...)
293
+ - join(Table.example, condition=...)
294
+
295
+ `fields` can be names or Relationship instances.
296
+ If no fields are passed, all relationships will be joined.
297
+
298
+ `fields` can be names of Relationships on the current model.
299
+ If no fields are passed, all will be used.
300
+
301
+ By default, the `method` defined in the relationship is used.
302
+ This can be overwritten with the `method` keyword argument (left or inner)
303
+
304
+ `condition_and` can be used to add extra conditions to an inner join.
305
+ """
306
+ # todo: allow limiting amount of related rows returned for join?
307
+ # todo: it would be nice if 'fields' could be an actual relationship
308
+ # (Article.tags = list[Tag]) and you could change the .condition and .on
309
+ # this could deprecate condition_and
310
+
311
+ if condition and on:
312
+ raise Relationship._error_duplicate_condition(condition, on) # type: ignore
313
+
314
+ relationships: dict[str, Relationship[t.Any]]
315
+
316
+ if condition:
317
+ if len(fields) != 1:
318
+ raise ValueError("join(field, condition=...) can only be used with exactly one field!")
319
+
320
+ if isinstance(condition, pydal.objects.Query):
321
+ condition = as_lambda(condition)
322
+
323
+ field = fields[0]
324
+ if isinstance(field, Relationship) and field.name:
325
+ relationships = {
326
+ field.name: field.clone(condition=condition, on=None, join=method, condition_and=condition_and)
327
+ }
328
+ else:
329
+ to_field = t.cast(t.Type[TypedTable], field)
330
+ relationships = {
331
+ str(to_field): Relationship(to_field, condition=condition, join=method, condition_and=condition_and)
332
+ }
333
+ elif on:
334
+ if len(fields) != 1:
335
+ raise ValueError("join(field, on=...) can only be used with exactly one field!")
336
+
337
+ if isinstance(on, pydal.objects.Expression):
338
+ on = [on]
339
+
340
+ if isinstance(on, list):
341
+ on = as_lambda(on)
342
+
343
+ field = fields[0]
344
+ if isinstance(field, Relationship) and field.name:
345
+ relationships = {
346
+ field.name: field.clone(on=on, join=method, condition=None, condition_and=condition_and)
347
+ }
348
+ else:
349
+ to_field = t.cast(t.Type[TypedTable], field)
350
+ relationships = {str(to_field): Relationship(to_field, on=on, join=method, condition_and=condition_and)}
351
+ elif fields:
352
+ # join on every relationship
353
+ # simple: 'relationship'
354
+ # -> {'relationship': Relationship('relationship')}
355
+ # complex with one: relationship.with_nested
356
+ # -> {'relationship': Relationship('relationship', nested=[Relationship('with_nested')])
357
+ # complex with two: relationship.with_nested, relationship.no2
358
+ # -> {'relationship': Relationship('relationship',
359
+ # nested=[Relationship('with_nested'), Relationship('no2')])
360
+
361
+ # fields is a tuple so that's not mutable, filter_out requires a mutable dict:
362
+ other_fields = {idx: field for idx, field in enumerate(fields)}
363
+ relationship_instances = filter_out(other_fields, Relationship)
364
+
365
+ relationships = {}
366
+
367
+ # Clone direct Relationship instances (preserving their settings)
368
+ for relationship in relationship_instances.values():
369
+ if relationship.name:
370
+ relationships[relationship.name] = relationship.clone(
371
+ join=method,
372
+ condition_and=condition_and,
373
+ )
374
+
375
+ # Parse and merge string/table fields
376
+ if other_fields:
377
+ parsed_relationships = self._parse_relationships(
378
+ other_fields.values(), # type: ignore
379
+ method=method,
380
+ condition_and=condition_and,
381
+ )
382
+ # Explicit Relationship instances take precedence
383
+ relationships = parsed_relationships | relationships
384
+
385
+ else:
386
+ relationships = {k: v for k, v in self.model.get_relationships().items() if not v.explicit}
387
+
388
+ return self._extend(relationships=relationships)
389
+
390
+ def cache(
391
+ self,
392
+ *deps: t.Any,
393
+ expires_at: t.Optional[dt.datetime] = None,
394
+ ttl: t.Optional[int | dt.timedelta] = None,
395
+ ) -> "QueryBuilder[T_MetaInstance]":
396
+ """
397
+ Enable caching for this query to load repeated calls from a dill row \
398
+ instead of executing the sql and collecing matching rows again.
399
+ """
400
+ existing = self.metadata.get("cache", {})
401
+
402
+ metadata: Metadata = {}
403
+
404
+ cache_meta = t.cast(
405
+ CacheMetadata,
406
+ self.metadata.get("cache", {})
407
+ | {
408
+ "enabled": True,
409
+ "depends_on": existing.get("depends_on", []) + [str(_) for _ in deps],
410
+ "expires_at": get_expire(expires_at=expires_at, ttl=ttl),
411
+ },
412
+ )
413
+
414
+ metadata["cache"] = cache_meta
415
+ return self._extend(metadata=metadata)
416
+
417
+ def _get_db(self) -> TypeDAL:
418
+ return self.model._db or throw(EnvironmentError("@define or db.define is not called on this class yet!"))
419
+
420
+ def _select_arg_convert(self, arg: t.Any) -> t.Any:
421
+ # typedfield are not really used at runtime t.Anymore, but leave it in for safety:
422
+ if isinstance(arg, TypedField): # pragma: no cover
423
+ arg = arg._field
424
+
425
+ return arg
426
+
427
+ def delete(self) -> list[int]:
428
+ """
429
+ Based on the current query, delete rows and return a list of deleted IDs.
430
+ """
431
+ db = self._get_db()
432
+ removed_ids = [_.id for _ in db(self.query).select("id")]
433
+ if db(self.query).delete():
434
+ # success!
435
+ return removed_ids
436
+
437
+ return []
438
+
439
+ def _delete(self) -> str:
440
+ db = self._get_db()
441
+ return str(db(self.query)._delete())
442
+
443
+ def update(self, **fields: t.Any) -> list[int]:
444
+ """
445
+ Based on the current query, update `fields` and return a list of updated IDs.
446
+ """
447
+ # todo: limit?
448
+ db = self._get_db()
449
+ updated_ids = db(self.query).select("id").column("id")
450
+ if db(self.query).update(**fields):
451
+ # success!
452
+ return updated_ids
453
+
454
+ return []
455
+
456
+ def _update(self, **fields: t.Any) -> str:
457
+ db = self._get_db()
458
+ return str(db(self.query)._update(**fields))
459
+
460
+ def _before_query(self, mut_metadata: Metadata, add_id: bool = True) -> tuple[Query, list[t.Any], SelectKwargs]:
461
+ select_args = [self._select_arg_convert(_) for _ in self.select_args] or [self.model.ALL]
462
+ select_kwargs = self.select_kwargs.copy()
463
+ query = self.query
464
+ model = self.model
465
+ mut_metadata["query"] = query
466
+ # require at least id of main table:
467
+ select_fields = ", ".join([str(_) for _ in select_args])
468
+ tablename = str(model)
469
+
470
+ if add_id and f"{tablename}.id" not in select_fields:
471
+ # fields of other selected, but required ID is missing.
472
+ select_args.append(model.id)
473
+
474
+ if self.relationships:
475
+ query, select_args = self._handle_relationships_pre_select(query, select_args, select_kwargs, mut_metadata)
476
+
477
+ return query, select_args, select_kwargs
478
+
479
+ def to_sql(self, add_id: bool = False) -> str:
480
+ """
481
+ Generate the SQL for the built query.
482
+ """
483
+ db = self._get_db()
484
+
485
+ query, select_args, select_kwargs = self._before_query({}, add_id=add_id)
486
+
487
+ return str(db(query)._select(*select_args, **select_kwargs))
488
+
489
+ def _collect(self) -> str:
490
+ """
491
+ Alias for to_sql, pydal-like syntax.
492
+ """
493
+ return self.to_sql()
494
+
495
+ def _collect_cached(self, metadata: Metadata) -> "TypedRows[T_MetaInstance] | None":
496
+ expires_at = metadata["cache"].get("expires_at")
497
+ metadata["cache"] |= {
498
+ # key is partly dependant on cache metadata but not these:
499
+ "key": None,
500
+ "status": None,
501
+ "cached_at": None,
502
+ "expires_at": None,
503
+ }
504
+
505
+ _, key = create_and_hash_cache_key(
506
+ self.model,
507
+ metadata,
508
+ self.query,
509
+ self.select_args,
510
+ self.select_kwargs,
511
+ self.relationships.keys(),
512
+ )
513
+
514
+ # re-set after creating key:
515
+ metadata["cache"]["expires_at"] = expires_at
516
+ metadata["cache"]["key"] = key
517
+
518
+ return load_from_cache(key, self._get_db())
519
+
520
+ def execute(self, add_id: bool = False) -> Rows:
521
+ """
522
+ Raw version of .collect which only executes the SQL, without performing t.Any magic afterwards.
523
+ """
524
+ db = self._get_db()
525
+ metadata = self.metadata.copy()
526
+
527
+ query, select_args, select_kwargs = self._before_query(metadata, add_id=add_id)
528
+
529
+ return db(query).select(*select_args, **select_kwargs)
530
+
531
+ def collect(
532
+ self,
533
+ verbose: bool = False,
534
+ _to: t.Type["TypedRows[t.Any]"] = None,
535
+ add_id: bool = True,
536
+ ) -> "TypedRows[T_MetaInstance]":
537
+ """
538
+ Execute the built query and turn it into model instances, while handling relationships.
539
+ """
540
+ if _to is None:
541
+ _to = TypedRows
542
+
543
+ if not isinstance(self.model, TableMeta):
544
+ # tried to use querybuilder with a non-typedal table,
545
+ # fallback to execute:
546
+ return self.execute(add_id=add_id)
547
+
548
+ db = self._get_db()
549
+ metadata = self.metadata.copy()
550
+
551
+ if metadata.get("cache", {}).get("enabled") and (result := self._collect_cached(metadata)):
552
+ return result
553
+
554
+ query, select_args, select_kwargs = self._before_query(metadata, add_id=add_id)
555
+
556
+ metadata["sql"] = db(query)._select(*select_args, **select_kwargs)
557
+
558
+ if verbose: # pragma: no cover
559
+ print(metadata["sql"])
560
+
561
+ rows: Rows = db(query).select(*select_args, **select_kwargs)
562
+
563
+ metadata["final_query"] = str(query)
564
+ metadata["final_args"] = [str(_) for _ in select_args]
565
+ metadata["final_kwargs"] = select_kwargs
566
+
567
+ if verbose: # pragma: no cover
568
+ print(rows)
569
+
570
+ if not self.relationships:
571
+ # easy
572
+ typed_rows = _to.from_rows(rows, self.model, metadata=metadata)
573
+
574
+ else:
575
+ # harder: try to match rows to the belonging objects
576
+ # assume structure of {'table': <data>} per row.
577
+ # if that's not the case, return default behavior again
578
+ typed_rows = self._collect_with_relationships(rows, metadata=metadata, _to=_to)
579
+
580
+ # only saves if requested in metadata:
581
+ return save_to_cache(typed_rows, rows)
582
+
583
+ @t.overload
584
+ def column(self, field: TypedField[T], **options: t.Unpack[SelectKwargs]) -> list[T]:
585
+ """
586
+ If a typedfield is passed, the output type can be safely determined.
587
+ """
588
+
589
+ @t.overload
590
+ def column(self, field: T, **options: t.Unpack[SelectKwargs]) -> list[T]:
591
+ """
592
+ Otherwise, the output type is loosely determined (assumes `field: type` or t.Any).
593
+ """
594
+
595
+ def column(self, field: TypedField[T] | T, **options: t.Unpack[SelectKwargs]) -> list[T]:
596
+ """
597
+ Get all values in a specific column.
598
+
599
+ Shortcut for `.select(field).execute().column(field)`.
600
+ """
601
+ return self.select(field, **options).execute().column(field)
602
+
603
+ def _handle_relationships_pre_select(
604
+ self,
605
+ query: Query,
606
+ select_args: list[t.Any],
607
+ select_kwargs: SelectKwargs,
608
+ metadata: Metadata,
609
+ ) -> tuple[Query, list[t.Any]]:
610
+ """Handle relationship joins and field selection for database query."""
611
+ # Collect all relationship keys including nested ones
612
+ metadata["relationships"] = self._collect_all_relationship_keys()
613
+
614
+ # Build joins and apply limitby optimization if needed
615
+ inner_joins = self._build_inner_joins()
616
+ query = self._apply_limitby_optimization(query, select_kwargs, inner_joins, metadata)
617
+
618
+ if inner_joins:
619
+ select_kwargs["join"] = inner_joins
620
+
621
+ # Build left joins and handle field selection
622
+ left_joins: list[Expression] = []
623
+ select_args = self._build_left_joins_and_fields(select_args, left_joins)
624
+
625
+ select_kwargs["left"] = left_joins
626
+ return query, select_args
627
+
628
+ def _collect_all_relationship_keys(self) -> set[str]:
629
+ """Collect all relationship keys including nested ones."""
630
+ keys = set(self.relationships.keys())
631
+
632
+ for relation in self.relationships.values():
633
+ keys.update(self._collect_nested_keys(relation))
634
+
635
+ return keys
636
+
637
+ def _collect_nested_keys(self, relation: Relationship[t.Any], prefix: str = "") -> set[str]:
638
+ """Recursively collect nested relationship keys."""
639
+ keys = set()
640
+
641
+ for name, nested in relation.nested.items():
642
+ nested_key = f"{prefix}.{name}" if prefix else name
643
+ keys.add(nested_key)
644
+ keys.update(self._collect_nested_keys(nested, nested_key))
645
+
646
+ return keys
647
+
648
+ def _build_inner_joins(self) -> list[t.Any]:
649
+ """Build inner joins for relationships with conditions."""
650
+ joins = []
651
+
652
+ for key, relation in self.relationships.items():
653
+ joins.extend(self._build_inner_joins_recursive(relation, self.model, key))
654
+
655
+ return joins
656
+
657
+ def _build_inner_joins_recursive(
658
+ self, relation: Relationship[t.Any], parent_table: t.Type[TypedTable], key: str, parent_key: str = ""
659
+ ) -> list[t.Any]:
660
+ """Recursively build inner joins for a relationship and its nested relationships."""
661
+ db = self._get_db()
662
+ joins = []
663
+
664
+ # Handle current level
665
+ if relation.condition and relation.join == "inner":
666
+ other = relation.get_table(db)
667
+ other = other.with_alias(f"{key}_{hash(relation)}")
668
+ condition = relation.condition(parent_table, other)
669
+
670
+ if callable(relation.condition_and):
671
+ condition &= relation.condition_and(parent_table, other)
672
+
673
+ joins.append(other.on(condition))
674
+
675
+ # Process nested relationships
676
+ for nested_name, nested in relation.nested.items():
677
+ # todo: add additional test, deduplicate
678
+ nested_key = f"{parent_key}.{nested_name}" if parent_key else f"{key}.{nested_name}"
679
+ joins.extend(self._build_inner_joins_recursive(nested, other, nested_name, nested_key))
680
+
681
+ return joins
682
+
683
+ def _apply_limitby_optimization(
684
+ self,
685
+ query: Query,
686
+ select_kwargs: SelectKwargs,
687
+ joins: list[t.Any],
688
+ metadata: Metadata,
689
+ ) -> Query:
690
+ """Apply limitby optimization when relationships are present."""
691
+ if not (limitby := select_kwargs.pop("limitby", ())):
692
+ return query
693
+
694
+ db = self._get_db()
695
+ model = self.model
696
+
697
+ kwargs: SelectKwargs = select_kwargs.copy()
698
+ kwargs["limitby"] = limitby
699
+
700
+ if joins:
701
+ kwargs["join"] = joins
702
+
703
+ ids = db(query)._select(model.id, **kwargs)
704
+ query = model.id.belongs(ids)
705
+ metadata["ids"] = ids
706
+
707
+ return query
708
+
709
+ def _build_left_joins_and_fields(self, select_args: list[t.Any], left_joins: list[Expression]) -> list[t.Any]:
710
+ """
711
+ Build left joins and ensure required fields are selected.
712
+ """
713
+ for key, relation in self.relationships.items():
714
+ select_args = self._process_relationship_for_left_join(relation, key, select_args, left_joins, self.model)
715
+
716
+ return select_args
717
+
718
+ def _process_relationship_for_left_join(
719
+ self,
720
+ relation: Relationship[t.Any],
721
+ key: str,
722
+ select_args: list[t.Any],
723
+ left_joins: list[Expression],
724
+ parent_table: t.Type[TypedTable],
725
+ parent_key: str = "",
726
+ ) -> list[t.Any]:
727
+ """Process a single relationship for left join and field selection."""
728
+ db = self._get_db()
729
+ other = relation.get_table(db)
730
+ method: JOIN_OPTIONS = relation.join or DEFAULT_JOIN_OPTION
731
+
732
+ select_fields = ", ".join([str(_) for _ in select_args])
733
+ pre_alias = str(other)
734
+
735
+ # Ensure required fields are selected
736
+ select_args = self._ensure_relationship_fields(select_args, other, select_fields)
737
+
738
+ # Build join condition
739
+ if relation.on:
740
+ # Custom .on condition - always left join
741
+ on = relation.on(parent_table, other)
742
+ if not isinstance(on, list):
743
+ on = [on]
744
+
745
+ on = [_ for _ in on if isinstance(_, pydal.objects.Expression)]
746
+ left_joins.extend(on)
747
+ elif method == "left":
748
+ # Generate left join condition
749
+ other = other.with_alias(f"{key}_{hash(relation)}")
750
+ condition = t.cast(Query, relation.condition(parent_table, other))
751
+
752
+ if callable(relation.condition_and):
753
+ condition &= relation.condition_and(parent_table, other)
754
+
755
+ left_joins.append(other.on(condition))
756
+ else:
757
+ # Inner join (handled in _build_inner_joins)
758
+ other = other.with_alias(f"{key}_{hash(relation)}")
759
+
760
+ # Handle aliasing in select_args
761
+ select_args = self._update_select_args_with_alias(select_args, pre_alias, other)
762
+
763
+ # Process nested relationships
764
+ for nested_name, nested in relation.nested.items():
765
+ # todo: add additional test, deduplicate
766
+ nested_key = f"{parent_key}.{nested_name}" if parent_key else f"{key}.{nested_name}"
767
+ select_args = self._process_relationship_for_left_join(
768
+ nested, nested_name, select_args, left_joins, other, nested_key
769
+ )
770
+
771
+ return select_args
772
+
773
+ def _ensure_relationship_fields(
774
+ self, select_args: list[t.Any], other: t.Type[TypedTable], select_fields: str
775
+ ) -> list[t.Any]:
776
+ """Ensure required fields from relationship table are selected."""
777
+ if f"{other}." not in select_fields:
778
+ # No fields of other selected, add .ALL
779
+ select_args.append(other.ALL)
780
+ elif f"{other}.id" not in select_fields:
781
+ # Fields of other selected, but required ID is missing
782
+ select_args.append(other.id)
783
+
784
+ return select_args
785
+
786
+ def _update_select_args_with_alias(
787
+ self, select_args: list[t.Any], pre_alias: str, other: t.Type[TypedTable]
788
+ ) -> list[t.Any]:
789
+ """Update select_args to use aliased table names."""
790
+ post_alias = str(other).split(" AS ")[-1]
791
+
792
+ if pre_alias != post_alias:
793
+ select_fields = ", ".join([str(_) for _ in select_args])
794
+ select_fields = select_fields.replace(f"{pre_alias}.", f"{post_alias}.")
795
+ select_args = select_fields.split(", ")
796
+
797
+ return select_args
798
+
799
+ def _collect_with_relationships(
800
+ self,
801
+ rows: Rows,
802
+ metadata: Metadata,
803
+ _to: t.Type["TypedRows[T_MetaInstance]"],
804
+ ) -> "TypedRows[T_MetaInstance]":
805
+ """
806
+ Transform the raw rows into Typed Table model instances with nested relationships.
807
+ """
808
+ db = self._get_db()
809
+ main_table = self._ensure_table_defined()
810
+
811
+ # id: Model
812
+ records: dict[t.Any, T_MetaInstance] = {}
813
+
814
+ # id: [Row]
815
+ raw_per_id: dict[t.Any, list[t.Any]] = defaultdict(list)
816
+
817
+ # Track what we've seen: main_id -> "column-relation_id"
818
+ seen_relations: dict[str, set[str]] = defaultdict(set)
819
+
820
+ for row in rows:
821
+ main = row[main_table]
822
+ main_id = main.id
823
+
824
+ raw_per_id[main_id].append(normalize_table_keys(row))
825
+
826
+ if main_id not in records:
827
+ records[main_id] = self.model(main)
828
+ records[main_id]._with = list(self.relationships.keys())
829
+
830
+ # Setup all relationship defaults (once)
831
+ for col, relationship in self.relationships.items():
832
+ records[main_id][col] = [] if relationship.multiple else None
833
+
834
+ # Process each top-level relationship
835
+ for column, relation in self.relationships.items():
836
+ self._process_relationship_data(
837
+ row=row,
838
+ column=column,
839
+ relation=relation,
840
+ parent_record=records[main_id],
841
+ parent_id=main_id,
842
+ seen_relations=seen_relations,
843
+ db=db,
844
+ )
845
+
846
+ return _to(rows, self.model, records, metadata=metadata, raw=raw_per_id)
847
+
848
+ def _process_relationship_data(
849
+ self,
850
+ row: t.Any,
851
+ column: str,
852
+ relation: Relationship[t.Any],
853
+ parent_record: t.Any,
854
+ parent_id: t.Any,
855
+ seen_relations: dict[str, set[str]],
856
+ db: t.Any,
857
+ path: str = "",
858
+ ) -> t.Any | None:
859
+ """
860
+ Process relationship data from a row and attach it to the parent record.
861
+
862
+ Returns the created instance (for nested processing).
863
+
864
+ Args:
865
+ row: The database row containing relationship data
866
+ column: The relationship column name
867
+ relation: The Relationship object
868
+ parent_record: The parent model instance to attach data to
869
+ parent_id: ID of the parent for tracking
870
+ seen_relations: Dict tracking which relationships we've already processed
871
+ db: Database instance
872
+ path: Current relationship path (e.g., "users.bestie")
873
+
874
+ Returns:
875
+ The created relationship instance, or None if skipped
876
+ """
877
+ # Build the full path for tracking (e.g., "users", "users.bestie", "users.bestie.articles")
878
+ current_path = f"{path}.{column}" if path else column
879
+
880
+ # Get the relationship column name (with hash for alias)
881
+ relationship_column = f"{column}_{hash(relation)}"
882
+
883
+ # Get relation data from row
884
+ relation_data = row[relationship_column] if relationship_column in row else row.get(relation.get_table_name())
885
+
886
+ # Skip if no data or NULL id
887
+ if not relation_data or relation_data.id is None:
888
+ return None
889
+
890
+ # Check if we've already seen this relationship instance
891
+ seen_key = f"{current_path}-{relation_data.id}"
892
+ if seen_key in seen_relations[parent_id]:
893
+ return None # Already processed
894
+
895
+ seen_relations[parent_id].add(seen_key)
896
+
897
+ # Create the relationship instance
898
+ relation_table = relation.get_table(db)
899
+ instance = relation_table(relation_data) if looks_like(relation_table, TypedTable) else relation_data
900
+
901
+ # Process nested relationships on this instance
902
+ if relation.nested:
903
+ self._process_nested_relationships(
904
+ row=row,
905
+ relation=relation,
906
+ instance=instance,
907
+ # parent_id=parent_id,
908
+ seen_relations=seen_relations,
909
+ db=db,
910
+ path=current_path,
911
+ )
912
+
913
+ # Attach to parent
914
+ if relation.multiple:
915
+ # current_value = parent_record.get(column)
916
+ # if not isinstance(current_value, list):
917
+ # setattr(parent_record, column, [])
918
+ parent_record[column].append(instance)
919
+ else:
920
+ parent_record[column] = instance
921
+
922
+ return instance
923
+
924
+ def _process_nested_relationships(
925
+ self,
926
+ row: t.Any,
927
+ relation: Relationship[t.Any],
928
+ instance: t.Any,
929
+ seen_relations: dict[str, set[str]],
930
+ db: t.Any,
931
+ path: str,
932
+ ) -> None:
933
+ """
934
+ Process all nested relationships for a given instance.
935
+
936
+ Args:
937
+ row: The database row containing relationship data
938
+ relation: The parent Relationship object containing nested relationships
939
+ instance: The instance to attach nested data to
940
+ seen_relations: Dict tracking which relationships we've already processed
941
+ db: Database instance
942
+ path: Current relationship path
943
+ """
944
+ # Initialize nested relationship defaults on the instance
945
+ # Use __dict__ to avoid triggering __get__ descriptors
946
+ for nested_col, nested_relation in relation.nested.items():
947
+ if nested_col not in instance.__dict__:
948
+ instance.__dict__[nested_col] = [] if nested_relation.multiple else None
949
+
950
+ # Process each nested relationship
951
+ for nested_col, nested_relation in relation.nested.items():
952
+ self._process_relationship_data(
953
+ row=row,
954
+ column=nested_col,
955
+ relation=nested_relation,
956
+ parent_record=instance,
957
+ parent_id=instance.id,
958
+ seen_relations=seen_relations,
959
+ db=db,
960
+ path=path,
961
+ )
962
+
963
+ def collect_or_fail(self, exception: t.Optional[Exception] = None) -> "TypedRows[T_MetaInstance]":
964
+ """
965
+ Call .collect() and raise an error if nothing found.
966
+
967
+ Basically unwraps t.Optional type.
968
+ """
969
+ return self.collect() or throw(exception or ValueError("Nothing found!"))
970
+
971
+ def __iter__(self) -> t.Generator[T_MetaInstance, None, None]:
972
+ """
973
+ You can start iterating a Query Builder object before calling collect, for ease of use.
974
+ """
975
+ yield from self.collect()
976
+
977
+ def __count(self, db: TypeDAL, distinct: t.Optional[bool] = None) -> Query:
978
+ # internal, shared logic between .count and ._count
979
+ model = self.model
980
+ query = self.query
981
+ for key, relation in self.relationships.items():
982
+ if (not relation.condition or relation.join != "inner") and not distinct:
983
+ continue
984
+
985
+ other = relation.get_table(db)
986
+ if not distinct:
987
+ # todo: can this lead to other issues?
988
+ other = other.with_alias(f"{key}_{hash(relation)}")
989
+ query &= relation.condition(model, other)
990
+
991
+ return query
992
+
993
+ def count(self, distinct: t.Optional[bool] = None) -> int:
994
+ """
995
+ Return the amount of rows matching the current query.
996
+ """
997
+ db = self._get_db()
998
+ query = self.__count(db, distinct=distinct)
999
+
1000
+ return db(query).count(distinct)
1001
+
1002
+ def _count(self, distinct: t.Optional[bool] = None) -> str:
1003
+ """
1004
+ Return the SQL for .count().
1005
+ """
1006
+ db = self._get_db()
1007
+ query = self.__count(db, distinct=distinct)
1008
+
1009
+ return t.cast(str, db(query)._count(distinct))
1010
+
1011
+ def exists(self) -> bool:
1012
+ """
1013
+ Determines if t.Any records exist matching the current query.
1014
+
1015
+ Returns True if one or more records exist; otherwise, False.
1016
+
1017
+ Returns:
1018
+ bool: A boolean indicating whether t.Any records exist.
1019
+ """
1020
+ return bool(self.count())
1021
+
1022
+ def __paginate(
1023
+ self,
1024
+ limit: int,
1025
+ page: int = 1,
1026
+ ) -> "QueryBuilder[T_MetaInstance]":
1027
+ available = self.count()
1028
+
1029
+ _from = limit * (page - 1)
1030
+ _to = (limit * page) if limit else available
1031
+
1032
+ metadata: Metadata = {}
1033
+
1034
+ metadata["pagination"] = {
1035
+ "limit": limit,
1036
+ "current_page": page,
1037
+ "max_page": math.ceil(available / limit) if limit else 1,
1038
+ "rows": available,
1039
+ "min_max": (_from, _to),
1040
+ }
1041
+
1042
+ return self._extend(select_kwargs={"limitby": (_from, _to)}, metadata=metadata)
1043
+
1044
+ def paginate(self, limit: int, page: int = 1, verbose: bool = False) -> "PaginatedRows[T_MetaInstance]":
1045
+ """
1046
+ Paginate transforms the more readable `page` and `limit` to pydals internal limit and offset.
1047
+
1048
+ Note: when using relationships, this limit is only applied to the 'main' table and t.Any number of extra rows \
1049
+ can be loaded with relationship data!
1050
+ """
1051
+ builder = self.__paginate(limit, page)
1052
+
1053
+ rows = t.cast(PaginatedRows[T_MetaInstance], builder.collect(verbose=verbose, _to=PaginatedRows))
1054
+
1055
+ rows._query_builder = builder
1056
+ return rows
1057
+
1058
+ def _paginate(
1059
+ self,
1060
+ limit: int,
1061
+ page: int = 1,
1062
+ ) -> str:
1063
+ builder = self.__paginate(limit, page)
1064
+ return builder._collect()
1065
+
1066
+ def chunk(self, chunk_size: int) -> t.Generator["TypedRows[T_MetaInstance]", t.Any, None]:
1067
+ """
1068
+ Generator that yields rows from a paginated source in chunks.
1069
+
1070
+ This function retrieves rows from a paginated data source in chunks of the
1071
+ specified `chunk_size` and yields them as TypedRows.
1072
+
1073
+ Example:
1074
+ ```
1075
+ for chunk_of_rows in Table.where(SomeTable.id > 5).chunk(100):
1076
+ for row in chunk_of_rows:
1077
+ # Process each row within the chunk.
1078
+ pass
1079
+ ```
1080
+ """
1081
+ page = 1
1082
+
1083
+ while rows := self.__paginate(chunk_size, page).collect():
1084
+ yield rows
1085
+ page += 1
1086
+
1087
+ def first(self, verbose: bool = False) -> T_MetaInstance | None:
1088
+ """
1089
+ Get the first row matching the currently built query.
1090
+
1091
+ Also adds paginate, since it would be a waste to select more rows than needed.
1092
+ """
1093
+ if row := self.paginate(page=1, limit=1, verbose=verbose).first():
1094
+ return self.model.from_row(row)
1095
+ else:
1096
+ return None
1097
+
1098
+ def _first(self) -> str:
1099
+ return self._paginate(page=1, limit=1)
1100
+
1101
+ def first_or_fail(self, exception: t.Optional[BaseException] = None, verbose: bool = False) -> T_MetaInstance:
1102
+ """
1103
+ Call .first() and raise an error if nothing found.
1104
+
1105
+ Basically unwraps t.Optional type.
1106
+ """
1107
+ return self.first(verbose=verbose) or throw(exception or ValueError("Nothing found!"))
1108
+
1109
+
1110
+ # note: these imports exist at the bottom of this file to prevent circular import issues:
1111
+
1112
+ from .caching import ( # noqa: E402
1113
+ create_and_hash_cache_key,
1114
+ get_expire,
1115
+ load_from_cache,
1116
+ save_to_cache,
1117
+ )
1118
+ from .relationships import Relationship # noqa: E402
1119
+ from .rows import PaginatedRows, TypedRows # noqa: E402