django-bulk-hooks 0.1.83__py3-none-any.whl → 0.2.100__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of django-bulk-hooks might be problematic. Click here for more details.

@@ -0,0 +1,928 @@
1
+ """
2
+ Bulk operation coordinator - Single entry point for all bulk operations.
3
+
4
+ This facade hides the complexity of wiring up multiple services and provides
5
+ a clean, simple API for the QuerySet to use.
6
+ """
7
+
8
+ import logging
9
+ from dataclasses import dataclass
10
+ from typing import Any
11
+ from typing import Callable
12
+ from typing import Dict
13
+ from typing import List
14
+ from typing import Optional
15
+ from typing import Set
16
+ from typing import Tuple
17
+
18
+ from django.core.exceptions import FieldDoesNotExist
19
+ from django.db import transaction
20
+ from django.db.models import Model
21
+ from django.db.models import QuerySet
22
+
23
+ from django_bulk_hooks.changeset import ChangeSet
24
+ from django_bulk_hooks.changeset import RecordChange
25
+ from django_bulk_hooks.context import get_bypass_hooks
26
+ from django_bulk_hooks.helpers import build_changeset_for_create
27
+ from django_bulk_hooks.helpers import build_changeset_for_delete
28
+ from django_bulk_hooks.helpers import build_changeset_for_update
29
+ from django_bulk_hooks.helpers import extract_pks
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ @dataclass
35
+ class InstanceSnapshot:
36
+ """Snapshot of instance state for modification tracking."""
37
+
38
+ field_values: Dict[str, Any]
39
+
40
+
41
+ class BulkOperationCoordinator:
42
+ """
43
+ Single entry point for coordinating bulk operations.
44
+
45
+ This coordinator manages all services and provides a clean facade
46
+ for the QuerySet. It wires up services and coordinates the hook
47
+ lifecycle for each operation type.
48
+
49
+ Services are created lazily and cached for performance.
50
+ """
51
+
52
+ # Constants
53
+ UPSERT_TIMESTAMP_THRESHOLD_SECONDS = 1.0
54
+
55
+ def __init__(self, queryset: QuerySet):
56
+ """
57
+ Initialize coordinator for a queryset.
58
+
59
+ Args:
60
+ queryset: Django QuerySet instance
61
+ """
62
+ self.queryset = queryset
63
+ self.model_cls = queryset.model
64
+
65
+ # Lazy-initialized services
66
+ self._analyzer = None
67
+ self._mti_handler = None
68
+ self._record_classifier = None
69
+ self._executor = None
70
+ self._dispatcher = None
71
+
72
+ # ==================== SERVICE PROPERTIES ====================
73
+
74
+ def _get_or_create_service(self, service_name: str, service_class: type, *args, **kwargs) -> Any:
75
+ """
76
+ Generic lazy service initialization with caching.
77
+
78
+ Args:
79
+ service_name: Name of the service attribute (e.g., 'analyzer')
80
+ service_class: The class to instantiate
81
+ *args, **kwargs: Arguments to pass to the service constructor
82
+
83
+ Returns:
84
+ The service instance
85
+ """
86
+ attr_name = f"_{service_name}"
87
+ service = getattr(self, attr_name)
88
+
89
+ if service is None:
90
+ service = service_class(*args, **kwargs)
91
+ setattr(self, attr_name, service)
92
+
93
+ return service
94
+
95
+ @property
96
+ def analyzer(self):
97
+ """Get or create ModelAnalyzer."""
98
+ from django_bulk_hooks.operations.analyzer import ModelAnalyzer
99
+
100
+ return self._get_or_create_service("analyzer", ModelAnalyzer, self.model_cls)
101
+
102
+ @property
103
+ def mti_handler(self):
104
+ """Get or create MTIHandler."""
105
+ from django_bulk_hooks.operations.mti_handler import MTIHandler
106
+
107
+ return self._get_or_create_service("mti_handler", MTIHandler, self.model_cls)
108
+
109
+ @property
110
+ def record_classifier(self):
111
+ """Get or create RecordClassifier."""
112
+ from django_bulk_hooks.operations.record_classifier import RecordClassifier
113
+
114
+ return self._get_or_create_service("record_classifier", RecordClassifier, self.model_cls)
115
+
116
+ @property
117
+ def executor(self):
118
+ """Get or create BulkExecutor."""
119
+ from django_bulk_hooks.operations.bulk_executor import BulkExecutor
120
+
121
+ return self._get_or_create_service(
122
+ "executor",
123
+ BulkExecutor,
124
+ queryset=self.queryset,
125
+ analyzer=self.analyzer,
126
+ mti_handler=self.mti_handler,
127
+ record_classifier=self.record_classifier,
128
+ )
129
+
130
+ @property
131
+ def dispatcher(self):
132
+ """Get or create Dispatcher."""
133
+ from django_bulk_hooks.dispatcher import get_dispatcher
134
+
135
+ return self._get_or_create_service("dispatcher", get_dispatcher)
136
+
137
+ @property
138
+ def inheritance_chain(self) -> List[type]:
139
+ """Single source of truth for MTI inheritance chain."""
140
+ return self.mti_handler.get_inheritance_chain()
141
+
142
+ # ==================== PUBLIC API ====================
143
+
144
+ @transaction.atomic
145
+ def create(
146
+ self,
147
+ objs: List[Model],
148
+ batch_size: Optional[int] = None,
149
+ ignore_conflicts: bool = False,
150
+ update_conflicts: bool = False,
151
+ update_fields: Optional[List[str]] = None,
152
+ unique_fields: Optional[List[str]] = None,
153
+ bypass_hooks: bool = False,
154
+ ) -> List[Model]:
155
+ """
156
+ Execute bulk create with hooks.
157
+
158
+ Args:
159
+ objs: List of model instances to create
160
+ batch_size: Number of objects per batch
161
+ ignore_conflicts: Ignore conflicts if True
162
+ update_conflicts: Update on conflict if True
163
+ update_fields: Fields to update on conflict
164
+ unique_fields: Fields to check for conflicts
165
+ bypass_hooks: Skip all hooks if True
166
+
167
+ Returns:
168
+ List of created objects
169
+ """
170
+ if not objs:
171
+ return objs
172
+
173
+ self.analyzer.validate_for_create(objs)
174
+
175
+ # Handle upsert classification upfront
176
+ existing_record_ids, existing_pks_map = self._classify_upsert_records(objs, update_conflicts, unique_fields)
177
+
178
+ changeset = build_changeset_for_create(
179
+ self.model_cls,
180
+ objs,
181
+ batch_size=batch_size,
182
+ ignore_conflicts=ignore_conflicts,
183
+ update_conflicts=update_conflicts,
184
+ update_fields=update_fields,
185
+ unique_fields=unique_fields,
186
+ )
187
+
188
+ def operation():
189
+ return self.executor.bulk_create(
190
+ objs,
191
+ batch_size=batch_size,
192
+ ignore_conflicts=ignore_conflicts,
193
+ update_conflicts=update_conflicts,
194
+ update_fields=update_fields,
195
+ unique_fields=unique_fields,
196
+ existing_record_ids=existing_record_ids,
197
+ existing_pks_map=existing_pks_map,
198
+ )
199
+
200
+ return self._execute_with_mti_hooks(
201
+ changeset=changeset,
202
+ operation=operation,
203
+ event_prefix="create",
204
+ bypass_hooks=bypass_hooks,
205
+ )
206
+
207
+ @transaction.atomic
208
+ def update(
209
+ self,
210
+ objs: List[Model],
211
+ fields: List[str],
212
+ batch_size: Optional[int] = None,
213
+ bypass_hooks: bool = False,
214
+ ) -> int:
215
+ """
216
+ Execute bulk update with hooks.
217
+
218
+ Args:
219
+ objs: List of model instances to update
220
+ fields: List of field names to update
221
+ batch_size: Number of objects per batch
222
+ bypass_hooks: Skip all hooks if True
223
+
224
+ Returns:
225
+ Number of objects updated
226
+ """
227
+ if not objs:
228
+ return 0
229
+
230
+ self.analyzer.validate_for_update(objs)
231
+
232
+ old_records_map = self.analyzer.fetch_old_records_map(objs)
233
+ changeset = self._build_update_changeset(objs, fields, old_records_map)
234
+
235
+ def operation():
236
+ return self.executor.bulk_update(objs, fields, batch_size=batch_size)
237
+
238
+ return self._execute_with_mti_hooks(
239
+ changeset=changeset,
240
+ operation=operation,
241
+ event_prefix="update",
242
+ bypass_hooks=bypass_hooks,
243
+ )
244
+
245
+ @transaction.atomic
246
+ def update_queryset(
247
+ self,
248
+ update_kwargs: Dict[str, Any],
249
+ bypass_hooks: bool = False,
250
+ ) -> int:
251
+ """
252
+ Execute queryset.update() with full hook support.
253
+
254
+ ARCHITECTURE & PERFORMANCE TRADE-OFFS
255
+ ======================================
256
+
257
+ To support hooks with queryset.update(), we must:
258
+ 1. Fetch old state (SELECT all matching rows)
259
+ 2. Execute database update (UPDATE in SQL)
260
+ 3. Fetch new state (SELECT all rows again)
261
+ 4. Run VALIDATE_UPDATE hooks (validation only)
262
+ 5. Run BEFORE_UPDATE hooks (CAN modify instances)
263
+ 6. Persist BEFORE_UPDATE modifications (bulk_update)
264
+ 7. Run AFTER_UPDATE hooks (read-only side effects)
265
+
266
+ Performance Cost:
267
+ - 2 SELECT queries (before/after)
268
+ - 1 UPDATE query (actual update)
269
+ - 1 bulk_update (if hooks modify data)
270
+
271
+ Trade-off: Hooks require loading data into Python. If you need
272
+ maximum performance and don't need hooks, use bypass_hooks=True.
273
+
274
+ Args:
275
+ update_kwargs: Dict of fields to update
276
+ bypass_hooks: Skip all hooks if True
277
+
278
+ Returns:
279
+ Number of rows updated
280
+ """
281
+ if bypass_hooks or get_bypass_hooks():
282
+ return QuerySet.update(self.queryset, **update_kwargs)
283
+
284
+ return self._execute_queryset_update_with_hooks(update_kwargs)
285
+
286
+ @transaction.atomic
287
+ def delete(self, bypass_hooks: bool = False) -> Tuple[int, Dict[str, int]]:
288
+ """
289
+ Execute delete with hooks.
290
+
291
+ Args:
292
+ bypass_hooks: Skip all hooks if True
293
+
294
+ Returns:
295
+ Tuple of (count, details dict)
296
+ """
297
+ objs = list(self.queryset)
298
+ if not objs:
299
+ return (0, {})
300
+
301
+ self.analyzer.validate_for_delete(objs)
302
+
303
+ changeset = build_changeset_for_delete(self.model_cls, objs)
304
+
305
+ def operation():
306
+ return QuerySet.delete(self.queryset)
307
+
308
+ return self._execute_with_mti_hooks(
309
+ changeset=changeset,
310
+ operation=operation,
311
+ event_prefix="delete",
312
+ bypass_hooks=bypass_hooks,
313
+ )
314
+
315
+ def clean(self, objs: List[Model], is_create: Optional[bool] = None) -> None:
316
+ """
317
+ Execute validation hooks only (no database operations).
318
+
319
+ This is used by Django's clean() method to hook VALIDATE_* events
320
+ without performing the actual operation.
321
+
322
+ Args:
323
+ objs: List of model instances to validate
324
+ is_create: True for create, False for update, None to auto-detect
325
+ """
326
+ if not objs:
327
+ return
328
+
329
+ # Auto-detect operation type
330
+ if is_create is None:
331
+ is_create = objs[0].pk is None
332
+
333
+ # Validate based on operation type
334
+ if is_create:
335
+ self.analyzer.validate_for_create(objs)
336
+ changeset = build_changeset_for_create(self.model_cls, objs)
337
+ event = "validate_create"
338
+ else:
339
+ self.analyzer.validate_for_update(objs)
340
+ changeset = build_changeset_for_update(self.model_cls, objs, {})
341
+ event = "validate_update"
342
+
343
+ # Dispatch validation event
344
+ models_in_chain = self.inheritance_chain
345
+ self._dispatch_hooks_for_models(models_in_chain, changeset, event)
346
+
347
+ # ==================== QUERYSET UPDATE IMPLEMENTATION ====================
348
+
349
+ def _execute_queryset_update_with_hooks(
350
+ self,
351
+ update_kwargs: Dict[str, Any],
352
+ ) -> int:
353
+ """
354
+ Execute queryset update with full hook lifecycle support.
355
+
356
+ Implements the fetch-update-fetch pattern required to support hooks
357
+ with queryset.update(). BEFORE_UPDATE hooks can modify instances
358
+ and modifications are auto-persisted.
359
+
360
+ Args:
361
+ update_kwargs: Dict of fields to update
362
+
363
+ Returns:
364
+ Number of rows updated
365
+ """
366
+ # Step 1: Fetch old state with relationships preloaded
367
+ hook_relationships = self._extract_hook_relationships()
368
+ old_instances = self._fetch_instances_with_relationships(self.queryset, hook_relationships)
369
+
370
+ if not old_instances:
371
+ return 0
372
+
373
+ old_records_map = {inst.pk: inst for inst in old_instances}
374
+
375
+ # Step 2: Execute native Django update
376
+ update_count = QuerySet.update(self.queryset, **update_kwargs)
377
+ if update_count == 0:
378
+ return 0
379
+
380
+ # Step 3: Fetch new state after update
381
+ pks = extract_pks(old_instances)
382
+ new_queryset = self.model_cls.objects.filter(pk__in=pks)
383
+ new_instances = self._fetch_instances_with_relationships(new_queryset, hook_relationships)
384
+
385
+ # Step 4: Build changeset and run hook lifecycle
386
+ changeset = build_changeset_for_update(
387
+ self.model_cls,
388
+ new_instances,
389
+ update_kwargs,
390
+ old_records_map=old_records_map,
391
+ )
392
+ changeset.operation_meta["is_queryset_update"] = True
393
+ changeset.operation_meta["allows_modifications"] = True
394
+
395
+ models_in_chain = self.inheritance_chain
396
+
397
+ # Step 5: VALIDATE phase
398
+ self._dispatch_hooks_for_models(models_in_chain, changeset, "validate_update", bypass_hooks=False)
399
+
400
+ # Step 6: BEFORE_UPDATE phase with modification tracking
401
+ modified_fields = self._run_before_update_hooks_with_tracking(new_instances, models_in_chain, changeset)
402
+
403
+ # Step 7: Auto-persist BEFORE_UPDATE modifications
404
+ if modified_fields:
405
+ self._persist_hook_modifications(new_instances, modified_fields)
406
+
407
+ # Step 8: AFTER_UPDATE phase (read-only)
408
+ pre_after_state = self._snapshot_instance_state(new_instances)
409
+ self._dispatch_hooks_for_models(models_in_chain, changeset, "after_update", bypass_hooks=False)
410
+
411
+ # Step 9: Auto-persist any AFTER_UPDATE modifications (should be rare)
412
+ after_modified_fields = self._detect_modifications(new_instances, pre_after_state)
413
+ if after_modified_fields:
414
+ logger.warning("AFTER_UPDATE hooks modified fields: %s. Consider moving modifications to BEFORE_UPDATE.", after_modified_fields)
415
+ self._persist_hook_modifications(new_instances, after_modified_fields)
416
+
417
+ return update_count
418
+
419
+ def _run_before_update_hooks_with_tracking(self, instances: List[Model], models_in_chain: List[type], changeset: ChangeSet) -> Set[str]:
420
+ """
421
+ Run BEFORE_UPDATE hooks and detect modifications.
422
+
423
+ Returns:
424
+ Set of field names that were modified by hooks
425
+ """
426
+ pre_hook_state = self._snapshot_instance_state(instances)
427
+ self._dispatch_hooks_for_models(models_in_chain, changeset, "before_update", bypass_hooks=False)
428
+ return self._detect_modifications(instances, pre_hook_state)
429
+
430
+ # ==================== MTI HOOK ORCHESTRATION ====================
431
+
432
+ def _execute_with_mti_hooks(
433
+ self,
434
+ changeset: ChangeSet,
435
+ operation: Callable,
436
+ event_prefix: str,
437
+ bypass_hooks: bool = False,
438
+ ) -> Any:
439
+ """
440
+ Execute operation with hooks for entire MTI inheritance chain.
441
+
442
+ This ensures parent model hooks fire when child instances are
443
+ created/updated/deleted in MTI scenarios.
444
+
445
+ Args:
446
+ changeset: ChangeSet for the child model
447
+ operation: Callable that performs the actual DB operation
448
+ event_prefix: 'create', 'update', or 'delete'
449
+ bypass_hooks: Skip all hooks if True
450
+
451
+ Returns:
452
+ Result of operation
453
+ """
454
+ if bypass_hooks:
455
+ return operation()
456
+
457
+ self.dispatcher._reset_executed_hooks()
458
+ logger.debug("Starting %s operation for %s", event_prefix, changeset.model_cls.__name__)
459
+
460
+ models_in_chain = self.inheritance_chain
461
+
462
+ # Preload relationships needed by hook conditions (prevents N+1)
463
+ self._preload_condition_relationships_for_operation(changeset, models_in_chain)
464
+
465
+ # VALIDATE phase
466
+ self._dispatch_hooks_for_models(models_in_chain, changeset, f"validate_{event_prefix}")
467
+
468
+ # BEFORE phase
469
+ self._dispatch_hooks_for_models(models_in_chain, changeset, f"before_{event_prefix}")
470
+
471
+ # Execute operation
472
+ result = operation()
473
+
474
+ # AFTER phase (handle upsert splitting for create operations)
475
+ if result and isinstance(result, list) and event_prefix == "create":
476
+ if self._is_upsert_operation(result):
477
+ self._dispatch_upsert_after_hooks(result, models_in_chain)
478
+ else:
479
+ after_changeset = build_changeset_for_create(changeset.model_cls, result)
480
+ self._dispatch_hooks_for_models(models_in_chain, after_changeset, f"after_{event_prefix}")
481
+ else:
482
+ self._dispatch_hooks_for_models(models_in_chain, changeset, f"after_{event_prefix}")
483
+
484
+ return result
485
+
486
+ def _dispatch_hooks_for_models(
487
+ self,
488
+ models_in_chain: List[type],
489
+ changeset: ChangeSet,
490
+ event_suffix: str,
491
+ bypass_hooks: bool = False,
492
+ ) -> None:
493
+ """
494
+ Dispatch hooks for all models in inheritance chain.
495
+
496
+ Args:
497
+ models_in_chain: List of model classes in MTI inheritance chain
498
+ changeset: The changeset to use as base
499
+ event_suffix: Event name suffix (e.g., 'before_create')
500
+ bypass_hooks: Whether to skip hook execution
501
+ """
502
+ logger.debug("Dispatching %s to %d models: %s", event_suffix, len(models_in_chain), [m.__name__ for m in models_in_chain])
503
+
504
+ for model_cls in models_in_chain:
505
+ model_changeset = self._build_changeset_for_model(changeset, model_cls)
506
+ self.dispatcher.dispatch(model_changeset, event_suffix, bypass_hooks=bypass_hooks)
507
+
508
+ def _build_changeset_for_model(self, original_changeset: ChangeSet, target_model_cls: type) -> ChangeSet:
509
+ """
510
+ Build a changeset for a specific model in the MTI inheritance chain.
511
+
512
+ This allows parent model hooks to receive the same instances but with
513
+ the correct model_cls for hook registration matching.
514
+
515
+ Args:
516
+ original_changeset: The original changeset (for child model)
517
+ target_model_cls: The model class to build changeset for
518
+
519
+ Returns:
520
+ ChangeSet for the target model
521
+ """
522
+ return ChangeSet(
523
+ model_cls=target_model_cls,
524
+ changes=original_changeset.changes,
525
+ operation_type=original_changeset.operation_type,
526
+ operation_meta=original_changeset.operation_meta,
527
+ )
528
+
529
+ # ==================== UPSERT HANDLING ====================
530
+
531
+ def _classify_upsert_records(
532
+ self,
533
+ objs: List[Model],
534
+ update_conflicts: bool,
535
+ unique_fields: Optional[List[str]],
536
+ ) -> Tuple[Set[Any], Dict[Any, Any]]:
537
+ """
538
+ Classify records for upsert operations.
539
+
540
+ Args:
541
+ objs: List of model instances
542
+ update_conflicts: Whether this is an upsert operation
543
+ unique_fields: Fields to check for conflicts
544
+
545
+ Returns:
546
+ Tuple of (existing_record_ids, existing_pks_map)
547
+ """
548
+ if not (update_conflicts and unique_fields):
549
+ return set(), {}
550
+
551
+ query_model = None
552
+ if self.mti_handler.is_mti_model():
553
+ query_model = self.mti_handler.find_model_with_unique_fields(unique_fields)
554
+ logger.info("MTI model detected: querying %s for unique fields %s", query_model.__name__, unique_fields)
555
+
556
+ existing_ids, existing_pks = self.record_classifier.classify_for_upsert(objs, unique_fields, query_model=query_model)
557
+
558
+ logger.info("Upsert classification: %d existing, %d new records", len(existing_ids), len(objs) - len(existing_ids))
559
+
560
+ return existing_ids, existing_pks
561
+
562
+ def _is_upsert_operation(self, result_objects: List[Model]) -> bool:
563
+ """Check if the operation was an upsert (with update_conflicts=True)."""
564
+ if not result_objects:
565
+ return False
566
+ return hasattr(result_objects[0], "_bulk_hooks_upsert_metadata")
567
+
568
+ def _dispatch_upsert_after_hooks(self, result_objects: List[Model], models_in_chain: List[type]) -> None:
569
+ """
570
+ Dispatch after hooks for upsert operations, splitting by create/update.
571
+
572
+ This matches Salesforce behavior where created records fire after_create
573
+ and updated records fire after_update hooks.
574
+
575
+ Args:
576
+ result_objects: List of objects returned from the operation
577
+ models_in_chain: List of model classes in the MTI inheritance chain
578
+ """
579
+ created, updated = self._classify_upsert_results(result_objects)
580
+
581
+ logger.info("Upsert after hooks: %d created, %d updated", len(created), len(updated))
582
+
583
+ if created:
584
+ create_changeset = build_changeset_for_create(self.model_cls, created)
585
+ create_changeset.operation_meta["relationships_preloaded"] = True
586
+ self._dispatch_hooks_for_models(models_in_chain, create_changeset, "after_create", bypass_hooks=False)
587
+
588
+ if updated:
589
+ old_records_map = self.analyzer.fetch_old_records_map(updated)
590
+ update_changeset = build_changeset_for_update(self.model_cls, updated, {}, old_records_map=old_records_map)
591
+ update_changeset.operation_meta["relationships_preloaded"] = True
592
+ self._dispatch_hooks_for_models(models_in_chain, update_changeset, "after_update", bypass_hooks=False)
593
+
594
+ self._cleanup_upsert_metadata(result_objects)
595
+
596
+ def _classify_upsert_results(self, result_objects: List[Model]) -> Tuple[List[Model], List[Model]]:
597
+ """
598
+ Classify upsert results into created and updated objects.
599
+
600
+ Returns:
601
+ Tuple of (created_objects, updated_objects)
602
+ """
603
+ created_objects = []
604
+ updated_objects = []
605
+ objects_needing_timestamp_check = []
606
+
607
+ # First pass: collect objects with metadata
608
+ for obj in result_objects:
609
+ if hasattr(obj, "_bulk_hooks_was_created"):
610
+ if obj._bulk_hooks_was_created:
611
+ created_objects.append(obj)
612
+ else:
613
+ updated_objects.append(obj)
614
+ else:
615
+ objects_needing_timestamp_check.append(obj)
616
+
617
+ # Second pass: bulk check timestamps for objects without metadata
618
+ if objects_needing_timestamp_check:
619
+ created, updated = self._classify_by_timestamps(objects_needing_timestamp_check)
620
+ created_objects.extend(created)
621
+ updated_objects.extend(updated)
622
+
623
+ return created_objects, updated_objects
624
+
625
+ def _classify_by_timestamps(self, objects: List[Model]) -> Tuple[List[Model], List[Model]]:
626
+ """
627
+ Classify objects as created or updated based on timestamp comparison.
628
+
629
+ Returns:
630
+ Tuple of (created_objects, updated_objects)
631
+ """
632
+ created = []
633
+ updated = []
634
+
635
+ # Group by model class to handle MTI scenarios
636
+ objects_by_model = {}
637
+ for obj in objects:
638
+ model_cls = obj.__class__
639
+ objects_by_model.setdefault(model_cls, []).append(obj)
640
+
641
+ # Process each model class
642
+ for model_cls, objs in objects_by_model.items():
643
+ if not (hasattr(model_cls, "created_at") and hasattr(model_cls, "updated_at")):
644
+ # No timestamp fields, default to created
645
+ created.extend(objs)
646
+ continue
647
+
648
+ # Bulk fetch timestamps
649
+ pks = extract_pks(objs)
650
+ if not pks:
651
+ created.extend(objs)
652
+ continue
653
+
654
+ timestamp_map = {
655
+ record["pk"]: (record["created_at"], record["updated_at"])
656
+ for record in model_cls.objects.filter(pk__in=pks).values("pk", "created_at", "updated_at")
657
+ }
658
+
659
+ # Classify based on timestamp difference
660
+ for obj in objs:
661
+ if obj.pk not in timestamp_map:
662
+ created.append(obj)
663
+ continue
664
+
665
+ created_at, updated_at = timestamp_map[obj.pk]
666
+ if not (created_at and updated_at):
667
+ created.append(obj)
668
+ continue
669
+
670
+ time_diff = abs((updated_at - created_at).total_seconds())
671
+ if time_diff <= self.UPSERT_TIMESTAMP_THRESHOLD_SECONDS:
672
+ created.append(obj)
673
+ else:
674
+ updated.append(obj)
675
+
676
+ return created, updated
677
+
678
+ def _cleanup_upsert_metadata(self, result_objects: List[Model]) -> None:
679
+ """Clean up temporary metadata added during upsert operations."""
680
+ for obj in result_objects:
681
+ for attr in ("_bulk_hooks_was_created", "_bulk_hooks_upsert_metadata"):
682
+ if hasattr(obj, attr):
683
+ delattr(obj, attr)
684
+
685
+ # ==================== INSTANCE STATE TRACKING ====================
686
+
687
+ def _snapshot_instance_state(self, instances: List[Model]) -> Dict[Any, Dict[str, Any]]:
688
+ """
689
+ Create a snapshot of current instance field values.
690
+
691
+ Args:
692
+ instances: List of model instances
693
+
694
+ Returns:
695
+ Dict mapping pk -> {field_name: value}
696
+ """
697
+ snapshot = {}
698
+
699
+ for instance in instances:
700
+ if instance.pk is None:
701
+ continue
702
+
703
+ field_values = {}
704
+ for field in self.model_cls._meta.get_fields():
705
+ # Skip non-concrete fields
706
+ if field.many_to_many or field.one_to_many:
707
+ continue
708
+
709
+ try:
710
+ field_values[field.name] = getattr(instance, field.name)
711
+ except (AttributeError, FieldDoesNotExist):
712
+ field_values[field.name] = None
713
+
714
+ snapshot[instance.pk] = field_values
715
+
716
+ return snapshot
717
+
718
+ def _detect_modifications(
719
+ self,
720
+ instances: List[Model],
721
+ pre_hook_state: Dict[Any, Dict[str, Any]],
722
+ ) -> Set[str]:
723
+ """
724
+ Detect which fields were modified by comparing to snapshot.
725
+
726
+ Args:
727
+ instances: List of model instances
728
+ pre_hook_state: Previous state snapshot
729
+
730
+ Returns:
731
+ Set of field names that were modified
732
+ """
733
+ modified_fields = set()
734
+
735
+ for instance in instances:
736
+ if instance.pk not in pre_hook_state:
737
+ continue
738
+
739
+ old_values = pre_hook_state[instance.pk]
740
+
741
+ for field_name, old_value in old_values.items():
742
+ try:
743
+ current_value = getattr(instance, field_name)
744
+ except (AttributeError, FieldDoesNotExist):
745
+ current_value = None
746
+
747
+ if current_value != old_value:
748
+ modified_fields.add(field_name)
749
+
750
+ return modified_fields
751
+
752
+ def _persist_hook_modifications(self, instances: List[Model], modified_fields: Set[str]) -> None:
753
+ """
754
+ Persist modifications made by hooks using bulk_update.
755
+
756
+ Args:
757
+ instances: List of modified instances
758
+ modified_fields: Set of field names that were modified
759
+ """
760
+ logger.info("Hooks modified %d field(s): %s", len(modified_fields), ", ".join(sorted(modified_fields)))
761
+ logger.info("Auto-persisting modifications via bulk_update")
762
+
763
+ # Use Django's bulk_update directly (not our hook version)
764
+ fresh_qs = QuerySet(model=self.model_cls, using=self.queryset.db)
765
+ QuerySet.bulk_update(fresh_qs, instances, list(modified_fields))
766
+
767
+ # ==================== RELATIONSHIP PRELOADING ====================
768
+
769
+ def _fetch_instances_with_relationships(
770
+ self,
771
+ queryset: QuerySet,
772
+ relationships: Set[str],
773
+ ) -> List[Model]:
774
+ """
775
+ Fetch instances with relationships preloaded.
776
+
777
+ Args:
778
+ queryset: QuerySet to fetch from
779
+ relationships: Set of relationship names to preload
780
+
781
+ Returns:
782
+ List of model instances with relationships loaded
783
+ """
784
+ if relationships:
785
+ logger.info("Fetching instances with select_related(%s)", list(relationships))
786
+ queryset = queryset.select_related(*relationships)
787
+ else:
788
+ logger.info("Fetching instances without select_related")
789
+
790
+ return list(queryset)
791
+
792
+ def _preload_condition_relationships_for_operation(
793
+ self,
794
+ changeset: ChangeSet,
795
+ models_in_chain: List[type],
796
+ ) -> None:
797
+ """
798
+ Preload relationships needed by hook conditions for this operation.
799
+
800
+ This prevents N+1 queries by loading all necessary relationships upfront.
801
+
802
+ Args:
803
+ changeset: The changeset for this operation
804
+ models_in_chain: List of model classes in inheritance chain
805
+ """
806
+ relationships = self._extract_condition_relationships_for_operation(changeset, models_in_chain)
807
+
808
+ if relationships:
809
+ logger.info("Bulk preloading %d condition relationships for %s hooks", len(relationships), changeset.model_cls.__name__)
810
+ self.dispatcher._preload_condition_relationships(changeset, relationships)
811
+ changeset.operation_meta["relationships_preloaded"] = True
812
+ else:
813
+ logger.info("No condition relationships to preload for %s hooks", changeset.model_cls.__name__)
814
+
815
+ def _extract_condition_relationships_for_operation(
816
+ self,
817
+ changeset: ChangeSet,
818
+ models_in_chain: List[type],
819
+ ) -> Set[str]:
820
+ """
821
+ Extract relationships needed by hook conditions for this operation.
822
+
823
+ Args:
824
+ changeset: The changeset for this operation
825
+ models_in_chain: List of model classes in inheritance chain
826
+
827
+ Returns:
828
+ Set of relationship field names to preload
829
+ """
830
+ relationships = set()
831
+ event_prefix = changeset.operation_type
832
+ events_to_check = [f"validate_{event_prefix}", f"before_{event_prefix}", f"after_{event_prefix}"]
833
+
834
+ for model_cls in models_in_chain:
835
+ for event in events_to_check:
836
+ hooks = self.dispatcher.registry.get_hooks(model_cls, event)
837
+
838
+ for handler_cls, method_name, condition, priority in hooks:
839
+ if condition:
840
+ condition_rels = self.dispatcher._extract_condition_relationships(condition, model_cls)
841
+ relationships.update(condition_rels)
842
+
843
+ return relationships
844
+
845
+ def _extract_hook_relationships(self) -> Set[str]:
846
+ """
847
+ Extract all relationship paths that hooks might access.
848
+
849
+ This includes both condition relationships and @select_related decorators
850
+ for the model and its MTI parents. Prevents N+1 queries during bulk operations.
851
+
852
+ Returns:
853
+ Set of relationship field names to preload with select_related
854
+ """
855
+ relationships = set()
856
+ models_to_check = self.inheritance_chain
857
+ events_to_check = ["before_update", "after_update", "validate_update"]
858
+
859
+ for model_cls in models_to_check:
860
+ logger.info("Checking hooks for model %s", model_cls.__name__)
861
+
862
+ for event in events_to_check:
863
+ hooks = self.dispatcher.registry.get_hooks(model_cls, event)
864
+ logger.info("Found %d hooks for %s.%s", len(hooks), model_cls.__name__, event)
865
+
866
+ for handler_cls, method_name, condition, priority in hooks:
867
+ # Extract from conditions
868
+ if condition:
869
+ condition_rels = self.dispatcher._extract_condition_relationships(condition, model_cls)
870
+ if condition_rels:
871
+ logger.info("Condition relationships for %s.%s: %s", model_cls.__name__, method_name, condition_rels)
872
+ relationships.update(condition_rels)
873
+
874
+ # Extract from @select_related decorators
875
+ try:
876
+ method = getattr(handler_cls, method_name, None)
877
+ if method:
878
+ select_related_fields = getattr(method, "_select_related_fields", None)
879
+ if select_related_fields and hasattr(select_related_fields, "__iter__"):
880
+ logger.info(
881
+ "@select_related fields on %s.%s: %s", handler_cls.__name__, method_name, list(select_related_fields)
882
+ )
883
+ relationships.update(select_related_fields)
884
+ except Exception as e:
885
+ logger.warning("Failed to extract @select_related from %s.%s: %s", handler_cls.__name__, method_name, e)
886
+
887
+ # Also preload all forward FK relationships on the model (aggressive approach)
888
+ try:
889
+ for field in self.model_cls._meta.get_fields():
890
+ if field.is_relation and not field.many_to_many and not field.one_to_many:
891
+ relationships.add(field.name)
892
+ logger.info("AUTO: Adding FK relationship field %s", field.name)
893
+ except Exception as e:
894
+ logger.warning("Failed to extract all relationship fields: %s", e)
895
+
896
+ logger.info("Total extracted relationships for %s: %s", self.model_cls.__name__, list(relationships))
897
+
898
+ return relationships
899
+
900
+ # ==================== HELPER METHODS ====================
901
+
902
+ def _build_update_changeset(
903
+ self,
904
+ objs: List[Model],
905
+ fields: List[str],
906
+ old_records_map: Dict[Any, Model],
907
+ ) -> ChangeSet:
908
+ """
909
+ Build a changeset for bulk update operations.
910
+
911
+ Args:
912
+ objs: List of model instances to update
913
+ fields: List of field names to update
914
+ old_records_map: Map of pk -> old record
915
+
916
+ Returns:
917
+ ChangeSet for the update operation
918
+ """
919
+ changes = [
920
+ RecordChange(
921
+ new_record=obj,
922
+ old_record=old_records_map.get(obj.pk),
923
+ changed_fields=fields,
924
+ )
925
+ for obj in objs
926
+ ]
927
+
928
+ return ChangeSet(self.model_cls, changes, "update", {"fields": fields})