PyPI - django-bulk-hooks - Versions diffs - 0.2.14__tar.gz → 0.2.16__tar.gz - Mend

django-bulk-hooks 0.2.14tar.gz → 0.2.16tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of django-bulk-hooks might be problematic. Click here for more details.

Files changed (26) hide show

{django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: django-bulk-hooks
-Version: 0.2.14
+Version: 0.2.16
 Summary: Hook-style hooks for Django bulk operations like bulk_create and bulk_update.
 License: MIT
 Keywords: django,bulk,hooks

{django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/decorators.py RENAMED Viewed

@@ -290,7 +290,13 @@ def bulk_hook(model_cls, event, when=None, priority=None):
                     return self.func(changeset, new_records, old_records, **kwargs)
                 else:
                     # Old signature without changeset
-                    return self.func(new_records, old_records, **kwargs)
+                    # Only pass changeset in kwargs if the function accepts **kwargs
+                    if 'kwargs' in params or any(param.startswith('**') for param in sig.parameters):
+                        kwargs['changeset'] = changeset
+                        return self.func(new_records, old_records, **kwargs)
+                    else:
+                        # Function doesn't accept **kwargs, just call with positional args
+                        return self.func(new_records, old_records)
         # Register the hook using the registry
         register_hook(

{django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/dispatcher.py RENAMED Viewed

@@ -244,3 +244,13 @@ def get_dispatcher():
         # Create dispatcher with the registry instance
         _dispatcher = HookDispatcher(get_registry())
     return _dispatcher
+def reset_dispatcher():
+    """
+    Reset the global dispatcher instance.
+    Useful for testing to ensure clean state between tests.
+    """
+    global _dispatcher
+    _dispatcher = None

{django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/operations/bulk_executor.py RENAMED Viewed

@@ -21,7 +21,7 @@ class BulkExecutor:
     Dependencies are explicitly injected via constructor.
     """
-    def __init__(self, queryset, analyzer, mti_handler):
+    def __init__(self, queryset, analyzer, mti_handler, record_classifier):
         """
         Initialize bulk executor with explicit dependencies.
@@ -29,10 +29,12 @@ class BulkExecutor:
             queryset: Django QuerySet instance
             analyzer: ModelAnalyzer instance (replaces validator + field_tracker)
             mti_handler: MTIHandler instance
+            record_classifier: RecordClassifier instance
         """
         self.queryset = queryset
         self.analyzer = analyzer
         self.mti_handler = mti_handler
+        self.record_classifier = record_classifier
         self.model_cls = queryset.model
     def bulk_create(
@@ -69,13 +71,24 @@ class BulkExecutor:
         # Check if this is an MTI model and route accordingly
         if self.mti_handler.is_mti_model():
             logger.info(f"Detected MTI model {self.model_cls.__name__}, using MTI bulk create")
-            # Build execution plan
+            # Classify records using the classifier service
+            existing_record_ids = set()
+            existing_pks_map = {}
+            if update_conflicts and unique_fields:
+                existing_record_ids, existing_pks_map = (
+                    self.record_classifier.classify_for_upsert(objs, unique_fields)
+                )
+            # Build execution plan with classification results
             plan = self.mti_handler.build_create_plan(
                 objs,
                 batch_size=batch_size,
                 update_conflicts=update_conflicts,
                 update_fields=update_fields,
                 unique_fields=unique_fields,
+                existing_record_ids=existing_record_ids,
+                existing_pks_map=existing_pks_map,
             )
             # Execute the plan
             return self._execute_mti_create_plan(plan)
@@ -161,12 +174,13 @@ class BulkExecutor:
         Execute an MTI create plan.
         This is where ALL database operations happen for MTI bulk_create.
+        Handles both new records (INSERT) and existing records (UPDATE) for upsert.
         Args:
             plan: MTICreatePlan object from MTIHandler
         Returns:
-            List of created objects with PKs assigned
+            List of created/updated objects with PKs assigned
         """
         from django.db import transaction
         from django.db.models import QuerySet as BaseQuerySet
@@ -175,31 +189,63 @@ class BulkExecutor:
             return []
         with transaction.atomic(using=self.queryset.db, savepoint=False):
-            # Step 1: Create all parent objects level by level
+            # Step 1: Create/Update all parent objects level by level
             parent_instances_map = {}  # Maps original obj id() -> {model: parent_instance}
             for parent_level in plan.parent_levels:
-                # Bulk create parents for this level
-                bulk_kwargs = {"batch_size": len(parent_level.objects)}
+                # Separate new and existing parent objects
+                new_parents = []
+                existing_parents = []
-                if parent_level.update_conflicts:
-                    bulk_kwargs["update_conflicts"] = True
-                    bulk_kwargs["unique_fields"] = parent_level.unique_fields
-                    bulk_kwargs["update_fields"] = parent_level.update_fields
+                for parent_obj in parent_level.objects:
+                    orig_obj_id = parent_level.original_object_map[id(parent_obj)]
+                    if orig_obj_id in plan.existing_record_ids:
+                        existing_parents.append(parent_obj)
+                    else:
+                        new_parents.append(parent_obj)
-                # Use base QuerySet to avoid recursion
-                base_qs = BaseQuerySet(model=parent_level.model_class, using=self.queryset.db)
-                created_parents = base_qs.bulk_create(parent_level.objects, **bulk_kwargs)
+                # Bulk create new parents
+                if new_parents:
+                    bulk_kwargs = {"batch_size": len(new_parents)}
+                    if parent_level.update_conflicts:
+                        bulk_kwargs["update_conflicts"] = True
+                        bulk_kwargs["unique_fields"] = parent_level.unique_fields
+                        bulk_kwargs["update_fields"] = parent_level.update_fields
+                    # Use base QuerySet to avoid recursion
+                    base_qs = BaseQuerySet(model=parent_level.model_class, using=self.queryset.db)
+                    created_parents = base_qs.bulk_create(new_parents, **bulk_kwargs)
+                    # Copy generated fields back to parent objects
+                    for created_parent, parent_obj in zip(created_parents, new_parents):
+                        for field in parent_level.model_class._meta.local_fields:
+                            created_value = getattr(created_parent, field.name, None)
+                            if created_value is not None:
+                                setattr(parent_obj, field.name, created_value)
+                        parent_obj._state.adding = False
+                        parent_obj._state.db = self.queryset.db
-                # Copy generated fields back to parent objects
-                for created_parent, parent_obj in zip(created_parents, parent_level.objects):
-                    for field in parent_level.model_class._meta.local_fields:
-                        created_value = getattr(created_parent, field.name, None)
-                        if created_value is not None:
-                            setattr(parent_obj, field.name, created_value)
+                # Update existing parents
+                if existing_parents and parent_level.update_fields:
+                    # Filter update fields to only those that exist in this parent model
+                    parent_model_fields = {
+                        field.name for field in parent_level.model_class._meta.local_fields
+                    }
+                    filtered_update_fields = [
+                        field for field in parent_level.update_fields
+                        if field in parent_model_fields
+                    ]
+                    if filtered_update_fields:
+                        base_qs = BaseQuerySet(model=parent_level.model_class, using=self.queryset.db)
+                        base_qs.bulk_update(existing_parents, filtered_update_fields)
-                    parent_obj._state.adding = False
-                    parent_obj._state.db = self.queryset.db
+                    # Mark as not adding
+                    for parent_obj in existing_parents:
+                        parent_obj._state.adding = False
+                        parent_obj._state.db = self.queryset.db
                 # Map parents back to original objects
                 for parent_obj in parent_level.objects:
@@ -208,75 +254,112 @@ class BulkExecutor:
                         parent_instances_map[orig_obj_id] = {}
                     parent_instances_map[orig_obj_id][parent_level.model_class] = parent_obj
-            # Step 2: Add parent links to child objects
+            # Step 2: Add parent links to child objects and separate new/existing
+            new_child_objects = []
+            existing_child_objects = []
             for child_obj, orig_obj in zip(plan.child_objects, plan.original_objects):
                 parent_instances = parent_instances_map.get(id(orig_obj), {})
+                # Set parent links
                 for parent_model, parent_instance in parent_instances.items():
                     parent_link = plan.child_model._meta.get_ancestor_link(parent_model)
                     if parent_link:
                         setattr(child_obj, parent_link.attname, parent_instance.pk)
                         setattr(child_obj, parent_link.name, parent_instance)
-            # Step 3: Bulk create child objects using _batched_insert (to bypass MTI check)
-            base_qs = BaseQuerySet(model=plan.child_model, using=self.queryset.db)
-            base_qs._prepare_for_bulk_create(plan.child_objects)
-            # Partition objects by PK status
-            objs_without_pk, objs_with_pk = [], []
-            for obj in plan.child_objects:
-                if obj._is_pk_set():
-                    objs_with_pk.append(obj)
+                # Classify as new or existing
+                if id(orig_obj) in plan.existing_record_ids:
+                    # For existing records, set the PK on child object
+                    pk_value = getattr(orig_obj, 'pk', None)
+                    if pk_value:
+                        setattr(child_obj, 'pk', pk_value)
+                        setattr(child_obj, 'id', pk_value)
+                    existing_child_objects.append(child_obj)
                 else:
-                    objs_without_pk.append(obj)
-            # Get fields for insert
-            opts = plan.child_model._meta
-            fields = [f for f in opts.local_fields if not f.generated]
+                    new_child_objects.append(child_obj)
-            # Execute bulk insert
-            if objs_with_pk:
-                returned_columns = base_qs._batched_insert(
-                    objs_with_pk,
-                    fields,
-                    batch_size=len(objs_with_pk),
-                )
-                if returned_columns:
-                    for obj, results in zip(objs_with_pk, returned_columns):
-                        if hasattr(opts, "db_returning_fields") and hasattr(opts, "pk"):
-                            for result, field in zip(results, opts.db_returning_fields):
-                                if field != opts.pk:
+            # Step 3: Bulk create new child objects using _batched_insert (to bypass MTI check)
+            if new_child_objects:
+                base_qs = BaseQuerySet(model=plan.child_model, using=self.queryset.db)
+                base_qs._prepare_for_bulk_create(new_child_objects)
+                # Partition objects by PK status
+                objs_without_pk, objs_with_pk = [], []
+                for obj in new_child_objects:
+                    if obj._is_pk_set():
+                        objs_with_pk.append(obj)
+                    else:
+                        objs_without_pk.append(obj)
+                # Get fields for insert
+                opts = plan.child_model._meta
+                fields = [f for f in opts.local_fields if not f.generated]
+                # Execute bulk insert
+                if objs_with_pk:
+                    returned_columns = base_qs._batched_insert(
+                        objs_with_pk,
+                        fields,
+                        batch_size=len(objs_with_pk),
+                    )
+                    if returned_columns:
+                        for obj, results in zip(objs_with_pk, returned_columns):
+                            if hasattr(opts, "db_returning_fields") and hasattr(opts, "pk"):
+                                for result, field in zip(results, opts.db_returning_fields):
+                                    if field != opts.pk:
+                                        setattr(obj, field.attname, result)
+                            obj._state.adding = False
+                            obj._state.db = self.queryset.db
+                    else:
+                        for obj in objs_with_pk:
+                            obj._state.adding = False
+                            obj._state.db = self.queryset.db
+                if objs_without_pk:
+                    filtered_fields = [
+                        f for f in fields
+                        if not isinstance(f, AutoField) and not f.primary_key
+                    ]
+                    returned_columns = base_qs._batched_insert(
+                        objs_without_pk,
+                        filtered_fields,
+                        batch_size=len(objs_without_pk),
+                    )
+                    if returned_columns:
+                        for obj, results in zip(objs_without_pk, returned_columns):
+                            if hasattr(opts, "db_returning_fields"):
+                                for result, field in zip(results, opts.db_returning_fields):
                                     setattr(obj, field.attname, result)
-                        obj._state.adding = False
-                        obj._state.db = self.queryset.db
-                else:
-                    for obj in objs_with_pk:
-                        obj._state.adding = False
-                        obj._state.db = self.queryset.db
+                            obj._state.adding = False
+                            obj._state.db = self.queryset.db
+                    else:
+                        for obj in objs_without_pk:
+                            obj._state.adding = False
+                            obj._state.db = self.queryset.db
-            if objs_without_pk:
-                filtered_fields = [
-                    f for f in fields
-                    if not isinstance(f, AutoField) and not f.primary_key
+            # Step 3.5: Update existing child objects
+            if existing_child_objects and plan.update_fields:
+                # Filter update fields to only those that exist in the child model
+                child_model_fields = {
+                    field.name for field in plan.child_model._meta.local_fields
+                }
+                filtered_child_update_fields = [
+                    field for field in plan.update_fields
+                    if field in child_model_fields
                 ]
-                returned_columns = base_qs._batched_insert(
-                    objs_without_pk,
-                    filtered_fields,
-                    batch_size=len(objs_without_pk),
-                )
-                if returned_columns:
-                    for obj, results in zip(objs_without_pk, returned_columns):
-                        if hasattr(opts, "db_returning_fields"):
-                            for result, field in zip(results, opts.db_returning_fields):
-                                setattr(obj, field.attname, result)
-                        obj._state.adding = False
-                        obj._state.db = self.queryset.db
-                else:
-                    for obj in objs_without_pk:
-                        obj._state.adding = False
-                        obj._state.db = self.queryset.db
+                if filtered_child_update_fields:
+                    base_qs = BaseQuerySet(model=plan.child_model, using=self.queryset.db)
+                    base_qs.bulk_update(existing_child_objects, filtered_child_update_fields)
+                # Mark as not adding
+                for child_obj in existing_child_objects:
+                    child_obj._state.adding = False
+                    child_obj._state.db = self.queryset.db
-            created_children = plan.child_objects
+            # Combine all children for final processing
+            created_children = new_child_objects + existing_child_objects
             # Step 4: Copy PKs and auto-generated fields back to original objects
             pk_field_name = plan.child_model._meta.pk.name

{django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/operations/coordinator.py RENAMED Viewed

@@ -7,7 +7,8 @@ a clean, simple API for the QuerySet to use.
 import logging
 from django.db import transaction
-from django.db.models import QuerySet as BaseQuerySet
+from django.db.models import QuerySet
+from django.core.exceptions import FieldDoesNotExist
 from django_bulk_hooks.helpers import (
     build_changeset_for_create,
@@ -29,6 +30,7 @@ class BulkOperationCoordinator:
     Services are created lazily and cached.
     """
     def __init__(self, queryset):
         """
         Initialize coordinator for a queryset.
@@ -42,6 +44,7 @@ class BulkOperationCoordinator:
         # Lazy initialization
         self._analyzer = None
         self._mti_handler = None
+        self._record_classifier = None
         self._executor = None
         self._dispatcher = None
@@ -63,6 +66,15 @@ class BulkOperationCoordinator:
             self._mti_handler = MTIHandler(self.model_cls)
         return self._mti_handler
+    @property
+    def record_classifier(self):
+        """Get or create RecordClassifier"""
+        if self._record_classifier is None:
+            from django_bulk_hooks.operations.record_classifier import RecordClassifier
+            self._record_classifier = RecordClassifier(self.model_cls)
+        return self._record_classifier
     @property
     def executor(self):
         """Get or create BulkExecutor"""
@@ -73,6 +85,7 @@ class BulkOperationCoordinator:
                 queryset=self.queryset,
                 analyzer=self.analyzer,
                 mti_handler=self.mti_handler,
+                record_classifier=self.record_classifier,
             )
         return self._executor
@@ -212,40 +225,52 @@ class BulkOperationCoordinator:
         self, update_kwargs, bypass_hooks=False, bypass_validation=False
     ):
         """
-        Execute queryset update with hooks - optimized for performance.
-        ARCHITECTURE: Database-Level Update with Hook Support
-        =======================================================
-        For queryset.update() operations:
-        1. Fetch old state (before DB update)
-        2. Execute native Django UPDATE (fast, direct SQL with Subquery/F() support)
-        3. Fetch new state (after DB update, with computed values)
-        4. Run BEFORE_UPDATE hooks with old/new state
-           - Hooks can see Subquery-computed values via new_records
-           - Hooks CAN modify instances (e.g., set derived fields)
-           - Modifications are auto-persisted with bulk_update
-        5. Run AFTER_UPDATE hooks (for read-only side effects)
-        Total DML: 1 (queryset.update) + 1 (bulk_update if hooks modify anything)
-        Note: BEFORE_UPDATE runs AFTER the primary database update.
-        This enables:
-        - HasChanged conditions to work with Subquery-computed values
-        - Cascading updates from hook modifications
-        - Optimal performance (Subquery stays in SQL)
-        For true BEFORE semantics (prevent/modify before DB write), use bulk_update().
+        Execute queryset.update() with full hook support.
+        ARCHITECTURE & PERFORMANCE TRADE-OFFS
+        ======================================
+        To support hooks with queryset.update(), we must:
+        1. Fetch old state (SELECT all matching rows)
+        2. Execute database update (UPDATE in SQL)
+        3. Fetch new state (SELECT all rows again)
+        4. Run VALIDATE_UPDATE hooks (validation only)
+        5. Run BEFORE_UPDATE hooks (CAN modify instances)
+        6. Persist BEFORE_UPDATE modifications (bulk_update)
+        7. Run AFTER_UPDATE hooks (read-only side effects)
+        Performance Cost:
+        - 2 SELECT queries (before/after)
+        - 1 UPDATE query (actual update)
+        - 1 bulk_update (if hooks modify data)
+        Trade-off: Hooks require loading data into Python. If you need
+        maximum performance and don't need hooks, use bypass_hooks=True.
+        Hook Semantics:
+        - BEFORE_UPDATE hooks run after the DB update and CAN modify instances
+        - Modifications are auto-persisted (framework handles complexity)
+        - AFTER_UPDATE hooks run after BEFORE_UPDATE and are read-only
+        - This enables cascade logic and computed fields based on DB values
+        - User expectation: BEFORE_UPDATE hooks can modify data
+        Why this approach works well:
+        - Allows hooks to see Subquery/F() computed values
+        - Enables HasChanged conditions on complex expressions
+        - Maintains SQL performance (Subquery stays in database)
+        - Meets user expectations: BEFORE_UPDATE can modify instances
+        - Clean separation: BEFORE for modifications, AFTER for side effects
+        For true "prevent write" semantics, intercept at a higher level
+        or use bulk_update() directly (which has true before semantics).
         """
-        # Check bypass early
         from django_bulk_hooks.context import get_bypass_hooks
-        should_bypass = bypass_hooks or get_bypass_hooks()
-        if should_bypass:
-            # No hooks - use original queryset.update() for max performance
-            return BaseQuerySet.update(self.queryset, **update_kwargs)
+        # Fast path: no hooks at all
+        if bypass_hooks or get_bypass_hooks():
+            return QuerySet.update(self.queryset, **update_kwargs)
-        # Delegate to specialized queryset update handler
+        # Full hook lifecycle path
         return self._execute_queryset_update_with_hooks(
             update_kwargs=update_kwargs,
             bypass_validation=bypass_validation,
@@ -255,34 +280,40 @@ class BulkOperationCoordinator:
         self, update_kwargs, bypass_validation=False
     ):
         """
-        Execute queryset update with hooks - fast path using native Django update.
+        Execute queryset update with full hook lifecycle support.
-        This method provides full hook lifecycle support for queryset.update()
-        including BEFORE_UPDATE hooks with automatic persistence of modifications.
+        This method implements the fetch-update-fetch pattern required
+        to support hooks with queryset.update(). BEFORE_UPDATE hooks can
+        modify instances and modifications are auto-persisted.
         Args:
             update_kwargs: Dict of fields to update
             bypass_validation: Skip validation hooks if True
         Returns:
-            Number of objects updated
+            Number of rows updated
         """
-        # 1. Fetch old state (before DB update)
+        # Step 1: Fetch old state (before database update)
         old_instances = list(self.queryset)
         if not old_instances:
             return 0
         old_records_map = {inst.pk: inst for inst in old_instances}
-        # 2. Execute native Django update (FAST)
-        result = BaseQuerySet.update(self.queryset, **update_kwargs)
+        # Step 2: Execute native Django update
+        # Use stored reference to parent class method - clean and simple
+        update_count = QuerySet.update(self.queryset, **update_kwargs)
-        if result == 0:
+        if update_count == 0:
             return 0
-        # 3. Fetch new state (after DB update)
-        new_instances = list(self.queryset)
+        # Step 3: Fetch new state (after database update)
+        # This captures any Subquery/F() computed values
+        # Use primary keys to fetch updated instances since queryset filters may no longer match
+        pks = [inst.pk for inst in old_instances]
+        new_instances = list(self.model_cls.objects.filter(pk__in=pks))
-        # 4. Build changeset (using framework helper)
+        # Step 4: Build changeset
         changeset = build_changeset_for_update(
             self.model_cls,
             new_instances,
@@ -290,65 +321,165 @@ class BulkOperationCoordinator:
             old_records_map=old_records_map,
         )
-        # Mark that this is a queryset update (for potential hook inspection)
+        # Mark as queryset update for potential hook inspection
         changeset.operation_meta['is_queryset_update'] = True
-        changeset.operation_meta['allows_before_modifications'] = True
+        changeset.operation_meta['allows_modifications'] = True
-        # 5. Get MTI chain (follow framework pattern)
+        # Step 5: Get MTI inheritance chain
         models_in_chain = [self.model_cls]
         if self.mti_handler.is_mti_model():
             models_in_chain.extend(self.mti_handler.get_parent_models())
-        # 6. BEFORE_UPDATE hooks (with auto-persistence)
-        # Snapshot state before hooks
-        pre_hook_state = {}
-        for instance in new_instances:
-            if instance.pk is not None:
-                pre_hook_values = {}
-                for field in self.model_cls._meta.fields:
-                    try:
-                        pre_hook_values[field.name] = getattr(instance, field.name, None)
-                    except Exception:
-                        pre_hook_values[field.name] = None
-                pre_hook_state[instance.pk] = pre_hook_values
-        # Dispatch BEFORE_UPDATE hooks
+        # Step 6: Run VALIDATE hooks (if not bypassed)
+        if not bypass_validation:
+            for model_cls in models_in_chain:
+                model_changeset = self._build_changeset_for_model(changeset, model_cls)
+                self.dispatcher.dispatch(
+                    model_changeset,
+                    "validate_update",
+                    bypass_hooks=False
+                )
+        # Step 7: Run BEFORE_UPDATE hooks with modification tracking
+        modified_fields = self._run_before_update_hooks_with_tracking(
+            new_instances,
+            models_in_chain,
+            changeset
+        )
+        # Step 8: Auto-persist BEFORE_UPDATE modifications
+        if modified_fields:
+            self._persist_hook_modifications(new_instances, modified_fields)
+        # Step 9: Take snapshot before AFTER_UPDATE hooks
+        pre_after_hook_state = self._snapshot_instance_state(new_instances)
+        # Step 10: Run AFTER_UPDATE hooks (read-only side effects)
         for model_cls in models_in_chain:
             model_changeset = self._build_changeset_for_model(changeset, model_cls)
-            self.dispatcher.dispatch(model_changeset, "before_update", bypass_hooks=False)
-        # Detect modifications made by BEFORE_UPDATE hooks
-        hook_modified_fields = set()
-        for instance in new_instances:
-            if instance.pk in pre_hook_state:
-                for field_name, pre_value in pre_hook_state[instance.pk].items():
-                    try:
-                        current_value = getattr(instance, field_name, None)
-                    except Exception:
-                        current_value = None
-                    if current_value != pre_value:
-                        hook_modified_fields.add(field_name)
-        # Auto-persist hook modifications
-        if hook_modified_fields:
-            logger.info(
-                f"BEFORE_UPDATE hooks modified {len(hook_modified_fields)} fields: {hook_modified_fields}"
+            self.dispatcher.dispatch(
+                model_changeset,
+                "after_update",
+                bypass_hooks=False
             )
-            logger.info("Auto-persisting modifications with bulk_update")
-            # Use bulk_update to persist changes
-            # This will trigger another hook cycle (Salesforce-style cascading)
-            from django.db.models import QuerySet as BaseQuerySet
-            base_qs = BaseQuerySet(model=self.model_cls, using=self.queryset.db)
-            base_qs.bulk_update(new_instances, list(hook_modified_fields))
-        # 7. AFTER_UPDATE hooks (read-only side effects)
+        # Step 11: Auto-persist AFTER_UPDATE modifications (if any)
+        after_modified_fields = self._detect_modifications(new_instances, pre_after_hook_state)
+        if after_modified_fields:
+            self._persist_hook_modifications(new_instances, after_modified_fields)
+        return update_count
+    def _run_before_update_hooks_with_tracking(self, instances, models_in_chain, changeset):
+        """
+        Run BEFORE_UPDATE hooks and detect modifications.
+        This is what users expect - BEFORE_UPDATE hooks can modify instances
+        and those modifications will be automatically persisted. The framework
+        handles the complexity internally.
+        Returns:
+            Set of field names that were modified by hooks
+        """
+        # Snapshot current state
+        pre_hook_state = self._snapshot_instance_state(instances)
+        # Run BEFORE_UPDATE hooks
         for model_cls in models_in_chain:
             model_changeset = self._build_changeset_for_model(changeset, model_cls)
-            self.dispatcher.dispatch(model_changeset, "after_update", bypass_hooks=False)
+            self.dispatcher.dispatch(
+                model_changeset,
+                "before_update",
+                bypass_hooks=False
+            )
-        return result
+        # Detect modifications
+        return self._detect_modifications(instances, pre_hook_state)
+    def _snapshot_instance_state(self, instances):
+        """
+        Create a snapshot of current instance field values.
+        Args:
+            instances: List of model instances
+        Returns:
+            Dict mapping pk -> {field_name: value}
+        """
+        snapshot = {}
+        for instance in instances:
+            if instance.pk is None:
+                continue
+            field_values = {}
+            for field in self.model_cls._meta.get_fields():
+                # Skip relations that aren't concrete fields
+                if field.many_to_many or field.one_to_many:
+                    continue
+                field_name = field.name
+                try:
+                    field_values[field_name] = getattr(instance, field_name)
+                except (AttributeError, FieldDoesNotExist):
+                    # Field not accessible (e.g., deferred field)
+                    field_values[field_name] = None
+            snapshot[instance.pk] = field_values
+        return snapshot
+    def _detect_modifications(self, instances, pre_hook_state):
+        """
+        Detect which fields were modified by comparing to snapshot.
+        Args:
+            instances: List of model instances
+            pre_hook_state: Previous state snapshot from _snapshot_instance_state
+        Returns:
+            Set of field names that were modified
+        """
+        modified_fields = set()
+        for instance in instances:
+            if instance.pk not in pre_hook_state:
+                continue
+            old_values = pre_hook_state[instance.pk]
+            for field_name, old_value in old_values.items():
+                try:
+                    current_value = getattr(instance, field_name)
+                except (AttributeError, FieldDoesNotExist):
+                    current_value = None
+                # Compare values
+                if current_value != old_value:
+                    modified_fields.add(field_name)
+        return modified_fields
+    def _persist_hook_modifications(self, instances, modified_fields):
+        """
+        Persist modifications made by hooks using bulk_update.
+        This creates a "cascade" effect similar to Salesforce workflows.
+        Args:
+            instances: List of modified instances
+            modified_fields: Set of field names that were modified
+        """
+        logger.info(
+            f"Hooks modified {len(modified_fields)} field(s): "
+            f"{', '.join(sorted(modified_fields))}"
+        )
+        logger.info("Auto-persisting modifications via bulk_update")
+        # Use Django's bulk_update directly (not our hook version)
+        # Create a fresh QuerySet to avoid recursion
+        fresh_qs = QuerySet(model=self.model_cls, using=self.queryset.db)
+        QuerySet.bulk_update(fresh_qs, instances, list(modified_fields))
     @transaction.atomic
     def delete(self, bypass_hooks=False, bypass_validation=False):
@@ -375,8 +506,8 @@ class BulkOperationCoordinator:
         # Execute with hook lifecycle
         def operation():
-            # Call base Django QuerySet.delete() to avoid recursion
-            return BaseQuerySet.delete(self.queryset)
+            # Use stored reference to parent method - clean and simple
+            return QuerySet.delete(self.queryset)
         return self._execute_with_mti_hooks(
             changeset=changeset,
@@ -532,8 +663,8 @@ class BulkOperationCoordinator:
                     # This is a FK field being updated by its attname (e.g., business_id)
                     # Add the relationship name (e.g., 'business') to skip list
                     fk_relationships.add(field.name)
-            except Exception:
+            except FieldDoesNotExist:
                 # If field lookup fails, skip it
                 continue
-        return fk_relationships
+        return fk_relationships

{django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/operations/mti_handler.py RENAMED Viewed

@@ -121,6 +121,8 @@ class MTIHandler:
         update_conflicts=False,
         unique_fields=None,
         update_fields=None,
+        existing_record_ids=None,
+        existing_pks_map=None,
     ):
         """
         Build an execution plan for bulk creating MTI model instances.
@@ -134,6 +136,8 @@ class MTIHandler:
             update_conflicts: Enable UPSERT on conflict
             unique_fields: Fields for conflict detection
             update_fields: Fields to update on conflict
+            existing_record_ids: Set of id() for objects that exist in DB (from RecordClassifier)
+            existing_pks_map: Dict mapping id(obj) -> pk for existing records (from RecordClassifier)
         Returns:
             MTICreatePlan object
@@ -149,6 +153,19 @@ class MTIHandler:
         batch_size = batch_size or len(objs)
+        # Use provided classification (no more DB query here!)
+        if existing_record_ids is None:
+            existing_record_ids = set()
+        if existing_pks_map is None:
+            existing_pks_map = {}
+        # Set PKs on existing objects so they can be updated
+        if existing_pks_map:
+            for obj in objs:
+                if id(obj) in existing_pks_map:
+                    setattr(obj, 'pk', existing_pks_map[id(obj)])
+                    setattr(obj, 'id', existing_pks_map[id(obj)])
         # Build parent levels
         parent_levels = self._build_parent_levels(
             objs,
@@ -171,6 +188,10 @@ class MTIHandler:
             child_model=inheritance_chain[-1],
             original_objects=objs,
             batch_size=batch_size,
+            existing_record_ids=existing_record_ids,
+            update_conflicts=update_conflicts,
+            unique_fields=unique_fields or [],
+            update_fields=update_fields or [],
         )
     def _build_parent_levels(
@@ -272,9 +293,17 @@ class MTIHandler:
             ut = (ut,)
         ut_field_sets = [tuple(group) for group in ut]
+        # Check individual field uniqueness
+        unique_field_sets = []
+        for field in model_class._meta.local_fields:
+            if field.unique and not field.primary_key:
+                unique_field_sets.append((field.name,))
         # Compare as sets
         provided_set = set(normalized_unique)
-        for group in constraint_field_sets + ut_field_sets:
+        all_constraint_sets = constraint_field_sets + ut_field_sets + unique_field_sets
+        for group in all_constraint_sets:
             if provided_set == set(group):
                 return True
         return False

{django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/operations/mti_plans.py RENAMED Viewed

@@ -45,6 +45,10 @@ class MTICreatePlan:
         child_model: The child model class
         original_objects: Original objects provided by user
         batch_size: Batch size for operations
+        existing_record_ids: Set of id() of original objects that represent existing DB records
+        update_conflicts: Whether this is an upsert operation
+        unique_fields: Fields used for conflict detection
+        update_fields: Fields to update on conflict
     """
     inheritance_chain: List[Any]
     parent_levels: List[ParentLevel]
@@ -52,6 +56,10 @@ class MTICreatePlan:
     child_model: Any
     original_objects: List[Any]
     batch_size: int = None
+    existing_record_ids: set = field(default_factory=set)
+    update_conflicts: bool = False
+    unique_fields: List[str] = field(default_factory=list)
+    update_fields: List[str] = field(default_factory=list)
 @dataclass

django_bulk_hooks-0.2.16/django_bulk_hooks/operations/record_classifier.py ADDED Viewed

@@ -0,0 +1,183 @@
+"""
+Record classification service for database queries.
+This service handles all database queries related to classifying and fetching
+records based on various criteria (PKs, unique fields, etc.).
+Separates data access concerns from business logic.
+"""
+import logging
+from django.db.models import Q
+logger = logging.getLogger(__name__)
+class RecordClassifier:
+    """
+    Service for classifying and fetching records via database queries.
+    This is the SINGLE point of truth for record classification queries.
+    Keeps database access logic separate from business/planning logic.
+    """
+    def __init__(self, model_cls):
+        """
+        Initialize classifier for a specific model.
+        Args:
+            model_cls: The Django model class
+        """
+        self.model_cls = model_cls
+    def classify_for_upsert(self, objs, unique_fields):
+        """
+        Classify records as new or existing based on unique_fields.
+        Queries the database to check which records already exist based on the
+        unique_fields constraint.
+        Args:
+            objs: List of model instances
+            unique_fields: List of field names that form the unique constraint
+        Returns:
+            Tuple of (existing_record_ids, existing_pks_map)
+            - existing_record_ids: Set of id() for objects that exist in DB
+            - existing_pks_map: Dict mapping id(obj) -> pk for existing records
+        """
+        if not unique_fields or not objs:
+            return set(), {}
+        # Build a query to find existing records
+        queries = []
+        obj_to_unique_values = {}
+        for obj in objs:
+            # Build lookup dict for this object's unique fields
+            lookup = {}
+            for field_name in unique_fields:
+                value = getattr(obj, field_name, None)
+                if value is None:
+                    # Can't match on None values
+                    break
+                lookup[field_name] = value
+            else:
+                # All unique fields have values, add to query
+                if lookup:
+                    queries.append(Q(**lookup))
+                    obj_to_unique_values[id(obj)] = tuple(lookup.values())
+        if not queries:
+            return set(), {}
+        # Query for existing records
+        combined_query = queries[0]
+        for q in queries[1:]:
+            combined_query |= q
+        existing_records = list(
+            self.model_cls.objects.filter(combined_query).values('pk', *unique_fields)
+        )
+        # Map existing records back to original objects
+        existing_record_ids = set()
+        existing_pks_map = {}
+        for record in existing_records:
+            record_values = tuple(record[field] for field in unique_fields)
+            # Find which object(s) match these values
+            for obj_id, obj_values in obj_to_unique_values.items():
+                if obj_values == record_values:
+                    existing_record_ids.add(obj_id)
+                    existing_pks_map[obj_id] = record['pk']
+        logger.info(
+            f"Classified {len(existing_record_ids)} existing and "
+            f"{len(objs) - len(existing_record_ids)} new records for upsert"
+        )
+        return existing_record_ids, existing_pks_map
+    def fetch_by_pks(self, pks, select_related=None, prefetch_related=None):
+        """
+        Fetch records by primary keys with optional relationship loading.
+        Args:
+            pks: List of primary key values
+            select_related: Optional list of fields to select_related
+            prefetch_related: Optional list of fields to prefetch_related
+        Returns:
+            Dict[pk, instance] for O(1) lookups
+        """
+        if not pks:
+            return {}
+        queryset = self.model_cls._base_manager.filter(pk__in=pks)
+        if select_related:
+            queryset = queryset.select_related(*select_related)
+        if prefetch_related:
+            queryset = queryset.prefetch_related(*prefetch_related)
+        return {obj.pk: obj for obj in queryset}
+    def fetch_by_unique_constraint(self, field_values_map):
+        """
+        Fetch records matching a unique constraint.
+        Args:
+            field_values_map: Dict of {field_name: value} for unique constraint
+        Returns:
+            Model instance if found, None otherwise
+        """
+        try:
+            return self.model_cls.objects.get(**field_values_map)
+        except self.model_cls.DoesNotExist:
+            return None
+        except self.model_cls.MultipleObjectsReturned:
+            logger.warning(
+                f"Multiple {self.model_cls.__name__} records found for "
+                f"unique constraint {field_values_map}"
+            )
+            return self.model_cls.objects.filter(**field_values_map).first()
+    def exists_by_pks(self, pks):
+        """
+        Check if records exist by primary keys without fetching them.
+        Args:
+            pks: List of primary key values
+        Returns:
+            Set of PKs that exist in the database
+        """
+        if not pks:
+            return set()
+        existing_pks = self.model_cls.objects.filter(
+            pk__in=pks
+        ).values_list('pk', flat=True)
+        return set(existing_pks)
+    def count_by_unique_fields(self, objs, unique_fields):
+        """
+        Count how many objects already exist based on unique fields.
+        Useful for validation or reporting before upsert operations.
+        Args:
+            objs: List of model instances
+            unique_fields: List of field names that form the unique constraint
+        Returns:
+            Tuple of (existing_count, new_count)
+        """
+        existing_ids, _ = self.classify_for_upsert(objs, unique_fields)
+        existing_count = len(existing_ids)
+        new_count = len(objs) - existing_count
+        return existing_count, new_count

{django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/registry.py RENAMED Viewed

@@ -165,6 +165,16 @@ class HookRegistry:
         with self._lock:
             return dict(self._hooks)
+    @property
+    def hooks(self) -> Dict[Tuple[Type, str], List[HookInfo]]:
+        """
+        Expose internal hooks dictionary for testing purposes.
+        This property provides direct access to the internal hooks storage
+        to allow tests to clear the registry state between test runs.
+        """
+        return self._hooks
     def count_hooks(
         self, model: Optional[Type] = None, event: Optional[str] = None
     ) -> int:
@@ -286,3 +296,8 @@ def list_all_hooks() -> Dict[Tuple[Type, str], List[HookInfo]]:
     """
     registry = get_registry()
     return registry.list_all()
+# Expose hooks dictionary for testing purposes
+# This provides backward compatibility with tests that expect to access _hooks directly
+_hooks = get_registry().hooks

{django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "django-bulk-hooks"
-version = "0.2.14"
+version = "0.2.16"
 description = "Hook-style hooks for Django bulk operations like bulk_create and bulk_update."
 authors = ["Konrad Beck <konrad.beck@merchantcapital.co.za>"]
 readme = "README.md"