PyPI - django-bulk-hooks - Versions diffs - 0.2.1__tar.gz → 0.2.3__tar.gz - Mend

django-bulk-hooks 0.2.1tar.gz → 0.2.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of django-bulk-hooks might be problematic. Click here for more details.

Files changed (28) hide show

{django_bulk_hooks-0.2.1 → django_bulk_hooks-0.2.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: django-bulk-hooks
-Version: 0.2.1
+Version: 0.2.3
 Summary: Hook-style hooks for Django bulk operations like bulk_create and bulk_update.
 License: MIT
 Keywords: django,bulk,hooks

{django_bulk_hooks-0.2.1 → django_bulk_hooks-0.2.3}/django_bulk_hooks/operations/analyzer.py RENAMED Viewed

@@ -206,3 +206,72 @@ class ModelAnalyzer:
         # Return as sorted list for deterministic behavior
         return sorted(changed_fields_set)
+    def resolve_expression(self, field_name, expression, instance):
+        """
+        Resolve a SQL expression to a concrete value for a specific instance.
+        This method materializes database expressions (F(), Subquery, Case, etc.)
+        into concrete values by using Django's annotate() mechanism.
+        Args:
+            field_name: Name of the field being updated
+            expression: The expression or value to resolve
+            instance: The model instance to resolve for
+        Returns:
+            The resolved concrete value
+        """
+        from django.db.models import Expression
+        from django.db.models.expressions import Combinable
+        # Simple value - return as-is
+        if not isinstance(expression, (Expression, Combinable)):
+            return expression
+        # For complex expressions, evaluate them in database context
+        # Use annotate() which Django properly handles for all expression types
+        try:
+            # Create a queryset for just this instance
+            instance_qs = self.model_cls.objects.filter(pk=instance.pk)
+            # Use annotate with the expression and let Django resolve it
+            resolved_value = instance_qs.annotate(
+                _resolved_value=expression
+            ).values_list('_resolved_value', flat=True).first()
+            return resolved_value
+        except Exception as e:
+            # If expression resolution fails, log and return original
+            logger.warning(
+                f"Failed to resolve expression for field '{field_name}' "
+                f"on {self.model_cls.__name__}: {e}. Using original value."
+            )
+            return expression
+    def apply_update_values(self, instances, update_kwargs):
+        """
+        Apply update_kwargs to instances, resolving any SQL expressions.
+        This method transforms queryset.update()-style kwargs (which may contain
+        F() expressions, Subquery, Case, etc.) into concrete values and applies
+        them to the instances.
+        Args:
+            instances: List of model instances to update
+            update_kwargs: Dict of {field_name: value_or_expression}
+        Returns:
+            List of field names that were updated
+        """
+        if not instances or not update_kwargs:
+            return []
+        fields_updated = list(update_kwargs.keys())
+        for field_name, value in update_kwargs.items():
+            for instance in instances:
+                resolved_value = self.resolve_expression(field_name, value, instance)
+                setattr(instance, field_name, resolved_value)
+        return fields_updated

django_bulk_hooks-0.2.3/django_bulk_hooks/operations/bulk_executor.py ADDED Viewed

@@ -0,0 +1,430 @@
+"""
+Bulk executor service for database operations.
+This service coordinates bulk database operations with validation and MTI handling.
+"""
+import logging
+from django.db import transaction
+from django.db.models import AutoField
+logger = logging.getLogger(__name__)
+class BulkExecutor:
+    """
+    Executes bulk database operations.
+    This service coordinates validation, MTI handling, and actual database
+    operations. It's the only service that directly calls Django ORM methods.
+    Dependencies are explicitly injected via constructor.
+    """
+    def __init__(self, queryset, analyzer, mti_handler):
+        """
+        Initialize bulk executor with explicit dependencies.
+        Args:
+            queryset: Django QuerySet instance
+            analyzer: ModelAnalyzer instance (replaces validator + field_tracker)
+            mti_handler: MTIHandler instance
+        """
+        self.queryset = queryset
+        self.analyzer = analyzer
+        self.mti_handler = mti_handler
+        self.model_cls = queryset.model
+    def bulk_create(
+        self,
+        objs,
+        batch_size=None,
+        ignore_conflicts=False,
+        update_conflicts=False,
+        update_fields=None,
+        unique_fields=None,
+        **kwargs,
+    ):
+        """
+        Execute bulk create operation.
+        NOTE: Coordinator is responsible for validation before calling this method.
+        This executor trusts that inputs have already been validated.
+        Args:
+            objs: List of model instances to create (pre-validated)
+            batch_size: Number of objects to create per batch
+            ignore_conflicts: Whether to ignore conflicts
+            update_conflicts: Whether to update on conflict
+            update_fields: Fields to update on conflict
+            unique_fields: Fields to use for conflict detection
+            **kwargs: Additional arguments
+        Returns:
+            List of created objects
+        """
+        if not objs:
+            return objs
+        # Check if this is an MTI model and route accordingly
+        if self.mti_handler.is_mti_model():
+            logger.info(f"Detected MTI model {self.model_cls.__name__}, using MTI bulk create")
+            # Build execution plan
+            plan = self.mti_handler.build_create_plan(
+                objs,
+                batch_size=batch_size,
+                update_conflicts=update_conflicts,
+                update_fields=update_fields,
+                unique_fields=unique_fields,
+            )
+            # Execute the plan
+            return self._execute_mti_create_plan(plan)
+        # Non-MTI model - use Django's native bulk_create
+        return self._execute_bulk_create(
+            objs,
+            batch_size,
+            ignore_conflicts,
+            update_conflicts,
+            update_fields,
+            unique_fields,
+            **kwargs,
+        )
+    def _execute_bulk_create(
+        self,
+        objs,
+        batch_size=None,
+        ignore_conflicts=False,
+        update_conflicts=False,
+        update_fields=None,
+        unique_fields=None,
+        **kwargs,
+    ):
+        """
+        Execute the actual Django bulk_create.
+        This is the only method that directly calls Django ORM.
+        We must call the base Django QuerySet to avoid recursion.
+        """
+        from django.db.models import QuerySet
+        # Create a base Django queryset (not our HookQuerySet)
+        base_qs = QuerySet(model=self.model_cls, using=self.queryset.db)
+        return base_qs.bulk_create(
+            objs,
+            batch_size=batch_size,
+            ignore_conflicts=ignore_conflicts,
+            update_conflicts=update_conflicts,
+            update_fields=update_fields,
+            unique_fields=unique_fields,
+        )
+    def bulk_update(self, objs, fields, batch_size=None):
+        """
+        Execute bulk update operation.
+        NOTE: Coordinator is responsible for validation before calling this method.
+        This executor trusts that inputs have already been validated.
+        Args:
+            objs: List of model instances to update (pre-validated)
+            fields: List of field names to update
+            batch_size: Number of objects to update per batch
+        Returns:
+            Number of objects updated
+        """
+        if not objs:
+            return 0
+        # Check if this is an MTI model and route accordingly
+        if self.mti_handler.is_mti_model():
+            logger.info(f"Detected MTI model {self.model_cls.__name__}, using MTI bulk update")
+            # Build execution plan
+            plan = self.mti_handler.build_update_plan(objs, fields, batch_size=batch_size)
+            # Execute the plan
+            return self._execute_mti_update_plan(plan)
+        # Non-MTI model - use Django's native bulk_update
+        # Validation already done by coordinator
+        from django.db.models import QuerySet
+        base_qs = QuerySet(model=self.model_cls, using=self.queryset.db)
+        return base_qs.bulk_update(objs, fields, batch_size=batch_size)
+    # ==================== MTI PLAN EXECUTION ====================
+    def _execute_mti_create_plan(self, plan):
+        """
+        Execute an MTI create plan.
+        This is where ALL database operations happen for MTI bulk_create.
+        Args:
+            plan: MTICreatePlan object from MTIHandler
+        Returns:
+            List of created objects with PKs assigned
+        """
+        from django.db import transaction
+        from django.db.models import QuerySet as BaseQuerySet
+        if not plan:
+            return []
+        with transaction.atomic(using=self.queryset.db, savepoint=False):
+            # Step 1: Create all parent objects level by level
+            parent_instances_map = {}  # Maps original obj id() -> {model: parent_instance}
+            for parent_level in plan.parent_levels:
+                # Bulk create parents for this level
+                bulk_kwargs = {"batch_size": len(parent_level.objects)}
+                if parent_level.update_conflicts:
+                    bulk_kwargs["update_conflicts"] = True
+                    bulk_kwargs["unique_fields"] = parent_level.unique_fields
+                    bulk_kwargs["update_fields"] = parent_level.update_fields
+                # Use base QuerySet to avoid recursion
+                base_qs = BaseQuerySet(model=parent_level.model_class, using=self.queryset.db)
+                created_parents = base_qs.bulk_create(parent_level.objects, **bulk_kwargs)
+                # Copy generated fields back to parent objects
+                for created_parent, parent_obj in zip(created_parents, parent_level.objects):
+                    for field in parent_level.model_class._meta.local_fields:
+                        created_value = getattr(created_parent, field.name, None)
+                        if created_value is not None:
+                            setattr(parent_obj, field.name, created_value)
+                    parent_obj._state.adding = False
+                    parent_obj._state.db = self.queryset.db
+                # Map parents back to original objects
+                for parent_obj in parent_level.objects:
+                    orig_obj_id = parent_level.original_object_map[id(parent_obj)]
+                    if orig_obj_id not in parent_instances_map:
+                        parent_instances_map[orig_obj_id] = {}
+                    parent_instances_map[orig_obj_id][parent_level.model_class] = parent_obj
+            # Step 2: Add parent links to child objects
+            for child_obj, orig_obj in zip(plan.child_objects, plan.original_objects):
+                parent_instances = parent_instances_map.get(id(orig_obj), {})
+                for parent_model, parent_instance in parent_instances.items():
+                    parent_link = plan.child_model._meta.get_ancestor_link(parent_model)
+                    if parent_link:
+                        setattr(child_obj, parent_link.attname, parent_instance.pk)
+                        setattr(child_obj, parent_link.name, parent_instance)
+            # Step 3: Bulk create child objects using _batched_insert (to bypass MTI check)
+            base_qs = BaseQuerySet(model=plan.child_model, using=self.queryset.db)
+            base_qs._prepare_for_bulk_create(plan.child_objects)
+            # Partition objects by PK status
+            objs_without_pk, objs_with_pk = [], []
+            for obj in plan.child_objects:
+                if obj._is_pk_set():
+                    objs_with_pk.append(obj)
+                else:
+                    objs_without_pk.append(obj)
+            # Get fields for insert
+            opts = plan.child_model._meta
+            fields = [f for f in opts.local_fields if not f.generated]
+            # Execute bulk insert
+            if objs_with_pk:
+                returned_columns = base_qs._batched_insert(
+                    objs_with_pk,
+                    fields,
+                    batch_size=len(objs_with_pk),
+                )
+                if returned_columns:
+                    for obj, results in zip(objs_with_pk, returned_columns):
+                        if hasattr(opts, "db_returning_fields") and hasattr(opts, "pk"):
+                            for result, field in zip(results, opts.db_returning_fields):
+                                if field != opts.pk:
+                                    setattr(obj, field.attname, result)
+                        obj._state.adding = False
+                        obj._state.db = self.queryset.db
+                else:
+                    for obj in objs_with_pk:
+                        obj._state.adding = False
+                        obj._state.db = self.queryset.db
+            if objs_without_pk:
+                filtered_fields = [
+                    f for f in fields
+                    if not isinstance(f, AutoField) and not f.primary_key
+                ]
+                returned_columns = base_qs._batched_insert(
+                    objs_without_pk,
+                    filtered_fields,
+                    batch_size=len(objs_without_pk),
+                )
+                if returned_columns:
+                    for obj, results in zip(objs_without_pk, returned_columns):
+                        if hasattr(opts, "db_returning_fields"):
+                            for result, field in zip(results, opts.db_returning_fields):
+                                setattr(obj, field.attname, result)
+                        obj._state.adding = False
+                        obj._state.db = self.queryset.db
+                else:
+                    for obj in objs_without_pk:
+                        obj._state.adding = False
+                        obj._state.db = self.queryset.db
+            created_children = plan.child_objects
+            # Step 4: Copy PKs and auto-generated fields back to original objects
+            pk_field_name = plan.child_model._meta.pk.name
+            for orig_obj, child_obj in zip(plan.original_objects, created_children):
+                # Copy PK
+                child_pk = getattr(child_obj, pk_field_name)
+                setattr(orig_obj, pk_field_name, child_pk)
+                # Copy auto-generated fields from all levels
+                parent_instances = parent_instances_map.get(id(orig_obj), {})
+                for model_class in plan.inheritance_chain:
+                    # Get source object for this level
+                    if model_class in parent_instances:
+                        source_obj = parent_instances[model_class]
+                    elif model_class == plan.child_model:
+                        source_obj = child_obj
+                    else:
+                        continue
+                    # Copy auto-generated field values
+                    for field in model_class._meta.local_fields:
+                        if field.name == pk_field_name:
+                            continue
+                        # Skip parent link fields
+                        if hasattr(field, 'remote_field') and field.remote_field:
+                            parent_link = plan.child_model._meta.get_ancestor_link(model_class)
+                            if parent_link and field.name == parent_link.name:
+                                continue
+                        # Copy auto_now_add, auto_now, and db_returning fields
+                        if (getattr(field, 'auto_now_add', False) or
+                            getattr(field, 'auto_now', False) or
+                            getattr(field, 'db_returning', False)):
+                            source_value = getattr(source_obj, field.name, None)
+                            if source_value is not None:
+                                setattr(orig_obj, field.name, source_value)
+                # Update object state
+                orig_obj._state.adding = False
+                orig_obj._state.db = self.queryset.db
+        return plan.original_objects
+    def _execute_mti_update_plan(self, plan):
+        """
+        Execute an MTI update plan.
+        Updates each table in the inheritance chain using CASE/WHEN for bulk updates.
+        Args:
+            plan: MTIUpdatePlan object from MTIHandler
+        Returns:
+            Number of objects updated
+        """
+        from django.db import transaction
+        from django.db.models import Case, Value, When, QuerySet as BaseQuerySet
+        if not plan:
+            return 0
+        total_updated = 0
+        # Get PKs for filtering
+        root_pks = [
+            getattr(obj, "pk", None) or getattr(obj, "id", None)
+            for obj in plan.objects
+            if getattr(obj, "pk", None) or getattr(obj, "id", None)
+        ]
+        if not root_pks:
+            return 0
+        with transaction.atomic(using=self.queryset.db, savepoint=False):
+            # Update each table in the chain
+            for field_group in plan.field_groups:
+                if not field_group.fields:
+                    continue
+                base_qs = BaseQuerySet(model=field_group.model_class, using=self.queryset.db)
+                # Check if records exist
+                existing_count = base_qs.filter(**{f"{field_group.filter_field}__in": root_pks}).count()
+                if existing_count == 0:
+                    continue
+                # Build CASE statements for bulk update
+                case_statements = {}
+                for field_name in field_group.fields:
+                    field = field_group.model_class._meta.get_field(field_name)
+                    # Use column name for FK fields
+                    if getattr(field, 'is_relation', False) and hasattr(field, 'attname'):
+                        db_field_name = field.attname
+                        target_field = field.target_field
+                    else:
+                        db_field_name = field_name
+                        target_field = field
+                    when_statements = []
+                    for pk, obj in zip(root_pks, plan.objects):
+                        obj_pk = getattr(obj, "pk", None) or getattr(obj, "id", None)
+                        if obj_pk is None:
+                            continue
+                        value = getattr(obj, db_field_name)
+                        when_statements.append(
+                            When(
+                                **{field_group.filter_field: pk},
+                                then=Value(value, output_field=target_field),
+                            )
+                        )
+                    if when_statements:
+                        case_statements[db_field_name] = Case(
+                            *when_statements, output_field=target_field
+                        )
+                # Execute bulk update
+                if case_statements:
+                    try:
+                        updated_count = base_qs.filter(
+                            **{f"{field_group.filter_field}__in": root_pks}
+                        ).update(**case_statements)
+                        total_updated += updated_count
+                    except Exception as e:
+                        logger.error(f"MTI bulk update failed for {field_group.model_class.__name__}: {e}")
+        return total_updated
+    def delete_queryset(self):
+        """
+        Execute delete on the queryset.
+        NOTE: Coordinator is responsible for validation before calling this method.
+        This executor trusts that inputs have already been validated.
+        Returns:
+            Tuple of (count, details dict)
+        """
+        if not self.queryset:
+            return 0, {}
+        # Execute delete via QuerySet
+        # Validation already done by coordinator
+        from django.db.models import QuerySet
+        return QuerySet.delete(self.queryset)

{django_bulk_hooks-0.2.1 → django_bulk_hooks-0.2.3}/django_bulk_hooks/operations/coordinator.py RENAMED Viewed

@@ -214,32 +214,38 @@ class BulkOperationCoordinator:
         """
         Execute queryset update with hooks.
-        ARCHITECTURE: Database-Layer vs Application-Layer Updates
-        ==========================================================
+        ARCHITECTURE: Application-Layer Update with Expression Resolution
+        ===================================================================
-        Unlike bulk_update(objs), queryset.update() is a pure SQL UPDATE operation.
-        The database evaluates ALL expressions (F(), Subquery, Case, functions, etc.)
-        without Python ever seeing the new values.
+        When hooks are enabled, queryset.update() is transformed into bulk_update()
+        to allow BEFORE hooks to modify records. This is a deliberate design choice:
-        To maintain Salesforce's hook contract (AFTER hooks see accurate new_records),
-        we ALWAYS refetch instances after the update for AFTER hooks.
+        1. Fetch instances from the queryset (we need them for hooks anyway)
+        2. Resolve SQL expressions (F(), Subquery, Case, etc.) to concrete values
+        3. Apply resolved values to instances
+        4. Run BEFORE hooks (which can now modify the instances)
+        5. Use bulk_update() to persist the (possibly modified) instances
+        6. Run AFTER hooks with final state
-        This is NOT a hack - it respects the fundamental architectural difference:
+        This approach:
+        - ✅ Allows BEFORE hooks to modify values (feature request)
+        - ✅ Preserves SQL expression semantics (materializes them correctly)
+        - ✅ Eliminates the double-fetch (was fetching before AND after)
+        - ✅ More efficient than previous implementation
+        - ✅ Maintains Salesforce-like hook contract
-        1. queryset.update():  Database evaluates → Must refetch for AFTER hooks
-        2. bulk_update(objs):  Python has values → No refetch needed
-        The refetch handles ALL database-level changes:
-        - F() expressions: F('count') + 1
+        SQL expressions are resolved per-instance using Django's annotate(),
+        which ensures correct evaluation of:
+        - F() expressions: F('balance') + 100
         - Subquery: Subquery(related.aggregate(...))
-        - Case/When: Case(When(status='A', then=Value('Active')))
-        - Database functions: Upper('name'), Concat(...)
-        - Database hooks/defaults
-        - Any other DB-evaluated expression
+        - Case/When: Case(When(...))
+        - Database functions: Upper(), Concat(), etc.
+        - Any other Django Expression
         Trade-off:
-        - Cost: 1 extra SELECT query per queryset.update() call
-        - Benefit: 100% correctness for ALL database expressions
+        - Uses bulk_update() internally (slightly different SQL than queryset.update)
+        - Expression resolution may add overhead for complex expressions
+        - But eliminates the refetch, so overall more efficient
         Args:
             update_kwargs: Dict of fields to update
@@ -249,52 +255,56 @@ class BulkOperationCoordinator:
         Returns:
             Number of objects updated
         """
-        # Fetch instances BEFORE update
+        # Fetch instances from queryset
         instances = list(self.queryset)
         if not instances:
             return 0
+        # Check both parameter and context for bypass_hooks
+        from django_bulk_hooks.context import get_bypass_hooks
+        should_bypass = bypass_hooks or get_bypass_hooks()
+        if should_bypass:
+            # No hooks - use original queryset.update() for max performance
+            return BaseQuerySet.update(self.queryset, **update_kwargs)
+        # Resolve expressions and apply to instances
+        # Delegate to analyzer for expression resolution and value application
+        fields_to_update = self.analyzer.apply_update_values(instances, update_kwargs)
+        # Now instances have the resolved values applied
         # Fetch old records for comparison (single bulk query)
         old_records_map = self.analyzer.fetch_old_records_map(instances)
         # Build changeset for VALIDATE and BEFORE hooks
-        # These see pre-update state, which is correct
-        changeset_before = build_changeset_for_update(
+        # instances now have the "intended" values from update_kwargs
+        changeset = build_changeset_for_update(
             self.model_cls,
             instances,
             update_kwargs,
             old_records_map=old_records_map,
         )
-        if bypass_hooks:
-            # No hooks - just execute the update
-            return BaseQuerySet.update(self.queryset, **update_kwargs)
         # Execute VALIDATE and BEFORE hooks
+        # Hooks can now modify the instances and changes will persist
         if not bypass_validation:
-            self.dispatcher.dispatch(changeset_before, "validate_update", bypass_hooks=False)
-        self.dispatcher.dispatch(changeset_before, "before_update", bypass_hooks=False)
-        # Execute the actual database UPDATE
-        # Database evaluates all expressions here (Subquery, F(), etc.)
-        result = BaseQuerySet.update(self.queryset, **update_kwargs)
-        # Refetch instances to get actual post-update values from database
-        # This ensures AFTER hooks see the real final state
-        pks = [obj.pk for obj in instances]
-        refetched_instances = list(
-            self.model_cls.objects.filter(pk__in=pks)
-        )
+            self.dispatcher.dispatch(changeset, "validate_update", bypass_hooks=False)
+        self.dispatcher.dispatch(changeset, "before_update", bypass_hooks=False)
+        # Use bulk_update with the (possibly modified) instances
+        # This persists any modifications made by BEFORE hooks
+        result = self.executor.bulk_update(instances, fields_to_update, batch_size=None)
-        # Build changeset for AFTER hooks with accurate new values
+        # Build changeset for AFTER hooks
+        # No refetch needed! instances already have final state from bulk_update
         changeset_after = build_changeset_for_update(
             self.model_cls,
-            refetched_instances,  # Fresh from database
+            instances,
             update_kwargs,
-            old_records_map=old_records_map,  # Still have old values for comparison
+            old_records_map=old_records_map,
         )
-        # Execute AFTER hooks with accurate new_records
+        # Execute AFTER hooks with final state
         self.dispatcher.dispatch(changeset_after, "after_update", bypass_hooks=False)
         return result

django-bulk-hooks 0.2.1__tar.gz → 0.2.3__tar.gz

Potentially problematic release.

django-bulk-hooks 0.2.1tar.gz → 0.2.3tar.gz