PyPI - django-bulk-hooks - Versions diffs - 0.2.44__py3-none-any.whl → 0.2.50__py3-none-any.whl - Mend - Supply Chain Defender

django-bulk-hooks 0.2.44py3-none-any.whl → 0.2.50py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of django-bulk-hooks might be problematic. Click here for more details.

Files changed (10) hide show

django_bulk_hooks/operations/analyzer.py CHANGED Viewed

@@ -77,14 +77,11 @@ class ModelAnalyzer:
         if not objs:
             return
-        invalid_types = {
-            type(obj).__name__ for obj in objs if not isinstance(obj, self.model_cls)
-        }
+        invalid_types = {type(obj).__name__ for obj in objs if not isinstance(obj, self.model_cls)}
         if invalid_types:
             raise TypeError(
-                f"{operation} expected instances of {self.model_cls.__name__}, "
-                f"but got {invalid_types}",
+                f"{operation} expected instances of {self.model_cls.__name__}, but got {invalid_types}",
             )
     def _check_has_pks(self, objs, operation="operation"):
@@ -130,7 +127,9 @@ class ModelAnalyzer:
         auto_now_fields = []
         for field in self.model_cls._meta.fields:
             if getattr(field, "auto_now", False) or getattr(
-                field, "auto_now_add", False,
+                field,
+                "auto_now_add",
+                False,
             ):
                 auto_now_fields.append(field.name)
         return auto_now_fields
@@ -142,11 +141,7 @@ class ModelAnalyzer:
         Returns:
             list: FK field names
         """
-        return [
-            field.name
-            for field in self.model_cls._meta.concrete_fields
-            if field.is_relation and not field.many_to_many
-        ]
+        return [field.name for field in self.model_cls._meta.concrete_fields if field.is_relation and not field.many_to_many]
     def detect_changed_fields(self, objs):
         """
@@ -210,15 +205,15 @@ class ModelAnalyzer:
     def resolve_expression(self, field_name, expression, instance):
         """
         Resolve a SQL expression to a concrete value for a specific instance.
         This method materializes database expressions (F(), Subquery, Case, etc.)
         into concrete values by using Django's annotate() mechanism.
         Args:
             field_name: Name of the field being updated
             expression: The expression or value to resolve
             instance: The model instance to resolve for
         Returns:
             The resolved concrete value
         """
@@ -236,31 +231,34 @@ class ModelAnalyzer:
             instance_qs = self.model_cls.objects.filter(pk=instance.pk)
             # Use annotate with the expression and let Django resolve it
-            resolved_value = instance_qs.annotate(
-                _resolved_value=expression,
-            ).values_list("_resolved_value", flat=True).first()
+            resolved_value = (
+                instance_qs.annotate(
+                    _resolved_value=expression,
+                )
+                .values_list("_resolved_value", flat=True)
+                .first()
+            )
             return resolved_value
         except Exception as e:
             # If expression resolution fails, log and return original
             logger.warning(
-                f"Failed to resolve expression for field '{field_name}' "
-                f"on {self.model_cls.__name__}: {e}. Using original value.",
+                f"Failed to resolve expression for field '{field_name}' on {self.model_cls.__name__}: {e}. Using original value.",
             )
             return expression
     def apply_update_values(self, instances, update_kwargs):
         """
         Apply update_kwargs to instances, resolving any SQL expressions.
         This method transforms queryset.update()-style kwargs (which may contain
         F() expressions, Subquery, Case, etc.) into concrete values and applies
         them to the instances.
         Args:
             instances: List of model instances to update
             update_kwargs: Dict of {field_name: value_or_expression}
         Returns:
             List of field names that were updated
         """
@@ -271,7 +269,7 @@ class ModelAnalyzer:
             return []
         fields_updated = list(update_kwargs.keys())
         # Extract PKs
         pks = [inst.pk for inst in instances if inst.pk is not None]
         if not pks:
@@ -306,8 +304,7 @@ class ModelAnalyzer:
             except Exception as e:
                 # If expression resolution fails, log and use original
                 logger.warning(
-                    f"Failed to resolve expression for field '{field_name}' "
-                    f"on {self.model_cls.__name__}: {e}. Using original value.",
+                    f"Failed to resolve expression for field '{field_name}' on {self.model_cls.__name__}: {e}. Using original value.",
                 )
                 for instance in instances:
                     setattr(instance, field_name, value)

django_bulk_hooks/operations/bulk_executor.py CHANGED Viewed

@@ -74,14 +74,23 @@ class BulkExecutor:
         # Check if this is an MTI model and route accordingly
         if self.mti_handler.is_mti_model():
-            logger.info(f"Detected MTI model {self.model_cls.__name__}, using MTI bulk create")
             # Use pre-classified records if provided, otherwise classify now
             if existing_record_ids is None or existing_pks_map is None:
                 existing_record_ids = set()
                 existing_pks_map = {}
                 if update_conflicts and unique_fields:
-                    existing_record_ids, existing_pks_map = self.record_classifier.classify_for_upsert(objs, unique_fields)
+                    # For MTI, find which model has the unique fields and query THAT model
+                    # This handles the schema migration case where parent exists but child doesn't
+                    query_model = self.mti_handler.find_model_with_unique_fields(unique_fields)
+                    logger.info(f"MTI upsert: querying {query_model.__name__} for unique fields {unique_fields}")
+                    existing_record_ids, existing_pks_map = self.record_classifier.classify_for_upsert(
+                        objs, unique_fields, query_model=query_model
+                    )
+                    logger.info(f"MTI Upsert classification: {len(existing_record_ids)} existing, {len(objs) - len(existing_record_ids)} new")
+                    logger.info(f"existing_record_ids: {existing_record_ids}")
+                    logger.info(f"existing_pks_map: {existing_pks_map}")
             # Build execution plan with classification results
             plan = self.mti_handler.build_create_plan(
@@ -95,11 +104,11 @@ class BulkExecutor:
             )
             # Execute the plan
             result = self._execute_mti_create_plan(plan)
             # Tag objects with upsert metadata for hook dispatching
             if update_conflicts and unique_fields:
-                self._tag_upsert_metadata(result, existing_record_ids)
+                self._tag_upsert_metadata(result, existing_record_ids, existing_pks_map)
             return result
         # Non-MTI model - use Django's native bulk_create
@@ -112,14 +121,14 @@ class BulkExecutor:
             unique_fields,
             **kwargs,
         )
         # Tag objects with upsert metadata for hook dispatching
         if update_conflicts and unique_fields:
             # Use pre-classified results if available, otherwise classify now
             if existing_record_ids is None:
-                existing_record_ids, _ = self.record_classifier.classify_for_upsert(objs, unique_fields)
-            self._tag_upsert_metadata(result, existing_record_ids)
+                existing_record_ids, existing_pks_map = self.record_classifier.classify_for_upsert(objs, unique_fields)
+            self._tag_upsert_metadata(result, existing_record_ids, existing_pks_map)
         return result
     def _execute_bulk_create(
@@ -205,59 +214,43 @@ class BulkExecutor:
         if not plan:
             return []
         with transaction.atomic(using=self.queryset.db, savepoint=False):
-            # Step 1: Create/Update all parent objects level by level
+            # Step 1: Upsert all parent objects level by level using Django's native upsert
             parent_instances_map = {}  # Maps original obj id() -> {model: parent_instance}
             for parent_level in plan.parent_levels:
-                # Separate new and existing parent objects
-                new_parents = []
-                existing_parents = []
-                for parent_obj in parent_level.objects:
-                    orig_obj_id = parent_level.original_object_map[id(parent_obj)]
-                    if orig_obj_id in plan.existing_record_ids:
-                        existing_parents.append(parent_obj)
-                    else:
-                        new_parents.append(parent_obj)
-                # Bulk create new parents
-                if new_parents:
-                    bulk_kwargs = {"batch_size": len(new_parents)}
-                    if parent_level.update_conflicts:
-                        bulk_kwargs["update_conflicts"] = True
-                        bulk_kwargs["unique_fields"] = parent_level.unique_fields
-                        bulk_kwargs["update_fields"] = parent_level.update_fields
-                    # Use base QuerySet to avoid recursion
-                    base_qs = BaseQuerySet(model=parent_level.model_class, using=self.queryset.db)
-                    created_parents = base_qs.bulk_create(new_parents, **bulk_kwargs)
-                    # Copy generated fields back to parent objects
-                    for created_parent, parent_obj in zip(created_parents, new_parents):
-                        for field in parent_level.model_class._meta.local_fields:
-                            created_value = getattr(created_parent, field.name, None)
-                            if created_value is not None:
-                                setattr(parent_obj, field.name, created_value)
-                        parent_obj._state.adding = False
-                        parent_obj._state.db = self.queryset.db
-                # Update existing parents
-                if existing_parents and parent_level.update_fields:
+                # Use base QuerySet to avoid recursion
+                base_qs = BaseQuerySet(model=parent_level.model_class, using=self.queryset.db)
+                # Build bulk_create kwargs
+                bulk_kwargs = {"batch_size": len(parent_level.objects)}
+                if parent_level.update_conflicts:
+                    # Let Django handle the upsert - it will INSERT or UPDATE as needed
+                    bulk_kwargs["update_conflicts"] = True
+                    bulk_kwargs["unique_fields"] = parent_level.unique_fields
                     # Filter update fields to only those that exist in this parent model
                     parent_model_fields = {field.name for field in parent_level.model_class._meta.local_fields}
                     filtered_update_fields = [field for field in parent_level.update_fields if field in parent_model_fields]
                     if filtered_update_fields:
-                        base_qs = BaseQuerySet(model=parent_level.model_class, using=self.queryset.db)
-                        base_qs.bulk_update(existing_parents, filtered_update_fields)
+                        bulk_kwargs["update_fields"] = filtered_update_fields
+                # Perform the upsert - Django handles INSERT vs UPDATE automatically
+                upserted_parents = base_qs.bulk_create(parent_level.objects, **bulk_kwargs)
+                # Copy generated fields back to parent objects
+                for upserted_parent, parent_obj in zip(upserted_parents, parent_level.objects):
+                    for field in parent_level.model_class._meta.local_fields:
+                        # Use attname for ForeignKey fields to avoid triggering database queries
+                        field_attr = field.attname if isinstance(field, ForeignKey) else field.name
+                        upserted_value = getattr(upserted_parent, field_attr, None)
+                        if upserted_value is not None:
+                            setattr(parent_obj, field_attr, upserted_value)
-                    # Mark as not adding
-                    for parent_obj in existing_parents:
-                        parent_obj._state.adding = False
-                        parent_obj._state.db = self.queryset.db
+                    parent_obj._state.adding = False
+                    parent_obj._state.db = self.queryset.db
                 # Map parents back to original objects
                 for parent_obj in parent_level.objects:
@@ -266,10 +259,7 @@ class BulkExecutor:
                         parent_instances_map[orig_obj_id] = {}
                     parent_instances_map[orig_obj_id][parent_level.model_class] = parent_obj
-            # Step 2: Add parent links to child objects and separate new/existing
-            new_child_objects = []
-            existing_child_objects = []
+            # Step 2: Add parent links to child objects and set PKs for existing records
             for child_obj, orig_obj in zip(plan.child_objects, plan.original_objects):
                 parent_instances = parent_instances_map.get(id(orig_obj), {})
@@ -279,91 +269,103 @@ class BulkExecutor:
                     if parent_link:
                         setattr(child_obj, parent_link.attname, parent_instance.pk)
                         setattr(child_obj, parent_link.name, parent_instance)
+                        # IMPORTANT: Don't set the child's PK here - it should only get PK after insertion
+                        # The parent link field (hookmodel_ptr) is NOT the same as the child's PK
+                    else:
+                        logger.warning(f"No parent link found for {parent_model} in {plan.child_model}")
-                # Classify as new or existing
+                # For existing records in upsert, ensure PK is set on child object
                 if id(orig_obj) in plan.existing_record_ids:
-                    # For existing records, set the PK on child object
                     pk_value = getattr(orig_obj, "pk", None)
                     if pk_value:
                         child_obj.pk = pk_value
                         child_obj.id = pk_value
-                    existing_child_objects.append(child_obj)
-                else:
-                    new_child_objects.append(child_obj)
-            # Step 3: Bulk create new child objects using _batched_insert (to bypass MTI check)
-            if new_child_objects:
-                base_qs = BaseQuerySet(model=plan.child_model, using=self.queryset.db)
-                base_qs._prepare_for_bulk_create(new_child_objects)
-                # Partition objects by PK status
-                objs_without_pk, objs_with_pk = [], []
-                for obj in new_child_objects:
-                    if obj._is_pk_set():
-                        objs_with_pk.append(obj)
                     else:
-                        objs_without_pk.append(obj)
+                        # If no PK on original object, this is a new record, don't set PK
+                        logger.info(f"New record {orig_obj} - not setting PK on child object")
-                # Get fields for insert
-                opts = plan.child_model._meta
-                fields = [f for f in opts.local_fields if not f.generated]
-                # Execute bulk insert
-                if objs_with_pk:
-                    returned_columns = base_qs._batched_insert(
-                        objs_with_pk,
-                        fields,
-                        batch_size=len(objs_with_pk),
-                    )
-                    if returned_columns:
-                        for obj, results in zip(objs_with_pk, returned_columns):
-                            if hasattr(opts, "db_returning_fields") and hasattr(opts, "pk"):
-                                for result, field in zip(results, opts.db_returning_fields):
-                                    if field != opts.pk:
-                                        setattr(obj, field.attname, result)
-                            obj._state.adding = False
-                            obj._state.db = self.queryset.db
-                    else:
-                        for obj in objs_with_pk:
-                            obj._state.adding = False
-                            obj._state.db = self.queryset.db
-                if objs_without_pk:
-                    filtered_fields = [f for f in fields if not isinstance(f, AutoField) and not f.primary_key]
-                    returned_columns = base_qs._batched_insert(
-                        objs_without_pk,
-                        filtered_fields,
-                        batch_size=len(objs_without_pk),
+            # Step 3: Handle child objects
+            # Note: We can't use bulk_create on child MTI models, so we use _batched_insert for new records
+            # and bulk_update for existing records
+            base_qs = BaseQuerySet(model=plan.child_model, using=self.queryset.db)
+            # For MTI child objects, we need to handle them differently
+            # In MTI, child objects get PKs from parent links, but we need to distinguish
+            # between truly new records and existing records for upsert operations
+            objs_without_pk, objs_with_pk = [], []
+            # Check which CHILD records actually exist in the child table
+            # This is separate from checking parent existence
+            if plan.update_conflicts:
+                # Query the CHILD table to see which child records exist
+                parent_pks = []
+                for child_obj in plan.child_objects:
+                    child_pk = getattr(child_obj, plan.child_model._meta.pk.attname, None)
+                    if child_pk:
+                        parent_pks.append(child_pk)
+                existing_child_pks = set()
+                if parent_pks:
+                    existing_child_pks = set(
+                        base_qs.filter(pk__in=parent_pks).values_list('pk', flat=True)
                     )
-                    if returned_columns:
-                        for obj, results in zip(objs_without_pk, returned_columns):
-                            if hasattr(opts, "db_returning_fields"):
-                                for result, field in zip(results, opts.db_returning_fields):
-                                    setattr(obj, field.attname, result)
-                            obj._state.adding = False
-                            obj._state.db = self.queryset.db
+                # Split based on whether child record exists
+                for child_obj in plan.child_objects:
+                    child_pk = getattr(child_obj, plan.child_model._meta.pk.attname, None)
+                    if child_pk and child_pk in existing_child_pks:
+                        # Child record exists - update it
+                        objs_with_pk.append(child_obj)
                     else:
-                        for obj in objs_without_pk:
-                            obj._state.adding = False
-                            obj._state.db = self.queryset.db
-            # Step 3.5: Update existing child objects
-            if existing_child_objects and plan.update_fields:
+                        # Child record doesn't exist - insert it
+                        objs_without_pk.append(child_obj)
+            else:
+                # Not an upsert - all are new records
+                objs_without_pk = plan.child_objects
+                objs_with_pk = []
+            # For objects with PK (existing records in upsert), use bulk_update
+            if objs_with_pk and plan.update_fields:
                 # Filter update fields to only those that exist in the child model
                 child_model_fields = {field.name for field in plan.child_model._meta.local_fields}
                 filtered_child_update_fields = [field for field in plan.update_fields if field in child_model_fields]
                 if filtered_child_update_fields:
-                    base_qs = BaseQuerySet(model=plan.child_model, using=self.queryset.db)
-                    base_qs.bulk_update(existing_child_objects, filtered_child_update_fields)
+                    base_qs.bulk_update(objs_with_pk, filtered_child_update_fields)
                 # Mark as not adding
-                for child_obj in existing_child_objects:
-                    child_obj._state.adding = False
-                    child_obj._state.db = self.queryset.db
+                for obj in objs_with_pk:
+                    obj._state.adding = False
+                    obj._state.db = self.queryset.db
+            # For objects without PK (new records), use _batched_insert
+            if objs_without_pk:
+                base_qs._prepare_for_bulk_create(objs_without_pk)
+                opts = plan.child_model._meta
+                # Include all local fields except auto-generated ones
+                # For MTI, we need to include the parent link (which is the PK)
+                filtered_fields = [f for f in opts.local_fields if not f.generated]
+                returned_columns = base_qs._batched_insert(
+                    objs_without_pk,
+                    filtered_fields,
+                    batch_size=len(objs_without_pk),
+                )
+                if returned_columns:
+                    for obj, results in zip(objs_without_pk, returned_columns):
+                        if hasattr(opts, "db_returning_fields"):
+                            for result, field in zip(results, opts.db_returning_fields):
+                                setattr(obj, field.attname, result)
+                        obj._state.adding = False
+                        obj._state.db = self.queryset.db
+                else:
+                    for obj in objs_without_pk:
+                        obj._state.adding = False
+                        obj._state.db = self.queryset.db
-            # Combine all children for final processing
-            created_children = new_child_objects + existing_child_objects
+            # All child objects are now created/updated
+            created_children = plan.child_objects
             # Step 4: Copy PKs and auto-generated fields back to original objects
             pk_field_name = plan.child_model._meta.pk.name
@@ -529,19 +531,38 @@ class BulkExecutor:
         return QuerySet.delete(self.queryset)
-    def _tag_upsert_metadata(self, result_objects, existing_record_ids):
+    def _tag_upsert_metadata(self, result_objects, existing_record_ids, existing_pks_map):
         """
         Tag objects with metadata indicating whether they were created or updated.
         This metadata is used by the coordinator to determine which hooks to fire.
         The metadata is temporary and will be cleaned up after hook execution.
         Args:
             result_objects: List of objects returned from bulk operation
             existing_record_ids: Set of id() for objects that existed before the operation
+            existing_pks_map: Dict mapping id(obj) -> pk for existing records
         """
+        created_count = 0
+        updated_count = 0
+        # Create a set of PKs that existed before the operation
+        existing_pks = set(existing_pks_map.values())
         for obj in result_objects:
-            # Tag with metadata for hook dispatching
-            was_created = id(obj) not in existing_record_ids
+            # Use PK to determine if this record was created or updated
+            # If the PK was in the existing_pks_map, it was updated; otherwise created
+            was_created = obj.pk not in existing_pks
             obj._bulk_hooks_was_created = was_created
             obj._bulk_hooks_upsert_metadata = True
+            if was_created:
+                created_count += 1
+            else:
+                updated_count += 1
+        logger.info(
+            f"Tagged upsert metadata: {created_count} created, {updated_count} updated "
+            f"(total={len(result_objects)}, existing_pks={len(existing_pks)})"
+        )