PyPI - django-bulk-hooks - Versions diffs - 0.2.15__py3-none-any.whl → 0.2.17__py3-none-any.whl - Mend

django-bulk-hooks 0.2.15py3-none-any.whl → 0.2.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of django-bulk-hooks might be problematic. Click here for more details.

Files changed (23) hide show

django_bulk_hooks/__init__.py +20 -24
django_bulk_hooks/changeset.py +1 -1
django_bulk_hooks/conditions.py +8 -12
django_bulk_hooks/decorators.py +15 -11
django_bulk_hooks/dispatcher.py +19 -10
django_bulk_hooks/factory.py +36 -38
django_bulk_hooks/handler.py +5 -6
django_bulk_hooks/helpers.py +4 -3
django_bulk_hooks/models.py +12 -13
django_bulk_hooks/operations/__init__.py +5 -5
django_bulk_hooks/operations/analyzer.py +14 -14
django_bulk_hooks/operations/bulk_executor.py +220 -129
django_bulk_hooks/operations/coordinator.py +82 -61
django_bulk_hooks/operations/mti_handler.py +91 -60
django_bulk_hooks/operations/mti_plans.py +23 -14
django_bulk_hooks/operations/record_classifier.py +184 -0
django_bulk_hooks/queryset.py +5 -3
django_bulk_hooks/registry.py +53 -43
{django_bulk_hooks-0.2.15.dist-info → django_bulk_hooks-0.2.17.dist-info}/METADATA +1 -1
django_bulk_hooks-0.2.17.dist-info/RECORD +26 -0
django_bulk_hooks-0.2.15.dist-info/RECORD +0 -25
{django_bulk_hooks-0.2.15.dist-info → django_bulk_hooks-0.2.17.dist-info}/LICENSE +0 -0
{django_bulk_hooks-0.2.15.dist-info → django_bulk_hooks-0.2.17.dist-info}/WHEEL +0 -0

django_bulk_hooks/operations/__init__.py CHANGED Viewed

@@ -5,14 +5,14 @@ This module contains all services for bulk operations following
 a clean, service-based architecture.
 """
-from django_bulk_hooks.operations.coordinator import BulkOperationCoordinator
 from django_bulk_hooks.operations.analyzer import ModelAnalyzer
 from django_bulk_hooks.operations.bulk_executor import BulkExecutor
+from django_bulk_hooks.operations.coordinator import BulkOperationCoordinator
 from django_bulk_hooks.operations.mti_handler import MTIHandler
 __all__ = [
-    'BulkOperationCoordinator',
-    'ModelAnalyzer',
-    'BulkExecutor',
-    'MTIHandler',
+    "BulkExecutor",
+    "BulkOperationCoordinator",
+    "MTIHandler",
+    "ModelAnalyzer",
 ]

django_bulk_hooks/operations/analyzer.py CHANGED Viewed

@@ -84,7 +84,7 @@ class ModelAnalyzer:
         if invalid_types:
             raise TypeError(
                 f"{operation} expected instances of {self.model_cls.__name__}, "
-                f"but got {invalid_types}"
+                f"but got {invalid_types}",
             )
     def _check_has_pks(self, objs, operation="operation"):
@@ -94,7 +94,7 @@ class ModelAnalyzer:
         if missing_pks:
             raise ValueError(
                 f"{operation} cannot operate on unsaved {self.model_cls.__name__} instances. "
-                f"{len(missing_pks)} object(s) have no primary key."
+                f"{len(missing_pks)} object(s) have no primary key.",
             )
     # ========== Data Fetching Methods ==========
@@ -130,7 +130,7 @@ class ModelAnalyzer:
         auto_now_fields = []
         for field in self.model_cls._meta.fields:
             if getattr(field, "auto_now", False) or getattr(
-                field, "auto_now_add", False
+                field, "auto_now_add", False,
             ):
                 auto_now_fields.append(field.name)
         return auto_now_fields
@@ -224,28 +224,28 @@ class ModelAnalyzer:
         """
         from django.db.models import Expression
         from django.db.models.expressions import Combinable
         # Simple value - return as-is
         if not isinstance(expression, (Expression, Combinable)):
             return expression
         # For complex expressions, evaluate them in database context
         # Use annotate() which Django properly handles for all expression types
         try:
             # Create a queryset for just this instance
             instance_qs = self.model_cls.objects.filter(pk=instance.pk)
             # Use annotate with the expression and let Django resolve it
             resolved_value = instance_qs.annotate(
-                _resolved_value=expression
-            ).values_list('_resolved_value', flat=True).first()
+                _resolved_value=expression,
+            ).values_list("_resolved_value", flat=True).first()
             return resolved_value
         except Exception as e:
             # If expression resolution fails, log and return original
             logger.warning(
                 f"Failed to resolve expression for field '{field_name}' "
-                f"on {self.model_cls.__name__}: {e}. Using original value."
+                f"on {self.model_cls.__name__}: {e}. Using original value.",
             )
             return expression
@@ -266,12 +266,12 @@ class ModelAnalyzer:
         """
         if not instances or not update_kwargs:
             return []
         fields_updated = list(update_kwargs.keys())
         for field_name, value in update_kwargs.items():
             for instance in instances:
                 resolved_value = self.resolve_expression(field_name, value, instance)
                 setattr(instance, field_name, resolved_value)
-        return fields_updated
+        return fields_updated

django_bulk_hooks/operations/bulk_executor.py CHANGED Viewed

@@ -5,6 +5,7 @@ This service coordinates bulk database operations with validation and MTI handli
 """
 import logging
 from django.db import transaction
 from django.db.models import AutoField
@@ -21,7 +22,7 @@ class BulkExecutor:
     Dependencies are explicitly injected via constructor.
     """
-    def __init__(self, queryset, analyzer, mti_handler):
+    def __init__(self, queryset, analyzer, mti_handler, record_classifier):
         """
         Initialize bulk executor with explicit dependencies.
@@ -29,10 +30,12 @@ class BulkExecutor:
             queryset: Django QuerySet instance
             analyzer: ModelAnalyzer instance (replaces validator + field_tracker)
             mti_handler: MTIHandler instance
+            record_classifier: RecordClassifier instance
         """
         self.queryset = queryset
         self.analyzer = analyzer
         self.mti_handler = mti_handler
+        self.record_classifier = record_classifier
         self.model_cls = queryset.model
     def bulk_create(
@@ -69,13 +72,24 @@ class BulkExecutor:
         # Check if this is an MTI model and route accordingly
         if self.mti_handler.is_mti_model():
             logger.info(f"Detected MTI model {self.model_cls.__name__}, using MTI bulk create")
-            # Build execution plan
+            # Classify records using the classifier service
+            existing_record_ids = set()
+            existing_pks_map = {}
+            if update_conflicts and unique_fields:
+                existing_record_ids, existing_pks_map = (
+                    self.record_classifier.classify_for_upsert(objs, unique_fields)
+                )
+            # Build execution plan with classification results
             plan = self.mti_handler.build_create_plan(
                 objs,
                 batch_size=batch_size,
                 update_conflicts=update_conflicts,
                 update_fields=update_fields,
                 unique_fields=unique_fields,
+                existing_record_ids=existing_record_ids,
+                existing_pks_map=existing_pks_map,
             )
             # Execute the plan
             return self._execute_mti_create_plan(plan)
@@ -161,134 +175,203 @@ class BulkExecutor:
         Execute an MTI create plan.
         This is where ALL database operations happen for MTI bulk_create.
+        Handles both new records (INSERT) and existing records (UPDATE) for upsert.
         Args:
             plan: MTICreatePlan object from MTIHandler
         Returns:
-            List of created objects with PKs assigned
+            List of created/updated objects with PKs assigned
         """
-        from django.db import transaction
         from django.db.models import QuerySet as BaseQuerySet
         if not plan:
             return []
         with transaction.atomic(using=self.queryset.db, savepoint=False):
-            # Step 1: Create all parent objects level by level
+            # Step 1: Create/Update all parent objects level by level
             parent_instances_map = {}  # Maps original obj id() -> {model: parent_instance}
             for parent_level in plan.parent_levels:
-                # Bulk create parents for this level
-                bulk_kwargs = {"batch_size": len(parent_level.objects)}
-                if parent_level.update_conflicts:
-                    bulk_kwargs["update_conflicts"] = True
-                    bulk_kwargs["unique_fields"] = parent_level.unique_fields
-                    bulk_kwargs["update_fields"] = parent_level.update_fields
-                # Use base QuerySet to avoid recursion
-                base_qs = BaseQuerySet(model=parent_level.model_class, using=self.queryset.db)
-                created_parents = base_qs.bulk_create(parent_level.objects, **bulk_kwargs)
-                # Copy generated fields back to parent objects
-                for created_parent, parent_obj in zip(created_parents, parent_level.objects):
-                    for field in parent_level.model_class._meta.local_fields:
-                        created_value = getattr(created_parent, field.name, None)
-                        if created_value is not None:
-                            setattr(parent_obj, field.name, created_value)
-                    parent_obj._state.adding = False
-                    parent_obj._state.db = self.queryset.db
+                # Separate new and existing parent objects
+                new_parents = []
+                existing_parents = []
+                for parent_obj in parent_level.objects:
+                    orig_obj_id = parent_level.original_object_map[id(parent_obj)]
+                    if orig_obj_id in plan.existing_record_ids:
+                        existing_parents.append(parent_obj)
+                    else:
+                        new_parents.append(parent_obj)
+                # Bulk create new parents
+                if new_parents:
+                    bulk_kwargs = {"batch_size": len(new_parents)}
+                    if parent_level.update_conflicts:
+                        bulk_kwargs["update_conflicts"] = True
+                        bulk_kwargs["unique_fields"] = parent_level.unique_fields
+                        bulk_kwargs["update_fields"] = parent_level.update_fields
+                    # Use base QuerySet to avoid recursion
+                    base_qs = BaseQuerySet(model=parent_level.model_class, using=self.queryset.db)
+                    created_parents = base_qs.bulk_create(new_parents, **bulk_kwargs)
+                    # Copy generated fields back to parent objects
+                    for created_parent, parent_obj in zip(created_parents, new_parents):
+                        for field in parent_level.model_class._meta.local_fields:
+                            created_value = getattr(created_parent, field.name, None)
+                            if created_value is not None:
+                                setattr(parent_obj, field.name, created_value)
+                        parent_obj._state.adding = False
+                        parent_obj._state.db = self.queryset.db
+                # Update existing parents
+                if existing_parents and parent_level.update_fields:
+                    # Filter update fields to only those that exist in this parent model
+                    parent_model_fields = {
+                        field.name for field in parent_level.model_class._meta.local_fields
+                    }
+                    filtered_update_fields = [
+                        field for field in parent_level.update_fields
+                        if field in parent_model_fields
+                    ]
+                    if filtered_update_fields:
+                        base_qs = BaseQuerySet(model=parent_level.model_class, using=self.queryset.db)
+                        base_qs.bulk_update(existing_parents, filtered_update_fields)
+                    # Mark as not adding
+                    for parent_obj in existing_parents:
+                        parent_obj._state.adding = False
+                        parent_obj._state.db = self.queryset.db
                 # Map parents back to original objects
                 for parent_obj in parent_level.objects:
                     orig_obj_id = parent_level.original_object_map[id(parent_obj)]
                     if orig_obj_id not in parent_instances_map:
                         parent_instances_map[orig_obj_id] = {}
                     parent_instances_map[orig_obj_id][parent_level.model_class] = parent_obj
-            # Step 2: Add parent links to child objects
+            # Step 2: Add parent links to child objects and separate new/existing
+            new_child_objects = []
+            existing_child_objects = []
             for child_obj, orig_obj in zip(plan.child_objects, plan.original_objects):
                 parent_instances = parent_instances_map.get(id(orig_obj), {})
+                # Set parent links
                 for parent_model, parent_instance in parent_instances.items():
                     parent_link = plan.child_model._meta.get_ancestor_link(parent_model)
                     if parent_link:
                         setattr(child_obj, parent_link.attname, parent_instance.pk)
                         setattr(child_obj, parent_link.name, parent_instance)
-            # Step 3: Bulk create child objects using _batched_insert (to bypass MTI check)
-            base_qs = BaseQuerySet(model=plan.child_model, using=self.queryset.db)
-            base_qs._prepare_for_bulk_create(plan.child_objects)
-            # Partition objects by PK status
-            objs_without_pk, objs_with_pk = [], []
-            for obj in plan.child_objects:
-                if obj._is_pk_set():
-                    objs_with_pk.append(obj)
+                # Classify as new or existing
+                if id(orig_obj) in plan.existing_record_ids:
+                    # For existing records, set the PK on child object
+                    pk_value = getattr(orig_obj, "pk", None)
+                    if pk_value:
+                        child_obj.pk = pk_value
+                        child_obj.id = pk_value
+                    existing_child_objects.append(child_obj)
                 else:
-                    objs_without_pk.append(obj)
-            # Get fields for insert
-            opts = plan.child_model._meta
-            fields = [f for f in opts.local_fields if not f.generated]
-            # Execute bulk insert
-            if objs_with_pk:
-                returned_columns = base_qs._batched_insert(
-                    objs_with_pk,
-                    fields,
-                    batch_size=len(objs_with_pk),
-                )
-                if returned_columns:
-                    for obj, results in zip(objs_with_pk, returned_columns):
-                        if hasattr(opts, "db_returning_fields") and hasattr(opts, "pk"):
-                            for result, field in zip(results, opts.db_returning_fields):
-                                if field != opts.pk:
+                    new_child_objects.append(child_obj)
+            # Step 3: Bulk create new child objects using _batched_insert (to bypass MTI check)
+            if new_child_objects:
+                base_qs = BaseQuerySet(model=plan.child_model, using=self.queryset.db)
+                base_qs._prepare_for_bulk_create(new_child_objects)
+                # Partition objects by PK status
+                objs_without_pk, objs_with_pk = [], []
+                for obj in new_child_objects:
+                    if obj._is_pk_set():
+                        objs_with_pk.append(obj)
+                    else:
+                        objs_without_pk.append(obj)
+                # Get fields for insert
+                opts = plan.child_model._meta
+                fields = [f for f in opts.local_fields if not f.generated]
+                # Execute bulk insert
+                if objs_with_pk:
+                    returned_columns = base_qs._batched_insert(
+                        objs_with_pk,
+                        fields,
+                        batch_size=len(objs_with_pk),
+                    )
+                    if returned_columns:
+                        for obj, results in zip(objs_with_pk, returned_columns):
+                            if hasattr(opts, "db_returning_fields") and hasattr(opts, "pk"):
+                                for result, field in zip(results, opts.db_returning_fields):
+                                    if field != opts.pk:
+                                        setattr(obj, field.attname, result)
+                            obj._state.adding = False
+                            obj._state.db = self.queryset.db
+                    else:
+                        for obj in objs_with_pk:
+                            obj._state.adding = False
+                            obj._state.db = self.queryset.db
+                if objs_without_pk:
+                    filtered_fields = [
+                        f for f in fields
+                        if not isinstance(f, AutoField) and not f.primary_key
+                    ]
+                    returned_columns = base_qs._batched_insert(
+                        objs_without_pk,
+                        filtered_fields,
+                        batch_size=len(objs_without_pk),
+                    )
+                    if returned_columns:
+                        for obj, results in zip(objs_without_pk, returned_columns):
+                            if hasattr(opts, "db_returning_fields"):
+                                for result, field in zip(results, opts.db_returning_fields):
                                     setattr(obj, field.attname, result)
-                        obj._state.adding = False
-                        obj._state.db = self.queryset.db
-                else:
-                    for obj in objs_with_pk:
-                        obj._state.adding = False
-                        obj._state.db = self.queryset.db
-            if objs_without_pk:
-                filtered_fields = [
-                    f for f in fields
-                    if not isinstance(f, AutoField) and not f.primary_key
+                            obj._state.adding = False
+                            obj._state.db = self.queryset.db
+                    else:
+                        for obj in objs_without_pk:
+                            obj._state.adding = False
+                            obj._state.db = self.queryset.db
+            # Step 3.5: Update existing child objects
+            if existing_child_objects and plan.update_fields:
+                # Filter update fields to only those that exist in the child model
+                child_model_fields = {
+                    field.name for field in plan.child_model._meta.local_fields
+                }
+                filtered_child_update_fields = [
+                    field for field in plan.update_fields
+                    if field in child_model_fields
                 ]
-                returned_columns = base_qs._batched_insert(
-                    objs_without_pk,
-                    filtered_fields,
-                    batch_size=len(objs_without_pk),
-                )
-                if returned_columns:
-                    for obj, results in zip(objs_without_pk, returned_columns):
-                        if hasattr(opts, "db_returning_fields"):
-                            for result, field in zip(results, opts.db_returning_fields):
-                                setattr(obj, field.attname, result)
-                        obj._state.adding = False
-                        obj._state.db = self.queryset.db
-                else:
-                    for obj in objs_without_pk:
-                        obj._state.adding = False
-                        obj._state.db = self.queryset.db
-            created_children = plan.child_objects
+                if filtered_child_update_fields:
+                    base_qs = BaseQuerySet(model=plan.child_model, using=self.queryset.db)
+                    base_qs.bulk_update(existing_child_objects, filtered_child_update_fields)
+                # Mark as not adding
+                for child_obj in existing_child_objects:
+                    child_obj._state.adding = False
+                    child_obj._state.db = self.queryset.db
+            # Combine all children for final processing
+            created_children = new_child_objects + existing_child_objects
             # Step 4: Copy PKs and auto-generated fields back to original objects
             pk_field_name = plan.child_model._meta.pk.name
             for orig_obj, child_obj in zip(plan.original_objects, created_children):
                 # Copy PK
                 child_pk = getattr(child_obj, pk_field_name)
                 setattr(orig_obj, pk_field_name, child_pk)
                 # Copy auto-generated fields from all levels
                 parent_instances = parent_instances_map.get(id(orig_obj), {})
                 for model_class in plan.inheritance_chain:
                     # Get source object for this level
                     if model_class in parent_instances:
@@ -297,30 +380,30 @@ class BulkExecutor:
                         source_obj = child_obj
                     else:
                         continue
                     # Copy auto-generated field values
                     for field in model_class._meta.local_fields:
                         if field.name == pk_field_name:
                             continue
                         # Skip parent link fields
-                        if hasattr(field, 'remote_field') and field.remote_field:
+                        if hasattr(field, "remote_field") and field.remote_field:
                             parent_link = plan.child_model._meta.get_ancestor_link(model_class)
                             if parent_link and field.name == parent_link.name:
                                 continue
                         # Copy auto_now_add, auto_now, and db_returning fields
-                        if (getattr(field, 'auto_now_add', False) or
-                            getattr(field, 'auto_now', False) or
-                            getattr(field, 'db_returning', False)):
+                        if (getattr(field, "auto_now_add", False) or
+                            getattr(field, "auto_now", False) or
+                            getattr(field, "db_returning", False)):
                             source_value = getattr(source_obj, field.name, None)
                             if source_value is not None:
                                 setattr(orig_obj, field.name, source_value)
                 # Update object state
                 orig_obj._state.adding = False
                 orig_obj._state.db = self.queryset.db
         return plan.original_objects
     def _execute_mti_update_plan(self, plan):
@@ -335,86 +418,94 @@ class BulkExecutor:
         Returns:
             Number of objects updated
         """
-        from django.db import transaction
-        from django.db.models import Case, Value, When, QuerySet as BaseQuerySet
+        from django.db.models import Case
+        from django.db.models import QuerySet as BaseQuerySet
+        from django.db.models import Value
+        from django.db.models import When
         if not plan:
             return 0
         total_updated = 0
         # Get PKs for filtering
         root_pks = [
-            getattr(obj, "pk", None) or getattr(obj, "id", None)
-            for obj in plan.objects
+            getattr(obj, "pk", None) or getattr(obj, "id", None)
+            for obj in plan.objects
             if getattr(obj, "pk", None) or getattr(obj, "id", None)
         ]
         if not root_pks:
             return 0
         with transaction.atomic(using=self.queryset.db, savepoint=False):
             # Update each table in the chain
             for field_group in plan.field_groups:
                 if not field_group.fields:
                     continue
                 base_qs = BaseQuerySet(model=field_group.model_class, using=self.queryset.db)
                 # Check if records exist
                 existing_count = base_qs.filter(**{f"{field_group.filter_field}__in": root_pks}).count()
                 if existing_count == 0:
                     continue
                 # Build CASE statements for bulk update
                 case_statements = {}
                 for field_name in field_group.fields:
                     field = field_group.model_class._meta.get_field(field_name)
                     # Use column name for FK fields
-                    if getattr(field, 'is_relation', False) and hasattr(field, 'attname'):
+                    if getattr(field, "is_relation", False) and hasattr(field, "attname"):
                         db_field_name = field.attname
                         target_field = field.target_field
                     else:
                         db_field_name = field_name
                         target_field = field
                     when_statements = []
                     for pk, obj in zip(root_pks, plan.objects):
                         obj_pk = getattr(obj, "pk", None) or getattr(obj, "id", None)
                         if obj_pk is None:
                             continue
                         value = getattr(obj, db_field_name)
                         # For FK fields, ensure we get the actual ID value, not the related object
-                        if getattr(field, 'is_relation', False) and hasattr(field, 'attname'):
+                        if getattr(field, "is_relation", False) and hasattr(field, "attname"):
                             # If value is a model instance, get its pk
-                            if value is not None and hasattr(value, 'pk'):
+                            if value is not None and hasattr(value, "pk"):
                                 value = value.pk
+                            # If value is a string representation of an ID, convert to int
+                            elif value is not None and isinstance(value, str) and value.isdigit():
+                                value = int(value)
+                            # If value is None or empty string, ensure it's None
+                            elif value == "":
+                                value = None
                         when_statements.append(
                             When(
                                 **{field_group.filter_field: pk},
                                 then=Value(value, output_field=target_field),
-                            )
+                            ),
                         )
                     if when_statements:
                         case_statements[db_field_name] = Case(
-                            *when_statements, output_field=target_field
+                            *when_statements, output_field=target_field,
                         )
                 # Execute bulk update
                 if case_statements:
                     try:
                         updated_count = base_qs.filter(
-                            **{f"{field_group.filter_field}__in": root_pks}
+                            **{f"{field_group.filter_field}__in": root_pks},
                         ).update(**case_statements)
                         total_updated += updated_count
                     except Exception as e:
                         logger.error(f"MTI bulk update failed for {field_group.model_class.__name__}: {e}")
         return total_updated
     def delete_queryset(self):

django-bulk-hooks 0.2.15__py3-none-any.whl → 0.2.17__py3-none-any.whl

Potentially problematic release.

django-bulk-hooks 0.2.15py3-none-any.whl → 0.2.17py3-none-any.whl