django-bulk-hooks 0.2.45__tar.gz → 0.2.47__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of django-bulk-hooks might be problematic. Click here for more details.

Files changed (26) hide show
  1. {django_bulk_hooks-0.2.45 → django_bulk_hooks-0.2.47}/PKG-INFO +1 -1
  2. {django_bulk_hooks-0.2.45 → django_bulk_hooks-0.2.47}/django_bulk_hooks/operations/analyzer.py +22 -25
  3. {django_bulk_hooks-0.2.45 → django_bulk_hooks-0.2.47}/django_bulk_hooks/operations/bulk_executor.py +106 -127
  4. {django_bulk_hooks-0.2.45 → django_bulk_hooks-0.2.47}/django_bulk_hooks/operations/coordinator.py +45 -27
  5. {django_bulk_hooks-0.2.45 → django_bulk_hooks-0.2.47}/django_bulk_hooks/operations/mti_handler.py +64 -42
  6. {django_bulk_hooks-0.2.45 → django_bulk_hooks-0.2.47}/django_bulk_hooks/operations/mti_plans.py +9 -6
  7. {django_bulk_hooks-0.2.45 → django_bulk_hooks-0.2.47}/django_bulk_hooks/operations/record_classifier.py +26 -21
  8. {django_bulk_hooks-0.2.45 → django_bulk_hooks-0.2.47}/pyproject.toml +1 -1
  9. {django_bulk_hooks-0.2.45 → django_bulk_hooks-0.2.47}/LICENSE +0 -0
  10. {django_bulk_hooks-0.2.45 → django_bulk_hooks-0.2.47}/README.md +0 -0
  11. {django_bulk_hooks-0.2.45 → django_bulk_hooks-0.2.47}/django_bulk_hooks/__init__.py +0 -0
  12. {django_bulk_hooks-0.2.45 → django_bulk_hooks-0.2.47}/django_bulk_hooks/changeset.py +0 -0
  13. {django_bulk_hooks-0.2.45 → django_bulk_hooks-0.2.47}/django_bulk_hooks/conditions.py +0 -0
  14. {django_bulk_hooks-0.2.45 → django_bulk_hooks-0.2.47}/django_bulk_hooks/constants.py +0 -0
  15. {django_bulk_hooks-0.2.45 → django_bulk_hooks-0.2.47}/django_bulk_hooks/context.py +0 -0
  16. {django_bulk_hooks-0.2.45 → django_bulk_hooks-0.2.47}/django_bulk_hooks/decorators.py +0 -0
  17. {django_bulk_hooks-0.2.45 → django_bulk_hooks-0.2.47}/django_bulk_hooks/dispatcher.py +0 -0
  18. {django_bulk_hooks-0.2.45 → django_bulk_hooks-0.2.47}/django_bulk_hooks/enums.py +0 -0
  19. {django_bulk_hooks-0.2.45 → django_bulk_hooks-0.2.47}/django_bulk_hooks/factory.py +0 -0
  20. {django_bulk_hooks-0.2.45 → django_bulk_hooks-0.2.47}/django_bulk_hooks/handler.py +0 -0
  21. {django_bulk_hooks-0.2.45 → django_bulk_hooks-0.2.47}/django_bulk_hooks/helpers.py +0 -0
  22. {django_bulk_hooks-0.2.45 → django_bulk_hooks-0.2.47}/django_bulk_hooks/manager.py +0 -0
  23. {django_bulk_hooks-0.2.45 → django_bulk_hooks-0.2.47}/django_bulk_hooks/models.py +0 -0
  24. {django_bulk_hooks-0.2.45 → django_bulk_hooks-0.2.47}/django_bulk_hooks/operations/__init__.py +0 -0
  25. {django_bulk_hooks-0.2.45 → django_bulk_hooks-0.2.47}/django_bulk_hooks/queryset.py +0 -0
  26. {django_bulk_hooks-0.2.45 → django_bulk_hooks-0.2.47}/django_bulk_hooks/registry.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: django-bulk-hooks
3
- Version: 0.2.45
3
+ Version: 0.2.47
4
4
  Summary: Hook-style hooks for Django bulk operations like bulk_create and bulk_update.
5
5
  License: MIT
6
6
  Keywords: django,bulk,hooks
@@ -77,14 +77,11 @@ class ModelAnalyzer:
77
77
  if not objs:
78
78
  return
79
79
 
80
- invalid_types = {
81
- type(obj).__name__ for obj in objs if not isinstance(obj, self.model_cls)
82
- }
80
+ invalid_types = {type(obj).__name__ for obj in objs if not isinstance(obj, self.model_cls)}
83
81
 
84
82
  if invalid_types:
85
83
  raise TypeError(
86
- f"{operation} expected instances of {self.model_cls.__name__}, "
87
- f"but got {invalid_types}",
84
+ f"{operation} expected instances of {self.model_cls.__name__}, but got {invalid_types}",
88
85
  )
89
86
 
90
87
  def _check_has_pks(self, objs, operation="operation"):
@@ -130,7 +127,9 @@ class ModelAnalyzer:
130
127
  auto_now_fields = []
131
128
  for field in self.model_cls._meta.fields:
132
129
  if getattr(field, "auto_now", False) or getattr(
133
- field, "auto_now_add", False,
130
+ field,
131
+ "auto_now_add",
132
+ False,
134
133
  ):
135
134
  auto_now_fields.append(field.name)
136
135
  return auto_now_fields
@@ -142,11 +141,7 @@ class ModelAnalyzer:
142
141
  Returns:
143
142
  list: FK field names
144
143
  """
145
- return [
146
- field.name
147
- for field in self.model_cls._meta.concrete_fields
148
- if field.is_relation and not field.many_to_many
149
- ]
144
+ return [field.name for field in self.model_cls._meta.concrete_fields if field.is_relation and not field.many_to_many]
150
145
 
151
146
  def detect_changed_fields(self, objs):
152
147
  """
@@ -210,15 +205,15 @@ class ModelAnalyzer:
210
205
  def resolve_expression(self, field_name, expression, instance):
211
206
  """
212
207
  Resolve a SQL expression to a concrete value for a specific instance.
213
-
208
+
214
209
  This method materializes database expressions (F(), Subquery, Case, etc.)
215
210
  into concrete values by using Django's annotate() mechanism.
216
-
211
+
217
212
  Args:
218
213
  field_name: Name of the field being updated
219
214
  expression: The expression or value to resolve
220
215
  instance: The model instance to resolve for
221
-
216
+
222
217
  Returns:
223
218
  The resolved concrete value
224
219
  """
@@ -236,31 +231,34 @@ class ModelAnalyzer:
236
231
  instance_qs = self.model_cls.objects.filter(pk=instance.pk)
237
232
 
238
233
  # Use annotate with the expression and let Django resolve it
239
- resolved_value = instance_qs.annotate(
240
- _resolved_value=expression,
241
- ).values_list("_resolved_value", flat=True).first()
234
+ resolved_value = (
235
+ instance_qs.annotate(
236
+ _resolved_value=expression,
237
+ )
238
+ .values_list("_resolved_value", flat=True)
239
+ .first()
240
+ )
242
241
 
243
242
  return resolved_value
244
243
  except Exception as e:
245
244
  # If expression resolution fails, log and return original
246
245
  logger.warning(
247
- f"Failed to resolve expression for field '{field_name}' "
248
- f"on {self.model_cls.__name__}: {e}. Using original value.",
246
+ f"Failed to resolve expression for field '{field_name}' on {self.model_cls.__name__}: {e}. Using original value.",
249
247
  )
250
248
  return expression
251
249
 
252
250
  def apply_update_values(self, instances, update_kwargs):
253
251
  """
254
252
  Apply update_kwargs to instances, resolving any SQL expressions.
255
-
253
+
256
254
  This method transforms queryset.update()-style kwargs (which may contain
257
255
  F() expressions, Subquery, Case, etc.) into concrete values and applies
258
256
  them to the instances.
259
-
257
+
260
258
  Args:
261
259
  instances: List of model instances to update
262
260
  update_kwargs: Dict of {field_name: value_or_expression}
263
-
261
+
264
262
  Returns:
265
263
  List of field names that were updated
266
264
  """
@@ -271,7 +269,7 @@ class ModelAnalyzer:
271
269
  return []
272
270
 
273
271
  fields_updated = list(update_kwargs.keys())
274
-
272
+
275
273
  # Extract PKs
276
274
  pks = [inst.pk for inst in instances if inst.pk is not None]
277
275
  if not pks:
@@ -306,8 +304,7 @@ class ModelAnalyzer:
306
304
  except Exception as e:
307
305
  # If expression resolution fails, log and use original
308
306
  logger.warning(
309
- f"Failed to resolve expression for field '{field_name}' "
310
- f"on {self.model_cls.__name__}: {e}. Using original value.",
307
+ f"Failed to resolve expression for field '{field_name}' on {self.model_cls.__name__}: {e}. Using original value.",
311
308
  )
312
309
  for instance in instances:
313
310
  setattr(instance, field_name, value)
@@ -74,14 +74,23 @@ class BulkExecutor:
74
74
 
75
75
  # Check if this is an MTI model and route accordingly
76
76
  if self.mti_handler.is_mti_model():
77
- logger.info(f"Detected MTI model {self.model_cls.__name__}, using MTI bulk create")
78
77
 
79
78
  # Use pre-classified records if provided, otherwise classify now
80
79
  if existing_record_ids is None or existing_pks_map is None:
81
80
  existing_record_ids = set()
82
81
  existing_pks_map = {}
83
82
  if update_conflicts and unique_fields:
84
- existing_record_ids, existing_pks_map = self.record_classifier.classify_for_upsert(objs, unique_fields)
83
+ # For MTI, find which model has the unique fields and query THAT model
84
+ # This handles the schema migration case where parent exists but child doesn't
85
+ query_model = self.mti_handler.find_model_with_unique_fields(unique_fields)
86
+ logger.info(f"MTI upsert: querying {query_model.__name__} for unique fields {unique_fields}")
87
+
88
+ existing_record_ids, existing_pks_map = self.record_classifier.classify_for_upsert(
89
+ objs, unique_fields, query_model=query_model
90
+ )
91
+ logger.info(f"MTI Upsert classification: {len(existing_record_ids)} existing, {len(objs) - len(existing_record_ids)} new")
92
+ logger.info(f"existing_record_ids: {existing_record_ids}")
93
+ logger.info(f"existing_pks_map: {existing_pks_map}")
85
94
 
86
95
  # Build execution plan with classification results
87
96
  plan = self.mti_handler.build_create_plan(
@@ -95,11 +104,11 @@ class BulkExecutor:
95
104
  )
96
105
  # Execute the plan
97
106
  result = self._execute_mti_create_plan(plan)
98
-
107
+
99
108
  # Tag objects with upsert metadata for hook dispatching
100
109
  if update_conflicts and unique_fields:
101
110
  self._tag_upsert_metadata(result, existing_record_ids)
102
-
111
+
103
112
  return result
104
113
 
105
114
  # Non-MTI model - use Django's native bulk_create
@@ -112,14 +121,14 @@ class BulkExecutor:
112
121
  unique_fields,
113
122
  **kwargs,
114
123
  )
115
-
124
+
116
125
  # Tag objects with upsert metadata for hook dispatching
117
126
  if update_conflicts and unique_fields:
118
127
  # Use pre-classified results if available, otherwise classify now
119
128
  if existing_record_ids is None:
120
129
  existing_record_ids, _ = self.record_classifier.classify_for_upsert(objs, unique_fields)
121
130
  self._tag_upsert_metadata(result, existing_record_ids)
122
-
131
+
123
132
  return result
124
133
 
125
134
  def _execute_bulk_create(
@@ -205,59 +214,41 @@ class BulkExecutor:
205
214
  if not plan:
206
215
  return []
207
216
 
217
+
208
218
  with transaction.atomic(using=self.queryset.db, savepoint=False):
209
- # Step 1: Create/Update all parent objects level by level
219
+ # Step 1: Upsert all parent objects level by level using Django's native upsert
210
220
  parent_instances_map = {} # Maps original obj id() -> {model: parent_instance}
211
221
 
212
222
  for parent_level in plan.parent_levels:
213
- # Separate new and existing parent objects
214
- new_parents = []
215
- existing_parents = []
216
-
217
- for parent_obj in parent_level.objects:
218
- orig_obj_id = parent_level.original_object_map[id(parent_obj)]
219
- if orig_obj_id in plan.existing_record_ids:
220
- existing_parents.append(parent_obj)
221
- else:
222
- new_parents.append(parent_obj)
223
-
224
- # Bulk create new parents
225
- if new_parents:
226
- bulk_kwargs = {"batch_size": len(new_parents)}
227
-
228
- if parent_level.update_conflicts:
229
- bulk_kwargs["update_conflicts"] = True
230
- bulk_kwargs["unique_fields"] = parent_level.unique_fields
231
- bulk_kwargs["update_fields"] = parent_level.update_fields
232
-
233
- # Use base QuerySet to avoid recursion
234
- base_qs = BaseQuerySet(model=parent_level.model_class, using=self.queryset.db)
235
- created_parents = base_qs.bulk_create(new_parents, **bulk_kwargs)
236
-
237
- # Copy generated fields back to parent objects
238
- for created_parent, parent_obj in zip(created_parents, new_parents):
239
- for field in parent_level.model_class._meta.local_fields:
240
- created_value = getattr(created_parent, field.name, None)
241
- if created_value is not None:
242
- setattr(parent_obj, field.name, created_value)
243
-
244
- parent_obj._state.adding = False
245
- parent_obj._state.db = self.queryset.db
246
-
247
- # Update existing parents
248
- if existing_parents and parent_level.update_fields:
223
+ # Use base QuerySet to avoid recursion
224
+ base_qs = BaseQuerySet(model=parent_level.model_class, using=self.queryset.db)
225
+
226
+ # Build bulk_create kwargs
227
+ bulk_kwargs = {"batch_size": len(parent_level.objects)}
228
+
229
+ if parent_level.update_conflicts:
230
+ # Let Django handle the upsert - it will INSERT or UPDATE as needed
231
+ bulk_kwargs["update_conflicts"] = True
232
+ bulk_kwargs["unique_fields"] = parent_level.unique_fields
233
+
249
234
  # Filter update fields to only those that exist in this parent model
250
235
  parent_model_fields = {field.name for field in parent_level.model_class._meta.local_fields}
251
236
  filtered_update_fields = [field for field in parent_level.update_fields if field in parent_model_fields]
252
-
253
237
  if filtered_update_fields:
254
- base_qs = BaseQuerySet(model=parent_level.model_class, using=self.queryset.db)
255
- base_qs.bulk_update(existing_parents, filtered_update_fields)
238
+ bulk_kwargs["update_fields"] = filtered_update_fields
239
+
240
+ # Perform the upsert - Django handles INSERT vs UPDATE automatically
241
+ upserted_parents = base_qs.bulk_create(parent_level.objects, **bulk_kwargs)
256
242
 
257
- # Mark as not adding
258
- for parent_obj in existing_parents:
259
- parent_obj._state.adding = False
260
- parent_obj._state.db = self.queryset.db
243
+ # Copy generated fields back to parent objects
244
+ for upserted_parent, parent_obj in zip(upserted_parents, parent_level.objects):
245
+ for field in parent_level.model_class._meta.local_fields:
246
+ upserted_value = getattr(upserted_parent, field.name, None)
247
+ if upserted_value is not None:
248
+ setattr(parent_obj, field.name, upserted_value)
249
+
250
+ parent_obj._state.adding = False
251
+ parent_obj._state.db = self.queryset.db
261
252
 
262
253
  # Map parents back to original objects
263
254
  for parent_obj in parent_level.objects:
@@ -266,10 +257,7 @@ class BulkExecutor:
266
257
  parent_instances_map[orig_obj_id] = {}
267
258
  parent_instances_map[orig_obj_id][parent_level.model_class] = parent_obj
268
259
 
269
- # Step 2: Add parent links to child objects and separate new/existing
270
- new_child_objects = []
271
- existing_child_objects = []
272
-
260
+ # Step 2: Add parent links to child objects and set PKs for existing records
273
261
  for child_obj, orig_obj in zip(plan.child_objects, plan.original_objects):
274
262
  parent_instances = parent_instances_map.get(id(orig_obj), {})
275
263
 
@@ -279,91 +267,81 @@ class BulkExecutor:
279
267
  if parent_link:
280
268
  setattr(child_obj, parent_link.attname, parent_instance.pk)
281
269
  setattr(child_obj, parent_link.name, parent_instance)
270
+ # IMPORTANT: Don't set the child's PK here - it should only get PK after insertion
271
+ # The parent link field (hookmodel_ptr) is NOT the same as the child's PK
272
+ else:
273
+ logger.warning(f"No parent link found for {parent_model} in {plan.child_model}")
282
274
 
283
- # Classify as new or existing
275
+ # For existing records in upsert, ensure PK is set on child object
284
276
  if id(orig_obj) in plan.existing_record_ids:
285
- # For existing records, set the PK on child object
286
277
  pk_value = getattr(orig_obj, "pk", None)
287
278
  if pk_value:
288
279
  child_obj.pk = pk_value
289
280
  child_obj.id = pk_value
290
- existing_child_objects.append(child_obj)
291
- else:
292
- new_child_objects.append(child_obj)
293
-
294
- # Step 3: Bulk create new child objects using _batched_insert (to bypass MTI check)
295
- if new_child_objects:
296
- base_qs = BaseQuerySet(model=plan.child_model, using=self.queryset.db)
297
- base_qs._prepare_for_bulk_create(new_child_objects)
298
-
299
- # Partition objects by PK status
300
- objs_without_pk, objs_with_pk = [], []
301
- for obj in new_child_objects:
302
- if obj._is_pk_set():
303
- objs_with_pk.append(obj)
304
281
  else:
305
- objs_without_pk.append(obj)
282
+ # If no PK on original object, this is a new record, don't set PK
283
+ logger.info(f"New record {orig_obj} - not setting PK on child object")
306
284
 
307
- # Get fields for insert
308
- opts = plan.child_model._meta
309
- fields = [f for f in opts.local_fields if not f.generated]
310
-
311
- # Execute bulk insert
312
- if objs_with_pk:
313
- returned_columns = base_qs._batched_insert(
314
- objs_with_pk,
315
- fields,
316
- batch_size=len(objs_with_pk),
317
- )
318
- if returned_columns:
319
- for obj, results in zip(objs_with_pk, returned_columns):
320
- if hasattr(opts, "db_returning_fields") and hasattr(opts, "pk"):
321
- for result, field in zip(results, opts.db_returning_fields):
322
- if field != opts.pk:
323
- setattr(obj, field.attname, result)
324
- obj._state.adding = False
325
- obj._state.db = self.queryset.db
326
- else:
327
- for obj in objs_with_pk:
328
- obj._state.adding = False
329
- obj._state.db = self.queryset.db
330
-
331
- if objs_without_pk:
332
- filtered_fields = [f for f in fields if not isinstance(f, AutoField) and not f.primary_key]
333
- returned_columns = base_qs._batched_insert(
334
- objs_without_pk,
335
- filtered_fields,
336
- batch_size=len(objs_without_pk),
337
- )
338
- if returned_columns:
339
- for obj, results in zip(objs_without_pk, returned_columns):
340
- if hasattr(opts, "db_returning_fields"):
341
- for result, field in zip(results, opts.db_returning_fields):
342
- setattr(obj, field.attname, result)
343
- obj._state.adding = False
344
- obj._state.db = self.queryset.db
345
- else:
346
- for obj in objs_without_pk:
347
- obj._state.adding = False
348
- obj._state.db = self.queryset.db
285
+ # Step 3: Handle child objects
286
+ # Note: We can't use bulk_create on child MTI models, so we use _batched_insert for new records
287
+ # and bulk_update for existing records
288
+ base_qs = BaseQuerySet(model=plan.child_model, using=self.queryset.db)
289
+
290
+ # For MTI child objects, we need to handle them differently
291
+ # In MTI, child objects get PKs from parent links, but we need to distinguish
292
+ # between truly new records and existing records for upsert operations
293
+ objs_without_pk, objs_with_pk = [], []
294
+ for child_obj, orig_obj in zip(plan.child_objects, plan.original_objects):
295
+ # Check if this is an existing record (for upsert operations)
296
+ if id(orig_obj) in plan.existing_record_ids:
297
+ # Existing record - should be updated
298
+ objs_with_pk.append(child_obj)
299
+ else:
300
+ # New record - should be inserted
301
+ objs_without_pk.append(child_obj)
349
302
 
350
- # Step 3.5: Update existing child objects
351
- if existing_child_objects and plan.update_fields:
303
+ # For objects with PK (existing records in upsert), use bulk_update
304
+ if objs_with_pk and plan.update_fields:
352
305
  # Filter update fields to only those that exist in the child model
353
306
  child_model_fields = {field.name for field in plan.child_model._meta.local_fields}
354
307
  filtered_child_update_fields = [field for field in plan.update_fields if field in child_model_fields]
355
308
 
356
309
  if filtered_child_update_fields:
357
- base_qs = BaseQuerySet(model=plan.child_model, using=self.queryset.db)
358
- base_qs.bulk_update(existing_child_objects, filtered_child_update_fields)
310
+ base_qs.bulk_update(objs_with_pk, filtered_child_update_fields)
359
311
 
360
312
  # Mark as not adding
361
- for child_obj in existing_child_objects:
362
- child_obj._state.adding = False
363
- child_obj._state.db = self.queryset.db
313
+ for obj in objs_with_pk:
314
+ obj._state.adding = False
315
+ obj._state.db = self.queryset.db
364
316
 
365
- # Combine all children for final processing
366
- created_children = new_child_objects + existing_child_objects
317
+ # For objects without PK (new records), use _batched_insert
318
+ if objs_without_pk:
319
+ base_qs._prepare_for_bulk_create(objs_without_pk)
320
+ opts = plan.child_model._meta
321
+
322
+ # Include all local fields except auto-generated ones
323
+ # For MTI, we need to include the parent link (which is the PK)
324
+ filtered_fields = [f for f in opts.local_fields if not f.generated]
325
+
326
+ returned_columns = base_qs._batched_insert(
327
+ objs_without_pk,
328
+ filtered_fields,
329
+ batch_size=len(objs_without_pk),
330
+ )
331
+ if returned_columns:
332
+ for obj, results in zip(objs_without_pk, returned_columns):
333
+ if hasattr(opts, "db_returning_fields"):
334
+ for result, field in zip(results, opts.db_returning_fields):
335
+ setattr(obj, field.attname, result)
336
+ obj._state.adding = False
337
+ obj._state.db = self.queryset.db
338
+ else:
339
+ for obj in objs_without_pk:
340
+ obj._state.adding = False
341
+ obj._state.db = self.queryset.db
342
+
343
+ # All child objects are now created/updated
344
+ created_children = plan.child_objects
367
345
 
368
346
  # Step 4: Copy PKs and auto-generated fields back to original objects
369
347
  pk_field_name = plan.child_model._meta.pk.name
@@ -532,29 +510,30 @@ class BulkExecutor:
532
510
  def _tag_upsert_metadata(self, result_objects, existing_record_ids):
533
511
  """
534
512
  Tag objects with metadata indicating whether they were created or updated.
535
-
513
+
536
514
  This metadata is used by the coordinator to determine which hooks to fire.
537
515
  The metadata is temporary and will be cleaned up after hook execution.
538
-
516
+
539
517
  Args:
540
518
  result_objects: List of objects returned from bulk operation
541
519
  existing_record_ids: Set of id() for objects that existed before the operation
542
520
  """
543
521
  created_count = 0
544
522
  updated_count = 0
545
-
523
+
546
524
  for obj in result_objects:
547
525
  # Tag with metadata for hook dispatching
548
526
  was_created = id(obj) not in existing_record_ids
549
527
  obj._bulk_hooks_was_created = was_created
550
528
  obj._bulk_hooks_upsert_metadata = True
551
-
529
+
552
530
  if was_created:
553
531
  created_count += 1
554
532
  else:
555
533
  updated_count += 1
556
-
534
+
557
535
  logger.info(
558
536
  f"Tagged upsert metadata: {created_count} created, {updated_count} updated "
559
537
  f"(total={len(result_objects)}, existing_ids={len(existing_record_ids)})"
560
538
  )
539
+
@@ -136,7 +136,15 @@ class BulkOperationCoordinator:
136
136
  existing_record_ids = set()
137
137
  existing_pks_map = {}
138
138
  if update_conflicts and unique_fields:
139
- existing_record_ids, existing_pks_map = self.record_classifier.classify_for_upsert(objs, unique_fields)
139
+ # For MTI models, query the parent model that has the unique fields
140
+ query_model = None
141
+ if self.mti_handler.is_mti_model():
142
+ query_model = self.mti_handler.find_model_with_unique_fields(unique_fields)
143
+ logger.info(f"MTI model detected: querying {query_model.__name__} for unique fields {unique_fields}")
144
+
145
+ existing_record_ids, existing_pks_map = self.record_classifier.classify_for_upsert(
146
+ objs, unique_fields, query_model=query_model
147
+ )
140
148
  logger.info(f"Upsert operation: {len(existing_record_ids)} existing, {len(objs) - len(existing_record_ids)} new records")
141
149
  logger.debug(f"Existing record IDs: {existing_record_ids}")
142
150
  logger.debug(f"Existing PKs map: {existing_pks_map}")
@@ -699,7 +707,7 @@ class BulkOperationCoordinator:
699
707
 
700
708
  def _is_upsert_operation(self, result_objects):
701
709
  """
702
- Check if the operation was an upsert (mixed create/update).
710
+ Check if the operation was an upsert (with update_conflicts=True).
703
711
 
704
712
  Args:
705
713
  result_objects: List of objects returned from the operation
@@ -725,36 +733,46 @@ class BulkOperationCoordinator:
725
733
  result_objects: List of objects returned from the operation
726
734
  models_in_chain: List of model classes in the MTI inheritance chain
727
735
  """
728
- # Split objects by operation type
736
+ # Split objects based on metadata set by the executor
729
737
  created_objects = []
730
738
  updated_objects = []
731
- missing_metadata_count = 0
732
-
739
+
740
+ if not result_objects:
741
+ return
742
+
733
743
  for obj in result_objects:
734
- # Check if metadata was set (it MUST be set for upsert operations)
735
- if not hasattr(obj, '_bulk_hooks_upsert_metadata'):
736
- # This should never happen - log and treat as created to maintain backward compat
737
- missing_metadata_count += 1
738
- logger.warning(
739
- f"Object {obj} (id={id(obj)}, pk={getattr(obj, 'pk', None)}) "
740
- f"missing upsert metadata - defaulting to 'created'. "
741
- f"This may indicate a bug in the upsert metadata tagging.",
742
- )
743
- was_created = True
744
- else:
744
+ # Check if metadata was set
745
+ if hasattr(obj, "_bulk_hooks_was_created"):
745
746
  was_created = getattr(obj, "_bulk_hooks_was_created", True)
746
-
747
- if was_created:
748
- created_objects.append(obj)
747
+ if was_created:
748
+ created_objects.append(obj)
749
+ else:
750
+ updated_objects.append(obj)
749
751
  else:
750
- updated_objects.append(obj)
751
-
752
- if missing_metadata_count > 0:
753
- logger.error(
754
- f"UPSERT METADATA BUG: {missing_metadata_count}/{len(result_objects)} objects "
755
- f"missing metadata. This will cause incorrect hook firing!",
756
- )
757
-
752
+ # Fallback: if no metadata, check timestamps
753
+ model_cls = obj.__class__
754
+ if hasattr(model_cls, "created_at") and hasattr(model_cls, "updated_at"):
755
+ # Reload from DB to get accurate timestamps
756
+ db_obj = model_cls.objects.filter(pk=obj.pk).values("created_at", "updated_at").first()
757
+ if db_obj:
758
+ created_at = db_obj["created_at"]
759
+ updated_at = db_obj["updated_at"]
760
+ if created_at and updated_at:
761
+ time_diff = abs((updated_at - created_at).total_seconds())
762
+ if time_diff <= 1.0: # Within 1 second = just created
763
+ created_objects.append(obj)
764
+ else:
765
+ updated_objects.append(obj)
766
+ else:
767
+ # No timestamps, default to created
768
+ created_objects.append(obj)
769
+ else:
770
+ # Object not found, treat as created
771
+ created_objects.append(obj)
772
+ else:
773
+ # No timestamp fields, default to created
774
+ created_objects.append(obj)
775
+
758
776
  logger.info(f"Upsert after hooks: {len(created_objects)} created, {len(updated_objects)} updated")
759
777
 
760
778
  # Dispatch after_create hooks for created objects
@@ -20,7 +20,7 @@ class MTIHandler:
20
20
 
21
21
  This service detects MTI models and builds execution plans.
22
22
  It does NOT execute database operations - that's the BulkExecutor's job.
23
-
23
+
24
24
  Responsibilities:
25
25
  - Detect MTI models
26
26
  - Build inheritance chains
@@ -45,8 +45,9 @@ class MTIHandler:
45
45
  Returns:
46
46
  bool: True if model has concrete parent models
47
47
  """
48
- for parent in self.model_cls._meta.all_parents:
49
- if parent._meta.concrete_model != self.model_cls._meta.concrete_model:
48
+ # Check if this model has concrete parent models (not abstract)
49
+ for parent in self.model_cls._meta.parents.keys():
50
+ if not parent._meta.abstract and parent._meta.concrete_model != self.model_cls._meta.concrete_model:
50
51
  return True
51
52
  return False
52
53
 
@@ -73,15 +74,12 @@ class MTIHandler:
73
74
  current_model = self.model_cls
74
75
 
75
76
  while current_model:
76
- if not current_model._meta.proxy:
77
+ if not current_model._meta.proxy and not current_model._meta.abstract:
77
78
  chain.append(current_model)
78
79
 
79
- # Get concrete parent models
80
- parents = [
81
- parent
82
- for parent in current_model._meta.parents.keys()
83
- if not parent._meta.proxy
84
- ]
80
+ # Get concrete parent models (not abstract, not proxy)
81
+ parents = [parent for parent in current_model._meta.parents.keys()
82
+ if not parent._meta.proxy and not parent._meta.abstract]
85
83
 
86
84
  current_model = parents[0] if parents else None
87
85
 
@@ -113,6 +111,35 @@ class MTIHandler:
113
111
  """
114
112
  return list(model_cls._meta.local_fields)
115
113
 
114
+ def find_model_with_unique_fields(self, unique_fields):
115
+ """
116
+ Find which model in the inheritance chain contains the unique fields.
117
+
118
+ This is critical for MTI upserts: we need to query the model that has
119
+ the unique constraint, not necessarily the child model.
120
+
121
+ Args:
122
+ unique_fields: List of field names forming the unique constraint
123
+
124
+ Returns:
125
+ Model class that contains all the unique fields (closest to root)
126
+ """
127
+ if not unique_fields:
128
+ return self.model_cls
129
+
130
+ inheritance_chain = self.get_inheritance_chain()
131
+
132
+ # Start from root and find the first model that has all unique fields
133
+ for model_cls in inheritance_chain:
134
+ model_field_names = {f.name for f in model_cls._meta.local_fields}
135
+
136
+ # Check if this model has all the unique fields
137
+ if all(field in model_field_names for field in unique_fields):
138
+ return model_cls
139
+
140
+ # Fallback to child model (shouldn't happen if unique_fields are valid)
141
+ return self.model_cls
142
+
116
143
  # ==================== MTI BULK CREATE PLANNING ====================
117
144
 
118
145
  def build_create_plan(
@@ -127,10 +154,10 @@ class MTIHandler:
127
154
  ):
128
155
  """
129
156
  Build an execution plan for bulk creating MTI model instances.
130
-
157
+
131
158
  This method does NOT execute any database operations.
132
159
  It returns a plan that the BulkExecutor will execute.
133
-
160
+
134
161
  Args:
135
162
  objs: List of model instances to create
136
163
  batch_size: Number of objects per batch
@@ -139,7 +166,7 @@ class MTIHandler:
139
166
  update_fields: Fields to update on conflict
140
167
  existing_record_ids: Set of id() for objects that exist in DB (from RecordClassifier)
141
168
  existing_pks_map: Dict mapping id(obj) -> pk for existing records (from RecordClassifier)
142
-
169
+
143
170
  Returns:
144
171
  MTICreatePlan object
145
172
  """
@@ -205,9 +232,9 @@ class MTIHandler:
205
232
  ):
206
233
  """
207
234
  Build parent level objects for each level in the inheritance chain.
208
-
235
+
209
236
  This is pure in-memory object creation - no DB operations.
210
-
237
+
211
238
  Returns:
212
239
  List of ParentLevel objects
213
240
  """
@@ -255,16 +282,14 @@ class MTIHandler:
255
282
  # Check if this model has a matching constraint
256
283
  if normalized_unique and self._has_matching_constraint(model_class, normalized_unique):
257
284
  # Filter update fields
258
- filtered_updates = [
259
- uf for uf in (update_fields or []) if uf in model_fields_by_name
260
- ]
285
+ filtered_updates = [uf for uf in (update_fields or []) if uf in model_fields_by_name]
261
286
 
262
287
  # If no fields to update at this level but we need upsert to prevent
263
288
  # unique constraint violations, use one of the unique fields as a dummy
264
289
  # update field (updating it to itself is a safe no-op)
265
290
  if not filtered_updates and normalized_unique:
266
291
  filtered_updates = [normalized_unique[0]]
267
-
292
+
268
293
  # Only enable upsert if we have fields to update (real or dummy)
269
294
  if filtered_updates:
270
295
  level_update_conflicts = True
@@ -288,10 +313,8 @@ class MTIHandler:
288
313
  """Check if model has a unique constraint matching the given fields."""
289
314
  try:
290
315
  from django.db.models import UniqueConstraint
291
- constraint_field_sets = [
292
- tuple(c.fields) for c in model_class._meta.constraints
293
- if isinstance(c, UniqueConstraint)
294
- ]
316
+
317
+ constraint_field_sets = [tuple(c.fields) for c in model_class._meta.constraints if isinstance(c, UniqueConstraint)]
295
318
  except Exception:
296
319
  constraint_field_sets = []
297
320
 
@@ -319,12 +342,12 @@ class MTIHandler:
319
342
  def _create_parent_instance(self, source_obj, parent_model, current_parent):
320
343
  """
321
344
  Create a parent instance from source object (in-memory only).
322
-
345
+
323
346
  Args:
324
347
  source_obj: Original object with data
325
348
  parent_model: Parent model class to create instance of
326
349
  current_parent: Parent instance from previous level (if any)
327
-
350
+
328
351
  Returns:
329
352
  Parent model instance (not saved)
330
353
  """
@@ -335,8 +358,7 @@ class MTIHandler:
335
358
  if hasattr(source_obj, field.name):
336
359
  value = getattr(source_obj, field.name, None)
337
360
  if value is not None:
338
- if (field.is_relation and not field.many_to_many and
339
- not field.one_to_many):
361
+ if field.is_relation and not field.many_to_many and not field.one_to_many:
340
362
  # Handle FK fields
341
363
  if hasattr(value, "pk") and value.pk is not None:
342
364
  setattr(parent_obj, field.attname, value.pk)
@@ -348,8 +370,7 @@ class MTIHandler:
348
370
  # Link to parent if exists
349
371
  if current_parent is not None:
350
372
  for field in parent_model._meta.local_fields:
351
- if (hasattr(field, "remote_field") and field.remote_field and
352
- field.remote_field.model == current_parent.__class__):
373
+ if hasattr(field, "remote_field") and field.remote_field and field.remote_field.model == current_parent.__class__:
353
374
  setattr(parent_obj, field.name, current_parent)
354
375
  break
355
376
 
@@ -373,13 +394,13 @@ class MTIHandler:
373
394
  def _create_child_instance_template(self, source_obj, child_model):
374
395
  """
375
396
  Create a child instance template (in-memory only, without parent links).
376
-
397
+
377
398
  The executor will add parent links after creating parent objects.
378
-
399
+
379
400
  Args:
380
401
  source_obj: Original object with data
381
402
  child_model: Child model class
382
-
403
+
383
404
  Returns:
384
405
  Child model instance (not saved, no parent links)
385
406
  """
@@ -399,8 +420,7 @@ class MTIHandler:
399
420
  if hasattr(source_obj, field.name):
400
421
  value = getattr(source_obj, field.name, None)
401
422
  if value is not None:
402
- if (field.is_relation and not field.many_to_many and
403
- not field.one_to_many):
423
+ if field.is_relation and not field.many_to_many and not field.one_to_many:
404
424
  if hasattr(value, "pk") and value.pk is not None:
405
425
  setattr(child_obj, field.attname, value.pk)
406
426
  else:
@@ -430,14 +450,14 @@ class MTIHandler:
430
450
  def build_update_plan(self, objs, fields, batch_size=None):
431
451
  """
432
452
  Build an execution plan for bulk updating MTI model instances.
433
-
453
+
434
454
  This method does NOT execute any database operations.
435
-
455
+
436
456
  Args:
437
457
  objs: List of model instances to update
438
458
  fields: List of field names to update
439
459
  batch_size: Number of objects per batch
440
-
460
+
441
461
  Returns:
442
462
  MTIUpdatePlan object
443
463
  """
@@ -497,11 +517,13 @@ class MTIHandler:
497
517
  break
498
518
  filter_field = parent_link.attname if parent_link else "pk"
499
519
 
500
- field_groups.append(ModelFieldGroup(
501
- model_class=model,
502
- fields=model_fields,
503
- filter_field=filter_field,
504
- ))
520
+ field_groups.append(
521
+ ModelFieldGroup(
522
+ model_class=model,
523
+ fields=model_fields,
524
+ filter_field=filter_field,
525
+ )
526
+ )
505
527
 
506
528
  return MTIUpdatePlan(
507
529
  inheritance_chain=inheritance_chain,
@@ -14,7 +14,7 @@ from typing import Any
14
14
  class ParentLevel:
15
15
  """
16
16
  Represents one level in the parent hierarchy for MTI bulk create.
17
-
17
+
18
18
  Attributes:
19
19
  model_class: The parent model class for this level
20
20
  objects: List of parent instances to create
@@ -23,6 +23,7 @@ class ParentLevel:
23
23
  unique_fields: Fields for conflict detection (if update_conflicts=True)
24
24
  update_fields: Fields to update on conflict (if update_conflicts=True)
25
25
  """
26
+
26
27
  model_class: Any
27
28
  objects: list[Any]
28
29
  original_object_map: dict[int, int] = field(default_factory=dict)
@@ -35,10 +36,10 @@ class ParentLevel:
35
36
  class MTICreatePlan:
36
37
  """
37
38
  Plan for executing bulk_create on an MTI model.
38
-
39
+
39
40
  This plan describes WHAT to create, not HOW to create it.
40
41
  The executor is responsible for executing this plan.
41
-
42
+
42
43
  Attributes:
43
44
  inheritance_chain: List of model classes from root to child
44
45
  parent_levels: List of ParentLevel objects, one per parent model
@@ -51,6 +52,7 @@ class MTICreatePlan:
51
52
  unique_fields: Fields used for conflict detection
52
53
  update_fields: Fields to update on conflict
53
54
  """
55
+
54
56
  inheritance_chain: list[Any]
55
57
  parent_levels: list[ParentLevel]
56
58
  child_objects: list[Any]
@@ -67,12 +69,13 @@ class MTICreatePlan:
67
69
  class ModelFieldGroup:
68
70
  """
69
71
  Represents fields to update for one model in the inheritance chain.
70
-
72
+
71
73
  Attributes:
72
74
  model_class: The model class
73
75
  fields: List of field names to update on this model
74
76
  filter_field: Field to use for filtering (e.g., 'pk' or parent link attname)
75
77
  """
78
+
76
79
  model_class: Any
77
80
  fields: list[str]
78
81
  filter_field: str = "pk"
@@ -82,15 +85,15 @@ class ModelFieldGroup:
82
85
  class MTIUpdatePlan:
83
86
  """
84
87
  Plan for executing bulk_update on an MTI model.
85
-
88
+
86
89
  Attributes:
87
90
  inheritance_chain: List of model classes from root to child
88
91
  field_groups: List of ModelFieldGroup objects
89
92
  objects: Objects to update
90
93
  batch_size: Batch size for operations
91
94
  """
95
+
92
96
  inheritance_chain: list[Any]
93
97
  field_groups: list[ModelFieldGroup]
94
98
  objects: list[Any]
95
99
  batch_size: int = None
96
-
@@ -17,7 +17,7 @@ logger = logging.getLogger(__name__)
17
17
  class RecordClassifier:
18
18
  """
19
19
  Service for classifying and fetching records via database queries.
20
-
20
+
21
21
  This is the SINGLE point of truth for record classification queries.
22
22
  Keeps database access logic separate from business/planning logic.
23
23
  """
@@ -31,17 +31,18 @@ class RecordClassifier:
31
31
  """
32
32
  self.model_cls = model_cls
33
33
 
34
- def classify_for_upsert(self, objs, unique_fields):
34
+ def classify_for_upsert(self, objs, unique_fields, query_model=None):
35
35
  """
36
36
  Classify records as new or existing based on unique_fields.
37
-
37
+
38
38
  Queries the database to check which records already exist based on the
39
39
  unique_fields constraint.
40
-
40
+
41
41
  Args:
42
42
  objs: List of model instances
43
43
  unique_fields: List of field names that form the unique constraint
44
-
44
+ query_model: Optional model class to query (for MTI, may be different from self.model_cls)
45
+
45
46
  Returns:
46
47
  Tuple of (existing_record_ids, existing_pks_map)
47
48
  - existing_record_ids: Set of id() for objects that exist in DB
@@ -50,6 +51,9 @@ class RecordClassifier:
50
51
  if not unique_fields or not objs:
51
52
  return set(), {}
52
53
 
54
+ # Use query_model if provided (for MTI scenarios), otherwise use self.model_cls
55
+ query_model = query_model or self.model_cls
56
+
53
57
  # Build a query to find existing records
54
58
  queries = []
55
59
  obj_to_unique_values = {}
@@ -77,9 +81,12 @@ class RecordClassifier:
77
81
  for q in queries[1:]:
78
82
  combined_query |= q
79
83
 
80
- existing_records = list(
81
- self.model_cls.objects.filter(combined_query).values("pk", *unique_fields),
82
- )
84
+ logger.info(f"Classifying for upsert: model={query_model.__name__}, query={combined_query}, unique_fields={unique_fields}")
85
+ queryset = query_model.objects.filter(combined_query)
86
+ logger.info(f"Queryset SQL: {queryset.query}")
87
+ logger.info(f"All records in table: {query_model.objects.all().count()}")
88
+ existing_records = list(queryset.values("pk", *unique_fields))
89
+ logger.info(f"Found {len(existing_records)} existing records: {existing_records}")
83
90
 
84
91
  # Map existing records back to original objects
85
92
  existing_record_ids = set()
@@ -94,8 +101,7 @@ class RecordClassifier:
94
101
  existing_pks_map[obj_id] = record["pk"]
95
102
 
96
103
  logger.info(
97
- f"Classified {len(existing_record_ids)} existing and "
98
- f"{len(objs) - len(existing_record_ids)} new records for upsert",
104
+ f"Classified {len(existing_record_ids)} existing and {len(objs) - len(existing_record_ids)} new records for upsert",
99
105
  )
100
106
 
101
107
  return existing_record_ids, existing_pks_map
@@ -103,12 +109,12 @@ class RecordClassifier:
103
109
  def fetch_by_pks(self, pks, select_related=None, prefetch_related=None):
104
110
  """
105
111
  Fetch records by primary keys with optional relationship loading.
106
-
112
+
107
113
  Args:
108
114
  pks: List of primary key values
109
115
  select_related: Optional list of fields to select_related
110
116
  prefetch_related: Optional list of fields to prefetch_related
111
-
117
+
112
118
  Returns:
113
119
  Dict[pk, instance] for O(1) lookups
114
120
  """
@@ -128,10 +134,10 @@ class RecordClassifier:
128
134
  def fetch_by_unique_constraint(self, field_values_map):
129
135
  """
130
136
  Fetch records matching a unique constraint.
131
-
137
+
132
138
  Args:
133
139
  field_values_map: Dict of {field_name: value} for unique constraint
134
-
140
+
135
141
  Returns:
136
142
  Model instance if found, None otherwise
137
143
  """
@@ -141,18 +147,17 @@ class RecordClassifier:
141
147
  return None
142
148
  except self.model_cls.MultipleObjectsReturned:
143
149
  logger.warning(
144
- f"Multiple {self.model_cls.__name__} records found for "
145
- f"unique constraint {field_values_map}",
150
+ f"Multiple {self.model_cls.__name__} records found for unique constraint {field_values_map}",
146
151
  )
147
152
  return self.model_cls.objects.filter(**field_values_map).first()
148
153
 
149
154
  def exists_by_pks(self, pks):
150
155
  """
151
156
  Check if records exist by primary keys without fetching them.
152
-
157
+
153
158
  Args:
154
159
  pks: List of primary key values
155
-
160
+
156
161
  Returns:
157
162
  Set of PKs that exist in the database
158
163
  """
@@ -168,13 +173,13 @@ class RecordClassifier:
168
173
  def count_by_unique_fields(self, objs, unique_fields):
169
174
  """
170
175
  Count how many objects already exist based on unique fields.
171
-
176
+
172
177
  Useful for validation or reporting before upsert operations.
173
-
178
+
174
179
  Args:
175
180
  objs: List of model instances
176
181
  unique_fields: List of field names that form the unique constraint
177
-
182
+
178
183
  Returns:
179
184
  Tuple of (existing_count, new_count)
180
185
  """
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "django-bulk-hooks"
3
- version = "0.2.45"
3
+ version = "0.2.47"
4
4
  description = "Hook-style hooks for Django bulk operations like bulk_create and bulk_update."
5
5
  authors = ["Konrad Beck <konrad.beck@merchantcapital.co.za>"]
6
6
  readme = "README.md"