django-bulk-hooks 0.2.42__py3-none-any.whl → 0.2.50__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of django-bulk-hooks might be problematic. Click here for more details.

@@ -77,14 +77,11 @@ class ModelAnalyzer:
77
77
  if not objs:
78
78
  return
79
79
 
80
- invalid_types = {
81
- type(obj).__name__ for obj in objs if not isinstance(obj, self.model_cls)
82
- }
80
+ invalid_types = {type(obj).__name__ for obj in objs if not isinstance(obj, self.model_cls)}
83
81
 
84
82
  if invalid_types:
85
83
  raise TypeError(
86
- f"{operation} expected instances of {self.model_cls.__name__}, "
87
- f"but got {invalid_types}",
84
+ f"{operation} expected instances of {self.model_cls.__name__}, but got {invalid_types}",
88
85
  )
89
86
 
90
87
  def _check_has_pks(self, objs, operation="operation"):
@@ -130,7 +127,9 @@ class ModelAnalyzer:
130
127
  auto_now_fields = []
131
128
  for field in self.model_cls._meta.fields:
132
129
  if getattr(field, "auto_now", False) or getattr(
133
- field, "auto_now_add", False,
130
+ field,
131
+ "auto_now_add",
132
+ False,
134
133
  ):
135
134
  auto_now_fields.append(field.name)
136
135
  return auto_now_fields
@@ -142,11 +141,7 @@ class ModelAnalyzer:
142
141
  Returns:
143
142
  list: FK field names
144
143
  """
145
- return [
146
- field.name
147
- for field in self.model_cls._meta.concrete_fields
148
- if field.is_relation and not field.many_to_many
149
- ]
144
+ return [field.name for field in self.model_cls._meta.concrete_fields if field.is_relation and not field.many_to_many]
150
145
 
151
146
  def detect_changed_fields(self, objs):
152
147
  """
@@ -210,15 +205,15 @@ class ModelAnalyzer:
210
205
  def resolve_expression(self, field_name, expression, instance):
211
206
  """
212
207
  Resolve a SQL expression to a concrete value for a specific instance.
213
-
208
+
214
209
  This method materializes database expressions (F(), Subquery, Case, etc.)
215
210
  into concrete values by using Django's annotate() mechanism.
216
-
211
+
217
212
  Args:
218
213
  field_name: Name of the field being updated
219
214
  expression: The expression or value to resolve
220
215
  instance: The model instance to resolve for
221
-
216
+
222
217
  Returns:
223
218
  The resolved concrete value
224
219
  """
@@ -236,31 +231,34 @@ class ModelAnalyzer:
236
231
  instance_qs = self.model_cls.objects.filter(pk=instance.pk)
237
232
 
238
233
  # Use annotate with the expression and let Django resolve it
239
- resolved_value = instance_qs.annotate(
240
- _resolved_value=expression,
241
- ).values_list("_resolved_value", flat=True).first()
234
+ resolved_value = (
235
+ instance_qs.annotate(
236
+ _resolved_value=expression,
237
+ )
238
+ .values_list("_resolved_value", flat=True)
239
+ .first()
240
+ )
242
241
 
243
242
  return resolved_value
244
243
  except Exception as e:
245
244
  # If expression resolution fails, log and return original
246
245
  logger.warning(
247
- f"Failed to resolve expression for field '{field_name}' "
248
- f"on {self.model_cls.__name__}: {e}. Using original value.",
246
+ f"Failed to resolve expression for field '{field_name}' on {self.model_cls.__name__}: {e}. Using original value.",
249
247
  )
250
248
  return expression
251
249
 
252
250
  def apply_update_values(self, instances, update_kwargs):
253
251
  """
254
252
  Apply update_kwargs to instances, resolving any SQL expressions.
255
-
253
+
256
254
  This method transforms queryset.update()-style kwargs (which may contain
257
255
  F() expressions, Subquery, Case, etc.) into concrete values and applies
258
256
  them to the instances.
259
-
257
+
260
258
  Args:
261
259
  instances: List of model instances to update
262
260
  update_kwargs: Dict of {field_name: value_or_expression}
263
-
261
+
264
262
  Returns:
265
263
  List of field names that were updated
266
264
  """
@@ -271,7 +269,7 @@ class ModelAnalyzer:
271
269
  return []
272
270
 
273
271
  fields_updated = list(update_kwargs.keys())
274
-
272
+
275
273
  # Extract PKs
276
274
  pks = [inst.pk for inst in instances if inst.pk is not None]
277
275
  if not pks:
@@ -306,8 +304,7 @@ class ModelAnalyzer:
306
304
  except Exception as e:
307
305
  # If expression resolution fails, log and use original
308
306
  logger.warning(
309
- f"Failed to resolve expression for field '{field_name}' "
310
- f"on {self.model_cls.__name__}: {e}. Using original value.",
307
+ f"Failed to resolve expression for field '{field_name}' on {self.model_cls.__name__}: {e}. Using original value.",
311
308
  )
312
309
  for instance in instances:
313
310
  setattr(instance, field_name, value)
@@ -47,6 +47,8 @@ class BulkExecutor:
47
47
  update_conflicts=False,
48
48
  update_fields=None,
49
49
  unique_fields=None,
50
+ existing_record_ids=None,
51
+ existing_pks_map=None,
50
52
  **kwargs,
51
53
  ):
52
54
  """
@@ -72,13 +74,23 @@ class BulkExecutor:
72
74
 
73
75
  # Check if this is an MTI model and route accordingly
74
76
  if self.mti_handler.is_mti_model():
75
- logger.info(f"Detected MTI model {self.model_cls.__name__}, using MTI bulk create")
76
77
 
77
- # Classify records using the classifier service
78
- existing_record_ids = set()
79
- existing_pks_map = {}
80
- if update_conflicts and unique_fields:
81
- existing_record_ids, existing_pks_map = self.record_classifier.classify_for_upsert(objs, unique_fields)
78
+ # Use pre-classified records if provided, otherwise classify now
79
+ if existing_record_ids is None or existing_pks_map is None:
80
+ existing_record_ids = set()
81
+ existing_pks_map = {}
82
+ if update_conflicts and unique_fields:
83
+ # For MTI, find which model has the unique fields and query THAT model
84
+ # This handles the schema migration case where parent exists but child doesn't
85
+ query_model = self.mti_handler.find_model_with_unique_fields(unique_fields)
86
+ logger.info(f"MTI upsert: querying {query_model.__name__} for unique fields {unique_fields}")
87
+
88
+ existing_record_ids, existing_pks_map = self.record_classifier.classify_for_upsert(
89
+ objs, unique_fields, query_model=query_model
90
+ )
91
+ logger.info(f"MTI Upsert classification: {len(existing_record_ids)} existing, {len(objs) - len(existing_record_ids)} new")
92
+ logger.info(f"existing_record_ids: {existing_record_ids}")
93
+ logger.info(f"existing_pks_map: {existing_pks_map}")
82
94
 
83
95
  # Build execution plan with classification results
84
96
  plan = self.mti_handler.build_create_plan(
@@ -91,10 +103,16 @@ class BulkExecutor:
91
103
  existing_pks_map=existing_pks_map,
92
104
  )
93
105
  # Execute the plan
94
- return self._execute_mti_create_plan(plan)
106
+ result = self._execute_mti_create_plan(plan)
107
+
108
+ # Tag objects with upsert metadata for hook dispatching
109
+ if update_conflicts and unique_fields:
110
+ self._tag_upsert_metadata(result, existing_record_ids, existing_pks_map)
111
+
112
+ return result
95
113
 
96
114
  # Non-MTI model - use Django's native bulk_create
97
- return self._execute_bulk_create(
115
+ result = self._execute_bulk_create(
98
116
  objs,
99
117
  batch_size,
100
118
  ignore_conflicts,
@@ -104,6 +122,15 @@ class BulkExecutor:
104
122
  **kwargs,
105
123
  )
106
124
 
125
+ # Tag objects with upsert metadata for hook dispatching
126
+ if update_conflicts and unique_fields:
127
+ # Use pre-classified results if available, otherwise classify now
128
+ if existing_record_ids is None:
129
+ existing_record_ids, existing_pks_map = self.record_classifier.classify_for_upsert(objs, unique_fields)
130
+ self._tag_upsert_metadata(result, existing_record_ids, existing_pks_map)
131
+
132
+ return result
133
+
107
134
  def _execute_bulk_create(
108
135
  self,
109
136
  objs,
@@ -187,59 +214,43 @@ class BulkExecutor:
187
214
  if not plan:
188
215
  return []
189
216
 
217
+
190
218
  with transaction.atomic(using=self.queryset.db, savepoint=False):
191
- # Step 1: Create/Update all parent objects level by level
219
+ # Step 1: Upsert all parent objects level by level using Django's native upsert
192
220
  parent_instances_map = {} # Maps original obj id() -> {model: parent_instance}
193
221
 
194
222
  for parent_level in plan.parent_levels:
195
- # Separate new and existing parent objects
196
- new_parents = []
197
- existing_parents = []
198
-
199
- for parent_obj in parent_level.objects:
200
- orig_obj_id = parent_level.original_object_map[id(parent_obj)]
201
- if orig_obj_id in plan.existing_record_ids:
202
- existing_parents.append(parent_obj)
203
- else:
204
- new_parents.append(parent_obj)
205
-
206
- # Bulk create new parents
207
- if new_parents:
208
- bulk_kwargs = {"batch_size": len(new_parents)}
209
-
210
- if parent_level.update_conflicts:
211
- bulk_kwargs["update_conflicts"] = True
212
- bulk_kwargs["unique_fields"] = parent_level.unique_fields
213
- bulk_kwargs["update_fields"] = parent_level.update_fields
214
-
215
- # Use base QuerySet to avoid recursion
216
- base_qs = BaseQuerySet(model=parent_level.model_class, using=self.queryset.db)
217
- created_parents = base_qs.bulk_create(new_parents, **bulk_kwargs)
218
-
219
- # Copy generated fields back to parent objects
220
- for created_parent, parent_obj in zip(created_parents, new_parents):
221
- for field in parent_level.model_class._meta.local_fields:
222
- created_value = getattr(created_parent, field.name, None)
223
- if created_value is not None:
224
- setattr(parent_obj, field.name, created_value)
225
-
226
- parent_obj._state.adding = False
227
- parent_obj._state.db = self.queryset.db
228
-
229
- # Update existing parents
230
- if existing_parents and parent_level.update_fields:
223
+ # Use base QuerySet to avoid recursion
224
+ base_qs = BaseQuerySet(model=parent_level.model_class, using=self.queryset.db)
225
+
226
+ # Build bulk_create kwargs
227
+ bulk_kwargs = {"batch_size": len(parent_level.objects)}
228
+
229
+ if parent_level.update_conflicts:
230
+ # Let Django handle the upsert - it will INSERT or UPDATE as needed
231
+ bulk_kwargs["update_conflicts"] = True
232
+ bulk_kwargs["unique_fields"] = parent_level.unique_fields
233
+
231
234
  # Filter update fields to only those that exist in this parent model
232
235
  parent_model_fields = {field.name for field in parent_level.model_class._meta.local_fields}
233
236
  filtered_update_fields = [field for field in parent_level.update_fields if field in parent_model_fields]
234
-
235
237
  if filtered_update_fields:
236
- base_qs = BaseQuerySet(model=parent_level.model_class, using=self.queryset.db)
237
- base_qs.bulk_update(existing_parents, filtered_update_fields)
238
+ bulk_kwargs["update_fields"] = filtered_update_fields
238
239
 
239
- # Mark as not adding
240
- for parent_obj in existing_parents:
241
- parent_obj._state.adding = False
242
- parent_obj._state.db = self.queryset.db
240
+ # Perform the upsert - Django handles INSERT vs UPDATE automatically
241
+ upserted_parents = base_qs.bulk_create(parent_level.objects, **bulk_kwargs)
242
+
243
+ # Copy generated fields back to parent objects
244
+ for upserted_parent, parent_obj in zip(upserted_parents, parent_level.objects):
245
+ for field in parent_level.model_class._meta.local_fields:
246
+ # Use attname for ForeignKey fields to avoid triggering database queries
247
+ field_attr = field.attname if isinstance(field, ForeignKey) else field.name
248
+ upserted_value = getattr(upserted_parent, field_attr, None)
249
+ if upserted_value is not None:
250
+ setattr(parent_obj, field_attr, upserted_value)
251
+
252
+ parent_obj._state.adding = False
253
+ parent_obj._state.db = self.queryset.db
243
254
 
244
255
  # Map parents back to original objects
245
256
  for parent_obj in parent_level.objects:
@@ -248,10 +259,7 @@ class BulkExecutor:
248
259
  parent_instances_map[orig_obj_id] = {}
249
260
  parent_instances_map[orig_obj_id][parent_level.model_class] = parent_obj
250
261
 
251
- # Step 2: Add parent links to child objects and separate new/existing
252
- new_child_objects = []
253
- existing_child_objects = []
254
-
262
+ # Step 2: Add parent links to child objects and set PKs for existing records
255
263
  for child_obj, orig_obj in zip(plan.child_objects, plan.original_objects):
256
264
  parent_instances = parent_instances_map.get(id(orig_obj), {})
257
265
 
@@ -261,91 +269,103 @@ class BulkExecutor:
261
269
  if parent_link:
262
270
  setattr(child_obj, parent_link.attname, parent_instance.pk)
263
271
  setattr(child_obj, parent_link.name, parent_instance)
272
+ # IMPORTANT: Don't set the child's PK here - it should only get PK after insertion
273
+ # The parent link field (hookmodel_ptr) is NOT the same as the child's PK
274
+ else:
275
+ logger.warning(f"No parent link found for {parent_model} in {plan.child_model}")
264
276
 
265
- # Classify as new or existing
277
+ # For existing records in upsert, ensure PK is set on child object
266
278
  if id(orig_obj) in plan.existing_record_ids:
267
- # For existing records, set the PK on child object
268
279
  pk_value = getattr(orig_obj, "pk", None)
269
280
  if pk_value:
270
281
  child_obj.pk = pk_value
271
282
  child_obj.id = pk_value
272
- existing_child_objects.append(child_obj)
273
- else:
274
- new_child_objects.append(child_obj)
275
-
276
- # Step 3: Bulk create new child objects using _batched_insert (to bypass MTI check)
277
- if new_child_objects:
278
- base_qs = BaseQuerySet(model=plan.child_model, using=self.queryset.db)
279
- base_qs._prepare_for_bulk_create(new_child_objects)
280
-
281
- # Partition objects by PK status
282
- objs_without_pk, objs_with_pk = [], []
283
- for obj in new_child_objects:
284
- if obj._is_pk_set():
285
- objs_with_pk.append(obj)
286
283
  else:
287
- objs_without_pk.append(obj)
288
-
289
- # Get fields for insert
290
- opts = plan.child_model._meta
291
- fields = [f for f in opts.local_fields if not f.generated]
292
-
293
- # Execute bulk insert
294
- if objs_with_pk:
295
- returned_columns = base_qs._batched_insert(
296
- objs_with_pk,
297
- fields,
298
- batch_size=len(objs_with_pk),
299
- )
300
- if returned_columns:
301
- for obj, results in zip(objs_with_pk, returned_columns):
302
- if hasattr(opts, "db_returning_fields") and hasattr(opts, "pk"):
303
- for result, field in zip(results, opts.db_returning_fields):
304
- if field != opts.pk:
305
- setattr(obj, field.attname, result)
306
- obj._state.adding = False
307
- obj._state.db = self.queryset.db
308
- else:
309
- for obj in objs_with_pk:
310
- obj._state.adding = False
311
- obj._state.db = self.queryset.db
312
-
313
- if objs_without_pk:
314
- filtered_fields = [f for f in fields if not isinstance(f, AutoField) and not f.primary_key]
315
- returned_columns = base_qs._batched_insert(
316
- objs_without_pk,
317
- filtered_fields,
318
- batch_size=len(objs_without_pk),
284
+ # If no PK on original object, this is a new record, don't set PK
285
+ logger.info(f"New record {orig_obj} - not setting PK on child object")
286
+
287
+ # Step 3: Handle child objects
288
+ # Note: We can't use bulk_create on child MTI models, so we use _batched_insert for new records
289
+ # and bulk_update for existing records
290
+ base_qs = BaseQuerySet(model=plan.child_model, using=self.queryset.db)
291
+
292
+ # For MTI child objects, we need to handle them differently
293
+ # In MTI, child objects get PKs from parent links, but we need to distinguish
294
+ # between truly new records and existing records for upsert operations
295
+ objs_without_pk, objs_with_pk = [], []
296
+
297
+ # Check which CHILD records actually exist in the child table
298
+ # This is separate from checking parent existence
299
+ if plan.update_conflicts:
300
+ # Query the CHILD table to see which child records exist
301
+ parent_pks = []
302
+ for child_obj in plan.child_objects:
303
+ child_pk = getattr(child_obj, plan.child_model._meta.pk.attname, None)
304
+ if child_pk:
305
+ parent_pks.append(child_pk)
306
+
307
+ existing_child_pks = set()
308
+ if parent_pks:
309
+ existing_child_pks = set(
310
+ base_qs.filter(pk__in=parent_pks).values_list('pk', flat=True)
319
311
  )
320
- if returned_columns:
321
- for obj, results in zip(objs_without_pk, returned_columns):
322
- if hasattr(opts, "db_returning_fields"):
323
- for result, field in zip(results, opts.db_returning_fields):
324
- setattr(obj, field.attname, result)
325
- obj._state.adding = False
326
- obj._state.db = self.queryset.db
312
+
313
+ # Split based on whether child record exists
314
+ for child_obj in plan.child_objects:
315
+ child_pk = getattr(child_obj, plan.child_model._meta.pk.attname, None)
316
+ if child_pk and child_pk in existing_child_pks:
317
+ # Child record exists - update it
318
+ objs_with_pk.append(child_obj)
327
319
  else:
328
- for obj in objs_without_pk:
329
- obj._state.adding = False
330
- obj._state.db = self.queryset.db
331
-
332
- # Step 3.5: Update existing child objects
333
- if existing_child_objects and plan.update_fields:
320
+ # Child record doesn't exist - insert it
321
+ objs_without_pk.append(child_obj)
322
+ else:
323
+ # Not an upsert - all are new records
324
+ objs_without_pk = plan.child_objects
325
+ objs_with_pk = []
326
+
327
+ # For objects with PK (existing records in upsert), use bulk_update
328
+ if objs_with_pk and plan.update_fields:
334
329
  # Filter update fields to only those that exist in the child model
335
330
  child_model_fields = {field.name for field in plan.child_model._meta.local_fields}
336
331
  filtered_child_update_fields = [field for field in plan.update_fields if field in child_model_fields]
337
332
 
338
333
  if filtered_child_update_fields:
339
- base_qs = BaseQuerySet(model=plan.child_model, using=self.queryset.db)
340
- base_qs.bulk_update(existing_child_objects, filtered_child_update_fields)
334
+ base_qs.bulk_update(objs_with_pk, filtered_child_update_fields)
341
335
 
342
336
  # Mark as not adding
343
- for child_obj in existing_child_objects:
344
- child_obj._state.adding = False
345
- child_obj._state.db = self.queryset.db
337
+ for obj in objs_with_pk:
338
+ obj._state.adding = False
339
+ obj._state.db = self.queryset.db
346
340
 
347
- # Combine all children for final processing
348
- created_children = new_child_objects + existing_child_objects
341
+ # For objects without PK (new records), use _batched_insert
342
+ if objs_without_pk:
343
+ base_qs._prepare_for_bulk_create(objs_without_pk)
344
+ opts = plan.child_model._meta
345
+
346
+ # Include all local fields except auto-generated ones
347
+ # For MTI, we need to include the parent link (which is the PK)
348
+ filtered_fields = [f for f in opts.local_fields if not f.generated]
349
+
350
+ returned_columns = base_qs._batched_insert(
351
+ objs_without_pk,
352
+ filtered_fields,
353
+ batch_size=len(objs_without_pk),
354
+ )
355
+ if returned_columns:
356
+ for obj, results in zip(objs_without_pk, returned_columns):
357
+ if hasattr(opts, "db_returning_fields"):
358
+ for result, field in zip(results, opts.db_returning_fields):
359
+ setattr(obj, field.attname, result)
360
+ obj._state.adding = False
361
+ obj._state.db = self.queryset.db
362
+ else:
363
+ for obj in objs_without_pk:
364
+ obj._state.adding = False
365
+ obj._state.db = self.queryset.db
366
+
367
+ # All child objects are now created/updated
368
+ created_children = plan.child_objects
349
369
 
350
370
  # Step 4: Copy PKs and auto-generated fields back to original objects
351
371
  pk_field_name = plan.child_model._meta.pk.name
@@ -510,3 +530,39 @@ class BulkExecutor:
510
530
  from django.db.models import QuerySet
511
531
 
512
532
  return QuerySet.delete(self.queryset)
533
+
534
+ def _tag_upsert_metadata(self, result_objects, existing_record_ids, existing_pks_map):
535
+ """
536
+ Tag objects with metadata indicating whether they were created or updated.
537
+
538
+ This metadata is used by the coordinator to determine which hooks to fire.
539
+ The metadata is temporary and will be cleaned up after hook execution.
540
+
541
+ Args:
542
+ result_objects: List of objects returned from bulk operation
543
+ existing_record_ids: Set of id() for objects that existed before the operation
544
+ existing_pks_map: Dict mapping id(obj) -> pk for existing records
545
+ """
546
+ created_count = 0
547
+ updated_count = 0
548
+
549
+ # Create a set of PKs that existed before the operation
550
+ existing_pks = set(existing_pks_map.values())
551
+
552
+ for obj in result_objects:
553
+ # Use PK to determine if this record was created or updated
554
+ # If the PK was in the existing_pks_map, it was updated; otherwise created
555
+ was_created = obj.pk not in existing_pks
556
+ obj._bulk_hooks_was_created = was_created
557
+ obj._bulk_hooks_upsert_metadata = True
558
+
559
+ if was_created:
560
+ created_count += 1
561
+ else:
562
+ updated_count += 1
563
+
564
+ logger.info(
565
+ f"Tagged upsert metadata: {created_count} created, {updated_count} updated "
566
+ f"(total={len(result_objects)}, existing_pks={len(existing_pks)})"
567
+ )
568
+