django-bulk-hooks 0.2.44__py3-none-any.whl → 0.2.46__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of django-bulk-hooks might be problematic. Click here for more details.
- django_bulk_hooks/operations/analyzer.py +22 -25
- django_bulk_hooks/operations/bulk_executor.py +108 -123
- django_bulk_hooks/operations/coordinator.py +102 -71
- django_bulk_hooks/operations/mti_handler.py +35 -42
- django_bulk_hooks/operations/mti_plans.py +9 -6
- django_bulk_hooks/operations/record_classifier.py +21 -20
- {django_bulk_hooks-0.2.44.dist-info → django_bulk_hooks-0.2.46.dist-info}/METADATA +1 -1
- {django_bulk_hooks-0.2.44.dist-info → django_bulk_hooks-0.2.46.dist-info}/RECORD +10 -10
- {django_bulk_hooks-0.2.44.dist-info → django_bulk_hooks-0.2.46.dist-info}/LICENSE +0 -0
- {django_bulk_hooks-0.2.44.dist-info → django_bulk_hooks-0.2.46.dist-info}/WHEEL +0 -0
|
@@ -77,14 +77,11 @@ class ModelAnalyzer:
|
|
|
77
77
|
if not objs:
|
|
78
78
|
return
|
|
79
79
|
|
|
80
|
-
invalid_types = {
|
|
81
|
-
type(obj).__name__ for obj in objs if not isinstance(obj, self.model_cls)
|
|
82
|
-
}
|
|
80
|
+
invalid_types = {type(obj).__name__ for obj in objs if not isinstance(obj, self.model_cls)}
|
|
83
81
|
|
|
84
82
|
if invalid_types:
|
|
85
83
|
raise TypeError(
|
|
86
|
-
f"{operation} expected instances of {self.model_cls.__name__}, "
|
|
87
|
-
f"but got {invalid_types}",
|
|
84
|
+
f"{operation} expected instances of {self.model_cls.__name__}, but got {invalid_types}",
|
|
88
85
|
)
|
|
89
86
|
|
|
90
87
|
def _check_has_pks(self, objs, operation="operation"):
|
|
@@ -130,7 +127,9 @@ class ModelAnalyzer:
|
|
|
130
127
|
auto_now_fields = []
|
|
131
128
|
for field in self.model_cls._meta.fields:
|
|
132
129
|
if getattr(field, "auto_now", False) or getattr(
|
|
133
|
-
field,
|
|
130
|
+
field,
|
|
131
|
+
"auto_now_add",
|
|
132
|
+
False,
|
|
134
133
|
):
|
|
135
134
|
auto_now_fields.append(field.name)
|
|
136
135
|
return auto_now_fields
|
|
@@ -142,11 +141,7 @@ class ModelAnalyzer:
|
|
|
142
141
|
Returns:
|
|
143
142
|
list: FK field names
|
|
144
143
|
"""
|
|
145
|
-
return [
|
|
146
|
-
field.name
|
|
147
|
-
for field in self.model_cls._meta.concrete_fields
|
|
148
|
-
if field.is_relation and not field.many_to_many
|
|
149
|
-
]
|
|
144
|
+
return [field.name for field in self.model_cls._meta.concrete_fields if field.is_relation and not field.many_to_many]
|
|
150
145
|
|
|
151
146
|
def detect_changed_fields(self, objs):
|
|
152
147
|
"""
|
|
@@ -210,15 +205,15 @@ class ModelAnalyzer:
|
|
|
210
205
|
def resolve_expression(self, field_name, expression, instance):
|
|
211
206
|
"""
|
|
212
207
|
Resolve a SQL expression to a concrete value for a specific instance.
|
|
213
|
-
|
|
208
|
+
|
|
214
209
|
This method materializes database expressions (F(), Subquery, Case, etc.)
|
|
215
210
|
into concrete values by using Django's annotate() mechanism.
|
|
216
|
-
|
|
211
|
+
|
|
217
212
|
Args:
|
|
218
213
|
field_name: Name of the field being updated
|
|
219
214
|
expression: The expression or value to resolve
|
|
220
215
|
instance: The model instance to resolve for
|
|
221
|
-
|
|
216
|
+
|
|
222
217
|
Returns:
|
|
223
218
|
The resolved concrete value
|
|
224
219
|
"""
|
|
@@ -236,31 +231,34 @@ class ModelAnalyzer:
|
|
|
236
231
|
instance_qs = self.model_cls.objects.filter(pk=instance.pk)
|
|
237
232
|
|
|
238
233
|
# Use annotate with the expression and let Django resolve it
|
|
239
|
-
resolved_value =
|
|
240
|
-
|
|
241
|
-
|
|
234
|
+
resolved_value = (
|
|
235
|
+
instance_qs.annotate(
|
|
236
|
+
_resolved_value=expression,
|
|
237
|
+
)
|
|
238
|
+
.values_list("_resolved_value", flat=True)
|
|
239
|
+
.first()
|
|
240
|
+
)
|
|
242
241
|
|
|
243
242
|
return resolved_value
|
|
244
243
|
except Exception as e:
|
|
245
244
|
# If expression resolution fails, log and return original
|
|
246
245
|
logger.warning(
|
|
247
|
-
f"Failed to resolve expression for field '{field_name}' "
|
|
248
|
-
f"on {self.model_cls.__name__}: {e}. Using original value.",
|
|
246
|
+
f"Failed to resolve expression for field '{field_name}' on {self.model_cls.__name__}: {e}. Using original value.",
|
|
249
247
|
)
|
|
250
248
|
return expression
|
|
251
249
|
|
|
252
250
|
def apply_update_values(self, instances, update_kwargs):
|
|
253
251
|
"""
|
|
254
252
|
Apply update_kwargs to instances, resolving any SQL expressions.
|
|
255
|
-
|
|
253
|
+
|
|
256
254
|
This method transforms queryset.update()-style kwargs (which may contain
|
|
257
255
|
F() expressions, Subquery, Case, etc.) into concrete values and applies
|
|
258
256
|
them to the instances.
|
|
259
|
-
|
|
257
|
+
|
|
260
258
|
Args:
|
|
261
259
|
instances: List of model instances to update
|
|
262
260
|
update_kwargs: Dict of {field_name: value_or_expression}
|
|
263
|
-
|
|
261
|
+
|
|
264
262
|
Returns:
|
|
265
263
|
List of field names that were updated
|
|
266
264
|
"""
|
|
@@ -271,7 +269,7 @@ class ModelAnalyzer:
|
|
|
271
269
|
return []
|
|
272
270
|
|
|
273
271
|
fields_updated = list(update_kwargs.keys())
|
|
274
|
-
|
|
272
|
+
|
|
275
273
|
# Extract PKs
|
|
276
274
|
pks = [inst.pk for inst in instances if inst.pk is not None]
|
|
277
275
|
if not pks:
|
|
@@ -306,8 +304,7 @@ class ModelAnalyzer:
|
|
|
306
304
|
except Exception as e:
|
|
307
305
|
# If expression resolution fails, log and use original
|
|
308
306
|
logger.warning(
|
|
309
|
-
f"Failed to resolve expression for field '{field_name}' "
|
|
310
|
-
f"on {self.model_cls.__name__}: {e}. Using original value.",
|
|
307
|
+
f"Failed to resolve expression for field '{field_name}' on {self.model_cls.__name__}: {e}. Using original value.",
|
|
311
308
|
)
|
|
312
309
|
for instance in instances:
|
|
313
310
|
setattr(instance, field_name, value)
|
|
@@ -74,7 +74,6 @@ class BulkExecutor:
|
|
|
74
74
|
|
|
75
75
|
# Check if this is an MTI model and route accordingly
|
|
76
76
|
if self.mti_handler.is_mti_model():
|
|
77
|
-
logger.info(f"Detected MTI model {self.model_cls.__name__}, using MTI bulk create")
|
|
78
77
|
|
|
79
78
|
# Use pre-classified records if provided, otherwise classify now
|
|
80
79
|
if existing_record_ids is None or existing_pks_map is None:
|
|
@@ -82,6 +81,9 @@ class BulkExecutor:
|
|
|
82
81
|
existing_pks_map = {}
|
|
83
82
|
if update_conflicts and unique_fields:
|
|
84
83
|
existing_record_ids, existing_pks_map = self.record_classifier.classify_for_upsert(objs, unique_fields)
|
|
84
|
+
logger.info(f"MTI Upsert classification: {len(existing_record_ids)} existing, {len(objs) - len(existing_record_ids)} new")
|
|
85
|
+
logger.info(f"existing_record_ids: {existing_record_ids}")
|
|
86
|
+
logger.info(f"existing_pks_map: {existing_pks_map}")
|
|
85
87
|
|
|
86
88
|
# Build execution plan with classification results
|
|
87
89
|
plan = self.mti_handler.build_create_plan(
|
|
@@ -95,11 +97,11 @@ class BulkExecutor:
|
|
|
95
97
|
)
|
|
96
98
|
# Execute the plan
|
|
97
99
|
result = self._execute_mti_create_plan(plan)
|
|
98
|
-
|
|
100
|
+
|
|
99
101
|
# Tag objects with upsert metadata for hook dispatching
|
|
100
102
|
if update_conflicts and unique_fields:
|
|
101
103
|
self._tag_upsert_metadata(result, existing_record_ids)
|
|
102
|
-
|
|
104
|
+
|
|
103
105
|
return result
|
|
104
106
|
|
|
105
107
|
# Non-MTI model - use Django's native bulk_create
|
|
@@ -112,14 +114,14 @@ class BulkExecutor:
|
|
|
112
114
|
unique_fields,
|
|
113
115
|
**kwargs,
|
|
114
116
|
)
|
|
115
|
-
|
|
117
|
+
|
|
116
118
|
# Tag objects with upsert metadata for hook dispatching
|
|
117
119
|
if update_conflicts and unique_fields:
|
|
118
120
|
# Use pre-classified results if available, otherwise classify now
|
|
119
121
|
if existing_record_ids is None:
|
|
120
122
|
existing_record_ids, _ = self.record_classifier.classify_for_upsert(objs, unique_fields)
|
|
121
123
|
self._tag_upsert_metadata(result, existing_record_ids)
|
|
122
|
-
|
|
124
|
+
|
|
123
125
|
return result
|
|
124
126
|
|
|
125
127
|
def _execute_bulk_create(
|
|
@@ -205,59 +207,41 @@ class BulkExecutor:
|
|
|
205
207
|
if not plan:
|
|
206
208
|
return []
|
|
207
209
|
|
|
210
|
+
|
|
208
211
|
with transaction.atomic(using=self.queryset.db, savepoint=False):
|
|
209
|
-
# Step 1:
|
|
212
|
+
# Step 1: Upsert all parent objects level by level using Django's native upsert
|
|
210
213
|
parent_instances_map = {} # Maps original obj id() -> {model: parent_instance}
|
|
211
214
|
|
|
212
215
|
for parent_level in plan.parent_levels:
|
|
213
|
-
#
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
# Bulk create new parents
|
|
225
|
-
if new_parents:
|
|
226
|
-
bulk_kwargs = {"batch_size": len(new_parents)}
|
|
227
|
-
|
|
228
|
-
if parent_level.update_conflicts:
|
|
229
|
-
bulk_kwargs["update_conflicts"] = True
|
|
230
|
-
bulk_kwargs["unique_fields"] = parent_level.unique_fields
|
|
231
|
-
bulk_kwargs["update_fields"] = parent_level.update_fields
|
|
232
|
-
|
|
233
|
-
# Use base QuerySet to avoid recursion
|
|
234
|
-
base_qs = BaseQuerySet(model=parent_level.model_class, using=self.queryset.db)
|
|
235
|
-
created_parents = base_qs.bulk_create(new_parents, **bulk_kwargs)
|
|
236
|
-
|
|
237
|
-
# Copy generated fields back to parent objects
|
|
238
|
-
for created_parent, parent_obj in zip(created_parents, new_parents):
|
|
239
|
-
for field in parent_level.model_class._meta.local_fields:
|
|
240
|
-
created_value = getattr(created_parent, field.name, None)
|
|
241
|
-
if created_value is not None:
|
|
242
|
-
setattr(parent_obj, field.name, created_value)
|
|
243
|
-
|
|
244
|
-
parent_obj._state.adding = False
|
|
245
|
-
parent_obj._state.db = self.queryset.db
|
|
246
|
-
|
|
247
|
-
# Update existing parents
|
|
248
|
-
if existing_parents and parent_level.update_fields:
|
|
216
|
+
# Use base QuerySet to avoid recursion
|
|
217
|
+
base_qs = BaseQuerySet(model=parent_level.model_class, using=self.queryset.db)
|
|
218
|
+
|
|
219
|
+
# Build bulk_create kwargs
|
|
220
|
+
bulk_kwargs = {"batch_size": len(parent_level.objects)}
|
|
221
|
+
|
|
222
|
+
if parent_level.update_conflicts:
|
|
223
|
+
# Let Django handle the upsert - it will INSERT or UPDATE as needed
|
|
224
|
+
bulk_kwargs["update_conflicts"] = True
|
|
225
|
+
bulk_kwargs["unique_fields"] = parent_level.unique_fields
|
|
226
|
+
|
|
249
227
|
# Filter update fields to only those that exist in this parent model
|
|
250
228
|
parent_model_fields = {field.name for field in parent_level.model_class._meta.local_fields}
|
|
251
229
|
filtered_update_fields = [field for field in parent_level.update_fields if field in parent_model_fields]
|
|
252
|
-
|
|
253
230
|
if filtered_update_fields:
|
|
254
|
-
|
|
255
|
-
|
|
231
|
+
bulk_kwargs["update_fields"] = filtered_update_fields
|
|
232
|
+
|
|
233
|
+
# Perform the upsert - Django handles INSERT vs UPDATE automatically
|
|
234
|
+
upserted_parents = base_qs.bulk_create(parent_level.objects, **bulk_kwargs)
|
|
235
|
+
|
|
236
|
+
# Copy generated fields back to parent objects
|
|
237
|
+
for upserted_parent, parent_obj in zip(upserted_parents, parent_level.objects):
|
|
238
|
+
for field in parent_level.model_class._meta.local_fields:
|
|
239
|
+
upserted_value = getattr(upserted_parent, field.name, None)
|
|
240
|
+
if upserted_value is not None:
|
|
241
|
+
setattr(parent_obj, field.name, upserted_value)
|
|
256
242
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
parent_obj._state.adding = False
|
|
260
|
-
parent_obj._state.db = self.queryset.db
|
|
243
|
+
parent_obj._state.adding = False
|
|
244
|
+
parent_obj._state.db = self.queryset.db
|
|
261
245
|
|
|
262
246
|
# Map parents back to original objects
|
|
263
247
|
for parent_obj in parent_level.objects:
|
|
@@ -266,10 +250,7 @@ class BulkExecutor:
|
|
|
266
250
|
parent_instances_map[orig_obj_id] = {}
|
|
267
251
|
parent_instances_map[orig_obj_id][parent_level.model_class] = parent_obj
|
|
268
252
|
|
|
269
|
-
# Step 2: Add parent links to child objects and
|
|
270
|
-
new_child_objects = []
|
|
271
|
-
existing_child_objects = []
|
|
272
|
-
|
|
253
|
+
# Step 2: Add parent links to child objects and set PKs for existing records
|
|
273
254
|
for child_obj, orig_obj in zip(plan.child_objects, plan.original_objects):
|
|
274
255
|
parent_instances = parent_instances_map.get(id(orig_obj), {})
|
|
275
256
|
|
|
@@ -279,91 +260,81 @@ class BulkExecutor:
|
|
|
279
260
|
if parent_link:
|
|
280
261
|
setattr(child_obj, parent_link.attname, parent_instance.pk)
|
|
281
262
|
setattr(child_obj, parent_link.name, parent_instance)
|
|
263
|
+
# IMPORTANT: Don't set the child's PK here - it should only get PK after insertion
|
|
264
|
+
# The parent link field (hookmodel_ptr) is NOT the same as the child's PK
|
|
265
|
+
else:
|
|
266
|
+
logger.warning(f"No parent link found for {parent_model} in {plan.child_model}")
|
|
282
267
|
|
|
283
|
-
#
|
|
268
|
+
# For existing records in upsert, ensure PK is set on child object
|
|
284
269
|
if id(orig_obj) in plan.existing_record_ids:
|
|
285
|
-
# For existing records, set the PK on child object
|
|
286
270
|
pk_value = getattr(orig_obj, "pk", None)
|
|
287
271
|
if pk_value:
|
|
288
272
|
child_obj.pk = pk_value
|
|
289
273
|
child_obj.id = pk_value
|
|
290
|
-
existing_child_objects.append(child_obj)
|
|
291
|
-
else:
|
|
292
|
-
new_child_objects.append(child_obj)
|
|
293
|
-
|
|
294
|
-
# Step 3: Bulk create new child objects using _batched_insert (to bypass MTI check)
|
|
295
|
-
if new_child_objects:
|
|
296
|
-
base_qs = BaseQuerySet(model=plan.child_model, using=self.queryset.db)
|
|
297
|
-
base_qs._prepare_for_bulk_create(new_child_objects)
|
|
298
|
-
|
|
299
|
-
# Partition objects by PK status
|
|
300
|
-
objs_without_pk, objs_with_pk = [], []
|
|
301
|
-
for obj in new_child_objects:
|
|
302
|
-
if obj._is_pk_set():
|
|
303
|
-
objs_with_pk.append(obj)
|
|
304
274
|
else:
|
|
305
|
-
|
|
275
|
+
# If no PK on original object, this is a new record, don't set PK
|
|
276
|
+
logger.info(f"New record {orig_obj} - not setting PK on child object")
|
|
306
277
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
obj._state.adding = False
|
|
325
|
-
obj._state.db = self.queryset.db
|
|
326
|
-
else:
|
|
327
|
-
for obj in objs_with_pk:
|
|
328
|
-
obj._state.adding = False
|
|
329
|
-
obj._state.db = self.queryset.db
|
|
330
|
-
|
|
331
|
-
if objs_without_pk:
|
|
332
|
-
filtered_fields = [f for f in fields if not isinstance(f, AutoField) and not f.primary_key]
|
|
333
|
-
returned_columns = base_qs._batched_insert(
|
|
334
|
-
objs_without_pk,
|
|
335
|
-
filtered_fields,
|
|
336
|
-
batch_size=len(objs_without_pk),
|
|
337
|
-
)
|
|
338
|
-
if returned_columns:
|
|
339
|
-
for obj, results in zip(objs_without_pk, returned_columns):
|
|
340
|
-
if hasattr(opts, "db_returning_fields"):
|
|
341
|
-
for result, field in zip(results, opts.db_returning_fields):
|
|
342
|
-
setattr(obj, field.attname, result)
|
|
343
|
-
obj._state.adding = False
|
|
344
|
-
obj._state.db = self.queryset.db
|
|
345
|
-
else:
|
|
346
|
-
for obj in objs_without_pk:
|
|
347
|
-
obj._state.adding = False
|
|
348
|
-
obj._state.db = self.queryset.db
|
|
278
|
+
# Step 3: Handle child objects
|
|
279
|
+
# Note: We can't use bulk_create on child MTI models, so we use _batched_insert for new records
|
|
280
|
+
# and bulk_update for existing records
|
|
281
|
+
base_qs = BaseQuerySet(model=plan.child_model, using=self.queryset.db)
|
|
282
|
+
|
|
283
|
+
# For MTI child objects, we need to handle them differently
|
|
284
|
+
# In MTI, child objects get PKs from parent links, but we need to distinguish
|
|
285
|
+
# between truly new records and existing records for upsert operations
|
|
286
|
+
objs_without_pk, objs_with_pk = [], []
|
|
287
|
+
for child_obj, orig_obj in zip(plan.child_objects, plan.original_objects):
|
|
288
|
+
# Check if this is an existing record (for upsert operations)
|
|
289
|
+
if id(orig_obj) in plan.existing_record_ids:
|
|
290
|
+
# Existing record - should be updated
|
|
291
|
+
objs_with_pk.append(child_obj)
|
|
292
|
+
else:
|
|
293
|
+
# New record - should be inserted
|
|
294
|
+
objs_without_pk.append(child_obj)
|
|
349
295
|
|
|
350
|
-
#
|
|
351
|
-
if
|
|
296
|
+
# For objects with PK (existing records in upsert), use bulk_update
|
|
297
|
+
if objs_with_pk and plan.update_fields:
|
|
352
298
|
# Filter update fields to only those that exist in the child model
|
|
353
299
|
child_model_fields = {field.name for field in plan.child_model._meta.local_fields}
|
|
354
300
|
filtered_child_update_fields = [field for field in plan.update_fields if field in child_model_fields]
|
|
355
301
|
|
|
356
302
|
if filtered_child_update_fields:
|
|
357
|
-
base_qs
|
|
358
|
-
base_qs.bulk_update(existing_child_objects, filtered_child_update_fields)
|
|
303
|
+
base_qs.bulk_update(objs_with_pk, filtered_child_update_fields)
|
|
359
304
|
|
|
360
305
|
# Mark as not adding
|
|
361
|
-
for
|
|
362
|
-
|
|
363
|
-
|
|
306
|
+
for obj in objs_with_pk:
|
|
307
|
+
obj._state.adding = False
|
|
308
|
+
obj._state.db = self.queryset.db
|
|
364
309
|
|
|
365
|
-
#
|
|
366
|
-
|
|
310
|
+
# For objects without PK (new records), use _batched_insert
|
|
311
|
+
if objs_without_pk:
|
|
312
|
+
base_qs._prepare_for_bulk_create(objs_without_pk)
|
|
313
|
+
opts = plan.child_model._meta
|
|
314
|
+
|
|
315
|
+
# Include all local fields except auto-generated ones
|
|
316
|
+
# For MTI, we need to include the parent link (which is the PK)
|
|
317
|
+
filtered_fields = [f for f in opts.local_fields if not f.generated]
|
|
318
|
+
|
|
319
|
+
returned_columns = base_qs._batched_insert(
|
|
320
|
+
objs_without_pk,
|
|
321
|
+
filtered_fields,
|
|
322
|
+
batch_size=len(objs_without_pk),
|
|
323
|
+
)
|
|
324
|
+
if returned_columns:
|
|
325
|
+
for obj, results in zip(objs_without_pk, returned_columns):
|
|
326
|
+
if hasattr(opts, "db_returning_fields"):
|
|
327
|
+
for result, field in zip(results, opts.db_returning_fields):
|
|
328
|
+
setattr(obj, field.attname, result)
|
|
329
|
+
obj._state.adding = False
|
|
330
|
+
obj._state.db = self.queryset.db
|
|
331
|
+
else:
|
|
332
|
+
for obj in objs_without_pk:
|
|
333
|
+
obj._state.adding = False
|
|
334
|
+
obj._state.db = self.queryset.db
|
|
335
|
+
|
|
336
|
+
# All child objects are now created/updated
|
|
337
|
+
created_children = plan.child_objects
|
|
367
338
|
|
|
368
339
|
# Step 4: Copy PKs and auto-generated fields back to original objects
|
|
369
340
|
pk_field_name = plan.child_model._meta.pk.name
|
|
@@ -532,16 +503,30 @@ class BulkExecutor:
|
|
|
532
503
|
def _tag_upsert_metadata(self, result_objects, existing_record_ids):
|
|
533
504
|
"""
|
|
534
505
|
Tag objects with metadata indicating whether they were created or updated.
|
|
535
|
-
|
|
506
|
+
|
|
536
507
|
This metadata is used by the coordinator to determine which hooks to fire.
|
|
537
508
|
The metadata is temporary and will be cleaned up after hook execution.
|
|
538
|
-
|
|
509
|
+
|
|
539
510
|
Args:
|
|
540
511
|
result_objects: List of objects returned from bulk operation
|
|
541
512
|
existing_record_ids: Set of id() for objects that existed before the operation
|
|
542
513
|
"""
|
|
514
|
+
created_count = 0
|
|
515
|
+
updated_count = 0
|
|
516
|
+
|
|
543
517
|
for obj in result_objects:
|
|
544
518
|
# Tag with metadata for hook dispatching
|
|
545
519
|
was_created = id(obj) not in existing_record_ids
|
|
546
520
|
obj._bulk_hooks_was_created = was_created
|
|
547
521
|
obj._bulk_hooks_upsert_metadata = True
|
|
522
|
+
|
|
523
|
+
if was_created:
|
|
524
|
+
created_count += 1
|
|
525
|
+
else:
|
|
526
|
+
updated_count += 1
|
|
527
|
+
|
|
528
|
+
logger.info(
|
|
529
|
+
f"Tagged upsert metadata: {created_count} created, {updated_count} updated "
|
|
530
|
+
f"(total={len(result_objects)}, existing_ids={len(existing_record_ids)})"
|
|
531
|
+
)
|
|
532
|
+
|
|
@@ -29,7 +29,6 @@ class BulkOperationCoordinator:
|
|
|
29
29
|
Services are created lazily and cached.
|
|
30
30
|
"""
|
|
31
31
|
|
|
32
|
-
|
|
33
32
|
def __init__(self, queryset):
|
|
34
33
|
"""
|
|
35
34
|
Initialize coordinator for a queryset.
|
|
@@ -137,13 +136,10 @@ class BulkOperationCoordinator:
|
|
|
137
136
|
existing_record_ids = set()
|
|
138
137
|
existing_pks_map = {}
|
|
139
138
|
if update_conflicts and unique_fields:
|
|
140
|
-
existing_record_ids, existing_pks_map = self.record_classifier.classify_for_upsert(
|
|
141
|
-
|
|
142
|
-
)
|
|
143
|
-
logger.
|
|
144
|
-
f"Upsert operation: {len(existing_record_ids)} existing, "
|
|
145
|
-
f"{len(objs) - len(existing_record_ids)} new records"
|
|
146
|
-
)
|
|
139
|
+
existing_record_ids, existing_pks_map = self.record_classifier.classify_for_upsert(objs, unique_fields)
|
|
140
|
+
logger.info(f"Upsert operation: {len(existing_record_ids)} existing, {len(objs) - len(existing_record_ids)} new records")
|
|
141
|
+
logger.debug(f"Existing record IDs: {existing_record_ids}")
|
|
142
|
+
logger.debug(f"Existing PKs map: {existing_pks_map}")
|
|
147
143
|
|
|
148
144
|
# Build initial changeset
|
|
149
145
|
changeset = build_changeset_for_create(
|
|
@@ -236,14 +232,17 @@ class BulkOperationCoordinator:
|
|
|
236
232
|
|
|
237
233
|
@transaction.atomic
|
|
238
234
|
def update_queryset(
|
|
239
|
-
self,
|
|
235
|
+
self,
|
|
236
|
+
update_kwargs,
|
|
237
|
+
bypass_hooks=False,
|
|
238
|
+
bypass_validation=False,
|
|
240
239
|
):
|
|
241
240
|
"""
|
|
242
241
|
Execute queryset.update() with full hook support.
|
|
243
|
-
|
|
242
|
+
|
|
244
243
|
ARCHITECTURE & PERFORMANCE TRADE-OFFS
|
|
245
244
|
======================================
|
|
246
|
-
|
|
245
|
+
|
|
247
246
|
To support hooks with queryset.update(), we must:
|
|
248
247
|
1. Fetch old state (SELECT all matching rows)
|
|
249
248
|
2. Execute database update (UPDATE in SQL)
|
|
@@ -252,29 +251,29 @@ class BulkOperationCoordinator:
|
|
|
252
251
|
5. Run BEFORE_UPDATE hooks (CAN modify instances)
|
|
253
252
|
6. Persist BEFORE_UPDATE modifications (bulk_update)
|
|
254
253
|
7. Run AFTER_UPDATE hooks (read-only side effects)
|
|
255
|
-
|
|
254
|
+
|
|
256
255
|
Performance Cost:
|
|
257
256
|
- 2 SELECT queries (before/after)
|
|
258
257
|
- 1 UPDATE query (actual update)
|
|
259
258
|
- 1 bulk_update (if hooks modify data)
|
|
260
|
-
|
|
259
|
+
|
|
261
260
|
Trade-off: Hooks require loading data into Python. If you need
|
|
262
261
|
maximum performance and don't need hooks, use bypass_hooks=True.
|
|
263
|
-
|
|
262
|
+
|
|
264
263
|
Hook Semantics:
|
|
265
264
|
- BEFORE_UPDATE hooks run after the DB update and CAN modify instances
|
|
266
265
|
- Modifications are auto-persisted (framework handles complexity)
|
|
267
266
|
- AFTER_UPDATE hooks run after BEFORE_UPDATE and are read-only
|
|
268
267
|
- This enables cascade logic and computed fields based on DB values
|
|
269
268
|
- User expectation: BEFORE_UPDATE hooks can modify data
|
|
270
|
-
|
|
269
|
+
|
|
271
270
|
Why this approach works well:
|
|
272
271
|
- Allows hooks to see Subquery/F() computed values
|
|
273
272
|
- Enables HasChanged conditions on complex expressions
|
|
274
273
|
- Maintains SQL performance (Subquery stays in database)
|
|
275
274
|
- Meets user expectations: BEFORE_UPDATE can modify instances
|
|
276
275
|
- Clean separation: BEFORE for modifications, AFTER for side effects
|
|
277
|
-
|
|
276
|
+
|
|
278
277
|
For true "prevent write" semantics, intercept at a higher level
|
|
279
278
|
or use bulk_update() directly (which has true before semantics).
|
|
280
279
|
"""
|
|
@@ -291,19 +290,21 @@ class BulkOperationCoordinator:
|
|
|
291
290
|
)
|
|
292
291
|
|
|
293
292
|
def _execute_queryset_update_with_hooks(
|
|
294
|
-
self,
|
|
293
|
+
self,
|
|
294
|
+
update_kwargs,
|
|
295
|
+
bypass_validation=False,
|
|
295
296
|
):
|
|
296
297
|
"""
|
|
297
298
|
Execute queryset update with full hook lifecycle support.
|
|
298
|
-
|
|
299
|
+
|
|
299
300
|
This method implements the fetch-update-fetch pattern required
|
|
300
301
|
to support hooks with queryset.update(). BEFORE_UPDATE hooks can
|
|
301
302
|
modify instances and modifications are auto-persisted.
|
|
302
|
-
|
|
303
|
+
|
|
303
304
|
Args:
|
|
304
305
|
update_kwargs: Dict of fields to update
|
|
305
306
|
bypass_validation: Skip validation hooks if True
|
|
306
|
-
|
|
307
|
+
|
|
307
308
|
Returns:
|
|
308
309
|
Number of rows updated
|
|
309
310
|
"""
|
|
@@ -387,11 +388,11 @@ class BulkOperationCoordinator:
|
|
|
387
388
|
def _run_before_update_hooks_with_tracking(self, instances, models_in_chain, changeset):
|
|
388
389
|
"""
|
|
389
390
|
Run BEFORE_UPDATE hooks and detect modifications.
|
|
390
|
-
|
|
391
|
+
|
|
391
392
|
This is what users expect - BEFORE_UPDATE hooks can modify instances
|
|
392
393
|
and those modifications will be automatically persisted. The framework
|
|
393
394
|
handles the complexity internally.
|
|
394
|
-
|
|
395
|
+
|
|
395
396
|
Returns:
|
|
396
397
|
Set of field names that were modified by hooks
|
|
397
398
|
"""
|
|
@@ -413,10 +414,10 @@ class BulkOperationCoordinator:
|
|
|
413
414
|
def _snapshot_instance_state(self, instances):
|
|
414
415
|
"""
|
|
415
416
|
Create a snapshot of current instance field values.
|
|
416
|
-
|
|
417
|
+
|
|
417
418
|
Args:
|
|
418
419
|
instances: List of model instances
|
|
419
|
-
|
|
420
|
+
|
|
420
421
|
Returns:
|
|
421
422
|
Dict mapping pk -> {field_name: value}
|
|
422
423
|
"""
|
|
@@ -446,11 +447,11 @@ class BulkOperationCoordinator:
|
|
|
446
447
|
def _detect_modifications(self, instances, pre_hook_state):
|
|
447
448
|
"""
|
|
448
449
|
Detect which fields were modified by comparing to snapshot.
|
|
449
|
-
|
|
450
|
+
|
|
450
451
|
Args:
|
|
451
452
|
instances: List of model instances
|
|
452
453
|
pre_hook_state: Previous state snapshot from _snapshot_instance_state
|
|
453
|
-
|
|
454
|
+
|
|
454
455
|
Returns:
|
|
455
456
|
Set of field names that were modified
|
|
456
457
|
"""
|
|
@@ -477,16 +478,15 @@ class BulkOperationCoordinator:
|
|
|
477
478
|
def _persist_hook_modifications(self, instances, modified_fields):
|
|
478
479
|
"""
|
|
479
480
|
Persist modifications made by hooks using bulk_update.
|
|
480
|
-
|
|
481
|
+
|
|
481
482
|
This creates a "cascade" effect similar to Salesforce workflows.
|
|
482
|
-
|
|
483
|
+
|
|
483
484
|
Args:
|
|
484
485
|
instances: List of modified instances
|
|
485
486
|
modified_fields: Set of field names that were modified
|
|
486
487
|
"""
|
|
487
488
|
logger.info(
|
|
488
|
-
f"Hooks modified {len(modified_fields)} field(s): "
|
|
489
|
-
f"{', '.join(sorted(modified_fields))}",
|
|
489
|
+
f"Hooks modified {len(modified_fields)} field(s): {', '.join(sorted(modified_fields))}",
|
|
490
490
|
)
|
|
491
491
|
logger.info("Auto-persisting modifications via bulk_update")
|
|
492
492
|
|
|
@@ -569,14 +569,14 @@ class BulkOperationCoordinator:
|
|
|
569
569
|
def _build_changeset_for_model(self, original_changeset, target_model_cls):
|
|
570
570
|
"""
|
|
571
571
|
Build a changeset for a specific model in the MTI inheritance chain.
|
|
572
|
-
|
|
572
|
+
|
|
573
573
|
This allows parent model hooks to receive the same instances but with
|
|
574
574
|
the correct model_cls for hook registration matching.
|
|
575
|
-
|
|
575
|
+
|
|
576
576
|
Args:
|
|
577
577
|
original_changeset: The original changeset (for child model)
|
|
578
578
|
target_model_cls: The model class to build changeset for (parent model)
|
|
579
|
-
|
|
579
|
+
|
|
580
580
|
Returns:
|
|
581
581
|
ChangeSet for the target model
|
|
582
582
|
"""
|
|
@@ -600,18 +600,18 @@ class BulkOperationCoordinator:
|
|
|
600
600
|
):
|
|
601
601
|
"""
|
|
602
602
|
Execute operation with hooks for entire MTI inheritance chain.
|
|
603
|
-
|
|
603
|
+
|
|
604
604
|
This method dispatches hooks for both child and parent models when
|
|
605
605
|
dealing with MTI models, ensuring parent model hooks fire when
|
|
606
606
|
child instances are created/updated/deleted.
|
|
607
|
-
|
|
607
|
+
|
|
608
608
|
Args:
|
|
609
609
|
changeset: ChangeSet for the child model
|
|
610
610
|
operation: Callable that performs the actual DB operation
|
|
611
611
|
event_prefix: 'create', 'update', or 'delete'
|
|
612
612
|
bypass_hooks: Skip all hooks if True
|
|
613
613
|
bypass_validation: Skip validation hooks if True
|
|
614
|
-
|
|
614
|
+
|
|
615
615
|
Returns:
|
|
616
616
|
Result of operation
|
|
617
617
|
"""
|
|
@@ -649,8 +649,9 @@ class BulkOperationCoordinator:
|
|
|
649
649
|
else:
|
|
650
650
|
# Normal create operation
|
|
651
651
|
from django_bulk_hooks.helpers import build_changeset_for_create
|
|
652
|
+
|
|
652
653
|
changeset = build_changeset_for_create(changeset.model_cls, result)
|
|
653
|
-
|
|
654
|
+
|
|
654
655
|
for model_cls in models_in_chain:
|
|
655
656
|
model_changeset = self._build_changeset_for_model(changeset, model_cls)
|
|
656
657
|
self.dispatcher.dispatch(model_changeset, f"after_{event_prefix}", bypass_hooks=False)
|
|
@@ -680,11 +681,13 @@ class BulkOperationCoordinator:
|
|
|
680
681
|
for field_name in update_kwargs.keys():
|
|
681
682
|
try:
|
|
682
683
|
field = self.model_cls._meta.get_field(field_name)
|
|
683
|
-
if (
|
|
684
|
-
|
|
685
|
-
not field.
|
|
686
|
-
|
|
687
|
-
field
|
|
684
|
+
if (
|
|
685
|
+
field.is_relation
|
|
686
|
+
and not field.many_to_many
|
|
687
|
+
and not field.one_to_many
|
|
688
|
+
and hasattr(field, "attname")
|
|
689
|
+
and field.attname == field_name
|
|
690
|
+
):
|
|
688
691
|
# This is a FK field being updated by its attname (e.g., business_id)
|
|
689
692
|
# Add the relationship name (e.g., 'business') to skip list
|
|
690
693
|
fk_relationships.add(field.name)
|
|
@@ -696,86 +699,114 @@ class BulkOperationCoordinator:
|
|
|
696
699
|
|
|
697
700
|
def _is_upsert_operation(self, result_objects):
|
|
698
701
|
"""
|
|
699
|
-
Check if the operation was an upsert (
|
|
700
|
-
|
|
702
|
+
Check if the operation was an upsert (with update_conflicts=True).
|
|
703
|
+
|
|
701
704
|
Args:
|
|
702
705
|
result_objects: List of objects returned from the operation
|
|
703
|
-
|
|
706
|
+
|
|
704
707
|
Returns:
|
|
705
708
|
True if this was an upsert operation, False otherwise
|
|
706
709
|
"""
|
|
707
710
|
if not result_objects:
|
|
708
711
|
return False
|
|
709
|
-
|
|
712
|
+
|
|
710
713
|
# Check if any object has upsert metadata
|
|
711
|
-
return hasattr(result_objects[0],
|
|
714
|
+
return hasattr(result_objects[0], "_bulk_hooks_upsert_metadata")
|
|
712
715
|
|
|
713
716
|
def _dispatch_upsert_after_hooks(self, result_objects, models_in_chain):
|
|
714
717
|
"""
|
|
715
718
|
Dispatch after hooks for upsert operations, splitting by create/update.
|
|
716
|
-
|
|
719
|
+
|
|
717
720
|
This matches Salesforce behavior:
|
|
718
721
|
- Records that were created fire after_create hooks
|
|
719
722
|
- Records that were updated fire after_update hooks
|
|
720
|
-
|
|
723
|
+
|
|
721
724
|
Args:
|
|
722
725
|
result_objects: List of objects returned from the operation
|
|
723
726
|
models_in_chain: List of model classes in the MTI inheritance chain
|
|
724
727
|
"""
|
|
725
|
-
# Split objects by
|
|
728
|
+
# Split objects based on metadata set by the executor
|
|
726
729
|
created_objects = []
|
|
727
730
|
updated_objects = []
|
|
728
|
-
|
|
731
|
+
|
|
732
|
+
if not result_objects:
|
|
733
|
+
return
|
|
734
|
+
|
|
729
735
|
for obj in result_objects:
|
|
730
|
-
|
|
731
|
-
if
|
|
732
|
-
|
|
736
|
+
# Check if metadata was set
|
|
737
|
+
if hasattr(obj, "_bulk_hooks_was_created"):
|
|
738
|
+
was_created = getattr(obj, "_bulk_hooks_was_created", True)
|
|
739
|
+
if was_created:
|
|
740
|
+
created_objects.append(obj)
|
|
741
|
+
else:
|
|
742
|
+
updated_objects.append(obj)
|
|
733
743
|
else:
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
744
|
+
# Fallback: if no metadata, check timestamps
|
|
745
|
+
model_cls = obj.__class__
|
|
746
|
+
if hasattr(model_cls, "created_at") and hasattr(model_cls, "updated_at"):
|
|
747
|
+
# Reload from DB to get accurate timestamps
|
|
748
|
+
db_obj = model_cls.objects.filter(pk=obj.pk).values("created_at", "updated_at").first()
|
|
749
|
+
if db_obj:
|
|
750
|
+
created_at = db_obj["created_at"]
|
|
751
|
+
updated_at = db_obj["updated_at"]
|
|
752
|
+
if created_at and updated_at:
|
|
753
|
+
time_diff = abs((updated_at - created_at).total_seconds())
|
|
754
|
+
if time_diff <= 1.0: # Within 1 second = just created
|
|
755
|
+
created_objects.append(obj)
|
|
756
|
+
else:
|
|
757
|
+
updated_objects.append(obj)
|
|
758
|
+
else:
|
|
759
|
+
# No timestamps, default to created
|
|
760
|
+
created_objects.append(obj)
|
|
761
|
+
else:
|
|
762
|
+
# Object not found, treat as created
|
|
763
|
+
created_objects.append(obj)
|
|
764
|
+
else:
|
|
765
|
+
# No timestamp fields, default to created
|
|
766
|
+
created_objects.append(obj)
|
|
767
|
+
|
|
768
|
+
logger.info(f"Upsert after hooks: {len(created_objects)} created, {len(updated_objects)} updated")
|
|
769
|
+
|
|
741
770
|
# Dispatch after_create hooks for created objects
|
|
742
771
|
if created_objects:
|
|
743
772
|
from django_bulk_hooks.helpers import build_changeset_for_create
|
|
773
|
+
|
|
744
774
|
create_changeset = build_changeset_for_create(self.model_cls, created_objects)
|
|
745
|
-
|
|
775
|
+
|
|
746
776
|
for model_cls in models_in_chain:
|
|
747
777
|
model_changeset = self._build_changeset_for_model(create_changeset, model_cls)
|
|
748
778
|
self.dispatcher.dispatch(model_changeset, "after_create", bypass_hooks=False)
|
|
749
|
-
|
|
779
|
+
|
|
750
780
|
# Dispatch after_update hooks for updated objects
|
|
751
781
|
if updated_objects:
|
|
752
782
|
# Fetch old records for proper change detection
|
|
753
783
|
old_records_map = self.analyzer.fetch_old_records_map(updated_objects)
|
|
754
|
-
|
|
784
|
+
|
|
755
785
|
from django_bulk_hooks.helpers import build_changeset_for_update
|
|
786
|
+
|
|
756
787
|
update_changeset = build_changeset_for_update(
|
|
757
788
|
self.model_cls,
|
|
758
789
|
updated_objects,
|
|
759
790
|
update_kwargs={}, # Empty since we don't know specific fields
|
|
760
791
|
old_records_map=old_records_map,
|
|
761
792
|
)
|
|
762
|
-
|
|
793
|
+
|
|
763
794
|
for model_cls in models_in_chain:
|
|
764
795
|
model_changeset = self._build_changeset_for_model(update_changeset, model_cls)
|
|
765
796
|
self.dispatcher.dispatch(model_changeset, "after_update", bypass_hooks=False)
|
|
766
|
-
|
|
797
|
+
|
|
767
798
|
# Clean up temporary metadata
|
|
768
799
|
self._cleanup_upsert_metadata(result_objects)
|
|
769
800
|
|
|
770
801
|
def _cleanup_upsert_metadata(self, result_objects):
|
|
771
802
|
"""
|
|
772
803
|
Clean up temporary metadata added during upsert operations.
|
|
773
|
-
|
|
804
|
+
|
|
774
805
|
Args:
|
|
775
806
|
result_objects: List of objects to clean up
|
|
776
807
|
"""
|
|
777
808
|
for obj in result_objects:
|
|
778
|
-
if hasattr(obj,
|
|
779
|
-
delattr(obj,
|
|
780
|
-
if hasattr(obj,
|
|
781
|
-
delattr(obj,
|
|
809
|
+
if hasattr(obj, "_bulk_hooks_was_created"):
|
|
810
|
+
delattr(obj, "_bulk_hooks_was_created")
|
|
811
|
+
if hasattr(obj, "_bulk_hooks_upsert_metadata"):
|
|
812
|
+
delattr(obj, "_bulk_hooks_upsert_metadata")
|
|
@@ -20,7 +20,7 @@ class MTIHandler:
|
|
|
20
20
|
|
|
21
21
|
This service detects MTI models and builds execution plans.
|
|
22
22
|
It does NOT execute database operations - that's the BulkExecutor's job.
|
|
23
|
-
|
|
23
|
+
|
|
24
24
|
Responsibilities:
|
|
25
25
|
- Detect MTI models
|
|
26
26
|
- Build inheritance chains
|
|
@@ -45,8 +45,9 @@ class MTIHandler:
|
|
|
45
45
|
Returns:
|
|
46
46
|
bool: True if model has concrete parent models
|
|
47
47
|
"""
|
|
48
|
-
|
|
49
|
-
|
|
48
|
+
# Check if this model has concrete parent models (not abstract)
|
|
49
|
+
for parent in self.model_cls._meta.parents.keys():
|
|
50
|
+
if not parent._meta.abstract and parent._meta.concrete_model != self.model_cls._meta.concrete_model:
|
|
50
51
|
return True
|
|
51
52
|
return False
|
|
52
53
|
|
|
@@ -73,15 +74,12 @@ class MTIHandler:
|
|
|
73
74
|
current_model = self.model_cls
|
|
74
75
|
|
|
75
76
|
while current_model:
|
|
76
|
-
if not current_model._meta.proxy:
|
|
77
|
+
if not current_model._meta.proxy and not current_model._meta.abstract:
|
|
77
78
|
chain.append(current_model)
|
|
78
79
|
|
|
79
|
-
# Get concrete parent models
|
|
80
|
-
parents = [
|
|
81
|
-
|
|
82
|
-
for parent in current_model._meta.parents.keys()
|
|
83
|
-
if not parent._meta.proxy
|
|
84
|
-
]
|
|
80
|
+
# Get concrete parent models (not abstract, not proxy)
|
|
81
|
+
parents = [parent for parent in current_model._meta.parents.keys()
|
|
82
|
+
if not parent._meta.proxy and not parent._meta.abstract]
|
|
85
83
|
|
|
86
84
|
current_model = parents[0] if parents else None
|
|
87
85
|
|
|
@@ -127,10 +125,10 @@ class MTIHandler:
|
|
|
127
125
|
):
|
|
128
126
|
"""
|
|
129
127
|
Build an execution plan for bulk creating MTI model instances.
|
|
130
|
-
|
|
128
|
+
|
|
131
129
|
This method does NOT execute any database operations.
|
|
132
130
|
It returns a plan that the BulkExecutor will execute.
|
|
133
|
-
|
|
131
|
+
|
|
134
132
|
Args:
|
|
135
133
|
objs: List of model instances to create
|
|
136
134
|
batch_size: Number of objects per batch
|
|
@@ -139,7 +137,7 @@ class MTIHandler:
|
|
|
139
137
|
update_fields: Fields to update on conflict
|
|
140
138
|
existing_record_ids: Set of id() for objects that exist in DB (from RecordClassifier)
|
|
141
139
|
existing_pks_map: Dict mapping id(obj) -> pk for existing records (from RecordClassifier)
|
|
142
|
-
|
|
140
|
+
|
|
143
141
|
Returns:
|
|
144
142
|
MTICreatePlan object
|
|
145
143
|
"""
|
|
@@ -205,9 +203,9 @@ class MTIHandler:
|
|
|
205
203
|
):
|
|
206
204
|
"""
|
|
207
205
|
Build parent level objects for each level in the inheritance chain.
|
|
208
|
-
|
|
206
|
+
|
|
209
207
|
This is pure in-memory object creation - no DB operations.
|
|
210
|
-
|
|
208
|
+
|
|
211
209
|
Returns:
|
|
212
210
|
List of ParentLevel objects
|
|
213
211
|
"""
|
|
@@ -255,16 +253,14 @@ class MTIHandler:
|
|
|
255
253
|
# Check if this model has a matching constraint
|
|
256
254
|
if normalized_unique and self._has_matching_constraint(model_class, normalized_unique):
|
|
257
255
|
# Filter update fields
|
|
258
|
-
filtered_updates = [
|
|
259
|
-
uf for uf in (update_fields or []) if uf in model_fields_by_name
|
|
260
|
-
]
|
|
256
|
+
filtered_updates = [uf for uf in (update_fields or []) if uf in model_fields_by_name]
|
|
261
257
|
|
|
262
258
|
# If no fields to update at this level but we need upsert to prevent
|
|
263
259
|
# unique constraint violations, use one of the unique fields as a dummy
|
|
264
260
|
# update field (updating it to itself is a safe no-op)
|
|
265
261
|
if not filtered_updates and normalized_unique:
|
|
266
262
|
filtered_updates = [normalized_unique[0]]
|
|
267
|
-
|
|
263
|
+
|
|
268
264
|
# Only enable upsert if we have fields to update (real or dummy)
|
|
269
265
|
if filtered_updates:
|
|
270
266
|
level_update_conflicts = True
|
|
@@ -288,10 +284,8 @@ class MTIHandler:
|
|
|
288
284
|
"""Check if model has a unique constraint matching the given fields."""
|
|
289
285
|
try:
|
|
290
286
|
from django.db.models import UniqueConstraint
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
if isinstance(c, UniqueConstraint)
|
|
294
|
-
]
|
|
287
|
+
|
|
288
|
+
constraint_field_sets = [tuple(c.fields) for c in model_class._meta.constraints if isinstance(c, UniqueConstraint)]
|
|
295
289
|
except Exception:
|
|
296
290
|
constraint_field_sets = []
|
|
297
291
|
|
|
@@ -319,12 +313,12 @@ class MTIHandler:
|
|
|
319
313
|
def _create_parent_instance(self, source_obj, parent_model, current_parent):
|
|
320
314
|
"""
|
|
321
315
|
Create a parent instance from source object (in-memory only).
|
|
322
|
-
|
|
316
|
+
|
|
323
317
|
Args:
|
|
324
318
|
source_obj: Original object with data
|
|
325
319
|
parent_model: Parent model class to create instance of
|
|
326
320
|
current_parent: Parent instance from previous level (if any)
|
|
327
|
-
|
|
321
|
+
|
|
328
322
|
Returns:
|
|
329
323
|
Parent model instance (not saved)
|
|
330
324
|
"""
|
|
@@ -335,8 +329,7 @@ class MTIHandler:
|
|
|
335
329
|
if hasattr(source_obj, field.name):
|
|
336
330
|
value = getattr(source_obj, field.name, None)
|
|
337
331
|
if value is not None:
|
|
338
|
-
if
|
|
339
|
-
not field.one_to_many):
|
|
332
|
+
if field.is_relation and not field.many_to_many and not field.one_to_many:
|
|
340
333
|
# Handle FK fields
|
|
341
334
|
if hasattr(value, "pk") and value.pk is not None:
|
|
342
335
|
setattr(parent_obj, field.attname, value.pk)
|
|
@@ -348,8 +341,7 @@ class MTIHandler:
|
|
|
348
341
|
# Link to parent if exists
|
|
349
342
|
if current_parent is not None:
|
|
350
343
|
for field in parent_model._meta.local_fields:
|
|
351
|
-
if
|
|
352
|
-
field.remote_field.model == current_parent.__class__):
|
|
344
|
+
if hasattr(field, "remote_field") and field.remote_field and field.remote_field.model == current_parent.__class__:
|
|
353
345
|
setattr(parent_obj, field.name, current_parent)
|
|
354
346
|
break
|
|
355
347
|
|
|
@@ -373,13 +365,13 @@ class MTIHandler:
|
|
|
373
365
|
def _create_child_instance_template(self, source_obj, child_model):
|
|
374
366
|
"""
|
|
375
367
|
Create a child instance template (in-memory only, without parent links).
|
|
376
|
-
|
|
368
|
+
|
|
377
369
|
The executor will add parent links after creating parent objects.
|
|
378
|
-
|
|
370
|
+
|
|
379
371
|
Args:
|
|
380
372
|
source_obj: Original object with data
|
|
381
373
|
child_model: Child model class
|
|
382
|
-
|
|
374
|
+
|
|
383
375
|
Returns:
|
|
384
376
|
Child model instance (not saved, no parent links)
|
|
385
377
|
"""
|
|
@@ -399,8 +391,7 @@ class MTIHandler:
|
|
|
399
391
|
if hasattr(source_obj, field.name):
|
|
400
392
|
value = getattr(source_obj, field.name, None)
|
|
401
393
|
if value is not None:
|
|
402
|
-
if
|
|
403
|
-
not field.one_to_many):
|
|
394
|
+
if field.is_relation and not field.many_to_many and not field.one_to_many:
|
|
404
395
|
if hasattr(value, "pk") and value.pk is not None:
|
|
405
396
|
setattr(child_obj, field.attname, value.pk)
|
|
406
397
|
else:
|
|
@@ -430,14 +421,14 @@ class MTIHandler:
|
|
|
430
421
|
def build_update_plan(self, objs, fields, batch_size=None):
|
|
431
422
|
"""
|
|
432
423
|
Build an execution plan for bulk updating MTI model instances.
|
|
433
|
-
|
|
424
|
+
|
|
434
425
|
This method does NOT execute any database operations.
|
|
435
|
-
|
|
426
|
+
|
|
436
427
|
Args:
|
|
437
428
|
objs: List of model instances to update
|
|
438
429
|
fields: List of field names to update
|
|
439
430
|
batch_size: Number of objects per batch
|
|
440
|
-
|
|
431
|
+
|
|
441
432
|
Returns:
|
|
442
433
|
MTIUpdatePlan object
|
|
443
434
|
"""
|
|
@@ -497,11 +488,13 @@ class MTIHandler:
|
|
|
497
488
|
break
|
|
498
489
|
filter_field = parent_link.attname if parent_link else "pk"
|
|
499
490
|
|
|
500
|
-
field_groups.append(
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
491
|
+
field_groups.append(
|
|
492
|
+
ModelFieldGroup(
|
|
493
|
+
model_class=model,
|
|
494
|
+
fields=model_fields,
|
|
495
|
+
filter_field=filter_field,
|
|
496
|
+
)
|
|
497
|
+
)
|
|
505
498
|
|
|
506
499
|
return MTIUpdatePlan(
|
|
507
500
|
inheritance_chain=inheritance_chain,
|
|
@@ -14,7 +14,7 @@ from typing import Any
|
|
|
14
14
|
class ParentLevel:
|
|
15
15
|
"""
|
|
16
16
|
Represents one level in the parent hierarchy for MTI bulk create.
|
|
17
|
-
|
|
17
|
+
|
|
18
18
|
Attributes:
|
|
19
19
|
model_class: The parent model class for this level
|
|
20
20
|
objects: List of parent instances to create
|
|
@@ -23,6 +23,7 @@ class ParentLevel:
|
|
|
23
23
|
unique_fields: Fields for conflict detection (if update_conflicts=True)
|
|
24
24
|
update_fields: Fields to update on conflict (if update_conflicts=True)
|
|
25
25
|
"""
|
|
26
|
+
|
|
26
27
|
model_class: Any
|
|
27
28
|
objects: list[Any]
|
|
28
29
|
original_object_map: dict[int, int] = field(default_factory=dict)
|
|
@@ -35,10 +36,10 @@ class ParentLevel:
|
|
|
35
36
|
class MTICreatePlan:
|
|
36
37
|
"""
|
|
37
38
|
Plan for executing bulk_create on an MTI model.
|
|
38
|
-
|
|
39
|
+
|
|
39
40
|
This plan describes WHAT to create, not HOW to create it.
|
|
40
41
|
The executor is responsible for executing this plan.
|
|
41
|
-
|
|
42
|
+
|
|
42
43
|
Attributes:
|
|
43
44
|
inheritance_chain: List of model classes from root to child
|
|
44
45
|
parent_levels: List of ParentLevel objects, one per parent model
|
|
@@ -51,6 +52,7 @@ class MTICreatePlan:
|
|
|
51
52
|
unique_fields: Fields used for conflict detection
|
|
52
53
|
update_fields: Fields to update on conflict
|
|
53
54
|
"""
|
|
55
|
+
|
|
54
56
|
inheritance_chain: list[Any]
|
|
55
57
|
parent_levels: list[ParentLevel]
|
|
56
58
|
child_objects: list[Any]
|
|
@@ -67,12 +69,13 @@ class MTICreatePlan:
|
|
|
67
69
|
class ModelFieldGroup:
|
|
68
70
|
"""
|
|
69
71
|
Represents fields to update for one model in the inheritance chain.
|
|
70
|
-
|
|
72
|
+
|
|
71
73
|
Attributes:
|
|
72
74
|
model_class: The model class
|
|
73
75
|
fields: List of field names to update on this model
|
|
74
76
|
filter_field: Field to use for filtering (e.g., 'pk' or parent link attname)
|
|
75
77
|
"""
|
|
78
|
+
|
|
76
79
|
model_class: Any
|
|
77
80
|
fields: list[str]
|
|
78
81
|
filter_field: str = "pk"
|
|
@@ -82,15 +85,15 @@ class ModelFieldGroup:
|
|
|
82
85
|
class MTIUpdatePlan:
|
|
83
86
|
"""
|
|
84
87
|
Plan for executing bulk_update on an MTI model.
|
|
85
|
-
|
|
88
|
+
|
|
86
89
|
Attributes:
|
|
87
90
|
inheritance_chain: List of model classes from root to child
|
|
88
91
|
field_groups: List of ModelFieldGroup objects
|
|
89
92
|
objects: Objects to update
|
|
90
93
|
batch_size: Batch size for operations
|
|
91
94
|
"""
|
|
95
|
+
|
|
92
96
|
inheritance_chain: list[Any]
|
|
93
97
|
field_groups: list[ModelFieldGroup]
|
|
94
98
|
objects: list[Any]
|
|
95
99
|
batch_size: int = None
|
|
96
|
-
|
|
@@ -17,7 +17,7 @@ logger = logging.getLogger(__name__)
|
|
|
17
17
|
class RecordClassifier:
|
|
18
18
|
"""
|
|
19
19
|
Service for classifying and fetching records via database queries.
|
|
20
|
-
|
|
20
|
+
|
|
21
21
|
This is the SINGLE point of truth for record classification queries.
|
|
22
22
|
Keeps database access logic separate from business/planning logic.
|
|
23
23
|
"""
|
|
@@ -34,14 +34,14 @@ class RecordClassifier:
|
|
|
34
34
|
def classify_for_upsert(self, objs, unique_fields):
|
|
35
35
|
"""
|
|
36
36
|
Classify records as new or existing based on unique_fields.
|
|
37
|
-
|
|
37
|
+
|
|
38
38
|
Queries the database to check which records already exist based on the
|
|
39
39
|
unique_fields constraint.
|
|
40
|
-
|
|
40
|
+
|
|
41
41
|
Args:
|
|
42
42
|
objs: List of model instances
|
|
43
43
|
unique_fields: List of field names that form the unique constraint
|
|
44
|
-
|
|
44
|
+
|
|
45
45
|
Returns:
|
|
46
46
|
Tuple of (existing_record_ids, existing_pks_map)
|
|
47
47
|
- existing_record_ids: Set of id() for objects that exist in DB
|
|
@@ -77,9 +77,12 @@ class RecordClassifier:
|
|
|
77
77
|
for q in queries[1:]:
|
|
78
78
|
combined_query |= q
|
|
79
79
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
)
|
|
80
|
+
logger.info(f"Classifying for upsert: model={self.model_cls.__name__}, query={combined_query}, unique_fields={unique_fields}")
|
|
81
|
+
queryset = self.model_cls.objects.filter(combined_query)
|
|
82
|
+
logger.info(f"Queryset SQL: {queryset.query}")
|
|
83
|
+
logger.info(f"All records in table: {self.model_cls.objects.all().count()}")
|
|
84
|
+
existing_records = list(queryset.values("pk", *unique_fields))
|
|
85
|
+
logger.info(f"Found {len(existing_records)} existing records: {existing_records}")
|
|
83
86
|
|
|
84
87
|
# Map existing records back to original objects
|
|
85
88
|
existing_record_ids = set()
|
|
@@ -94,8 +97,7 @@ class RecordClassifier:
|
|
|
94
97
|
existing_pks_map[obj_id] = record["pk"]
|
|
95
98
|
|
|
96
99
|
logger.info(
|
|
97
|
-
f"Classified {len(existing_record_ids)} existing and "
|
|
98
|
-
f"{len(objs) - len(existing_record_ids)} new records for upsert",
|
|
100
|
+
f"Classified {len(existing_record_ids)} existing and {len(objs) - len(existing_record_ids)} new records for upsert",
|
|
99
101
|
)
|
|
100
102
|
|
|
101
103
|
return existing_record_ids, existing_pks_map
|
|
@@ -103,12 +105,12 @@ class RecordClassifier:
|
|
|
103
105
|
def fetch_by_pks(self, pks, select_related=None, prefetch_related=None):
|
|
104
106
|
"""
|
|
105
107
|
Fetch records by primary keys with optional relationship loading.
|
|
106
|
-
|
|
108
|
+
|
|
107
109
|
Args:
|
|
108
110
|
pks: List of primary key values
|
|
109
111
|
select_related: Optional list of fields to select_related
|
|
110
112
|
prefetch_related: Optional list of fields to prefetch_related
|
|
111
|
-
|
|
113
|
+
|
|
112
114
|
Returns:
|
|
113
115
|
Dict[pk, instance] for O(1) lookups
|
|
114
116
|
"""
|
|
@@ -128,10 +130,10 @@ class RecordClassifier:
|
|
|
128
130
|
def fetch_by_unique_constraint(self, field_values_map):
|
|
129
131
|
"""
|
|
130
132
|
Fetch records matching a unique constraint.
|
|
131
|
-
|
|
133
|
+
|
|
132
134
|
Args:
|
|
133
135
|
field_values_map: Dict of {field_name: value} for unique constraint
|
|
134
|
-
|
|
136
|
+
|
|
135
137
|
Returns:
|
|
136
138
|
Model instance if found, None otherwise
|
|
137
139
|
"""
|
|
@@ -141,18 +143,17 @@ class RecordClassifier:
|
|
|
141
143
|
return None
|
|
142
144
|
except self.model_cls.MultipleObjectsReturned:
|
|
143
145
|
logger.warning(
|
|
144
|
-
f"Multiple {self.model_cls.__name__} records found for "
|
|
145
|
-
f"unique constraint {field_values_map}",
|
|
146
|
+
f"Multiple {self.model_cls.__name__} records found for unique constraint {field_values_map}",
|
|
146
147
|
)
|
|
147
148
|
return self.model_cls.objects.filter(**field_values_map).first()
|
|
148
149
|
|
|
149
150
|
def exists_by_pks(self, pks):
|
|
150
151
|
"""
|
|
151
152
|
Check if records exist by primary keys without fetching them.
|
|
152
|
-
|
|
153
|
+
|
|
153
154
|
Args:
|
|
154
155
|
pks: List of primary key values
|
|
155
|
-
|
|
156
|
+
|
|
156
157
|
Returns:
|
|
157
158
|
Set of PKs that exist in the database
|
|
158
159
|
"""
|
|
@@ -168,13 +169,13 @@ class RecordClassifier:
|
|
|
168
169
|
def count_by_unique_fields(self, objs, unique_fields):
|
|
169
170
|
"""
|
|
170
171
|
Count how many objects already exist based on unique fields.
|
|
171
|
-
|
|
172
|
+
|
|
172
173
|
Useful for validation or reporting before upsert operations.
|
|
173
|
-
|
|
174
|
+
|
|
174
175
|
Args:
|
|
175
176
|
objs: List of model instances
|
|
176
177
|
unique_fields: List of field names that form the unique constraint
|
|
177
|
-
|
|
178
|
+
|
|
178
179
|
Returns:
|
|
179
180
|
Tuple of (existing_count, new_count)
|
|
180
181
|
"""
|
|
@@ -12,15 +12,15 @@ django_bulk_hooks/helpers.py,sha256=Nw8eXryLUUquW7AgiuKp0PQT3Pq6HAHsdP-xAtqhmjA,
|
|
|
12
12
|
django_bulk_hooks/manager.py,sha256=3mFzB0ZzHHeXWdKGObZD_H0NlskHJc8uYBF69KKdAXU,4068
|
|
13
13
|
django_bulk_hooks/models.py,sha256=4Vvi2LiGP0g4j08a5liqBROfsO8Wd_ermBoyjKwfrPU,2512
|
|
14
14
|
django_bulk_hooks/operations/__init__.py,sha256=BtJYjmRhe_sScivLsniDaZmBkm0ZLvcmzXFKL7QY2Xg,550
|
|
15
|
-
django_bulk_hooks/operations/analyzer.py,sha256=
|
|
16
|
-
django_bulk_hooks/operations/bulk_executor.py,sha256=
|
|
17
|
-
django_bulk_hooks/operations/coordinator.py,sha256=
|
|
18
|
-
django_bulk_hooks/operations/mti_handler.py,sha256=
|
|
19
|
-
django_bulk_hooks/operations/mti_plans.py,sha256=
|
|
20
|
-
django_bulk_hooks/operations/record_classifier.py,sha256=
|
|
15
|
+
django_bulk_hooks/operations/analyzer.py,sha256=wAG8sAG9NwfwNqG9z81VfGR7AANDzRmMGE_o82MWji4,10689
|
|
16
|
+
django_bulk_hooks/operations/bulk_executor.py,sha256=4gCvG1Co3rqUR4TmlA4af-McHBLvOk3ekNAEguPsIBA,22678
|
|
17
|
+
django_bulk_hooks/operations/coordinator.py,sha256=6qalGxbKqgglYwwYXVS49T8pd15uO95qexcTP_V2buk,30354
|
|
18
|
+
django_bulk_hooks/operations/mti_handler.py,sha256=qx1xuOzxT3ibz2WJcfPsKxOiG-HU-axBRSmz76neD_w,19561
|
|
19
|
+
django_bulk_hooks/operations/mti_plans.py,sha256=7STQ2oA2ZT8cEG3-t-6xciRAdf7OeSf0gRLXR_BRG-Q,3363
|
|
20
|
+
django_bulk_hooks/operations/record_classifier.py,sha256=KiMmTjEZ3QdkSImCJnG5LBu2uSmcuIYQP4rSIzofdvQ,6437
|
|
21
21
|
django_bulk_hooks/queryset.py,sha256=aQitlbexcVnmeAdc0jtO3hci39p4QEu4srQPEzozy5s,5546
|
|
22
22
|
django_bulk_hooks/registry.py,sha256=uum5jhGI3TPaoiXuA1MdBdu4gbE3rQGGwQ5YDjiMcjk,7949
|
|
23
|
-
django_bulk_hooks-0.2.
|
|
24
|
-
django_bulk_hooks-0.2.
|
|
25
|
-
django_bulk_hooks-0.2.
|
|
26
|
-
django_bulk_hooks-0.2.
|
|
23
|
+
django_bulk_hooks-0.2.46.dist-info/LICENSE,sha256=dguKIcbDGeZD-vXWdLyErPUALYOvtX_fO4Zjhq481uk,1088
|
|
24
|
+
django_bulk_hooks-0.2.46.dist-info/METADATA,sha256=mvL_yi_iUtU2JrP7pZ8lDfVUFAGBBK_dmQ0-VjhhYZE,9265
|
|
25
|
+
django_bulk_hooks-0.2.46.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
26
|
+
django_bulk_hooks-0.2.46.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|