django-bulk-hooks 0.2.14__tar.gz → 0.2.16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of django-bulk-hooks might be problematic. Click here for more details.
- {django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/PKG-INFO +1 -1
- {django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/decorators.py +7 -1
- {django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/dispatcher.py +10 -0
- {django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/operations/bulk_executor.py +159 -76
- {django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/operations/coordinator.py +223 -92
- {django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/operations/mti_handler.py +30 -1
- {django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/operations/mti_plans.py +8 -0
- django_bulk_hooks-0.2.16/django_bulk_hooks/operations/record_classifier.py +183 -0
- {django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/registry.py +15 -0
- {django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/pyproject.toml +1 -1
- {django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/LICENSE +0 -0
- {django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/README.md +0 -0
- {django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/__init__.py +0 -0
- {django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/changeset.py +0 -0
- {django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/conditions.py +0 -0
- {django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/constants.py +0 -0
- {django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/context.py +0 -0
- {django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/enums.py +0 -0
- {django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/factory.py +0 -0
- {django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/handler.py +0 -0
- {django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/helpers.py +0 -0
- {django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/manager.py +0 -0
- {django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/models.py +0 -0
- {django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/operations/__init__.py +0 -0
- {django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/operations/analyzer.py +0 -0
- {django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/queryset.py +0 -0
|
@@ -290,7 +290,13 @@ def bulk_hook(model_cls, event, when=None, priority=None):
|
|
|
290
290
|
return self.func(changeset, new_records, old_records, **kwargs)
|
|
291
291
|
else:
|
|
292
292
|
# Old signature without changeset
|
|
293
|
-
|
|
293
|
+
# Only pass changeset in kwargs if the function accepts **kwargs
|
|
294
|
+
if 'kwargs' in params or any(param.startswith('**') for param in sig.parameters):
|
|
295
|
+
kwargs['changeset'] = changeset
|
|
296
|
+
return self.func(new_records, old_records, **kwargs)
|
|
297
|
+
else:
|
|
298
|
+
# Function doesn't accept **kwargs, just call with positional args
|
|
299
|
+
return self.func(new_records, old_records)
|
|
294
300
|
|
|
295
301
|
# Register the hook using the registry
|
|
296
302
|
register_hook(
|
|
@@ -244,3 +244,13 @@ def get_dispatcher():
|
|
|
244
244
|
# Create dispatcher with the registry instance
|
|
245
245
|
_dispatcher = HookDispatcher(get_registry())
|
|
246
246
|
return _dispatcher
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def reset_dispatcher():
|
|
250
|
+
"""
|
|
251
|
+
Reset the global dispatcher instance.
|
|
252
|
+
|
|
253
|
+
Useful for testing to ensure clean state between tests.
|
|
254
|
+
"""
|
|
255
|
+
global _dispatcher
|
|
256
|
+
_dispatcher = None
|
{django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/operations/bulk_executor.py
RENAMED
|
@@ -21,7 +21,7 @@ class BulkExecutor:
|
|
|
21
21
|
Dependencies are explicitly injected via constructor.
|
|
22
22
|
"""
|
|
23
23
|
|
|
24
|
-
def __init__(self, queryset, analyzer, mti_handler):
|
|
24
|
+
def __init__(self, queryset, analyzer, mti_handler, record_classifier):
|
|
25
25
|
"""
|
|
26
26
|
Initialize bulk executor with explicit dependencies.
|
|
27
27
|
|
|
@@ -29,10 +29,12 @@ class BulkExecutor:
|
|
|
29
29
|
queryset: Django QuerySet instance
|
|
30
30
|
analyzer: ModelAnalyzer instance (replaces validator + field_tracker)
|
|
31
31
|
mti_handler: MTIHandler instance
|
|
32
|
+
record_classifier: RecordClassifier instance
|
|
32
33
|
"""
|
|
33
34
|
self.queryset = queryset
|
|
34
35
|
self.analyzer = analyzer
|
|
35
36
|
self.mti_handler = mti_handler
|
|
37
|
+
self.record_classifier = record_classifier
|
|
36
38
|
self.model_cls = queryset.model
|
|
37
39
|
|
|
38
40
|
def bulk_create(
|
|
@@ -69,13 +71,24 @@ class BulkExecutor:
|
|
|
69
71
|
# Check if this is an MTI model and route accordingly
|
|
70
72
|
if self.mti_handler.is_mti_model():
|
|
71
73
|
logger.info(f"Detected MTI model {self.model_cls.__name__}, using MTI bulk create")
|
|
72
|
-
|
|
74
|
+
|
|
75
|
+
# Classify records using the classifier service
|
|
76
|
+
existing_record_ids = set()
|
|
77
|
+
existing_pks_map = {}
|
|
78
|
+
if update_conflicts and unique_fields:
|
|
79
|
+
existing_record_ids, existing_pks_map = (
|
|
80
|
+
self.record_classifier.classify_for_upsert(objs, unique_fields)
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Build execution plan with classification results
|
|
73
84
|
plan = self.mti_handler.build_create_plan(
|
|
74
85
|
objs,
|
|
75
86
|
batch_size=batch_size,
|
|
76
87
|
update_conflicts=update_conflicts,
|
|
77
88
|
update_fields=update_fields,
|
|
78
89
|
unique_fields=unique_fields,
|
|
90
|
+
existing_record_ids=existing_record_ids,
|
|
91
|
+
existing_pks_map=existing_pks_map,
|
|
79
92
|
)
|
|
80
93
|
# Execute the plan
|
|
81
94
|
return self._execute_mti_create_plan(plan)
|
|
@@ -161,12 +174,13 @@ class BulkExecutor:
|
|
|
161
174
|
Execute an MTI create plan.
|
|
162
175
|
|
|
163
176
|
This is where ALL database operations happen for MTI bulk_create.
|
|
177
|
+
Handles both new records (INSERT) and existing records (UPDATE) for upsert.
|
|
164
178
|
|
|
165
179
|
Args:
|
|
166
180
|
plan: MTICreatePlan object from MTIHandler
|
|
167
181
|
|
|
168
182
|
Returns:
|
|
169
|
-
List of created objects with PKs assigned
|
|
183
|
+
List of created/updated objects with PKs assigned
|
|
170
184
|
"""
|
|
171
185
|
from django.db import transaction
|
|
172
186
|
from django.db.models import QuerySet as BaseQuerySet
|
|
@@ -175,31 +189,63 @@ class BulkExecutor:
|
|
|
175
189
|
return []
|
|
176
190
|
|
|
177
191
|
with transaction.atomic(using=self.queryset.db, savepoint=False):
|
|
178
|
-
# Step 1: Create all parent objects level by level
|
|
192
|
+
# Step 1: Create/Update all parent objects level by level
|
|
179
193
|
parent_instances_map = {} # Maps original obj id() -> {model: parent_instance}
|
|
180
194
|
|
|
181
195
|
for parent_level in plan.parent_levels:
|
|
182
|
-
#
|
|
183
|
-
|
|
196
|
+
# Separate new and existing parent objects
|
|
197
|
+
new_parents = []
|
|
198
|
+
existing_parents = []
|
|
184
199
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
200
|
+
for parent_obj in parent_level.objects:
|
|
201
|
+
orig_obj_id = parent_level.original_object_map[id(parent_obj)]
|
|
202
|
+
if orig_obj_id in plan.existing_record_ids:
|
|
203
|
+
existing_parents.append(parent_obj)
|
|
204
|
+
else:
|
|
205
|
+
new_parents.append(parent_obj)
|
|
189
206
|
|
|
190
|
-
#
|
|
191
|
-
|
|
192
|
-
|
|
207
|
+
# Bulk create new parents
|
|
208
|
+
if new_parents:
|
|
209
|
+
bulk_kwargs = {"batch_size": len(new_parents)}
|
|
210
|
+
|
|
211
|
+
if parent_level.update_conflicts:
|
|
212
|
+
bulk_kwargs["update_conflicts"] = True
|
|
213
|
+
bulk_kwargs["unique_fields"] = parent_level.unique_fields
|
|
214
|
+
bulk_kwargs["update_fields"] = parent_level.update_fields
|
|
215
|
+
|
|
216
|
+
# Use base QuerySet to avoid recursion
|
|
217
|
+
base_qs = BaseQuerySet(model=parent_level.model_class, using=self.queryset.db)
|
|
218
|
+
created_parents = base_qs.bulk_create(new_parents, **bulk_kwargs)
|
|
219
|
+
|
|
220
|
+
# Copy generated fields back to parent objects
|
|
221
|
+
for created_parent, parent_obj in zip(created_parents, new_parents):
|
|
222
|
+
for field in parent_level.model_class._meta.local_fields:
|
|
223
|
+
created_value = getattr(created_parent, field.name, None)
|
|
224
|
+
if created_value is not None:
|
|
225
|
+
setattr(parent_obj, field.name, created_value)
|
|
226
|
+
|
|
227
|
+
parent_obj._state.adding = False
|
|
228
|
+
parent_obj._state.db = self.queryset.db
|
|
193
229
|
|
|
194
|
-
#
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
230
|
+
# Update existing parents
|
|
231
|
+
if existing_parents and parent_level.update_fields:
|
|
232
|
+
# Filter update fields to only those that exist in this parent model
|
|
233
|
+
parent_model_fields = {
|
|
234
|
+
field.name for field in parent_level.model_class._meta.local_fields
|
|
235
|
+
}
|
|
236
|
+
filtered_update_fields = [
|
|
237
|
+
field for field in parent_level.update_fields
|
|
238
|
+
if field in parent_model_fields
|
|
239
|
+
]
|
|
240
|
+
|
|
241
|
+
if filtered_update_fields:
|
|
242
|
+
base_qs = BaseQuerySet(model=parent_level.model_class, using=self.queryset.db)
|
|
243
|
+
base_qs.bulk_update(existing_parents, filtered_update_fields)
|
|
200
244
|
|
|
201
|
-
|
|
202
|
-
parent_obj
|
|
245
|
+
# Mark as not adding
|
|
246
|
+
for parent_obj in existing_parents:
|
|
247
|
+
parent_obj._state.adding = False
|
|
248
|
+
parent_obj._state.db = self.queryset.db
|
|
203
249
|
|
|
204
250
|
# Map parents back to original objects
|
|
205
251
|
for parent_obj in parent_level.objects:
|
|
@@ -208,75 +254,112 @@ class BulkExecutor:
|
|
|
208
254
|
parent_instances_map[orig_obj_id] = {}
|
|
209
255
|
parent_instances_map[orig_obj_id][parent_level.model_class] = parent_obj
|
|
210
256
|
|
|
211
|
-
# Step 2: Add parent links to child objects
|
|
257
|
+
# Step 2: Add parent links to child objects and separate new/existing
|
|
258
|
+
new_child_objects = []
|
|
259
|
+
existing_child_objects = []
|
|
260
|
+
|
|
212
261
|
for child_obj, orig_obj in zip(plan.child_objects, plan.original_objects):
|
|
213
262
|
parent_instances = parent_instances_map.get(id(orig_obj), {})
|
|
214
263
|
|
|
264
|
+
# Set parent links
|
|
215
265
|
for parent_model, parent_instance in parent_instances.items():
|
|
216
266
|
parent_link = plan.child_model._meta.get_ancestor_link(parent_model)
|
|
217
267
|
if parent_link:
|
|
218
268
|
setattr(child_obj, parent_link.attname, parent_instance.pk)
|
|
219
269
|
setattr(child_obj, parent_link.name, parent_instance)
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
objs_with_pk.append(obj)
|
|
270
|
+
|
|
271
|
+
# Classify as new or existing
|
|
272
|
+
if id(orig_obj) in plan.existing_record_ids:
|
|
273
|
+
# For existing records, set the PK on child object
|
|
274
|
+
pk_value = getattr(orig_obj, 'pk', None)
|
|
275
|
+
if pk_value:
|
|
276
|
+
setattr(child_obj, 'pk', pk_value)
|
|
277
|
+
setattr(child_obj, 'id', pk_value)
|
|
278
|
+
existing_child_objects.append(child_obj)
|
|
230
279
|
else:
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
# Get fields for insert
|
|
234
|
-
opts = plan.child_model._meta
|
|
235
|
-
fields = [f for f in opts.local_fields if not f.generated]
|
|
280
|
+
new_child_objects.append(child_obj)
|
|
236
281
|
|
|
237
|
-
#
|
|
238
|
-
if
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
282
|
+
# Step 3: Bulk create new child objects using _batched_insert (to bypass MTI check)
|
|
283
|
+
if new_child_objects:
|
|
284
|
+
base_qs = BaseQuerySet(model=plan.child_model, using=self.queryset.db)
|
|
285
|
+
base_qs._prepare_for_bulk_create(new_child_objects)
|
|
286
|
+
|
|
287
|
+
# Partition objects by PK status
|
|
288
|
+
objs_without_pk, objs_with_pk = [], []
|
|
289
|
+
for obj in new_child_objects:
|
|
290
|
+
if obj._is_pk_set():
|
|
291
|
+
objs_with_pk.append(obj)
|
|
292
|
+
else:
|
|
293
|
+
objs_without_pk.append(obj)
|
|
294
|
+
|
|
295
|
+
# Get fields for insert
|
|
296
|
+
opts = plan.child_model._meta
|
|
297
|
+
fields = [f for f in opts.local_fields if not f.generated]
|
|
298
|
+
|
|
299
|
+
# Execute bulk insert
|
|
300
|
+
if objs_with_pk:
|
|
301
|
+
returned_columns = base_qs._batched_insert(
|
|
302
|
+
objs_with_pk,
|
|
303
|
+
fields,
|
|
304
|
+
batch_size=len(objs_with_pk),
|
|
305
|
+
)
|
|
306
|
+
if returned_columns:
|
|
307
|
+
for obj, results in zip(objs_with_pk, returned_columns):
|
|
308
|
+
if hasattr(opts, "db_returning_fields") and hasattr(opts, "pk"):
|
|
309
|
+
for result, field in zip(results, opts.db_returning_fields):
|
|
310
|
+
if field != opts.pk:
|
|
311
|
+
setattr(obj, field.attname, result)
|
|
312
|
+
obj._state.adding = False
|
|
313
|
+
obj._state.db = self.queryset.db
|
|
314
|
+
else:
|
|
315
|
+
for obj in objs_with_pk:
|
|
316
|
+
obj._state.adding = False
|
|
317
|
+
obj._state.db = self.queryset.db
|
|
318
|
+
|
|
319
|
+
if objs_without_pk:
|
|
320
|
+
filtered_fields = [
|
|
321
|
+
f for f in fields
|
|
322
|
+
if not isinstance(f, AutoField) and not f.primary_key
|
|
323
|
+
]
|
|
324
|
+
returned_columns = base_qs._batched_insert(
|
|
325
|
+
objs_without_pk,
|
|
326
|
+
filtered_fields,
|
|
327
|
+
batch_size=len(objs_without_pk),
|
|
328
|
+
)
|
|
329
|
+
if returned_columns:
|
|
330
|
+
for obj, results in zip(objs_without_pk, returned_columns):
|
|
331
|
+
if hasattr(opts, "db_returning_fields"):
|
|
332
|
+
for result, field in zip(results, opts.db_returning_fields):
|
|
249
333
|
setattr(obj, field.attname, result)
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
334
|
+
obj._state.adding = False
|
|
335
|
+
obj._state.db = self.queryset.db
|
|
336
|
+
else:
|
|
337
|
+
for obj in objs_without_pk:
|
|
338
|
+
obj._state.adding = False
|
|
339
|
+
obj._state.db = self.queryset.db
|
|
256
340
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
341
|
+
# Step 3.5: Update existing child objects
|
|
342
|
+
if existing_child_objects and plan.update_fields:
|
|
343
|
+
# Filter update fields to only those that exist in the child model
|
|
344
|
+
child_model_fields = {
|
|
345
|
+
field.name for field in plan.child_model._meta.local_fields
|
|
346
|
+
}
|
|
347
|
+
filtered_child_update_fields = [
|
|
348
|
+
field for field in plan.update_fields
|
|
349
|
+
if field in child_model_fields
|
|
261
350
|
]
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
setattr(obj, field.attname, result)
|
|
272
|
-
obj._state.adding = False
|
|
273
|
-
obj._state.db = self.queryset.db
|
|
274
|
-
else:
|
|
275
|
-
for obj in objs_without_pk:
|
|
276
|
-
obj._state.adding = False
|
|
277
|
-
obj._state.db = self.queryset.db
|
|
351
|
+
|
|
352
|
+
if filtered_child_update_fields:
|
|
353
|
+
base_qs = BaseQuerySet(model=plan.child_model, using=self.queryset.db)
|
|
354
|
+
base_qs.bulk_update(existing_child_objects, filtered_child_update_fields)
|
|
355
|
+
|
|
356
|
+
# Mark as not adding
|
|
357
|
+
for child_obj in existing_child_objects:
|
|
358
|
+
child_obj._state.adding = False
|
|
359
|
+
child_obj._state.db = self.queryset.db
|
|
278
360
|
|
|
279
|
-
|
|
361
|
+
# Combine all children for final processing
|
|
362
|
+
created_children = new_child_objects + existing_child_objects
|
|
280
363
|
|
|
281
364
|
# Step 4: Copy PKs and auto-generated fields back to original objects
|
|
282
365
|
pk_field_name = plan.child_model._meta.pk.name
|
{django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/operations/coordinator.py
RENAMED
|
@@ -7,7 +7,8 @@ a clean, simple API for the QuerySet to use.
|
|
|
7
7
|
|
|
8
8
|
import logging
|
|
9
9
|
from django.db import transaction
|
|
10
|
-
from django.db.models import QuerySet
|
|
10
|
+
from django.db.models import QuerySet
|
|
11
|
+
from django.core.exceptions import FieldDoesNotExist
|
|
11
12
|
|
|
12
13
|
from django_bulk_hooks.helpers import (
|
|
13
14
|
build_changeset_for_create,
|
|
@@ -29,6 +30,7 @@ class BulkOperationCoordinator:
|
|
|
29
30
|
Services are created lazily and cached.
|
|
30
31
|
"""
|
|
31
32
|
|
|
33
|
+
|
|
32
34
|
def __init__(self, queryset):
|
|
33
35
|
"""
|
|
34
36
|
Initialize coordinator for a queryset.
|
|
@@ -42,6 +44,7 @@ class BulkOperationCoordinator:
|
|
|
42
44
|
# Lazy initialization
|
|
43
45
|
self._analyzer = None
|
|
44
46
|
self._mti_handler = None
|
|
47
|
+
self._record_classifier = None
|
|
45
48
|
self._executor = None
|
|
46
49
|
self._dispatcher = None
|
|
47
50
|
|
|
@@ -63,6 +66,15 @@ class BulkOperationCoordinator:
|
|
|
63
66
|
self._mti_handler = MTIHandler(self.model_cls)
|
|
64
67
|
return self._mti_handler
|
|
65
68
|
|
|
69
|
+
@property
|
|
70
|
+
def record_classifier(self):
|
|
71
|
+
"""Get or create RecordClassifier"""
|
|
72
|
+
if self._record_classifier is None:
|
|
73
|
+
from django_bulk_hooks.operations.record_classifier import RecordClassifier
|
|
74
|
+
|
|
75
|
+
self._record_classifier = RecordClassifier(self.model_cls)
|
|
76
|
+
return self._record_classifier
|
|
77
|
+
|
|
66
78
|
@property
|
|
67
79
|
def executor(self):
|
|
68
80
|
"""Get or create BulkExecutor"""
|
|
@@ -73,6 +85,7 @@ class BulkOperationCoordinator:
|
|
|
73
85
|
queryset=self.queryset,
|
|
74
86
|
analyzer=self.analyzer,
|
|
75
87
|
mti_handler=self.mti_handler,
|
|
88
|
+
record_classifier=self.record_classifier,
|
|
76
89
|
)
|
|
77
90
|
return self._executor
|
|
78
91
|
|
|
@@ -212,40 +225,52 @@ class BulkOperationCoordinator:
|
|
|
212
225
|
self, update_kwargs, bypass_hooks=False, bypass_validation=False
|
|
213
226
|
):
|
|
214
227
|
"""
|
|
215
|
-
Execute queryset
|
|
216
|
-
|
|
217
|
-
ARCHITECTURE
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
1. Fetch old state (
|
|
222
|
-
2. Execute
|
|
223
|
-
3. Fetch new state (
|
|
224
|
-
4. Run
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
228
|
+
Execute queryset.update() with full hook support.
|
|
229
|
+
|
|
230
|
+
ARCHITECTURE & PERFORMANCE TRADE-OFFS
|
|
231
|
+
======================================
|
|
232
|
+
|
|
233
|
+
To support hooks with queryset.update(), we must:
|
|
234
|
+
1. Fetch old state (SELECT all matching rows)
|
|
235
|
+
2. Execute database update (UPDATE in SQL)
|
|
236
|
+
3. Fetch new state (SELECT all rows again)
|
|
237
|
+
4. Run VALIDATE_UPDATE hooks (validation only)
|
|
238
|
+
5. Run BEFORE_UPDATE hooks (CAN modify instances)
|
|
239
|
+
6. Persist BEFORE_UPDATE modifications (bulk_update)
|
|
240
|
+
7. Run AFTER_UPDATE hooks (read-only side effects)
|
|
241
|
+
|
|
242
|
+
Performance Cost:
|
|
243
|
+
- 2 SELECT queries (before/after)
|
|
244
|
+
- 1 UPDATE query (actual update)
|
|
245
|
+
- 1 bulk_update (if hooks modify data)
|
|
246
|
+
|
|
247
|
+
Trade-off: Hooks require loading data into Python. If you need
|
|
248
|
+
maximum performance and don't need hooks, use bypass_hooks=True.
|
|
249
|
+
|
|
250
|
+
Hook Semantics:
|
|
251
|
+
- BEFORE_UPDATE hooks run after the DB update and CAN modify instances
|
|
252
|
+
- Modifications are auto-persisted (framework handles complexity)
|
|
253
|
+
- AFTER_UPDATE hooks run after BEFORE_UPDATE and are read-only
|
|
254
|
+
- This enables cascade logic and computed fields based on DB values
|
|
255
|
+
- User expectation: BEFORE_UPDATE hooks can modify data
|
|
256
|
+
|
|
257
|
+
Why this approach works well:
|
|
258
|
+
- Allows hooks to see Subquery/F() computed values
|
|
259
|
+
- Enables HasChanged conditions on complex expressions
|
|
260
|
+
- Maintains SQL performance (Subquery stays in database)
|
|
261
|
+
- Meets user expectations: BEFORE_UPDATE can modify instances
|
|
262
|
+
- Clean separation: BEFORE for modifications, AFTER for side effects
|
|
263
|
+
|
|
264
|
+
For true "prevent write" semantics, intercept at a higher level
|
|
265
|
+
or use bulk_update() directly (which has true before semantics).
|
|
239
266
|
"""
|
|
240
|
-
# Check bypass early
|
|
241
267
|
from django_bulk_hooks.context import get_bypass_hooks
|
|
242
|
-
should_bypass = bypass_hooks or get_bypass_hooks()
|
|
243
268
|
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
return
|
|
269
|
+
# Fast path: no hooks at all
|
|
270
|
+
if bypass_hooks or get_bypass_hooks():
|
|
271
|
+
return QuerySet.update(self.queryset, **update_kwargs)
|
|
247
272
|
|
|
248
|
-
#
|
|
273
|
+
# Full hook lifecycle path
|
|
249
274
|
return self._execute_queryset_update_with_hooks(
|
|
250
275
|
update_kwargs=update_kwargs,
|
|
251
276
|
bypass_validation=bypass_validation,
|
|
@@ -255,34 +280,40 @@ class BulkOperationCoordinator:
|
|
|
255
280
|
self, update_kwargs, bypass_validation=False
|
|
256
281
|
):
|
|
257
282
|
"""
|
|
258
|
-
Execute queryset update with
|
|
283
|
+
Execute queryset update with full hook lifecycle support.
|
|
259
284
|
|
|
260
|
-
This method
|
|
261
|
-
|
|
285
|
+
This method implements the fetch-update-fetch pattern required
|
|
286
|
+
to support hooks with queryset.update(). BEFORE_UPDATE hooks can
|
|
287
|
+
modify instances and modifications are auto-persisted.
|
|
262
288
|
|
|
263
289
|
Args:
|
|
264
290
|
update_kwargs: Dict of fields to update
|
|
265
291
|
bypass_validation: Skip validation hooks if True
|
|
266
292
|
|
|
267
293
|
Returns:
|
|
268
|
-
Number of
|
|
294
|
+
Number of rows updated
|
|
269
295
|
"""
|
|
270
|
-
# 1
|
|
296
|
+
# Step 1: Fetch old state (before database update)
|
|
271
297
|
old_instances = list(self.queryset)
|
|
272
298
|
if not old_instances:
|
|
273
299
|
return 0
|
|
300
|
+
|
|
274
301
|
old_records_map = {inst.pk: inst for inst in old_instances}
|
|
275
302
|
|
|
276
|
-
# 2
|
|
277
|
-
|
|
303
|
+
# Step 2: Execute native Django update
|
|
304
|
+
# Use stored reference to parent class method - clean and simple
|
|
305
|
+
update_count = QuerySet.update(self.queryset, **update_kwargs)
|
|
278
306
|
|
|
279
|
-
if
|
|
307
|
+
if update_count == 0:
|
|
280
308
|
return 0
|
|
281
309
|
|
|
282
|
-
# 3
|
|
283
|
-
|
|
310
|
+
# Step 3: Fetch new state (after database update)
|
|
311
|
+
# This captures any Subquery/F() computed values
|
|
312
|
+
# Use primary keys to fetch updated instances since queryset filters may no longer match
|
|
313
|
+
pks = [inst.pk for inst in old_instances]
|
|
314
|
+
new_instances = list(self.model_cls.objects.filter(pk__in=pks))
|
|
284
315
|
|
|
285
|
-
# 4
|
|
316
|
+
# Step 4: Build changeset
|
|
286
317
|
changeset = build_changeset_for_update(
|
|
287
318
|
self.model_cls,
|
|
288
319
|
new_instances,
|
|
@@ -290,65 +321,165 @@ class BulkOperationCoordinator:
|
|
|
290
321
|
old_records_map=old_records_map,
|
|
291
322
|
)
|
|
292
323
|
|
|
293
|
-
# Mark
|
|
324
|
+
# Mark as queryset update for potential hook inspection
|
|
294
325
|
changeset.operation_meta['is_queryset_update'] = True
|
|
295
|
-
changeset.operation_meta['
|
|
326
|
+
changeset.operation_meta['allows_modifications'] = True
|
|
296
327
|
|
|
297
|
-
# 5
|
|
328
|
+
# Step 5: Get MTI inheritance chain
|
|
298
329
|
models_in_chain = [self.model_cls]
|
|
299
330
|
if self.mti_handler.is_mti_model():
|
|
300
331
|
models_in_chain.extend(self.mti_handler.get_parent_models())
|
|
301
332
|
|
|
302
|
-
# 6
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
333
|
+
# Step 6: Run VALIDATE hooks (if not bypassed)
|
|
334
|
+
if not bypass_validation:
|
|
335
|
+
for model_cls in models_in_chain:
|
|
336
|
+
model_changeset = self._build_changeset_for_model(changeset, model_cls)
|
|
337
|
+
self.dispatcher.dispatch(
|
|
338
|
+
model_changeset,
|
|
339
|
+
"validate_update",
|
|
340
|
+
bypass_hooks=False
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
# Step 7: Run BEFORE_UPDATE hooks with modification tracking
|
|
344
|
+
modified_fields = self._run_before_update_hooks_with_tracking(
|
|
345
|
+
new_instances,
|
|
346
|
+
models_in_chain,
|
|
347
|
+
changeset
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
# Step 8: Auto-persist BEFORE_UPDATE modifications
|
|
351
|
+
if modified_fields:
|
|
352
|
+
self._persist_hook_modifications(new_instances, modified_fields)
|
|
353
|
+
|
|
354
|
+
# Step 9: Take snapshot before AFTER_UPDATE hooks
|
|
355
|
+
pre_after_hook_state = self._snapshot_instance_state(new_instances)
|
|
356
|
+
|
|
357
|
+
# Step 10: Run AFTER_UPDATE hooks (read-only side effects)
|
|
316
358
|
for model_cls in models_in_chain:
|
|
317
359
|
model_changeset = self._build_changeset_for_model(changeset, model_cls)
|
|
318
|
-
self.dispatcher.dispatch(
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
for instance in new_instances:
|
|
323
|
-
if instance.pk in pre_hook_state:
|
|
324
|
-
for field_name, pre_value in pre_hook_state[instance.pk].items():
|
|
325
|
-
try:
|
|
326
|
-
current_value = getattr(instance, field_name, None)
|
|
327
|
-
except Exception:
|
|
328
|
-
current_value = None
|
|
329
|
-
|
|
330
|
-
if current_value != pre_value:
|
|
331
|
-
hook_modified_fields.add(field_name)
|
|
332
|
-
|
|
333
|
-
# Auto-persist hook modifications
|
|
334
|
-
if hook_modified_fields:
|
|
335
|
-
logger.info(
|
|
336
|
-
f"BEFORE_UPDATE hooks modified {len(hook_modified_fields)} fields: {hook_modified_fields}"
|
|
360
|
+
self.dispatcher.dispatch(
|
|
361
|
+
model_changeset,
|
|
362
|
+
"after_update",
|
|
363
|
+
bypass_hooks=False
|
|
337
364
|
)
|
|
338
|
-
logger.info("Auto-persisting modifications with bulk_update")
|
|
339
|
-
|
|
340
|
-
# Use bulk_update to persist changes
|
|
341
|
-
# This will trigger another hook cycle (Salesforce-style cascading)
|
|
342
|
-
from django.db.models import QuerySet as BaseQuerySet
|
|
343
|
-
base_qs = BaseQuerySet(model=self.model_cls, using=self.queryset.db)
|
|
344
|
-
base_qs.bulk_update(new_instances, list(hook_modified_fields))
|
|
345
365
|
|
|
346
|
-
#
|
|
366
|
+
# Step 11: Auto-persist AFTER_UPDATE modifications (if any)
|
|
367
|
+
after_modified_fields = self._detect_modifications(new_instances, pre_after_hook_state)
|
|
368
|
+
if after_modified_fields:
|
|
369
|
+
self._persist_hook_modifications(new_instances, after_modified_fields)
|
|
370
|
+
|
|
371
|
+
return update_count
|
|
372
|
+
|
|
373
|
+
def _run_before_update_hooks_with_tracking(self, instances, models_in_chain, changeset):
|
|
374
|
+
"""
|
|
375
|
+
Run BEFORE_UPDATE hooks and detect modifications.
|
|
376
|
+
|
|
377
|
+
This is what users expect - BEFORE_UPDATE hooks can modify instances
|
|
378
|
+
and those modifications will be automatically persisted. The framework
|
|
379
|
+
handles the complexity internally.
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
Set of field names that were modified by hooks
|
|
383
|
+
"""
|
|
384
|
+
# Snapshot current state
|
|
385
|
+
pre_hook_state = self._snapshot_instance_state(instances)
|
|
386
|
+
|
|
387
|
+
# Run BEFORE_UPDATE hooks
|
|
347
388
|
for model_cls in models_in_chain:
|
|
348
389
|
model_changeset = self._build_changeset_for_model(changeset, model_cls)
|
|
349
|
-
self.dispatcher.dispatch(
|
|
390
|
+
self.dispatcher.dispatch(
|
|
391
|
+
model_changeset,
|
|
392
|
+
"before_update",
|
|
393
|
+
bypass_hooks=False
|
|
394
|
+
)
|
|
350
395
|
|
|
351
|
-
|
|
396
|
+
# Detect modifications
|
|
397
|
+
return self._detect_modifications(instances, pre_hook_state)
|
|
398
|
+
|
|
399
|
+
def _snapshot_instance_state(self, instances):
|
|
400
|
+
"""
|
|
401
|
+
Create a snapshot of current instance field values.
|
|
402
|
+
|
|
403
|
+
Args:
|
|
404
|
+
instances: List of model instances
|
|
405
|
+
|
|
406
|
+
Returns:
|
|
407
|
+
Dict mapping pk -> {field_name: value}
|
|
408
|
+
"""
|
|
409
|
+
snapshot = {}
|
|
410
|
+
|
|
411
|
+
for instance in instances:
|
|
412
|
+
if instance.pk is None:
|
|
413
|
+
continue
|
|
414
|
+
|
|
415
|
+
field_values = {}
|
|
416
|
+
for field in self.model_cls._meta.get_fields():
|
|
417
|
+
# Skip relations that aren't concrete fields
|
|
418
|
+
if field.many_to_many or field.one_to_many:
|
|
419
|
+
continue
|
|
420
|
+
|
|
421
|
+
field_name = field.name
|
|
422
|
+
try:
|
|
423
|
+
field_values[field_name] = getattr(instance, field_name)
|
|
424
|
+
except (AttributeError, FieldDoesNotExist):
|
|
425
|
+
# Field not accessible (e.g., deferred field)
|
|
426
|
+
field_values[field_name] = None
|
|
427
|
+
|
|
428
|
+
snapshot[instance.pk] = field_values
|
|
429
|
+
|
|
430
|
+
return snapshot
|
|
431
|
+
|
|
432
|
+
def _detect_modifications(self, instances, pre_hook_state):
|
|
433
|
+
"""
|
|
434
|
+
Detect which fields were modified by comparing to snapshot.
|
|
435
|
+
|
|
436
|
+
Args:
|
|
437
|
+
instances: List of model instances
|
|
438
|
+
pre_hook_state: Previous state snapshot from _snapshot_instance_state
|
|
439
|
+
|
|
440
|
+
Returns:
|
|
441
|
+
Set of field names that were modified
|
|
442
|
+
"""
|
|
443
|
+
modified_fields = set()
|
|
444
|
+
|
|
445
|
+
for instance in instances:
|
|
446
|
+
if instance.pk not in pre_hook_state:
|
|
447
|
+
continue
|
|
448
|
+
|
|
449
|
+
old_values = pre_hook_state[instance.pk]
|
|
450
|
+
|
|
451
|
+
for field_name, old_value in old_values.items():
|
|
452
|
+
try:
|
|
453
|
+
current_value = getattr(instance, field_name)
|
|
454
|
+
except (AttributeError, FieldDoesNotExist):
|
|
455
|
+
current_value = None
|
|
456
|
+
|
|
457
|
+
# Compare values
|
|
458
|
+
if current_value != old_value:
|
|
459
|
+
modified_fields.add(field_name)
|
|
460
|
+
|
|
461
|
+
return modified_fields
|
|
462
|
+
|
|
463
|
+
def _persist_hook_modifications(self, instances, modified_fields):
|
|
464
|
+
"""
|
|
465
|
+
Persist modifications made by hooks using bulk_update.
|
|
466
|
+
|
|
467
|
+
This creates a "cascade" effect similar to Salesforce workflows.
|
|
468
|
+
|
|
469
|
+
Args:
|
|
470
|
+
instances: List of modified instances
|
|
471
|
+
modified_fields: Set of field names that were modified
|
|
472
|
+
"""
|
|
473
|
+
logger.info(
|
|
474
|
+
f"Hooks modified {len(modified_fields)} field(s): "
|
|
475
|
+
f"{', '.join(sorted(modified_fields))}"
|
|
476
|
+
)
|
|
477
|
+
logger.info("Auto-persisting modifications via bulk_update")
|
|
478
|
+
|
|
479
|
+
# Use Django's bulk_update directly (not our hook version)
|
|
480
|
+
# Create a fresh QuerySet to avoid recursion
|
|
481
|
+
fresh_qs = QuerySet(model=self.model_cls, using=self.queryset.db)
|
|
482
|
+
QuerySet.bulk_update(fresh_qs, instances, list(modified_fields))
|
|
352
483
|
|
|
353
484
|
@transaction.atomic
|
|
354
485
|
def delete(self, bypass_hooks=False, bypass_validation=False):
|
|
@@ -375,8 +506,8 @@ class BulkOperationCoordinator:
|
|
|
375
506
|
|
|
376
507
|
# Execute with hook lifecycle
|
|
377
508
|
def operation():
|
|
378
|
-
#
|
|
379
|
-
return
|
|
509
|
+
# Use stored reference to parent method - clean and simple
|
|
510
|
+
return QuerySet.delete(self.queryset)
|
|
380
511
|
|
|
381
512
|
return self._execute_with_mti_hooks(
|
|
382
513
|
changeset=changeset,
|
|
@@ -532,8 +663,8 @@ class BulkOperationCoordinator:
|
|
|
532
663
|
# This is a FK field being updated by its attname (e.g., business_id)
|
|
533
664
|
# Add the relationship name (e.g., 'business') to skip list
|
|
534
665
|
fk_relationships.add(field.name)
|
|
535
|
-
except
|
|
666
|
+
except FieldDoesNotExist:
|
|
536
667
|
# If field lookup fails, skip it
|
|
537
668
|
continue
|
|
538
669
|
|
|
539
|
-
return fk_relationships
|
|
670
|
+
return fk_relationships
|
{django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/operations/mti_handler.py
RENAMED
|
@@ -121,6 +121,8 @@ class MTIHandler:
|
|
|
121
121
|
update_conflicts=False,
|
|
122
122
|
unique_fields=None,
|
|
123
123
|
update_fields=None,
|
|
124
|
+
existing_record_ids=None,
|
|
125
|
+
existing_pks_map=None,
|
|
124
126
|
):
|
|
125
127
|
"""
|
|
126
128
|
Build an execution plan for bulk creating MTI model instances.
|
|
@@ -134,6 +136,8 @@ class MTIHandler:
|
|
|
134
136
|
update_conflicts: Enable UPSERT on conflict
|
|
135
137
|
unique_fields: Fields for conflict detection
|
|
136
138
|
update_fields: Fields to update on conflict
|
|
139
|
+
existing_record_ids: Set of id() for objects that exist in DB (from RecordClassifier)
|
|
140
|
+
existing_pks_map: Dict mapping id(obj) -> pk for existing records (from RecordClassifier)
|
|
137
141
|
|
|
138
142
|
Returns:
|
|
139
143
|
MTICreatePlan object
|
|
@@ -149,6 +153,19 @@ class MTIHandler:
|
|
|
149
153
|
|
|
150
154
|
batch_size = batch_size or len(objs)
|
|
151
155
|
|
|
156
|
+
# Use provided classification (no more DB query here!)
|
|
157
|
+
if existing_record_ids is None:
|
|
158
|
+
existing_record_ids = set()
|
|
159
|
+
if existing_pks_map is None:
|
|
160
|
+
existing_pks_map = {}
|
|
161
|
+
|
|
162
|
+
# Set PKs on existing objects so they can be updated
|
|
163
|
+
if existing_pks_map:
|
|
164
|
+
for obj in objs:
|
|
165
|
+
if id(obj) in existing_pks_map:
|
|
166
|
+
setattr(obj, 'pk', existing_pks_map[id(obj)])
|
|
167
|
+
setattr(obj, 'id', existing_pks_map[id(obj)])
|
|
168
|
+
|
|
152
169
|
# Build parent levels
|
|
153
170
|
parent_levels = self._build_parent_levels(
|
|
154
171
|
objs,
|
|
@@ -171,6 +188,10 @@ class MTIHandler:
|
|
|
171
188
|
child_model=inheritance_chain[-1],
|
|
172
189
|
original_objects=objs,
|
|
173
190
|
batch_size=batch_size,
|
|
191
|
+
existing_record_ids=existing_record_ids,
|
|
192
|
+
update_conflicts=update_conflicts,
|
|
193
|
+
unique_fields=unique_fields or [],
|
|
194
|
+
update_fields=update_fields or [],
|
|
174
195
|
)
|
|
175
196
|
|
|
176
197
|
def _build_parent_levels(
|
|
@@ -272,9 +293,17 @@ class MTIHandler:
|
|
|
272
293
|
ut = (ut,)
|
|
273
294
|
ut_field_sets = [tuple(group) for group in ut]
|
|
274
295
|
|
|
296
|
+
# Check individual field uniqueness
|
|
297
|
+
unique_field_sets = []
|
|
298
|
+
for field in model_class._meta.local_fields:
|
|
299
|
+
if field.unique and not field.primary_key:
|
|
300
|
+
unique_field_sets.append((field.name,))
|
|
301
|
+
|
|
275
302
|
# Compare as sets
|
|
276
303
|
provided_set = set(normalized_unique)
|
|
277
|
-
|
|
304
|
+
all_constraint_sets = constraint_field_sets + ut_field_sets + unique_field_sets
|
|
305
|
+
|
|
306
|
+
for group in all_constraint_sets:
|
|
278
307
|
if provided_set == set(group):
|
|
279
308
|
return True
|
|
280
309
|
return False
|
{django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/operations/mti_plans.py
RENAMED
|
@@ -45,6 +45,10 @@ class MTICreatePlan:
|
|
|
45
45
|
child_model: The child model class
|
|
46
46
|
original_objects: Original objects provided by user
|
|
47
47
|
batch_size: Batch size for operations
|
|
48
|
+
existing_record_ids: Set of id() of original objects that represent existing DB records
|
|
49
|
+
update_conflicts: Whether this is an upsert operation
|
|
50
|
+
unique_fields: Fields used for conflict detection
|
|
51
|
+
update_fields: Fields to update on conflict
|
|
48
52
|
"""
|
|
49
53
|
inheritance_chain: List[Any]
|
|
50
54
|
parent_levels: List[ParentLevel]
|
|
@@ -52,6 +56,10 @@ class MTICreatePlan:
|
|
|
52
56
|
child_model: Any
|
|
53
57
|
original_objects: List[Any]
|
|
54
58
|
batch_size: int = None
|
|
59
|
+
existing_record_ids: set = field(default_factory=set)
|
|
60
|
+
update_conflicts: bool = False
|
|
61
|
+
unique_fields: List[str] = field(default_factory=list)
|
|
62
|
+
update_fields: List[str] = field(default_factory=list)
|
|
55
63
|
|
|
56
64
|
|
|
57
65
|
@dataclass
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Record classification service for database queries.
|
|
3
|
+
|
|
4
|
+
This service handles all database queries related to classifying and fetching
|
|
5
|
+
records based on various criteria (PKs, unique fields, etc.).
|
|
6
|
+
|
|
7
|
+
Separates data access concerns from business logic.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from django.db.models import Q
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class RecordClassifier:
|
|
17
|
+
"""
|
|
18
|
+
Service for classifying and fetching records via database queries.
|
|
19
|
+
|
|
20
|
+
This is the SINGLE point of truth for record classification queries.
|
|
21
|
+
Keeps database access logic separate from business/planning logic.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, model_cls):
|
|
25
|
+
"""
|
|
26
|
+
Initialize classifier for a specific model.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
model_cls: The Django model class
|
|
30
|
+
"""
|
|
31
|
+
self.model_cls = model_cls
|
|
32
|
+
|
|
33
|
+
def classify_for_upsert(self, objs, unique_fields):
|
|
34
|
+
"""
|
|
35
|
+
Classify records as new or existing based on unique_fields.
|
|
36
|
+
|
|
37
|
+
Queries the database to check which records already exist based on the
|
|
38
|
+
unique_fields constraint.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
objs: List of model instances
|
|
42
|
+
unique_fields: List of field names that form the unique constraint
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Tuple of (existing_record_ids, existing_pks_map)
|
|
46
|
+
- existing_record_ids: Set of id() for objects that exist in DB
|
|
47
|
+
- existing_pks_map: Dict mapping id(obj) -> pk for existing records
|
|
48
|
+
"""
|
|
49
|
+
if not unique_fields or not objs:
|
|
50
|
+
return set(), {}
|
|
51
|
+
|
|
52
|
+
# Build a query to find existing records
|
|
53
|
+
queries = []
|
|
54
|
+
obj_to_unique_values = {}
|
|
55
|
+
|
|
56
|
+
for obj in objs:
|
|
57
|
+
# Build lookup dict for this object's unique fields
|
|
58
|
+
lookup = {}
|
|
59
|
+
for field_name in unique_fields:
|
|
60
|
+
value = getattr(obj, field_name, None)
|
|
61
|
+
if value is None:
|
|
62
|
+
# Can't match on None values
|
|
63
|
+
break
|
|
64
|
+
lookup[field_name] = value
|
|
65
|
+
else:
|
|
66
|
+
# All unique fields have values, add to query
|
|
67
|
+
if lookup:
|
|
68
|
+
queries.append(Q(**lookup))
|
|
69
|
+
obj_to_unique_values[id(obj)] = tuple(lookup.values())
|
|
70
|
+
|
|
71
|
+
if not queries:
|
|
72
|
+
return set(), {}
|
|
73
|
+
|
|
74
|
+
# Query for existing records
|
|
75
|
+
combined_query = queries[0]
|
|
76
|
+
for q in queries[1:]:
|
|
77
|
+
combined_query |= q
|
|
78
|
+
|
|
79
|
+
existing_records = list(
|
|
80
|
+
self.model_cls.objects.filter(combined_query).values('pk', *unique_fields)
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Map existing records back to original objects
|
|
84
|
+
existing_record_ids = set()
|
|
85
|
+
existing_pks_map = {}
|
|
86
|
+
|
|
87
|
+
for record in existing_records:
|
|
88
|
+
record_values = tuple(record[field] for field in unique_fields)
|
|
89
|
+
# Find which object(s) match these values
|
|
90
|
+
for obj_id, obj_values in obj_to_unique_values.items():
|
|
91
|
+
if obj_values == record_values:
|
|
92
|
+
existing_record_ids.add(obj_id)
|
|
93
|
+
existing_pks_map[obj_id] = record['pk']
|
|
94
|
+
|
|
95
|
+
logger.info(
|
|
96
|
+
f"Classified {len(existing_record_ids)} existing and "
|
|
97
|
+
f"{len(objs) - len(existing_record_ids)} new records for upsert"
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
return existing_record_ids, existing_pks_map
|
|
101
|
+
|
|
102
|
+
def fetch_by_pks(self, pks, select_related=None, prefetch_related=None):
|
|
103
|
+
"""
|
|
104
|
+
Fetch records by primary keys with optional relationship loading.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
pks: List of primary key values
|
|
108
|
+
select_related: Optional list of fields to select_related
|
|
109
|
+
prefetch_related: Optional list of fields to prefetch_related
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
Dict[pk, instance] for O(1) lookups
|
|
113
|
+
"""
|
|
114
|
+
if not pks:
|
|
115
|
+
return {}
|
|
116
|
+
|
|
117
|
+
queryset = self.model_cls._base_manager.filter(pk__in=pks)
|
|
118
|
+
|
|
119
|
+
if select_related:
|
|
120
|
+
queryset = queryset.select_related(*select_related)
|
|
121
|
+
|
|
122
|
+
if prefetch_related:
|
|
123
|
+
queryset = queryset.prefetch_related(*prefetch_related)
|
|
124
|
+
|
|
125
|
+
return {obj.pk: obj for obj in queryset}
|
|
126
|
+
|
|
127
|
+
def fetch_by_unique_constraint(self, field_values_map):
|
|
128
|
+
"""
|
|
129
|
+
Fetch records matching a unique constraint.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
field_values_map: Dict of {field_name: value} for unique constraint
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
Model instance if found, None otherwise
|
|
136
|
+
"""
|
|
137
|
+
try:
|
|
138
|
+
return self.model_cls.objects.get(**field_values_map)
|
|
139
|
+
except self.model_cls.DoesNotExist:
|
|
140
|
+
return None
|
|
141
|
+
except self.model_cls.MultipleObjectsReturned:
|
|
142
|
+
logger.warning(
|
|
143
|
+
f"Multiple {self.model_cls.__name__} records found for "
|
|
144
|
+
f"unique constraint {field_values_map}"
|
|
145
|
+
)
|
|
146
|
+
return self.model_cls.objects.filter(**field_values_map).first()
|
|
147
|
+
|
|
148
|
+
def exists_by_pks(self, pks):
|
|
149
|
+
"""
|
|
150
|
+
Check if records exist by primary keys without fetching them.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
pks: List of primary key values
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
Set of PKs that exist in the database
|
|
157
|
+
"""
|
|
158
|
+
if not pks:
|
|
159
|
+
return set()
|
|
160
|
+
|
|
161
|
+
existing_pks = self.model_cls.objects.filter(
|
|
162
|
+
pk__in=pks
|
|
163
|
+
).values_list('pk', flat=True)
|
|
164
|
+
|
|
165
|
+
return set(existing_pks)
|
|
166
|
+
|
|
167
|
+
def count_by_unique_fields(self, objs, unique_fields):
|
|
168
|
+
"""
|
|
169
|
+
Count how many objects already exist based on unique fields.
|
|
170
|
+
|
|
171
|
+
Useful for validation or reporting before upsert operations.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
objs: List of model instances
|
|
175
|
+
unique_fields: List of field names that form the unique constraint
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
Tuple of (existing_count, new_count)
|
|
179
|
+
"""
|
|
180
|
+
existing_ids, _ = self.classify_for_upsert(objs, unique_fields)
|
|
181
|
+
existing_count = len(existing_ids)
|
|
182
|
+
new_count = len(objs) - existing_count
|
|
183
|
+
return existing_count, new_count
|
|
@@ -165,6 +165,16 @@ class HookRegistry:
|
|
|
165
165
|
with self._lock:
|
|
166
166
|
return dict(self._hooks)
|
|
167
167
|
|
|
168
|
+
@property
|
|
169
|
+
def hooks(self) -> Dict[Tuple[Type, str], List[HookInfo]]:
|
|
170
|
+
"""
|
|
171
|
+
Expose internal hooks dictionary for testing purposes.
|
|
172
|
+
|
|
173
|
+
This property provides direct access to the internal hooks storage
|
|
174
|
+
to allow tests to clear the registry state between test runs.
|
|
175
|
+
"""
|
|
176
|
+
return self._hooks
|
|
177
|
+
|
|
168
178
|
def count_hooks(
|
|
169
179
|
self, model: Optional[Type] = None, event: Optional[str] = None
|
|
170
180
|
) -> int:
|
|
@@ -286,3 +296,8 @@ def list_all_hooks() -> Dict[Tuple[Type, str], List[HookInfo]]:
|
|
|
286
296
|
"""
|
|
287
297
|
registry = get_registry()
|
|
288
298
|
return registry.list_all()
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
# Expose hooks dictionary for testing purposes
|
|
302
|
+
# This provides backward compatibility with tests that expect to access _hooks directly
|
|
303
|
+
_hooks = get_registry().hooks
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/operations/__init__.py
RENAMED
|
File without changes
|
{django_bulk_hooks-0.2.14 → django_bulk_hooks-0.2.16}/django_bulk_hooks/operations/analyzer.py
RENAMED
|
File without changes
|
|
File without changes
|