django-bulk-hooks 0.2.15__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of django-bulk-hooks might be problematic. Click here for more details.
- django_bulk_hooks/__init__.py +20 -24
- django_bulk_hooks/changeset.py +1 -1
- django_bulk_hooks/conditions.py +8 -12
- django_bulk_hooks/decorators.py +15 -11
- django_bulk_hooks/dispatcher.py +19 -10
- django_bulk_hooks/factory.py +36 -38
- django_bulk_hooks/handler.py +5 -6
- django_bulk_hooks/helpers.py +4 -3
- django_bulk_hooks/models.py +12 -13
- django_bulk_hooks/operations/__init__.py +5 -5
- django_bulk_hooks/operations/analyzer.py +14 -14
- django_bulk_hooks/operations/bulk_executor.py +220 -129
- django_bulk_hooks/operations/coordinator.py +82 -61
- django_bulk_hooks/operations/mti_handler.py +91 -60
- django_bulk_hooks/operations/mti_plans.py +23 -14
- django_bulk_hooks/operations/record_classifier.py +184 -0
- django_bulk_hooks/queryset.py +5 -3
- django_bulk_hooks/registry.py +53 -43
- {django_bulk_hooks-0.2.15.dist-info → django_bulk_hooks-0.2.17.dist-info}/METADATA +1 -1
- django_bulk_hooks-0.2.17.dist-info/RECORD +26 -0
- django_bulk_hooks-0.2.15.dist-info/RECORD +0 -25
- {django_bulk_hooks-0.2.15.dist-info → django_bulk_hooks-0.2.17.dist-info}/LICENSE +0 -0
- {django_bulk_hooks-0.2.15.dist-info → django_bulk_hooks-0.2.17.dist-info}/WHEEL +0 -0
|
@@ -8,6 +8,7 @@ It returns plans (data structures) that the BulkExecutor executes.
|
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
import logging
|
|
11
|
+
|
|
11
12
|
from django.db.models import AutoField
|
|
12
13
|
|
|
13
14
|
logger = logging.getLogger(__name__)
|
|
@@ -121,6 +122,8 @@ class MTIHandler:
|
|
|
121
122
|
update_conflicts=False,
|
|
122
123
|
unique_fields=None,
|
|
123
124
|
update_fields=None,
|
|
125
|
+
existing_record_ids=None,
|
|
126
|
+
existing_pks_map=None,
|
|
124
127
|
):
|
|
125
128
|
"""
|
|
126
129
|
Build an execution plan for bulk creating MTI model instances.
|
|
@@ -134,21 +137,36 @@ class MTIHandler:
|
|
|
134
137
|
update_conflicts: Enable UPSERT on conflict
|
|
135
138
|
unique_fields: Fields for conflict detection
|
|
136
139
|
update_fields: Fields to update on conflict
|
|
140
|
+
existing_record_ids: Set of id() for objects that exist in DB (from RecordClassifier)
|
|
141
|
+
existing_pks_map: Dict mapping id(obj) -> pk for existing records (from RecordClassifier)
|
|
137
142
|
|
|
138
143
|
Returns:
|
|
139
144
|
MTICreatePlan object
|
|
140
145
|
"""
|
|
141
|
-
from django_bulk_hooks.operations.mti_plans import MTICreatePlan
|
|
142
|
-
|
|
146
|
+
from django_bulk_hooks.operations.mti_plans import MTICreatePlan
|
|
147
|
+
|
|
143
148
|
if not objs:
|
|
144
149
|
return None
|
|
145
|
-
|
|
150
|
+
|
|
146
151
|
inheritance_chain = self.get_inheritance_chain()
|
|
147
152
|
if len(inheritance_chain) <= 1:
|
|
148
153
|
raise ValueError("build_create_plan called on non-MTI model")
|
|
149
|
-
|
|
154
|
+
|
|
150
155
|
batch_size = batch_size or len(objs)
|
|
151
|
-
|
|
156
|
+
|
|
157
|
+
# Use provided classification (no more DB query here!)
|
|
158
|
+
if existing_record_ids is None:
|
|
159
|
+
existing_record_ids = set()
|
|
160
|
+
if existing_pks_map is None:
|
|
161
|
+
existing_pks_map = {}
|
|
162
|
+
|
|
163
|
+
# Set PKs on existing objects so they can be updated
|
|
164
|
+
if existing_pks_map:
|
|
165
|
+
for obj in objs:
|
|
166
|
+
if id(obj) in existing_pks_map:
|
|
167
|
+
obj.pk = existing_pks_map[id(obj)]
|
|
168
|
+
obj.id = existing_pks_map[id(obj)]
|
|
169
|
+
|
|
152
170
|
# Build parent levels
|
|
153
171
|
parent_levels = self._build_parent_levels(
|
|
154
172
|
objs,
|
|
@@ -157,13 +175,13 @@ class MTIHandler:
|
|
|
157
175
|
unique_fields=unique_fields,
|
|
158
176
|
update_fields=update_fields,
|
|
159
177
|
)
|
|
160
|
-
|
|
178
|
+
|
|
161
179
|
# Build child object templates (without parent links - executor adds them)
|
|
162
180
|
child_objects = []
|
|
163
181
|
for obj in objs:
|
|
164
182
|
child_obj = self._create_child_instance_template(obj, inheritance_chain[-1])
|
|
165
183
|
child_objects.append(child_obj)
|
|
166
|
-
|
|
184
|
+
|
|
167
185
|
return MTICreatePlan(
|
|
168
186
|
inheritance_chain=inheritance_chain,
|
|
169
187
|
parent_levels=parent_levels,
|
|
@@ -171,6 +189,10 @@ class MTIHandler:
|
|
|
171
189
|
child_model=inheritance_chain[-1],
|
|
172
190
|
original_objects=objs,
|
|
173
191
|
batch_size=batch_size,
|
|
192
|
+
existing_record_ids=existing_record_ids,
|
|
193
|
+
update_conflicts=update_conflicts,
|
|
194
|
+
unique_fields=unique_fields or [],
|
|
195
|
+
update_fields=update_fields or [],
|
|
174
196
|
)
|
|
175
197
|
|
|
176
198
|
def _build_parent_levels(
|
|
@@ -190,38 +212,38 @@ class MTIHandler:
|
|
|
190
212
|
List of ParentLevel objects
|
|
191
213
|
"""
|
|
192
214
|
from django_bulk_hooks.operations.mti_plans import ParentLevel
|
|
193
|
-
|
|
215
|
+
|
|
194
216
|
parent_levels = []
|
|
195
217
|
parent_instances_map = {} # Maps obj id() -> {model_class: parent_instance}
|
|
196
|
-
|
|
218
|
+
|
|
197
219
|
for level_idx, model_class in enumerate(inheritance_chain[:-1]):
|
|
198
220
|
parent_objs_for_level = []
|
|
199
|
-
|
|
221
|
+
|
|
200
222
|
for obj in objs:
|
|
201
223
|
# Get current parent from previous level
|
|
202
224
|
current_parent = None
|
|
203
225
|
if level_idx > 0:
|
|
204
226
|
prev_parents = parent_instances_map.get(id(obj), {})
|
|
205
227
|
current_parent = prev_parents.get(inheritance_chain[level_idx - 1])
|
|
206
|
-
|
|
228
|
+
|
|
207
229
|
# Create parent instance
|
|
208
230
|
parent_obj = self._create_parent_instance(obj, model_class, current_parent)
|
|
209
231
|
parent_objs_for_level.append(parent_obj)
|
|
210
|
-
|
|
232
|
+
|
|
211
233
|
# Store in map
|
|
212
234
|
if id(obj) not in parent_instances_map:
|
|
213
235
|
parent_instances_map[id(obj)] = {}
|
|
214
236
|
parent_instances_map[id(obj)][model_class] = parent_obj
|
|
215
|
-
|
|
237
|
+
|
|
216
238
|
# Determine upsert parameters for this level
|
|
217
239
|
level_update_conflicts = False
|
|
218
240
|
level_unique_fields = []
|
|
219
241
|
level_update_fields = []
|
|
220
|
-
|
|
242
|
+
|
|
221
243
|
if update_conflicts and unique_fields:
|
|
222
244
|
# Filter unique_fields and update_fields to only those in this model
|
|
223
245
|
model_fields_by_name = {f.name: f for f in model_class._meta.local_fields}
|
|
224
|
-
|
|
246
|
+
|
|
225
247
|
# Normalize unique fields
|
|
226
248
|
normalized_unique = []
|
|
227
249
|
for uf in unique_fields or []:
|
|
@@ -229,19 +251,19 @@ class MTIHandler:
|
|
|
229
251
|
normalized_unique.append(uf)
|
|
230
252
|
elif uf.endswith("_id") and uf[:-3] in model_fields_by_name:
|
|
231
253
|
normalized_unique.append(uf[:-3])
|
|
232
|
-
|
|
254
|
+
|
|
233
255
|
# Check if this model has a matching constraint
|
|
234
256
|
if normalized_unique and self._has_matching_constraint(model_class, normalized_unique):
|
|
235
257
|
# Filter update fields
|
|
236
258
|
filtered_updates = [
|
|
237
259
|
uf for uf in (update_fields or []) if uf in model_fields_by_name
|
|
238
260
|
]
|
|
239
|
-
|
|
261
|
+
|
|
240
262
|
if filtered_updates:
|
|
241
263
|
level_update_conflicts = True
|
|
242
264
|
level_unique_fields = normalized_unique
|
|
243
265
|
level_update_fields = filtered_updates
|
|
244
|
-
|
|
266
|
+
|
|
245
267
|
# Create parent level
|
|
246
268
|
parent_level = ParentLevel(
|
|
247
269
|
model_class=model_class,
|
|
@@ -252,7 +274,7 @@ class MTIHandler:
|
|
|
252
274
|
update_fields=level_update_fields,
|
|
253
275
|
)
|
|
254
276
|
parent_levels.append(parent_level)
|
|
255
|
-
|
|
277
|
+
|
|
256
278
|
return parent_levels
|
|
257
279
|
|
|
258
280
|
def _has_matching_constraint(self, model_class, normalized_unique):
|
|
@@ -260,21 +282,29 @@ class MTIHandler:
|
|
|
260
282
|
try:
|
|
261
283
|
from django.db.models import UniqueConstraint
|
|
262
284
|
constraint_field_sets = [
|
|
263
|
-
tuple(c.fields) for c in model_class._meta.constraints
|
|
285
|
+
tuple(c.fields) for c in model_class._meta.constraints
|
|
264
286
|
if isinstance(c, UniqueConstraint)
|
|
265
287
|
]
|
|
266
288
|
except Exception:
|
|
267
289
|
constraint_field_sets = []
|
|
268
|
-
|
|
290
|
+
|
|
269
291
|
# Check unique_together
|
|
270
292
|
ut = getattr(model_class._meta, "unique_together", ()) or ()
|
|
271
293
|
if isinstance(ut, tuple) and ut and not isinstance(ut[0], (list, tuple)):
|
|
272
294
|
ut = (ut,)
|
|
273
295
|
ut_field_sets = [tuple(group) for group in ut]
|
|
274
|
-
|
|
296
|
+
|
|
297
|
+
# Check individual field uniqueness
|
|
298
|
+
unique_field_sets = []
|
|
299
|
+
for field in model_class._meta.local_fields:
|
|
300
|
+
if field.unique and not field.primary_key:
|
|
301
|
+
unique_field_sets.append((field.name,))
|
|
302
|
+
|
|
275
303
|
# Compare as sets
|
|
276
304
|
provided_set = set(normalized_unique)
|
|
277
|
-
|
|
305
|
+
all_constraint_sets = constraint_field_sets + ut_field_sets + unique_field_sets
|
|
306
|
+
|
|
307
|
+
for group in all_constraint_sets:
|
|
278
308
|
if provided_set == set(group):
|
|
279
309
|
return True
|
|
280
310
|
return False
|
|
@@ -292,13 +322,13 @@ class MTIHandler:
|
|
|
292
322
|
Parent model instance (not saved)
|
|
293
323
|
"""
|
|
294
324
|
parent_obj = parent_model()
|
|
295
|
-
|
|
325
|
+
|
|
296
326
|
# Copy field values from source
|
|
297
327
|
for field in parent_model._meta.local_fields:
|
|
298
328
|
if hasattr(source_obj, field.name):
|
|
299
329
|
value = getattr(source_obj, field.name, None)
|
|
300
330
|
if value is not None:
|
|
301
|
-
if (field.is_relation and not field.many_to_many and
|
|
331
|
+
if (field.is_relation and not field.many_to_many and
|
|
302
332
|
not field.one_to_many):
|
|
303
333
|
# Handle FK fields
|
|
304
334
|
if hasattr(value, "pk") and value.pk is not None:
|
|
@@ -307,7 +337,7 @@ class MTIHandler:
|
|
|
307
337
|
setattr(parent_obj, field.attname, value)
|
|
308
338
|
else:
|
|
309
339
|
setattr(parent_obj, field.name, value)
|
|
310
|
-
|
|
340
|
+
|
|
311
341
|
# Link to parent if exists
|
|
312
342
|
if current_parent is not None:
|
|
313
343
|
for field in parent_model._meta.local_fields:
|
|
@@ -315,22 +345,22 @@ class MTIHandler:
|
|
|
315
345
|
field.remote_field.model == current_parent.__class__):
|
|
316
346
|
setattr(parent_obj, field.name, current_parent)
|
|
317
347
|
break
|
|
318
|
-
|
|
348
|
+
|
|
319
349
|
# Copy object state
|
|
320
|
-
if hasattr(source_obj,
|
|
350
|
+
if hasattr(source_obj, "_state") and hasattr(parent_obj, "_state"):
|
|
321
351
|
parent_obj._state.adding = source_obj._state.adding
|
|
322
|
-
if hasattr(source_obj._state,
|
|
352
|
+
if hasattr(source_obj._state, "db"):
|
|
323
353
|
parent_obj._state.db = source_obj._state.db
|
|
324
|
-
|
|
354
|
+
|
|
325
355
|
# Handle auto_now_add and auto_now fields
|
|
326
356
|
for field in parent_model._meta.local_fields:
|
|
327
|
-
if getattr(field,
|
|
357
|
+
if getattr(field, "auto_now_add", False):
|
|
328
358
|
if getattr(parent_obj, field.name) is None:
|
|
329
359
|
field.pre_save(parent_obj, add=True)
|
|
330
360
|
setattr(parent_obj, field.attname, field.value_from_object(parent_obj))
|
|
331
|
-
elif getattr(field,
|
|
361
|
+
elif getattr(field, "auto_now", False):
|
|
332
362
|
field.pre_save(parent_obj, add=True)
|
|
333
|
-
|
|
363
|
+
|
|
334
364
|
return parent_obj
|
|
335
365
|
|
|
336
366
|
def _create_child_instance_template(self, source_obj, child_model):
|
|
@@ -347,22 +377,22 @@ class MTIHandler:
|
|
|
347
377
|
Child model instance (not saved, no parent links)
|
|
348
378
|
"""
|
|
349
379
|
child_obj = child_model()
|
|
350
|
-
|
|
380
|
+
|
|
351
381
|
# Copy field values (excluding AutoField and parent links)
|
|
352
382
|
for field in child_model._meta.local_fields:
|
|
353
383
|
if isinstance(field, AutoField):
|
|
354
384
|
continue
|
|
355
|
-
|
|
385
|
+
|
|
356
386
|
# Skip parent link fields - executor will set these
|
|
357
|
-
if field.is_relation and hasattr(field,
|
|
387
|
+
if field.is_relation and hasattr(field, "related_model"):
|
|
358
388
|
# Check if this field is a parent link
|
|
359
389
|
if child_model._meta.get_ancestor_link(field.related_model) == field:
|
|
360
390
|
continue
|
|
361
|
-
|
|
391
|
+
|
|
362
392
|
if hasattr(source_obj, field.name):
|
|
363
393
|
value = getattr(source_obj, field.name, None)
|
|
364
394
|
if value is not None:
|
|
365
|
-
if (field.is_relation and not field.many_to_many and
|
|
395
|
+
if (field.is_relation and not field.many_to_many and
|
|
366
396
|
not field.one_to_many):
|
|
367
397
|
if hasattr(value, "pk") and value.pk is not None:
|
|
368
398
|
setattr(child_obj, field.attname, value.pk)
|
|
@@ -370,22 +400,22 @@ class MTIHandler:
|
|
|
370
400
|
setattr(child_obj, field.attname, value)
|
|
371
401
|
else:
|
|
372
402
|
setattr(child_obj, field.name, value)
|
|
373
|
-
|
|
403
|
+
|
|
374
404
|
# Copy object state
|
|
375
|
-
if hasattr(source_obj,
|
|
405
|
+
if hasattr(source_obj, "_state") and hasattr(child_obj, "_state"):
|
|
376
406
|
child_obj._state.adding = source_obj._state.adding
|
|
377
|
-
if hasattr(source_obj._state,
|
|
407
|
+
if hasattr(source_obj._state, "db"):
|
|
378
408
|
child_obj._state.db = source_obj._state.db
|
|
379
|
-
|
|
409
|
+
|
|
380
410
|
# Handle auto_now_add and auto_now fields
|
|
381
411
|
for field in child_model._meta.local_fields:
|
|
382
|
-
if getattr(field,
|
|
412
|
+
if getattr(field, "auto_now_add", False):
|
|
383
413
|
if getattr(child_obj, field.name) is None:
|
|
384
414
|
field.pre_save(child_obj, add=True)
|
|
385
415
|
setattr(child_obj, field.attname, field.value_from_object(child_obj))
|
|
386
|
-
elif getattr(field,
|
|
416
|
+
elif getattr(field, "auto_now", False):
|
|
387
417
|
field.pre_save(child_obj, add=True)
|
|
388
|
-
|
|
418
|
+
|
|
389
419
|
return child_obj
|
|
390
420
|
|
|
391
421
|
# ==================== MTI BULK UPDATE PLANNING ====================
|
|
@@ -404,48 +434,49 @@ class MTIHandler:
|
|
|
404
434
|
Returns:
|
|
405
435
|
MTIUpdatePlan object
|
|
406
436
|
"""
|
|
407
|
-
from django_bulk_hooks.operations.mti_plans import
|
|
408
|
-
|
|
437
|
+
from django_bulk_hooks.operations.mti_plans import ModelFieldGroup
|
|
438
|
+
from django_bulk_hooks.operations.mti_plans import MTIUpdatePlan
|
|
439
|
+
|
|
409
440
|
if not objs:
|
|
410
441
|
return None
|
|
411
|
-
|
|
442
|
+
|
|
412
443
|
inheritance_chain = self.get_inheritance_chain()
|
|
413
444
|
if len(inheritance_chain) <= 1:
|
|
414
445
|
raise ValueError("build_update_plan called on non-MTI model")
|
|
415
|
-
|
|
446
|
+
|
|
416
447
|
batch_size = batch_size or len(objs)
|
|
417
|
-
|
|
448
|
+
|
|
418
449
|
# Handle auto_now fields
|
|
419
450
|
for obj in objs:
|
|
420
451
|
for model in inheritance_chain:
|
|
421
452
|
for field in model._meta.local_fields:
|
|
422
|
-
if getattr(field,
|
|
453
|
+
if getattr(field, "auto_now", False):
|
|
423
454
|
field.pre_save(obj, add=False)
|
|
424
|
-
|
|
455
|
+
|
|
425
456
|
# Add auto_now fields to update list
|
|
426
457
|
auto_now_fields = set()
|
|
427
458
|
for model in inheritance_chain:
|
|
428
459
|
for field in model._meta.local_fields:
|
|
429
|
-
if getattr(field,
|
|
460
|
+
if getattr(field, "auto_now", False):
|
|
430
461
|
auto_now_fields.add(field.name)
|
|
431
|
-
|
|
462
|
+
|
|
432
463
|
all_fields = list(fields) + list(auto_now_fields)
|
|
433
|
-
|
|
464
|
+
|
|
434
465
|
# Group fields by model
|
|
435
466
|
field_groups = []
|
|
436
467
|
for model_idx, model in enumerate(inheritance_chain):
|
|
437
468
|
model_fields = []
|
|
438
|
-
|
|
469
|
+
|
|
439
470
|
for field_name in all_fields:
|
|
440
471
|
try:
|
|
441
472
|
field = self.model_cls._meta.get_field(field_name)
|
|
442
473
|
if field in model._meta.local_fields:
|
|
443
474
|
# Skip auto_now_add fields for updates
|
|
444
|
-
if not getattr(field,
|
|
475
|
+
if not getattr(field, "auto_now_add", False):
|
|
445
476
|
model_fields.append(field_name)
|
|
446
477
|
except Exception:
|
|
447
478
|
continue
|
|
448
|
-
|
|
479
|
+
|
|
449
480
|
if model_fields:
|
|
450
481
|
# Determine filter field
|
|
451
482
|
if model_idx == 0:
|
|
@@ -458,13 +489,13 @@ class MTIHandler:
|
|
|
458
489
|
parent_link = model._meta.parents[parent_model]
|
|
459
490
|
break
|
|
460
491
|
filter_field = parent_link.attname if parent_link else "pk"
|
|
461
|
-
|
|
492
|
+
|
|
462
493
|
field_groups.append(ModelFieldGroup(
|
|
463
494
|
model_class=model,
|
|
464
495
|
fields=model_fields,
|
|
465
496
|
filter_field=filter_field,
|
|
466
497
|
))
|
|
467
|
-
|
|
498
|
+
|
|
468
499
|
return MTIUpdatePlan(
|
|
469
500
|
inheritance_chain=inheritance_chain,
|
|
470
501
|
field_groups=field_groups,
|
|
@@ -5,8 +5,9 @@ These are pure data structures returned by MTIHandler to be executed by BulkExec
|
|
|
5
5
|
This separates planning (logic) from execution (database operations).
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
from dataclasses import dataclass
|
|
9
|
-
from
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from dataclasses import field
|
|
10
|
+
from typing import Any
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
@dataclass
|
|
@@ -23,11 +24,11 @@ class ParentLevel:
|
|
|
23
24
|
update_fields: Fields to update on conflict (if update_conflicts=True)
|
|
24
25
|
"""
|
|
25
26
|
model_class: Any
|
|
26
|
-
objects:
|
|
27
|
-
original_object_map:
|
|
27
|
+
objects: list[Any]
|
|
28
|
+
original_object_map: dict[int, int] = field(default_factory=dict)
|
|
28
29
|
update_conflicts: bool = False
|
|
29
|
-
unique_fields:
|
|
30
|
-
update_fields:
|
|
30
|
+
unique_fields: list[str] = field(default_factory=list)
|
|
31
|
+
update_fields: list[str] = field(default_factory=list)
|
|
31
32
|
|
|
32
33
|
|
|
33
34
|
@dataclass
|
|
@@ -45,13 +46,21 @@ class MTICreatePlan:
|
|
|
45
46
|
child_model: The child model class
|
|
46
47
|
original_objects: Original objects provided by user
|
|
47
48
|
batch_size: Batch size for operations
|
|
49
|
+
existing_record_ids: Set of id() of original objects that represent existing DB records
|
|
50
|
+
update_conflicts: Whether this is an upsert operation
|
|
51
|
+
unique_fields: Fields used for conflict detection
|
|
52
|
+
update_fields: Fields to update on conflict
|
|
48
53
|
"""
|
|
49
|
-
inheritance_chain:
|
|
50
|
-
parent_levels:
|
|
51
|
-
child_objects:
|
|
54
|
+
inheritance_chain: list[Any]
|
|
55
|
+
parent_levels: list[ParentLevel]
|
|
56
|
+
child_objects: list[Any]
|
|
52
57
|
child_model: Any
|
|
53
|
-
original_objects:
|
|
58
|
+
original_objects: list[Any]
|
|
54
59
|
batch_size: int = None
|
|
60
|
+
existing_record_ids: set = field(default_factory=set)
|
|
61
|
+
update_conflicts: bool = False
|
|
62
|
+
unique_fields: list[str] = field(default_factory=list)
|
|
63
|
+
update_fields: list[str] = field(default_factory=list)
|
|
55
64
|
|
|
56
65
|
|
|
57
66
|
@dataclass
|
|
@@ -65,7 +74,7 @@ class ModelFieldGroup:
|
|
|
65
74
|
filter_field: Field to use for filtering (e.g., 'pk' or parent link attname)
|
|
66
75
|
"""
|
|
67
76
|
model_class: Any
|
|
68
|
-
fields:
|
|
77
|
+
fields: list[str]
|
|
69
78
|
filter_field: str = "pk"
|
|
70
79
|
|
|
71
80
|
|
|
@@ -80,8 +89,8 @@ class MTIUpdatePlan:
|
|
|
80
89
|
objects: Objects to update
|
|
81
90
|
batch_size: Batch size for operations
|
|
82
91
|
"""
|
|
83
|
-
inheritance_chain:
|
|
84
|
-
field_groups:
|
|
85
|
-
objects:
|
|
92
|
+
inheritance_chain: list[Any]
|
|
93
|
+
field_groups: list[ModelFieldGroup]
|
|
94
|
+
objects: list[Any]
|
|
86
95
|
batch_size: int = None
|
|
87
96
|
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Record classification service for database queries.
|
|
3
|
+
|
|
4
|
+
This service handles all database queries related to classifying and fetching
|
|
5
|
+
records based on various criteria (PKs, unique fields, etc.).
|
|
6
|
+
|
|
7
|
+
Separates data access concerns from business logic.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
|
|
12
|
+
from django.db.models import Q
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class RecordClassifier:
|
|
18
|
+
"""
|
|
19
|
+
Service for classifying and fetching records via database queries.
|
|
20
|
+
|
|
21
|
+
This is the SINGLE point of truth for record classification queries.
|
|
22
|
+
Keeps database access logic separate from business/planning logic.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, model_cls):
|
|
26
|
+
"""
|
|
27
|
+
Initialize classifier for a specific model.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
model_cls: The Django model class
|
|
31
|
+
"""
|
|
32
|
+
self.model_cls = model_cls
|
|
33
|
+
|
|
34
|
+
def classify_for_upsert(self, objs, unique_fields):
|
|
35
|
+
"""
|
|
36
|
+
Classify records as new or existing based on unique_fields.
|
|
37
|
+
|
|
38
|
+
Queries the database to check which records already exist based on the
|
|
39
|
+
unique_fields constraint.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
objs: List of model instances
|
|
43
|
+
unique_fields: List of field names that form the unique constraint
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
Tuple of (existing_record_ids, existing_pks_map)
|
|
47
|
+
- existing_record_ids: Set of id() for objects that exist in DB
|
|
48
|
+
- existing_pks_map: Dict mapping id(obj) -> pk for existing records
|
|
49
|
+
"""
|
|
50
|
+
if not unique_fields or not objs:
|
|
51
|
+
return set(), {}
|
|
52
|
+
|
|
53
|
+
# Build a query to find existing records
|
|
54
|
+
queries = []
|
|
55
|
+
obj_to_unique_values = {}
|
|
56
|
+
|
|
57
|
+
for obj in objs:
|
|
58
|
+
# Build lookup dict for this object's unique fields
|
|
59
|
+
lookup = {}
|
|
60
|
+
for field_name in unique_fields:
|
|
61
|
+
value = getattr(obj, field_name, None)
|
|
62
|
+
if value is None:
|
|
63
|
+
# Can't match on None values
|
|
64
|
+
break
|
|
65
|
+
lookup[field_name] = value
|
|
66
|
+
else:
|
|
67
|
+
# All unique fields have values, add to query
|
|
68
|
+
if lookup:
|
|
69
|
+
queries.append(Q(**lookup))
|
|
70
|
+
obj_to_unique_values[id(obj)] = tuple(lookup.values())
|
|
71
|
+
|
|
72
|
+
if not queries:
|
|
73
|
+
return set(), {}
|
|
74
|
+
|
|
75
|
+
# Query for existing records
|
|
76
|
+
combined_query = queries[0]
|
|
77
|
+
for q in queries[1:]:
|
|
78
|
+
combined_query |= q
|
|
79
|
+
|
|
80
|
+
existing_records = list(
|
|
81
|
+
self.model_cls.objects.filter(combined_query).values("pk", *unique_fields),
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# Map existing records back to original objects
|
|
85
|
+
existing_record_ids = set()
|
|
86
|
+
existing_pks_map = {}
|
|
87
|
+
|
|
88
|
+
for record in existing_records:
|
|
89
|
+
record_values = tuple(record[field] for field in unique_fields)
|
|
90
|
+
# Find which object(s) match these values
|
|
91
|
+
for obj_id, obj_values in obj_to_unique_values.items():
|
|
92
|
+
if obj_values == record_values:
|
|
93
|
+
existing_record_ids.add(obj_id)
|
|
94
|
+
existing_pks_map[obj_id] = record["pk"]
|
|
95
|
+
|
|
96
|
+
logger.info(
|
|
97
|
+
f"Classified {len(existing_record_ids)} existing and "
|
|
98
|
+
f"{len(objs) - len(existing_record_ids)} new records for upsert",
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
return existing_record_ids, existing_pks_map
|
|
102
|
+
|
|
103
|
+
def fetch_by_pks(self, pks, select_related=None, prefetch_related=None):
|
|
104
|
+
"""
|
|
105
|
+
Fetch records by primary keys with optional relationship loading.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
pks: List of primary key values
|
|
109
|
+
select_related: Optional list of fields to select_related
|
|
110
|
+
prefetch_related: Optional list of fields to prefetch_related
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
Dict[pk, instance] for O(1) lookups
|
|
114
|
+
"""
|
|
115
|
+
if not pks:
|
|
116
|
+
return {}
|
|
117
|
+
|
|
118
|
+
queryset = self.model_cls._base_manager.filter(pk__in=pks)
|
|
119
|
+
|
|
120
|
+
if select_related:
|
|
121
|
+
queryset = queryset.select_related(*select_related)
|
|
122
|
+
|
|
123
|
+
if prefetch_related:
|
|
124
|
+
queryset = queryset.prefetch_related(*prefetch_related)
|
|
125
|
+
|
|
126
|
+
return {obj.pk: obj for obj in queryset}
|
|
127
|
+
|
|
128
|
+
def fetch_by_unique_constraint(self, field_values_map):
|
|
129
|
+
"""
|
|
130
|
+
Fetch records matching a unique constraint.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
field_values_map: Dict of {field_name: value} for unique constraint
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
Model instance if found, None otherwise
|
|
137
|
+
"""
|
|
138
|
+
try:
|
|
139
|
+
return self.model_cls.objects.get(**field_values_map)
|
|
140
|
+
except self.model_cls.DoesNotExist:
|
|
141
|
+
return None
|
|
142
|
+
except self.model_cls.MultipleObjectsReturned:
|
|
143
|
+
logger.warning(
|
|
144
|
+
f"Multiple {self.model_cls.__name__} records found for "
|
|
145
|
+
f"unique constraint {field_values_map}",
|
|
146
|
+
)
|
|
147
|
+
return self.model_cls.objects.filter(**field_values_map).first()
|
|
148
|
+
|
|
149
|
+
def exists_by_pks(self, pks):
|
|
150
|
+
"""
|
|
151
|
+
Check if records exist by primary keys without fetching them.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
pks: List of primary key values
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
Set of PKs that exist in the database
|
|
158
|
+
"""
|
|
159
|
+
if not pks:
|
|
160
|
+
return set()
|
|
161
|
+
|
|
162
|
+
existing_pks = self.model_cls.objects.filter(
|
|
163
|
+
pk__in=pks,
|
|
164
|
+
).values_list("pk", flat=True)
|
|
165
|
+
|
|
166
|
+
return set(existing_pks)
|
|
167
|
+
|
|
168
|
+
def count_by_unique_fields(self, objs, unique_fields):
|
|
169
|
+
"""
|
|
170
|
+
Count how many objects already exist based on unique fields.
|
|
171
|
+
|
|
172
|
+
Useful for validation or reporting before upsert operations.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
objs: List of model instances
|
|
176
|
+
unique_fields: List of field names that form the unique constraint
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
Tuple of (existing_count, new_count)
|
|
180
|
+
"""
|
|
181
|
+
existing_ids, _ = self.classify_for_upsert(objs, unique_fields)
|
|
182
|
+
existing_count = len(existing_ids)
|
|
183
|
+
new_count = len(objs) - existing_count
|
|
184
|
+
return existing_count, new_count
|
django_bulk_hooks/queryset.py
CHANGED
|
@@ -7,7 +7,9 @@ complex coordination required for bulk operations with hooks.
|
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
import logging
|
|
10
|
-
|
|
10
|
+
|
|
11
|
+
from django.db import models
|
|
12
|
+
from django.db import transaction
|
|
11
13
|
|
|
12
14
|
logger = logging.getLogger(__name__)
|
|
13
15
|
|
|
@@ -98,7 +100,7 @@ class HookQuerySet(models.QuerySet):
|
|
|
98
100
|
fields = self.coordinator.analyzer.detect_changed_fields(objs)
|
|
99
101
|
if not fields:
|
|
100
102
|
logger.debug(
|
|
101
|
-
f"bulk_update: No fields changed for {len(objs)} {self.model.__name__} objects"
|
|
103
|
+
f"bulk_update: No fields changed for {len(objs)} {self.model.__name__} objects",
|
|
102
104
|
)
|
|
103
105
|
return 0
|
|
104
106
|
|
|
@@ -133,7 +135,7 @@ class HookQuerySet(models.QuerySet):
|
|
|
133
135
|
|
|
134
136
|
@transaction.atomic
|
|
135
137
|
def bulk_delete(
|
|
136
|
-
self, objs, bypass_hooks=False, bypass_validation=False, **kwargs
|
|
138
|
+
self, objs, bypass_hooks=False, bypass_validation=False, **kwargs,
|
|
137
139
|
):
|
|
138
140
|
"""
|
|
139
141
|
Delete multiple objects with hook support.
|