django-bulk-hooks 0.1.281__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of django-bulk-hooks might be problematic. Click here for more details.

@@ -0,0 +1,208 @@
1
+ """
2
+ Model analyzer service - Combines validation and field tracking.
3
+
4
+ This service handles all model analysis needs:
5
+ - Input validation
6
+ - Field change detection
7
+ - Field comparison
8
+ """
9
+
10
+ import logging
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class ModelAnalyzer:
16
+ """
17
+ Analyzes models and validates operations.
18
+
19
+ This service combines the responsibilities of validation and field tracking
20
+ since they're closely related and often used together.
21
+ """
22
+
23
+ def __init__(self, model_cls):
24
+ """
25
+ Initialize analyzer for a specific model.
26
+
27
+ Args:
28
+ model_cls: The Django model class
29
+ """
30
+ self.model_cls = model_cls
31
+
32
+ # ========== Validation Methods ==========
33
+
34
+ def validate_for_create(self, objs):
35
+ """
36
+ Validate objects for bulk_create operation.
37
+
38
+ Args:
39
+ objs: List of model instances
40
+
41
+ Raises:
42
+ TypeError: If objects are not instances of model_cls
43
+ """
44
+ self._check_types(objs, operation="bulk_create")
45
+ return True
46
+
47
+ def validate_for_update(self, objs):
48
+ """
49
+ Validate objects for bulk_update operation.
50
+
51
+ Args:
52
+ objs: List of model instances
53
+
54
+ Raises:
55
+ TypeError: If objects are not instances of model_cls
56
+ ValueError: If objects don't have primary keys
57
+ """
58
+ self._check_types(objs, operation="bulk_update")
59
+ self._check_has_pks(objs, operation="bulk_update")
60
+ return True
61
+
62
+ def validate_for_delete(self, objs):
63
+ """
64
+ Validate objects for delete operation.
65
+
66
+ Args:
67
+ objs: List of model instances
68
+
69
+ Raises:
70
+ TypeError: If objects are not instances of model_cls
71
+ """
72
+ self._check_types(objs, operation="delete")
73
+ return True
74
+
75
+ def _check_types(self, objs, operation="operation"):
76
+ """Check that all objects are instances of the model class"""
77
+ if not objs:
78
+ return
79
+
80
+ invalid_types = {
81
+ type(obj).__name__ for obj in objs if not isinstance(obj, self.model_cls)
82
+ }
83
+
84
+ if invalid_types:
85
+ raise TypeError(
86
+ f"{operation} expected instances of {self.model_cls.__name__}, "
87
+ f"but got {invalid_types}"
88
+ )
89
+
90
+ def _check_has_pks(self, objs, operation="operation"):
91
+ """Check that all objects have primary keys"""
92
+ missing_pks = [obj for obj in objs if obj.pk is None]
93
+
94
+ if missing_pks:
95
+ raise ValueError(
96
+ f"{operation} cannot operate on unsaved {self.model_cls.__name__} instances. "
97
+ f"{len(missing_pks)} object(s) have no primary key."
98
+ )
99
+
100
+ # ========== Data Fetching Methods ==========
101
+
102
+ def fetch_old_records_map(self, instances):
103
+ """
104
+ Fetch old records for instances in a single bulk query.
105
+
106
+ This is the SINGLE point of truth for fetching old records.
107
+ All other methods should delegate to this.
108
+
109
+ Args:
110
+ instances: List of model instances
111
+
112
+ Returns:
113
+ Dict[pk, instance] for O(1) lookups
114
+ """
115
+ pks = [obj.pk for obj in instances if obj.pk is not None]
116
+ if not pks:
117
+ return {}
118
+
119
+ return {obj.pk: obj for obj in self.model_cls._base_manager.filter(pk__in=pks)}
120
+
121
+ # ========== Field Introspection Methods ==========
122
+
123
+ def get_auto_now_fields(self):
124
+ """
125
+ Get fields that have auto_now or auto_now_add set.
126
+
127
+ Returns:
128
+ list: Field names with auto_now behavior
129
+ """
130
+ auto_now_fields = []
131
+ for field in self.model_cls._meta.fields:
132
+ if getattr(field, "auto_now", False) or getattr(
133
+ field, "auto_now_add", False
134
+ ):
135
+ auto_now_fields.append(field.name)
136
+ return auto_now_fields
137
+
138
+ def get_fk_fields(self):
139
+ """
140
+ Get all foreign key fields for the model.
141
+
142
+ Returns:
143
+ list: FK field names
144
+ """
145
+ return [
146
+ field.name
147
+ for field in self.model_cls._meta.concrete_fields
148
+ if field.is_relation and not field.many_to_many
149
+ ]
150
+
151
+ def detect_changed_fields(self, objs):
152
+ """
153
+ Detect which fields have changed across a set of objects.
154
+
155
+ This method fetches old records from the database in a SINGLE bulk query
156
+ and compares them with the new objects to determine changed fields.
157
+
158
+ PERFORMANCE: Uses bulk query (O(1) queries) not N queries.
159
+
160
+ Args:
161
+ objs: List of model instances to check
162
+
163
+ Returns:
164
+ List of field names that changed across any object
165
+ """
166
+ if not objs:
167
+ return []
168
+
169
+ # Fetch old records using the single source of truth
170
+ old_records_map = self.fetch_old_records_map(objs)
171
+ if not old_records_map:
172
+ return []
173
+
174
+ # Track which fields changed across ALL objects
175
+ changed_fields_set = set()
176
+
177
+ # Compare each object with its database state
178
+ for obj in objs:
179
+ if obj.pk is None:
180
+ continue
181
+
182
+ old_obj = old_records_map.get(obj.pk)
183
+ if old_obj is None:
184
+ # Object doesn't exist in DB, skip
185
+ continue
186
+
187
+ # Check each field for changes
188
+ for field in self.model_cls._meta.fields:
189
+ # Skip primary key and auto fields
190
+ if field.primary_key or field.auto_created:
191
+ continue
192
+
193
+ old_val = getattr(old_obj, field.name, None)
194
+ new_val = getattr(obj, field.name, None)
195
+
196
+ # Use field's get_prep_value for proper comparison
197
+ try:
198
+ old_prep = field.get_prep_value(old_val)
199
+ new_prep = field.get_prep_value(new_val)
200
+ if old_prep != new_prep:
201
+ changed_fields_set.add(field.name)
202
+ except (TypeError, ValueError):
203
+ # Fallback to direct comparison
204
+ if old_val != new_val:
205
+ changed_fields_set.add(field.name)
206
+
207
+ # Return as sorted list for deterministic behavior
208
+ return sorted(changed_fields_set)
@@ -0,0 +1,430 @@
1
+ """
2
+ Bulk executor service for database operations.
3
+
4
+ This service coordinates bulk database operations with validation and MTI handling.
5
+ """
6
+
7
+ import logging
8
+ from django.db import transaction
9
+ from django.db.models import AutoField
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class BulkExecutor:
15
+ """
16
+ Executes bulk database operations.
17
+
18
+ This service coordinates validation, MTI handling, and actual database
19
+ operations. It's the only service that directly calls Django ORM methods.
20
+
21
+ Dependencies are explicitly injected via constructor.
22
+ """
23
+
24
+ def __init__(self, queryset, analyzer, mti_handler):
25
+ """
26
+ Initialize bulk executor with explicit dependencies.
27
+
28
+ Args:
29
+ queryset: Django QuerySet instance
30
+ analyzer: ModelAnalyzer instance (replaces validator + field_tracker)
31
+ mti_handler: MTIHandler instance
32
+ """
33
+ self.queryset = queryset
34
+ self.analyzer = analyzer
35
+ self.mti_handler = mti_handler
36
+ self.model_cls = queryset.model
37
+
38
+ def bulk_create(
39
+ self,
40
+ objs,
41
+ batch_size=None,
42
+ ignore_conflicts=False,
43
+ update_conflicts=False,
44
+ update_fields=None,
45
+ unique_fields=None,
46
+ **kwargs,
47
+ ):
48
+ """
49
+ Execute bulk create operation.
50
+
51
+ NOTE: Coordinator is responsible for validation before calling this method.
52
+ This executor trusts that inputs have already been validated.
53
+
54
+ Args:
55
+ objs: List of model instances to create (pre-validated)
56
+ batch_size: Number of objects to create per batch
57
+ ignore_conflicts: Whether to ignore conflicts
58
+ update_conflicts: Whether to update on conflict
59
+ update_fields: Fields to update on conflict
60
+ unique_fields: Fields to use for conflict detection
61
+ **kwargs: Additional arguments
62
+
63
+ Returns:
64
+ List of created objects
65
+ """
66
+ if not objs:
67
+ return objs
68
+
69
+ # Check if this is an MTI model and route accordingly
70
+ if self.mti_handler.is_mti_model():
71
+ logger.info(f"Detected MTI model {self.model_cls.__name__}, using MTI bulk create")
72
+ # Build execution plan
73
+ plan = self.mti_handler.build_create_plan(
74
+ objs,
75
+ batch_size=batch_size,
76
+ update_conflicts=update_conflicts,
77
+ update_fields=update_fields,
78
+ unique_fields=unique_fields,
79
+ )
80
+ # Execute the plan
81
+ return self._execute_mti_create_plan(plan)
82
+
83
+ # Non-MTI model - use Django's native bulk_create
84
+ return self._execute_bulk_create(
85
+ objs,
86
+ batch_size,
87
+ ignore_conflicts,
88
+ update_conflicts,
89
+ update_fields,
90
+ unique_fields,
91
+ **kwargs,
92
+ )
93
+
94
+ def _execute_bulk_create(
95
+ self,
96
+ objs,
97
+ batch_size=None,
98
+ ignore_conflicts=False,
99
+ update_conflicts=False,
100
+ update_fields=None,
101
+ unique_fields=None,
102
+ **kwargs,
103
+ ):
104
+ """
105
+ Execute the actual Django bulk_create.
106
+
107
+ This is the only method that directly calls Django ORM.
108
+ We must call the base Django QuerySet to avoid recursion.
109
+ """
110
+ from django.db.models import QuerySet
111
+
112
+ # Create a base Django queryset (not our HookQuerySet)
113
+ base_qs = QuerySet(model=self.model_cls, using=self.queryset.db)
114
+
115
+ return base_qs.bulk_create(
116
+ objs,
117
+ batch_size=batch_size,
118
+ ignore_conflicts=ignore_conflicts,
119
+ update_conflicts=update_conflicts,
120
+ update_fields=update_fields,
121
+ unique_fields=unique_fields,
122
+ )
123
+
124
+ def bulk_update(self, objs, fields, batch_size=None):
125
+ """
126
+ Execute bulk update operation.
127
+
128
+ NOTE: Coordinator is responsible for validation before calling this method.
129
+ This executor trusts that inputs have already been validated.
130
+
131
+ Args:
132
+ objs: List of model instances to update (pre-validated)
133
+ fields: List of field names to update
134
+ batch_size: Number of objects to update per batch
135
+
136
+ Returns:
137
+ Number of objects updated
138
+ """
139
+ if not objs:
140
+ return 0
141
+
142
+ # Check if this is an MTI model and route accordingly
143
+ if self.mti_handler.is_mti_model():
144
+ logger.info(f"Detected MTI model {self.model_cls.__name__}, using MTI bulk update")
145
+ # Build execution plan
146
+ plan = self.mti_handler.build_update_plan(objs, fields, batch_size=batch_size)
147
+ # Execute the plan
148
+ return self._execute_mti_update_plan(plan)
149
+
150
+ # Non-MTI model - use Django's native bulk_update
151
+ # Validation already done by coordinator
152
+ from django.db.models import QuerySet
153
+
154
+ base_qs = QuerySet(model=self.model_cls, using=self.queryset.db)
155
+ return base_qs.bulk_update(objs, fields, batch_size=batch_size)
156
+
157
+ # ==================== MTI PLAN EXECUTION ====================
158
+
159
+ def _execute_mti_create_plan(self, plan):
160
+ """
161
+ Execute an MTI create plan.
162
+
163
+ This is where ALL database operations happen for MTI bulk_create.
164
+
165
+ Args:
166
+ plan: MTICreatePlan object from MTIHandler
167
+
168
+ Returns:
169
+ List of created objects with PKs assigned
170
+ """
171
+ from django.db import transaction
172
+ from django.db.models import QuerySet as BaseQuerySet
173
+
174
+ if not plan:
175
+ return []
176
+
177
+ with transaction.atomic(using=self.queryset.db, savepoint=False):
178
+ # Step 1: Create all parent objects level by level
179
+ parent_instances_map = {} # Maps original obj id() -> {model: parent_instance}
180
+
181
+ for parent_level in plan.parent_levels:
182
+ # Bulk create parents for this level
183
+ bulk_kwargs = {"batch_size": len(parent_level.objects)}
184
+
185
+ if parent_level.update_conflicts:
186
+ bulk_kwargs["update_conflicts"] = True
187
+ bulk_kwargs["unique_fields"] = parent_level.unique_fields
188
+ bulk_kwargs["update_fields"] = parent_level.update_fields
189
+
190
+ # Use base QuerySet to avoid recursion
191
+ base_qs = BaseQuerySet(model=parent_level.model_class, using=self.queryset.db)
192
+ created_parents = base_qs.bulk_create(parent_level.objects, **bulk_kwargs)
193
+
194
+ # Copy generated fields back to parent objects
195
+ for created_parent, parent_obj in zip(created_parents, parent_level.objects):
196
+ for field in parent_level.model_class._meta.local_fields:
197
+ created_value = getattr(created_parent, field.name, None)
198
+ if created_value is not None:
199
+ setattr(parent_obj, field.name, created_value)
200
+
201
+ parent_obj._state.adding = False
202
+ parent_obj._state.db = self.queryset.db
203
+
204
+ # Map parents back to original objects
205
+ for parent_obj in parent_level.objects:
206
+ orig_obj_id = parent_level.original_object_map[id(parent_obj)]
207
+ if orig_obj_id not in parent_instances_map:
208
+ parent_instances_map[orig_obj_id] = {}
209
+ parent_instances_map[orig_obj_id][parent_level.model_class] = parent_obj
210
+
211
+ # Step 2: Add parent links to child objects
212
+ for child_obj, orig_obj in zip(plan.child_objects, plan.original_objects):
213
+ parent_instances = parent_instances_map.get(id(orig_obj), {})
214
+
215
+ for parent_model, parent_instance in parent_instances.items():
216
+ parent_link = plan.child_model._meta.get_ancestor_link(parent_model)
217
+ if parent_link:
218
+ setattr(child_obj, parent_link.attname, parent_instance.pk)
219
+ setattr(child_obj, parent_link.name, parent_instance)
220
+
221
+ # Step 3: Bulk create child objects using _batched_insert (to bypass MTI check)
222
+ base_qs = BaseQuerySet(model=plan.child_model, using=self.queryset.db)
223
+ base_qs._prepare_for_bulk_create(plan.child_objects)
224
+
225
+ # Partition objects by PK status
226
+ objs_without_pk, objs_with_pk = [], []
227
+ for obj in plan.child_objects:
228
+ if obj._is_pk_set():
229
+ objs_with_pk.append(obj)
230
+ else:
231
+ objs_without_pk.append(obj)
232
+
233
+ # Get fields for insert
234
+ opts = plan.child_model._meta
235
+ fields = [f for f in opts.local_fields if not f.generated]
236
+
237
+ # Execute bulk insert
238
+ if objs_with_pk:
239
+ returned_columns = base_qs._batched_insert(
240
+ objs_with_pk,
241
+ fields,
242
+ batch_size=len(objs_with_pk),
243
+ )
244
+ if returned_columns:
245
+ for obj, results in zip(objs_with_pk, returned_columns):
246
+ if hasattr(opts, "db_returning_fields") and hasattr(opts, "pk"):
247
+ for result, field in zip(results, opts.db_returning_fields):
248
+ if field != opts.pk:
249
+ setattr(obj, field.attname, result)
250
+ obj._state.adding = False
251
+ obj._state.db = self.queryset.db
252
+ else:
253
+ for obj in objs_with_pk:
254
+ obj._state.adding = False
255
+ obj._state.db = self.queryset.db
256
+
257
+ if objs_without_pk:
258
+ filtered_fields = [
259
+ f for f in fields
260
+ if not isinstance(f, AutoField) and not f.primary_key
261
+ ]
262
+ returned_columns = base_qs._batched_insert(
263
+ objs_without_pk,
264
+ filtered_fields,
265
+ batch_size=len(objs_without_pk),
266
+ )
267
+ if returned_columns:
268
+ for obj, results in zip(objs_without_pk, returned_columns):
269
+ if hasattr(opts, "db_returning_fields"):
270
+ for result, field in zip(results, opts.db_returning_fields):
271
+ setattr(obj, field.attname, result)
272
+ obj._state.adding = False
273
+ obj._state.db = self.queryset.db
274
+ else:
275
+ for obj in objs_without_pk:
276
+ obj._state.adding = False
277
+ obj._state.db = self.queryset.db
278
+
279
+ created_children = plan.child_objects
280
+
281
+ # Step 4: Copy PKs and auto-generated fields back to original objects
282
+ pk_field_name = plan.child_model._meta.pk.name
283
+
284
+ for orig_obj, child_obj in zip(plan.original_objects, created_children):
285
+ # Copy PK
286
+ child_pk = getattr(child_obj, pk_field_name)
287
+ setattr(orig_obj, pk_field_name, child_pk)
288
+
289
+ # Copy auto-generated fields from all levels
290
+ parent_instances = parent_instances_map.get(id(orig_obj), {})
291
+
292
+ for model_class in plan.inheritance_chain:
293
+ # Get source object for this level
294
+ if model_class in parent_instances:
295
+ source_obj = parent_instances[model_class]
296
+ elif model_class == plan.child_model:
297
+ source_obj = child_obj
298
+ else:
299
+ continue
300
+
301
+ # Copy auto-generated field values
302
+ for field in model_class._meta.local_fields:
303
+ if field.name == pk_field_name:
304
+ continue
305
+
306
+ # Skip parent link fields
307
+ if hasattr(field, 'remote_field') and field.remote_field:
308
+ parent_link = plan.child_model._meta.get_ancestor_link(model_class)
309
+ if parent_link and field.name == parent_link.name:
310
+ continue
311
+
312
+ # Copy auto_now_add, auto_now, and db_returning fields
313
+ if (getattr(field, 'auto_now_add', False) or
314
+ getattr(field, 'auto_now', False) or
315
+ getattr(field, 'db_returning', False)):
316
+ source_value = getattr(source_obj, field.name, None)
317
+ if source_value is not None:
318
+ setattr(orig_obj, field.name, source_value)
319
+
320
+ # Update object state
321
+ orig_obj._state.adding = False
322
+ orig_obj._state.db = self.queryset.db
323
+
324
+ return plan.original_objects
325
+
326
+ def _execute_mti_update_plan(self, plan):
327
+ """
328
+ Execute an MTI update plan.
329
+
330
+ Updates each table in the inheritance chain using CASE/WHEN for bulk updates.
331
+
332
+ Args:
333
+ plan: MTIUpdatePlan object from MTIHandler
334
+
335
+ Returns:
336
+ Number of objects updated
337
+ """
338
+ from django.db import transaction
339
+ from django.db.models import Case, Value, When, QuerySet as BaseQuerySet
340
+
341
+ if not plan:
342
+ return 0
343
+
344
+ total_updated = 0
345
+
346
+ # Get PKs for filtering
347
+ root_pks = [
348
+ getattr(obj, "pk", None) or getattr(obj, "id", None)
349
+ for obj in plan.objects
350
+ if getattr(obj, "pk", None) or getattr(obj, "id", None)
351
+ ]
352
+
353
+ if not root_pks:
354
+ return 0
355
+
356
+ with transaction.atomic(using=self.queryset.db, savepoint=False):
357
+ # Update each table in the chain
358
+ for field_group in plan.field_groups:
359
+ if not field_group.fields:
360
+ continue
361
+
362
+ base_qs = BaseQuerySet(model=field_group.model_class, using=self.queryset.db)
363
+
364
+ # Check if records exist
365
+ existing_count = base_qs.filter(**{f"{field_group.filter_field}__in": root_pks}).count()
366
+ if existing_count == 0:
367
+ continue
368
+
369
+ # Build CASE statements for bulk update
370
+ case_statements = {}
371
+ for field_name in field_group.fields:
372
+ field = field_group.model_class._meta.get_field(field_name)
373
+
374
+ # Use column name for FK fields
375
+ if getattr(field, 'is_relation', False) and hasattr(field, 'attname'):
376
+ db_field_name = field.attname
377
+ target_field = field.target_field
378
+ else:
379
+ db_field_name = field_name
380
+ target_field = field
381
+
382
+ when_statements = []
383
+ for pk, obj in zip(root_pks, plan.objects):
384
+ obj_pk = getattr(obj, "pk", None) or getattr(obj, "id", None)
385
+ if obj_pk is None:
386
+ continue
387
+
388
+ value = getattr(obj, db_field_name)
389
+ when_statements.append(
390
+ When(
391
+ **{field_group.filter_field: pk},
392
+ then=Value(value, output_field=target_field),
393
+ )
394
+ )
395
+
396
+ if when_statements:
397
+ case_statements[db_field_name] = Case(
398
+ *when_statements, output_field=target_field
399
+ )
400
+
401
+ # Execute bulk update
402
+ if case_statements:
403
+ try:
404
+ updated_count = base_qs.filter(
405
+ **{f"{field_group.filter_field}__in": root_pks}
406
+ ).update(**case_statements)
407
+ total_updated += updated_count
408
+ except Exception as e:
409
+ logger.error(f"MTI bulk update failed for {field_group.model_class.__name__}: {e}")
410
+
411
+ return total_updated
412
+
413
+ def delete_queryset(self):
414
+ """
415
+ Execute delete on the queryset.
416
+
417
+ NOTE: Coordinator is responsible for validation before calling this method.
418
+ This executor trusts that inputs have already been validated.
419
+
420
+ Returns:
421
+ Tuple of (count, details dict)
422
+ """
423
+ if not self.queryset:
424
+ return 0, {}
425
+
426
+ # Execute delete via QuerySet
427
+ # Validation already done by coordinator
428
+ from django.db.models import QuerySet
429
+
430
+ return QuerySet.delete(self.queryset)