django-bulk-hooks 0.1.83__py3-none-any.whl → 0.2.100__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of django-bulk-hooks might be problematic. Click here for more details.

@@ -0,0 +1,103 @@
1
+ """
2
+ MTI operation plans - Data structures for multi-table inheritance operations.
3
+
4
+ These are pure data structures returned by MTIHandler to be executed by BulkExecutor.
5
+ This separates planning (logic) from execution (database operations).
6
+ """
7
+
8
+ from dataclasses import dataclass
9
+ from dataclasses import field
10
+ from typing import Any
11
+
12
+
13
+ @dataclass
14
+ class ParentLevel:
15
+ """
16
+ Represents one level in the parent hierarchy for MTI bulk create.
17
+
18
+ Attributes:
19
+ model_class: The parent model class for this level
20
+ objects: List of parent instances to create
21
+ original_object_map: Maps parent instance id() -> original object id()
22
+ update_conflicts: Whether to enable UPSERT for this level
23
+ unique_fields: Fields for conflict detection (if update_conflicts=True)
24
+ update_fields: Fields to update on conflict (if update_conflicts=True)
25
+ """
26
+
27
+ model_class: Any
28
+ objects: list[Any]
29
+ original_object_map: dict[int, int] = field(default_factory=dict)
30
+ update_conflicts: bool = False
31
+ unique_fields: list[str] = field(default_factory=list)
32
+ update_fields: list[str] = field(default_factory=list)
33
+
34
+
35
+ @dataclass
36
+ class MTICreatePlan:
37
+ """
38
+ Plan for executing bulk_create on an MTI model.
39
+
40
+ This plan describes WHAT to create, not HOW to create it.
41
+ The executor is responsible for executing this plan.
42
+
43
+ Attributes:
44
+ inheritance_chain: List of model classes from root to child
45
+ parent_levels: List of ParentLevel objects, one per parent model
46
+ child_objects: List of child instances to create (not yet with parent links)
47
+ child_model: The child model class
48
+ original_objects: Original objects provided by user
49
+ batch_size: Batch size for operations
50
+ existing_record_ids: Set of id() of original objects that represent existing DB records
51
+ update_conflicts: Whether this is an upsert operation
52
+ unique_fields: Fields used for conflict detection (original, unfiltered)
53
+ update_fields: Fields to update on conflict (original, unfiltered)
54
+ child_unique_fields: Pre-filtered field objects for child table conflict detection
55
+ child_update_fields: Pre-filtered field objects for child table updates
56
+ """
57
+
58
+ inheritance_chain: list[Any]
59
+ parent_levels: list[ParentLevel]
60
+ child_objects: list[Any]
61
+ child_model: Any
62
+ original_objects: list[Any]
63
+ batch_size: int = None
64
+ existing_record_ids: set = field(default_factory=set)
65
+ update_conflicts: bool = False
66
+ unique_fields: list[str] = field(default_factory=list)
67
+ update_fields: list[str] = field(default_factory=list)
68
+ child_unique_fields: list = field(default_factory=list) # Field objects for child table
69
+ child_update_fields: list = field(default_factory=list) # Field objects for child table
70
+
71
+
72
+ @dataclass
73
+ class ModelFieldGroup:
74
+ """
75
+ Represents fields to update for one model in the inheritance chain.
76
+
77
+ Attributes:
78
+ model_class: The model class
79
+ fields: List of field names to update on this model
80
+ filter_field: Field to use for filtering (e.g., 'pk' or parent link attname)
81
+ """
82
+
83
+ model_class: Any
84
+ fields: list[str]
85
+ filter_field: str = "pk"
86
+
87
+
88
+ @dataclass
89
+ class MTIUpdatePlan:
90
+ """
91
+ Plan for executing bulk_update on an MTI model.
92
+
93
+ Attributes:
94
+ inheritance_chain: List of model classes from root to child
95
+ field_groups: List of ModelFieldGroup objects
96
+ objects: Objects to update
97
+ batch_size: Batch size for operations
98
+ """
99
+
100
+ inheritance_chain: list[Any]
101
+ field_groups: list[ModelFieldGroup]
102
+ objects: list[Any]
103
+ batch_size: int = None
@@ -0,0 +1,196 @@
1
+ """
2
+ Record classification service for database queries.
3
+
4
+ This service handles all database queries related to classifying and fetching
5
+ records based on various criteria (PKs, unique fields, etc.).
6
+
7
+ Separates data access concerns from business logic.
8
+ """
9
+
10
+ import logging
11
+
12
+ from django.db.models import Q
13
+
14
+ from django_bulk_hooks.operations.field_utils import get_field_value_for_db
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class RecordClassifier:
20
+ """
21
+ Service for classifying and fetching records via database queries.
22
+
23
+ This is the SINGLE point of truth for record classification queries.
24
+ Keeps database access logic separate from business/planning logic.
25
+ """
26
+
27
+ def __init__(self, model_cls):
28
+ """
29
+ Initialize classifier for a specific model.
30
+
31
+ Args:
32
+ model_cls: The Django model class
33
+ """
34
+ self.model_cls = model_cls
35
+
36
+ def classify_for_upsert(self, objs, unique_fields, query_model=None):
37
+ """
38
+ Classify records as new or existing based on unique_fields.
39
+
40
+ Queries the database to check which records already exist based on the
41
+ unique_fields constraint.
42
+
43
+ Args:
44
+ objs: List of model instances
45
+ unique_fields: List of field names that form the unique constraint
46
+ query_model: Optional model class to query (for MTI, may be different from self.model_cls)
47
+
48
+ Returns:
49
+ Tuple of (existing_record_ids, existing_pks_map)
50
+ - existing_record_ids: Set of id() for objects that exist in DB
51
+ - existing_pks_map: Dict mapping id(obj) -> pk for existing records
52
+ """
53
+ if not unique_fields or not objs:
54
+ return set(), {}
55
+
56
+ # Use query_model if provided (for MTI scenarios), otherwise use self.model_cls
57
+ query_model = query_model or self.model_cls
58
+
59
+ # Build a query to find existing records
60
+ queries = []
61
+ obj_to_unique_values = {}
62
+
63
+ for obj in objs:
64
+ # Build lookup dict for this object's unique fields
65
+ lookup = {}
66
+ normalized_values = []
67
+
68
+ for field_name in unique_fields:
69
+ # Use centralized field value extraction for consistent FK handling
70
+ value = get_field_value_for_db(obj, field_name, query_model)
71
+ if value is None:
72
+ # Can't match on None values
73
+ break
74
+ lookup[field_name] = value
75
+ normalized_values.append(value)
76
+ else:
77
+ # All unique fields have values, add to query
78
+ if lookup:
79
+ queries.append(Q(**lookup))
80
+ # Store normalized values for comparison with database results
81
+ obj_to_unique_values[id(obj)] = tuple(normalized_values)
82
+
83
+ if not queries:
84
+ return set(), {}
85
+
86
+ # Query for existing records
87
+ combined_query = queries[0]
88
+ for q in queries[1:]:
89
+ combined_query |= q
90
+
91
+ logger.info(f"Classifying for upsert: model={query_model.__name__}, query={combined_query}, unique_fields={unique_fields}")
92
+ queryset = query_model.objects.filter(combined_query)
93
+ logger.info(f"Queryset SQL: {queryset.query}")
94
+ logger.info(f"All records in table: {query_model.objects.all().count()}")
95
+ existing_records = list(queryset.values("pk", *unique_fields))
96
+ logger.info(f"Found {len(existing_records)} existing records: {existing_records}")
97
+
98
+ # Map existing records back to original objects
99
+ existing_record_ids = set()
100
+ existing_pks_map = {}
101
+
102
+ for record in existing_records:
103
+ record_values = tuple(record[field] for field in unique_fields)
104
+ # Find which object(s) match these values
105
+ for obj_id, obj_values in obj_to_unique_values.items():
106
+ if obj_values == record_values:
107
+ existing_record_ids.add(obj_id)
108
+ existing_pks_map[obj_id] = record["pk"]
109
+
110
+ logger.info(
111
+ f"Classified {len(existing_record_ids)} existing and {len(objs) - len(existing_record_ids)} new records for upsert",
112
+ )
113
+
114
+ return existing_record_ids, existing_pks_map
115
+
116
+ def fetch_by_pks(self, pks, select_related=None, prefetch_related=None):
117
+ """
118
+ Fetch records by primary keys with optional relationship loading.
119
+
120
+ Args:
121
+ pks: List of primary key values
122
+ select_related: Optional list of fields to select_related
123
+ prefetch_related: Optional list of fields to prefetch_related
124
+
125
+ Returns:
126
+ Dict[pk, instance] for O(1) lookups
127
+ """
128
+ if not pks:
129
+ return {}
130
+
131
+ queryset = self.model_cls._base_manager.filter(pk__in=pks)
132
+
133
+ if select_related:
134
+ queryset = queryset.select_related(*select_related)
135
+
136
+ if prefetch_related:
137
+ queryset = queryset.prefetch_related(*prefetch_related)
138
+
139
+ return {obj.pk: obj for obj in queryset}
140
+
141
+ def fetch_by_unique_constraint(self, field_values_map):
142
+ """
143
+ Fetch records matching a unique constraint.
144
+
145
+ Args:
146
+ field_values_map: Dict of {field_name: value} for unique constraint
147
+
148
+ Returns:
149
+ Model instance if found, None otherwise
150
+ """
151
+ try:
152
+ return self.model_cls.objects.get(**field_values_map)
153
+ except self.model_cls.DoesNotExist:
154
+ return None
155
+ except self.model_cls.MultipleObjectsReturned:
156
+ logger.warning(
157
+ f"Multiple {self.model_cls.__name__} records found for unique constraint {field_values_map}",
158
+ )
159
+ return self.model_cls.objects.filter(**field_values_map).first()
160
+
161
+ def exists_by_pks(self, pks):
162
+ """
163
+ Check if records exist by primary keys without fetching them.
164
+
165
+ Args:
166
+ pks: List of primary key values
167
+
168
+ Returns:
169
+ Set of PKs that exist in the database
170
+ """
171
+ if not pks:
172
+ return set()
173
+
174
+ existing_pks = self.model_cls.objects.filter(
175
+ pk__in=pks,
176
+ ).values_list("pk", flat=True)
177
+
178
+ return set(existing_pks)
179
+
180
+ def count_by_unique_fields(self, objs, unique_fields):
181
+ """
182
+ Count how many objects already exist based on unique fields.
183
+
184
+ Useful for validation or reporting before upsert operations.
185
+
186
+ Args:
187
+ objs: List of model instances
188
+ unique_fields: List of field names that form the unique constraint
189
+
190
+ Returns:
191
+ Tuple of (existing_count, new_count)
192
+ """
193
+ existing_ids, _ = self.classify_for_upsert(objs, unique_fields)
194
+ existing_count = len(existing_ids)
195
+ new_count = len(objs) - existing_count
196
+ return existing_count, new_count
@@ -1,43 +1,233 @@
1
- from django.db import models, transaction
2
-
3
-
4
- class HookQuerySet(models.QuerySet):
5
- @transaction.atomic
6
- def delete(self):
7
- objs = list(self)
8
- if not objs:
9
- return 0
10
- return self.model.objects.bulk_delete(objs)
11
-
12
- @transaction.atomic
13
- def update(self, **kwargs):
14
- instances = list(self)
15
- if not instances:
16
- return 0
17
-
18
- model_cls = self.model
19
- pks = [obj.pk for obj in instances]
20
-
21
- # Load originals for hook comparison
22
- originals = list(model_cls.objects.filter(pk__in=pks))
23
-
24
- # Apply field updates to instances
25
- for obj in instances:
26
- for field, value in kwargs.items():
27
- setattr(obj, field, value)
28
-
29
- # Run BEFORE_UPDATE hooks
30
- from django_bulk_hooks import engine
31
- from django_bulk_hooks.context import HookContext
32
-
33
- ctx = HookContext(model_cls)
34
- engine.run(model_cls, "before_update", instances, originals, ctx=ctx)
35
-
36
- # Use Django's built-in update logic directly
37
- queryset = self.model.objects.filter(pk__in=pks)
38
- update_count = queryset.update(**kwargs)
39
-
40
- # Run AFTER_UPDATE hooks
41
- engine.run(model_cls, "after_update", instances, originals, ctx=ctx)
42
-
43
- return update_count
1
+ """
2
+ HookQuerySet - Django QuerySet with hook support.
3
+
4
+ This is a thin coordinator that delegates all complex logic to services.
5
+ It follows the Facade pattern, providing a simple interface over the
6
+ complex coordination required for bulk operations with hooks.
7
+ """
8
+
9
+ import logging
10
+
11
+ from django.db import models
12
+ from django.db import transaction
13
+
14
+ from django_bulk_hooks.helpers import extract_pks
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class HookQuerySet(models.QuerySet):
20
+ """
21
+ QuerySet with hook support.
22
+
23
+ This is a thin facade over BulkOperationCoordinator. It provides
24
+ backward-compatible API for Django's QuerySet while integrating
25
+ the full hook lifecycle.
26
+
27
+ Key design principles:
28
+ - Minimal logic (< 10 lines per method)
29
+ - No business logic (delegate to coordinator)
30
+ - No conditionals (let services handle it)
31
+ - Transaction boundaries only
32
+ """
33
+
34
+ def __init__(self, *args, **kwargs):
35
+ super().__init__(*args, **kwargs)
36
+ self._coordinator = None
37
+
38
+ @classmethod
39
+ def with_hooks(cls, queryset):
40
+ """
41
+ Apply hook functionality to any queryset.
42
+
43
+ This enables hooks to work with any manager by applying hook
44
+ capabilities at the queryset level rather than through inheritance.
45
+
46
+ Args:
47
+ queryset: Any Django QuerySet instance
48
+
49
+ Returns:
50
+ HookQuerySet instance with the same query parameters
51
+ """
52
+ if isinstance(queryset, cls):
53
+ return queryset # Already has hooks
54
+
55
+ # Create a new HookQuerySet with the same parameters as the original queryset
56
+ hook_qs = cls(
57
+ model=queryset.model,
58
+ query=queryset.query,
59
+ using=queryset._db,
60
+ hints=getattr(queryset, '_hints', {}),
61
+ )
62
+
63
+ # Preserve any additional attributes from the original queryset
64
+ # This allows composition with other queryset enhancements
65
+ cls._preserve_queryset_attributes(hook_qs, queryset)
66
+
67
+ return hook_qs
68
+
69
+ @classmethod
70
+ def _preserve_queryset_attributes(cls, hook_qs, original_qs):
71
+ """
72
+ Preserve attributes from the original queryset.
73
+
74
+ This enables composition with other queryset enhancements like
75
+ queryable properties, annotations, etc.
76
+ """
77
+ # Copy non-method attributes that might be set by other managers
78
+ for attr_name in dir(original_qs):
79
+ if (not attr_name.startswith('_') and
80
+ not hasattr(cls, attr_name) and
81
+ not callable(getattr(original_qs, attr_name, None))):
82
+ try:
83
+ value = getattr(original_qs, attr_name)
84
+ setattr(hook_qs, attr_name, value)
85
+ except (AttributeError, TypeError):
86
+ # Skip attributes that can't be copied
87
+ continue
88
+
89
+ @property
90
+ def coordinator(self):
91
+ """Lazy initialization of coordinator"""
92
+ if self._coordinator is None:
93
+ from django_bulk_hooks.operations import BulkOperationCoordinator
94
+
95
+ self._coordinator = BulkOperationCoordinator(self)
96
+ return self._coordinator
97
+
98
+ @transaction.atomic
99
+ def bulk_create(
100
+ self,
101
+ objs,
102
+ batch_size=None,
103
+ ignore_conflicts=False,
104
+ update_conflicts=False,
105
+ update_fields=None,
106
+ unique_fields=None,
107
+ bypass_hooks=False,
108
+ ):
109
+ """
110
+ Create multiple objects with hook support.
111
+
112
+ This is the public API - delegates to coordinator.
113
+ """
114
+ return self.coordinator.create(
115
+ objs=objs,
116
+ batch_size=batch_size,
117
+ ignore_conflicts=ignore_conflicts,
118
+ update_conflicts=update_conflicts,
119
+ update_fields=update_fields,
120
+ unique_fields=unique_fields,
121
+ bypass_hooks=bypass_hooks,
122
+ )
123
+
124
+ @transaction.atomic
125
+ def bulk_update(
126
+ self,
127
+ objs,
128
+ fields=None,
129
+ batch_size=None,
130
+ bypass_hooks=False,
131
+ **kwargs,
132
+ ):
133
+ """
134
+ Update multiple objects with hook support.
135
+
136
+ This is the public API - delegates to coordinator.
137
+
138
+ Args:
139
+ objs: List of model instances to update
140
+ fields: List of field names to update (optional, will auto-detect if None)
141
+ batch_size: Number of objects per batch
142
+ bypass_hooks: Skip all hooks if True
143
+
144
+ Returns:
145
+ Number of objects updated
146
+ """
147
+ # If fields is None, auto-detect changed fields using analyzer
148
+ if fields is None:
149
+ fields = self.coordinator.analyzer.detect_changed_fields(objs)
150
+ if not fields:
151
+ return 0
152
+
153
+ return self.coordinator.update(
154
+ objs=objs,
155
+ fields=fields,
156
+ batch_size=batch_size,
157
+ bypass_hooks=bypass_hooks,
158
+ )
159
+
160
+ @transaction.atomic
161
+ def update(self, bypass_hooks=False, **kwargs):
162
+ """
163
+ Update QuerySet with hook support.
164
+
165
+ This is the public API - delegates to coordinator.
166
+
167
+ Args:
168
+ bypass_hooks: Skip all hooks if True
169
+ **kwargs: Fields to update
170
+
171
+ Returns:
172
+ Number of objects updated
173
+ """
174
+ return self.coordinator.update_queryset(
175
+ update_kwargs=kwargs,
176
+ bypass_hooks=bypass_hooks,
177
+ )
178
+
179
+ @transaction.atomic
180
+ def bulk_delete(
181
+ self,
182
+ objs,
183
+ bypass_hooks=False,
184
+ **kwargs,
185
+ ):
186
+ """
187
+ Delete multiple objects with hook support.
188
+
189
+ This is the public API - delegates to coordinator.
190
+
191
+ Args:
192
+ objs: List of objects to delete
193
+ bypass_hooks: Skip all hooks if True
194
+
195
+ Returns:
196
+ Tuple of (count, details dict)
197
+ """
198
+ # Filter queryset to only these objects
199
+ pks = extract_pks(objs)
200
+ if not pks:
201
+ return 0
202
+
203
+ # Create a filtered queryset
204
+ filtered_qs = self.filter(pk__in=pks)
205
+
206
+ # Use coordinator with the filtered queryset
207
+ from django_bulk_hooks.operations import BulkOperationCoordinator
208
+
209
+ coordinator = BulkOperationCoordinator(filtered_qs)
210
+
211
+ count, details = coordinator.delete(
212
+ bypass_hooks=bypass_hooks,
213
+ )
214
+
215
+ # For bulk_delete, return just the count to match Django's behavior
216
+ return count
217
+
218
+ @transaction.atomic
219
+ def delete(self, bypass_hooks=False):
220
+ """
221
+ Delete QuerySet with hook support.
222
+
223
+ This is the public API - delegates to coordinator.
224
+
225
+ Args:
226
+ bypass_hooks: Skip all hooks if True
227
+
228
+ Returns:
229
+ Tuple of (count, details dict)
230
+ """
231
+ return self.coordinator.delete(
232
+ bypass_hooks=bypass_hooks,
233
+ )