django-bulk-hooks 0.2.9__py3-none-any.whl → 0.2.93__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,87 +1,103 @@
1
- """
2
- MTI operation plans - Data structures for multi-table inheritance operations.
3
-
4
- These are pure data structures returned by MTIHandler to be executed by BulkExecutor.
5
- This separates planning (logic) from execution (database operations).
6
- """
7
-
8
- from dataclasses import dataclass, field
9
- from typing import Dict, List, Any
10
-
11
-
12
- @dataclass
13
- class ParentLevel:
14
- """
15
- Represents one level in the parent hierarchy for MTI bulk create.
16
-
17
- Attributes:
18
- model_class: The parent model class for this level
19
- objects: List of parent instances to create
20
- original_object_map: Maps parent instance id() -> original object id()
21
- update_conflicts: Whether to enable UPSERT for this level
22
- unique_fields: Fields for conflict detection (if update_conflicts=True)
23
- update_fields: Fields to update on conflict (if update_conflicts=True)
24
- """
25
- model_class: Any
26
- objects: List[Any]
27
- original_object_map: Dict[int, int] = field(default_factory=dict)
28
- update_conflicts: bool = False
29
- unique_fields: List[str] = field(default_factory=list)
30
- update_fields: List[str] = field(default_factory=list)
31
-
32
-
33
- @dataclass
34
- class MTICreatePlan:
35
- """
36
- Plan for executing bulk_create on an MTI model.
37
-
38
- This plan describes WHAT to create, not HOW to create it.
39
- The executor is responsible for executing this plan.
40
-
41
- Attributes:
42
- inheritance_chain: List of model classes from root to child
43
- parent_levels: List of ParentLevel objects, one per parent model
44
- child_objects: List of child instances to create (not yet with parent links)
45
- child_model: The child model class
46
- original_objects: Original objects provided by user
47
- batch_size: Batch size for operations
48
- """
49
- inheritance_chain: List[Any]
50
- parent_levels: List[ParentLevel]
51
- child_objects: List[Any]
52
- child_model: Any
53
- original_objects: List[Any]
54
- batch_size: int = None
55
-
56
-
57
- @dataclass
58
- class ModelFieldGroup:
59
- """
60
- Represents fields to update for one model in the inheritance chain.
61
-
62
- Attributes:
63
- model_class: The model class
64
- fields: List of field names to update on this model
65
- filter_field: Field to use for filtering (e.g., 'pk' or parent link attname)
66
- """
67
- model_class: Any
68
- fields: List[str]
69
- filter_field: str = "pk"
70
-
71
-
72
- @dataclass
73
- class MTIUpdatePlan:
74
- """
75
- Plan for executing bulk_update on an MTI model.
76
-
77
- Attributes:
78
- inheritance_chain: List of model classes from root to child
79
- field_groups: List of ModelFieldGroup objects
80
- objects: Objects to update
81
- batch_size: Batch size for operations
82
- """
83
- inheritance_chain: List[Any]
84
- field_groups: List[ModelFieldGroup]
85
- objects: List[Any]
86
- batch_size: int = None
87
-
1
+ """
2
+ MTI operation plans - Data structures for multi-table inheritance operations.
3
+
4
+ These are pure data structures returned by MTIHandler to be executed by BulkExecutor.
5
+ This separates planning (logic) from execution (database operations).
6
+ """
7
+
8
+ from dataclasses import dataclass
9
+ from dataclasses import field
10
+ from typing import Any
11
+
12
+
13
+ @dataclass
14
+ class ParentLevel:
15
+ """
16
+ Represents one level in the parent hierarchy for MTI bulk create.
17
+
18
+ Attributes:
19
+ model_class: The parent model class for this level
20
+ objects: List of parent instances to create
21
+ original_object_map: Maps parent instance id() -> original object id()
22
+ update_conflicts: Whether to enable UPSERT for this level
23
+ unique_fields: Fields for conflict detection (if update_conflicts=True)
24
+ update_fields: Fields to update on conflict (if update_conflicts=True)
25
+ """
26
+
27
+ model_class: Any
28
+ objects: list[Any]
29
+ original_object_map: dict[int, int] = field(default_factory=dict)
30
+ update_conflicts: bool = False
31
+ unique_fields: list[str] = field(default_factory=list)
32
+ update_fields: list[str] = field(default_factory=list)
33
+
34
+
35
+ @dataclass
36
+ class MTICreatePlan:
37
+ """
38
+ Plan for executing bulk_create on an MTI model.
39
+
40
+ This plan describes WHAT to create, not HOW to create it.
41
+ The executor is responsible for executing this plan.
42
+
43
+ Attributes:
44
+ inheritance_chain: List of model classes from root to child
45
+ parent_levels: List of ParentLevel objects, one per parent model
46
+ child_objects: List of child instances to create (not yet with parent links)
47
+ child_model: The child model class
48
+ original_objects: Original objects provided by user
49
+ batch_size: Batch size for operations
50
+ existing_record_ids: Set of id() of original objects that represent existing DB records
51
+ update_conflicts: Whether this is an upsert operation
52
+ unique_fields: Fields used for conflict detection (original, unfiltered)
53
+ update_fields: Fields to update on conflict (original, unfiltered)
54
+ child_unique_fields: Pre-filtered field objects for child table conflict detection
55
+ child_update_fields: Pre-filtered field objects for child table updates
56
+ """
57
+
58
+ inheritance_chain: list[Any]
59
+ parent_levels: list[ParentLevel]
60
+ child_objects: list[Any]
61
+ child_model: Any
62
+ original_objects: list[Any]
63
+ batch_size: int = None
64
+ existing_record_ids: set = field(default_factory=set)
65
+ update_conflicts: bool = False
66
+ unique_fields: list[str] = field(default_factory=list)
67
+ update_fields: list[str] = field(default_factory=list)
68
+ child_unique_fields: list = field(default_factory=list) # Field objects for child table
69
+ child_update_fields: list = field(default_factory=list) # Field objects for child table
70
+
71
+
72
+ @dataclass
73
+ class ModelFieldGroup:
74
+ """
75
+ Represents fields to update for one model in the inheritance chain.
76
+
77
+ Attributes:
78
+ model_class: The model class
79
+ fields: List of field names to update on this model
80
+ filter_field: Field to use for filtering (e.g., 'pk' or parent link attname)
81
+ """
82
+
83
+ model_class: Any
84
+ fields: list[str]
85
+ filter_field: str = "pk"
86
+
87
+
88
+ @dataclass
89
+ class MTIUpdatePlan:
90
+ """
91
+ Plan for executing bulk_update on an MTI model.
92
+
93
+ Attributes:
94
+ inheritance_chain: List of model classes from root to child
95
+ field_groups: List of ModelFieldGroup objects
96
+ objects: Objects to update
97
+ batch_size: Batch size for operations
98
+ """
99
+
100
+ inheritance_chain: list[Any]
101
+ field_groups: list[ModelFieldGroup]
102
+ objects: list[Any]
103
+ batch_size: int = None
@@ -0,0 +1,196 @@
1
+ """
2
+ Record classification service for database queries.
3
+
4
+ This service handles all database queries related to classifying and fetching
5
+ records based on various criteria (PKs, unique fields, etc.).
6
+
7
+ Separates data access concerns from business logic.
8
+ """
9
+
10
+ import logging
11
+
12
+ from django.db.models import Q
13
+
14
+ from django_bulk_hooks.operations.field_utils import get_field_value_for_db
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class RecordClassifier:
20
+ """
21
+ Service for classifying and fetching records via database queries.
22
+
23
+ This is the SINGLE point of truth for record classification queries.
24
+ Keeps database access logic separate from business/planning logic.
25
+ """
26
+
27
+ def __init__(self, model_cls):
28
+ """
29
+ Initialize classifier for a specific model.
30
+
31
+ Args:
32
+ model_cls: The Django model class
33
+ """
34
+ self.model_cls = model_cls
35
+
36
+ def classify_for_upsert(self, objs, unique_fields, query_model=None):
37
+ """
38
+ Classify records as new or existing based on unique_fields.
39
+
40
+ Queries the database to check which records already exist based on the
41
+ unique_fields constraint.
42
+
43
+ Args:
44
+ objs: List of model instances
45
+ unique_fields: List of field names that form the unique constraint
46
+ query_model: Optional model class to query (for MTI, may be different from self.model_cls)
47
+
48
+ Returns:
49
+ Tuple of (existing_record_ids, existing_pks_map)
50
+ - existing_record_ids: Set of id() for objects that exist in DB
51
+ - existing_pks_map: Dict mapping id(obj) -> pk for existing records
52
+ """
53
+ if not unique_fields or not objs:
54
+ return set(), {}
55
+
56
+ # Use query_model if provided (for MTI scenarios), otherwise use self.model_cls
57
+ query_model = query_model or self.model_cls
58
+
59
+ # Build a query to find existing records
60
+ queries = []
61
+ obj_to_unique_values = {}
62
+
63
+ for obj in objs:
64
+ # Build lookup dict for this object's unique fields
65
+ lookup = {}
66
+ normalized_values = []
67
+
68
+ for field_name in unique_fields:
69
+ # Use centralized field value extraction for consistent FK handling
70
+ value = get_field_value_for_db(obj, field_name, query_model)
71
+ if value is None:
72
+ # Can't match on None values
73
+ break
74
+ lookup[field_name] = value
75
+ normalized_values.append(value)
76
+ else:
77
+ # All unique fields have values, add to query
78
+ if lookup:
79
+ queries.append(Q(**lookup))
80
+ # Store normalized values for comparison with database results
81
+ obj_to_unique_values[id(obj)] = tuple(normalized_values)
82
+
83
+ if not queries:
84
+ return set(), {}
85
+
86
+ # Query for existing records
87
+ combined_query = queries[0]
88
+ for q in queries[1:]:
89
+ combined_query |= q
90
+
91
+ logger.info(f"Classifying for upsert: model={query_model.__name__}, query={combined_query}, unique_fields={unique_fields}")
92
+ queryset = query_model.objects.filter(combined_query)
93
+ logger.info(f"Queryset SQL: {queryset.query}")
94
+ logger.info(f"All records in table: {query_model.objects.all().count()}")
95
+ existing_records = list(queryset.values("pk", *unique_fields))
96
+ logger.info(f"Found {len(existing_records)} existing records: {existing_records}")
97
+
98
+ # Map existing records back to original objects
99
+ existing_record_ids = set()
100
+ existing_pks_map = {}
101
+
102
+ for record in existing_records:
103
+ record_values = tuple(record[field] for field in unique_fields)
104
+ # Find which object(s) match these values
105
+ for obj_id, obj_values in obj_to_unique_values.items():
106
+ if obj_values == record_values:
107
+ existing_record_ids.add(obj_id)
108
+ existing_pks_map[obj_id] = record["pk"]
109
+
110
+ logger.info(
111
+ f"Classified {len(existing_record_ids)} existing and {len(objs) - len(existing_record_ids)} new records for upsert",
112
+ )
113
+
114
+ return existing_record_ids, existing_pks_map
115
+
116
+ def fetch_by_pks(self, pks, select_related=None, prefetch_related=None):
117
+ """
118
+ Fetch records by primary keys with optional relationship loading.
119
+
120
+ Args:
121
+ pks: List of primary key values
122
+ select_related: Optional list of fields to select_related
123
+ prefetch_related: Optional list of fields to prefetch_related
124
+
125
+ Returns:
126
+ Dict[pk, instance] for O(1) lookups
127
+ """
128
+ if not pks:
129
+ return {}
130
+
131
+ queryset = self.model_cls._base_manager.filter(pk__in=pks)
132
+
133
+ if select_related:
134
+ queryset = queryset.select_related(*select_related)
135
+
136
+ if prefetch_related:
137
+ queryset = queryset.prefetch_related(*prefetch_related)
138
+
139
+ return {obj.pk: obj for obj in queryset}
140
+
141
+ def fetch_by_unique_constraint(self, field_values_map):
142
+ """
143
+ Fetch records matching a unique constraint.
144
+
145
+ Args:
146
+ field_values_map: Dict of {field_name: value} for unique constraint
147
+
148
+ Returns:
149
+ Model instance if found, None otherwise
150
+ """
151
+ try:
152
+ return self.model_cls.objects.get(**field_values_map)
153
+ except self.model_cls.DoesNotExist:
154
+ return None
155
+ except self.model_cls.MultipleObjectsReturned:
156
+ logger.warning(
157
+ f"Multiple {self.model_cls.__name__} records found for unique constraint {field_values_map}",
158
+ )
159
+ return self.model_cls.objects.filter(**field_values_map).first()
160
+
161
+ def exists_by_pks(self, pks):
162
+ """
163
+ Check if records exist by primary keys without fetching them.
164
+
165
+ Args:
166
+ pks: List of primary key values
167
+
168
+ Returns:
169
+ Set of PKs that exist in the database
170
+ """
171
+ if not pks:
172
+ return set()
173
+
174
+ existing_pks = self.model_cls.objects.filter(
175
+ pk__in=pks,
176
+ ).values_list("pk", flat=True)
177
+
178
+ return set(existing_pks)
179
+
180
+ def count_by_unique_fields(self, objs, unique_fields):
181
+ """
182
+ Count how many objects already exist based on unique fields.
183
+
184
+ Useful for validation or reporting before upsert operations.
185
+
186
+ Args:
187
+ objs: List of model instances
188
+ unique_fields: List of field names that form the unique constraint
189
+
190
+ Returns:
191
+ Tuple of (existing_count, new_count)
192
+ """
193
+ existing_ids, _ = self.classify_for_upsert(objs, unique_fields)
194
+ existing_count = len(existing_ids)
195
+ new_count = len(objs) - existing_count
196
+ return existing_count, new_count