django-bulk-hooks 0.2.9__py3-none-any.whl → 0.2.93__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- django_bulk_hooks/__init__.py +20 -27
- django_bulk_hooks/changeset.py +214 -230
- django_bulk_hooks/conditions.py +12 -12
- django_bulk_hooks/decorators.py +68 -26
- django_bulk_hooks/dispatcher.py +369 -58
- django_bulk_hooks/factory.py +541 -565
- django_bulk_hooks/handler.py +106 -115
- django_bulk_hooks/helpers.py +258 -99
- django_bulk_hooks/manager.py +134 -130
- django_bulk_hooks/models.py +89 -76
- django_bulk_hooks/operations/__init__.py +5 -5
- django_bulk_hooks/operations/analyzer.py +299 -172
- django_bulk_hooks/operations/bulk_executor.py +742 -437
- django_bulk_hooks/operations/coordinator.py +928 -472
- django_bulk_hooks/operations/field_utils.py +335 -0
- django_bulk_hooks/operations/mti_handler.py +696 -473
- django_bulk_hooks/operations/mti_plans.py +103 -87
- django_bulk_hooks/operations/record_classifier.py +196 -0
- django_bulk_hooks/queryset.py +233 -189
- django_bulk_hooks/registry.py +276 -288
- {django_bulk_hooks-0.2.9.dist-info → django_bulk_hooks-0.2.93.dist-info}/METADATA +55 -4
- django_bulk_hooks-0.2.93.dist-info/RECORD +27 -0
- django_bulk_hooks/debug_utils.py +0 -145
- django_bulk_hooks-0.2.9.dist-info/RECORD +0 -26
- {django_bulk_hooks-0.2.9.dist-info → django_bulk_hooks-0.2.93.dist-info}/LICENSE +0 -0
- {django_bulk_hooks-0.2.9.dist-info → django_bulk_hooks-0.2.93.dist-info}/WHEEL +0 -0
|
@@ -1,87 +1,103 @@
|
|
|
1
|
-
"""
|
|
2
|
-
MTI operation plans - Data structures for multi-table inheritance operations.
|
|
3
|
-
|
|
4
|
-
These are pure data structures returned by MTIHandler to be executed by BulkExecutor.
|
|
5
|
-
This separates planning (logic) from execution (database operations).
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
from dataclasses import dataclass
|
|
9
|
-
from
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
@dataclass
|
|
73
|
-
class
|
|
74
|
-
"""
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
Attributes:
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
1
|
+
"""
|
|
2
|
+
MTI operation plans - Data structures for multi-table inheritance operations.
|
|
3
|
+
|
|
4
|
+
These are pure data structures returned by MTIHandler to be executed by BulkExecutor.
|
|
5
|
+
This separates planning (logic) from execution (database operations).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from dataclasses import field
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class ParentLevel:
|
|
15
|
+
"""
|
|
16
|
+
Represents one level in the parent hierarchy for MTI bulk create.
|
|
17
|
+
|
|
18
|
+
Attributes:
|
|
19
|
+
model_class: The parent model class for this level
|
|
20
|
+
objects: List of parent instances to create
|
|
21
|
+
original_object_map: Maps parent instance id() -> original object id()
|
|
22
|
+
update_conflicts: Whether to enable UPSERT for this level
|
|
23
|
+
unique_fields: Fields for conflict detection (if update_conflicts=True)
|
|
24
|
+
update_fields: Fields to update on conflict (if update_conflicts=True)
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
model_class: Any
|
|
28
|
+
objects: list[Any]
|
|
29
|
+
original_object_map: dict[int, int] = field(default_factory=dict)
|
|
30
|
+
update_conflicts: bool = False
|
|
31
|
+
unique_fields: list[str] = field(default_factory=list)
|
|
32
|
+
update_fields: list[str] = field(default_factory=list)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class MTICreatePlan:
|
|
37
|
+
"""
|
|
38
|
+
Plan for executing bulk_create on an MTI model.
|
|
39
|
+
|
|
40
|
+
This plan describes WHAT to create, not HOW to create it.
|
|
41
|
+
The executor is responsible for executing this plan.
|
|
42
|
+
|
|
43
|
+
Attributes:
|
|
44
|
+
inheritance_chain: List of model classes from root to child
|
|
45
|
+
parent_levels: List of ParentLevel objects, one per parent model
|
|
46
|
+
child_objects: List of child instances to create (not yet with parent links)
|
|
47
|
+
child_model: The child model class
|
|
48
|
+
original_objects: Original objects provided by user
|
|
49
|
+
batch_size: Batch size for operations
|
|
50
|
+
existing_record_ids: Set of id() of original objects that represent existing DB records
|
|
51
|
+
update_conflicts: Whether this is an upsert operation
|
|
52
|
+
unique_fields: Fields used for conflict detection (original, unfiltered)
|
|
53
|
+
update_fields: Fields to update on conflict (original, unfiltered)
|
|
54
|
+
child_unique_fields: Pre-filtered field objects for child table conflict detection
|
|
55
|
+
child_update_fields: Pre-filtered field objects for child table updates
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
inheritance_chain: list[Any]
|
|
59
|
+
parent_levels: list[ParentLevel]
|
|
60
|
+
child_objects: list[Any]
|
|
61
|
+
child_model: Any
|
|
62
|
+
original_objects: list[Any]
|
|
63
|
+
batch_size: int = None
|
|
64
|
+
existing_record_ids: set = field(default_factory=set)
|
|
65
|
+
update_conflicts: bool = False
|
|
66
|
+
unique_fields: list[str] = field(default_factory=list)
|
|
67
|
+
update_fields: list[str] = field(default_factory=list)
|
|
68
|
+
child_unique_fields: list = field(default_factory=list) # Field objects for child table
|
|
69
|
+
child_update_fields: list = field(default_factory=list) # Field objects for child table
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass
|
|
73
|
+
class ModelFieldGroup:
|
|
74
|
+
"""
|
|
75
|
+
Represents fields to update for one model in the inheritance chain.
|
|
76
|
+
|
|
77
|
+
Attributes:
|
|
78
|
+
model_class: The model class
|
|
79
|
+
fields: List of field names to update on this model
|
|
80
|
+
filter_field: Field to use for filtering (e.g., 'pk' or parent link attname)
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
model_class: Any
|
|
84
|
+
fields: list[str]
|
|
85
|
+
filter_field: str = "pk"
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@dataclass
|
|
89
|
+
class MTIUpdatePlan:
|
|
90
|
+
"""
|
|
91
|
+
Plan for executing bulk_update on an MTI model.
|
|
92
|
+
|
|
93
|
+
Attributes:
|
|
94
|
+
inheritance_chain: List of model classes from root to child
|
|
95
|
+
field_groups: List of ModelFieldGroup objects
|
|
96
|
+
objects: Objects to update
|
|
97
|
+
batch_size: Batch size for operations
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
inheritance_chain: list[Any]
|
|
101
|
+
field_groups: list[ModelFieldGroup]
|
|
102
|
+
objects: list[Any]
|
|
103
|
+
batch_size: int = None
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Record classification service for database queries.
|
|
3
|
+
|
|
4
|
+
This service handles all database queries related to classifying and fetching
|
|
5
|
+
records based on various criteria (PKs, unique fields, etc.).
|
|
6
|
+
|
|
7
|
+
Separates data access concerns from business logic.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
|
|
12
|
+
from django.db.models import Q
|
|
13
|
+
|
|
14
|
+
from django_bulk_hooks.operations.field_utils import get_field_value_for_db
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class RecordClassifier:
|
|
20
|
+
"""
|
|
21
|
+
Service for classifying and fetching records via database queries.
|
|
22
|
+
|
|
23
|
+
This is the SINGLE point of truth for record classification queries.
|
|
24
|
+
Keeps database access logic separate from business/planning logic.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self, model_cls):
|
|
28
|
+
"""
|
|
29
|
+
Initialize classifier for a specific model.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
model_cls: The Django model class
|
|
33
|
+
"""
|
|
34
|
+
self.model_cls = model_cls
|
|
35
|
+
|
|
36
|
+
def classify_for_upsert(self, objs, unique_fields, query_model=None):
|
|
37
|
+
"""
|
|
38
|
+
Classify records as new or existing based on unique_fields.
|
|
39
|
+
|
|
40
|
+
Queries the database to check which records already exist based on the
|
|
41
|
+
unique_fields constraint.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
objs: List of model instances
|
|
45
|
+
unique_fields: List of field names that form the unique constraint
|
|
46
|
+
query_model: Optional model class to query (for MTI, may be different from self.model_cls)
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
Tuple of (existing_record_ids, existing_pks_map)
|
|
50
|
+
- existing_record_ids: Set of id() for objects that exist in DB
|
|
51
|
+
- existing_pks_map: Dict mapping id(obj) -> pk for existing records
|
|
52
|
+
"""
|
|
53
|
+
if not unique_fields or not objs:
|
|
54
|
+
return set(), {}
|
|
55
|
+
|
|
56
|
+
# Use query_model if provided (for MTI scenarios), otherwise use self.model_cls
|
|
57
|
+
query_model = query_model or self.model_cls
|
|
58
|
+
|
|
59
|
+
# Build a query to find existing records
|
|
60
|
+
queries = []
|
|
61
|
+
obj_to_unique_values = {}
|
|
62
|
+
|
|
63
|
+
for obj in objs:
|
|
64
|
+
# Build lookup dict for this object's unique fields
|
|
65
|
+
lookup = {}
|
|
66
|
+
normalized_values = []
|
|
67
|
+
|
|
68
|
+
for field_name in unique_fields:
|
|
69
|
+
# Use centralized field value extraction for consistent FK handling
|
|
70
|
+
value = get_field_value_for_db(obj, field_name, query_model)
|
|
71
|
+
if value is None:
|
|
72
|
+
# Can't match on None values
|
|
73
|
+
break
|
|
74
|
+
lookup[field_name] = value
|
|
75
|
+
normalized_values.append(value)
|
|
76
|
+
else:
|
|
77
|
+
# All unique fields have values, add to query
|
|
78
|
+
if lookup:
|
|
79
|
+
queries.append(Q(**lookup))
|
|
80
|
+
# Store normalized values for comparison with database results
|
|
81
|
+
obj_to_unique_values[id(obj)] = tuple(normalized_values)
|
|
82
|
+
|
|
83
|
+
if not queries:
|
|
84
|
+
return set(), {}
|
|
85
|
+
|
|
86
|
+
# Query for existing records
|
|
87
|
+
combined_query = queries[0]
|
|
88
|
+
for q in queries[1:]:
|
|
89
|
+
combined_query |= q
|
|
90
|
+
|
|
91
|
+
logger.info(f"Classifying for upsert: model={query_model.__name__}, query={combined_query}, unique_fields={unique_fields}")
|
|
92
|
+
queryset = query_model.objects.filter(combined_query)
|
|
93
|
+
logger.info(f"Queryset SQL: {queryset.query}")
|
|
94
|
+
logger.info(f"All records in table: {query_model.objects.all().count()}")
|
|
95
|
+
existing_records = list(queryset.values("pk", *unique_fields))
|
|
96
|
+
logger.info(f"Found {len(existing_records)} existing records: {existing_records}")
|
|
97
|
+
|
|
98
|
+
# Map existing records back to original objects
|
|
99
|
+
existing_record_ids = set()
|
|
100
|
+
existing_pks_map = {}
|
|
101
|
+
|
|
102
|
+
for record in existing_records:
|
|
103
|
+
record_values = tuple(record[field] for field in unique_fields)
|
|
104
|
+
# Find which object(s) match these values
|
|
105
|
+
for obj_id, obj_values in obj_to_unique_values.items():
|
|
106
|
+
if obj_values == record_values:
|
|
107
|
+
existing_record_ids.add(obj_id)
|
|
108
|
+
existing_pks_map[obj_id] = record["pk"]
|
|
109
|
+
|
|
110
|
+
logger.info(
|
|
111
|
+
f"Classified {len(existing_record_ids)} existing and {len(objs) - len(existing_record_ids)} new records for upsert",
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
return existing_record_ids, existing_pks_map
|
|
115
|
+
|
|
116
|
+
def fetch_by_pks(self, pks, select_related=None, prefetch_related=None):
|
|
117
|
+
"""
|
|
118
|
+
Fetch records by primary keys with optional relationship loading.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
pks: List of primary key values
|
|
122
|
+
select_related: Optional list of fields to select_related
|
|
123
|
+
prefetch_related: Optional list of fields to prefetch_related
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
Dict[pk, instance] for O(1) lookups
|
|
127
|
+
"""
|
|
128
|
+
if not pks:
|
|
129
|
+
return {}
|
|
130
|
+
|
|
131
|
+
queryset = self.model_cls._base_manager.filter(pk__in=pks)
|
|
132
|
+
|
|
133
|
+
if select_related:
|
|
134
|
+
queryset = queryset.select_related(*select_related)
|
|
135
|
+
|
|
136
|
+
if prefetch_related:
|
|
137
|
+
queryset = queryset.prefetch_related(*prefetch_related)
|
|
138
|
+
|
|
139
|
+
return {obj.pk: obj for obj in queryset}
|
|
140
|
+
|
|
141
|
+
def fetch_by_unique_constraint(self, field_values_map):
|
|
142
|
+
"""
|
|
143
|
+
Fetch records matching a unique constraint.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
field_values_map: Dict of {field_name: value} for unique constraint
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
Model instance if found, None otherwise
|
|
150
|
+
"""
|
|
151
|
+
try:
|
|
152
|
+
return self.model_cls.objects.get(**field_values_map)
|
|
153
|
+
except self.model_cls.DoesNotExist:
|
|
154
|
+
return None
|
|
155
|
+
except self.model_cls.MultipleObjectsReturned:
|
|
156
|
+
logger.warning(
|
|
157
|
+
f"Multiple {self.model_cls.__name__} records found for unique constraint {field_values_map}",
|
|
158
|
+
)
|
|
159
|
+
return self.model_cls.objects.filter(**field_values_map).first()
|
|
160
|
+
|
|
161
|
+
def exists_by_pks(self, pks):
|
|
162
|
+
"""
|
|
163
|
+
Check if records exist by primary keys without fetching them.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
pks: List of primary key values
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
Set of PKs that exist in the database
|
|
170
|
+
"""
|
|
171
|
+
if not pks:
|
|
172
|
+
return set()
|
|
173
|
+
|
|
174
|
+
existing_pks = self.model_cls.objects.filter(
|
|
175
|
+
pk__in=pks,
|
|
176
|
+
).values_list("pk", flat=True)
|
|
177
|
+
|
|
178
|
+
return set(existing_pks)
|
|
179
|
+
|
|
180
|
+
def count_by_unique_fields(self, objs, unique_fields):
|
|
181
|
+
"""
|
|
182
|
+
Count how many objects already exist based on unique fields.
|
|
183
|
+
|
|
184
|
+
Useful for validation or reporting before upsert operations.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
objs: List of model instances
|
|
188
|
+
unique_fields: List of field names that form the unique constraint
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Tuple of (existing_count, new_count)
|
|
192
|
+
"""
|
|
193
|
+
existing_ids, _ = self.classify_for_upsert(objs, unique_fields)
|
|
194
|
+
existing_count = len(existing_ids)
|
|
195
|
+
new_count = len(objs) - existing_count
|
|
196
|
+
return existing_count, new_count
|