mongo-pipebuilder 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mongo_pipebuilder/__init__.py +1 -1
- mongo_pipebuilder/builder.py +371 -134
- {mongo_pipebuilder-0.3.0.dist-info → mongo_pipebuilder-0.4.0.dist-info}/METADATA +123 -5
- mongo_pipebuilder-0.4.0.dist-info/RECORD +7 -0
- {mongo_pipebuilder-0.3.0.dist-info → mongo_pipebuilder-0.4.0.dist-info}/WHEEL +1 -1
- {mongo_pipebuilder-0.3.0.dist-info → mongo_pipebuilder-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {mongo_pipebuilder-0.3.0.dist-info → mongo_pipebuilder-0.4.0.dist-info}/top_level.txt +0 -0
- mongo_pipebuilder-0.3.0.dist-info/RECORD +0 -7
mongo_pipebuilder/__init__.py
CHANGED
mongo_pipebuilder/builder.py
CHANGED
|
@@ -6,15 +6,14 @@ Builder Pattern implementation for safe construction of MongoDB aggregation pipe
|
|
|
6
6
|
|
|
7
7
|
Author: seligoroff
|
|
8
8
|
"""
|
|
9
|
+
import copy
|
|
10
|
+
import difflib
|
|
9
11
|
import json
|
|
10
12
|
from pathlib import Path
|
|
11
13
|
from typing import Any, Dict, List, Optional, Union
|
|
12
14
|
|
|
13
|
-
# For compatibility with Python < 3.11
|
|
14
|
-
|
|
15
|
-
from typing import Self
|
|
16
|
-
except ImportError:
|
|
17
|
-
from typing_extensions import Self
|
|
15
|
+
# For compatibility with Python < 3.11 and mypy with python_version 3.8
|
|
16
|
+
from typing_extensions import Self
|
|
18
17
|
|
|
19
18
|
|
|
20
19
|
class PipelineBuilder:
|
|
@@ -27,16 +26,16 @@ class PipelineBuilder:
|
|
|
27
26
|
def match(self, conditions: Dict[str, Any]) -> Self:
|
|
28
27
|
"""
|
|
29
28
|
Add a $match stage for filtering documents.
|
|
30
|
-
|
|
29
|
+
|
|
31
30
|
Args:
|
|
32
31
|
conditions: Dictionary with filtering conditions
|
|
33
|
-
|
|
32
|
+
|
|
34
33
|
Returns:
|
|
35
34
|
Self for method chaining
|
|
36
|
-
|
|
35
|
+
|
|
37
36
|
Raises:
|
|
38
37
|
TypeError: If conditions is None or not a dictionary
|
|
39
|
-
|
|
38
|
+
|
|
40
39
|
Example:
|
|
41
40
|
>>> builder.match({"status": "active", "age": {"$gte": 18}})
|
|
42
41
|
"""
|
|
@@ -48,6 +47,29 @@ class PipelineBuilder:
|
|
|
48
47
|
self._stages.append({"$match": conditions})
|
|
49
48
|
return self
|
|
50
49
|
|
|
50
|
+
def match_expr(self, expr: Dict[str, Any]) -> Self:
|
|
51
|
+
"""
|
|
52
|
+
Add a $match stage with $expr condition (expression-based filter).
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
expr: The expression for $expr (e.g. {"$eq": ["$id", "$$teamId"]}).
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
Self for method chaining.
|
|
59
|
+
|
|
60
|
+
Raises:
|
|
61
|
+
TypeError: If expr is None or not a dict.
|
|
62
|
+
|
|
63
|
+
Example:
|
|
64
|
+
>>> builder.match_expr({"$eq": ["$id", "$$teamId"]})
|
|
65
|
+
"""
|
|
66
|
+
if expr is None:
|
|
67
|
+
raise TypeError("expr cannot be None, use empty dict {} instead")
|
|
68
|
+
if not isinstance(expr, dict):
|
|
69
|
+
raise TypeError(f"expr must be a dict, got {type(expr)}")
|
|
70
|
+
self._stages.append({"$match": {"$expr": expr}})
|
|
71
|
+
return self
|
|
72
|
+
|
|
51
73
|
def lookup(
|
|
52
74
|
self,
|
|
53
75
|
from_collection: str,
|
|
@@ -58,21 +80,21 @@ class PipelineBuilder:
|
|
|
58
80
|
) -> Self:
|
|
59
81
|
"""
|
|
60
82
|
Add a $lookup stage for joining with another collection.
|
|
61
|
-
|
|
83
|
+
|
|
62
84
|
Args:
|
|
63
85
|
from_collection: Name of the collection to join with
|
|
64
86
|
local_field: Field in the current collection
|
|
65
87
|
foreign_field: Field in the target collection
|
|
66
88
|
as_field: Name of the field for join results
|
|
67
89
|
pipeline: Optional nested pipeline for filtering
|
|
68
|
-
|
|
90
|
+
|
|
69
91
|
Returns:
|
|
70
92
|
Self for method chaining
|
|
71
|
-
|
|
93
|
+
|
|
72
94
|
Raises:
|
|
73
95
|
TypeError: If pipeline is not None and not a list, or if string fields are not strings
|
|
74
96
|
ValueError: If required string fields are empty
|
|
75
|
-
|
|
97
|
+
|
|
76
98
|
Example:
|
|
77
99
|
>>> builder.lookup(
|
|
78
100
|
... from_collection="users",
|
|
@@ -90,14 +112,14 @@ class PipelineBuilder:
|
|
|
90
112
|
raise ValueError("foreign_field must be a non-empty string")
|
|
91
113
|
if not isinstance(as_field, str) or not as_field:
|
|
92
114
|
raise ValueError("as_field must be a non-empty string")
|
|
93
|
-
|
|
115
|
+
|
|
94
116
|
# Validate pipeline
|
|
95
117
|
if pipeline is not None:
|
|
96
118
|
if not isinstance(pipeline, list):
|
|
97
119
|
raise TypeError(f"pipeline must be a list, got {type(pipeline)}")
|
|
98
120
|
if not all(isinstance(stage, dict) for stage in pipeline):
|
|
99
121
|
raise TypeError("All pipeline stages must be dictionaries")
|
|
100
|
-
|
|
122
|
+
|
|
101
123
|
lookup_stage: Dict[str, Any] = {
|
|
102
124
|
"from": from_collection,
|
|
103
125
|
"localField": local_field,
|
|
@@ -109,19 +131,133 @@ class PipelineBuilder:
|
|
|
109
131
|
self._stages.append({"$lookup": lookup_stage})
|
|
110
132
|
return self
|
|
111
133
|
|
|
134
|
+
def lookup_let(
|
|
135
|
+
self,
|
|
136
|
+
from_collection: str,
|
|
137
|
+
let: Dict[str, Any],
|
|
138
|
+
pipeline: Union[List[Dict[str, Any]], "PipelineBuilder"],
|
|
139
|
+
as_field: str,
|
|
140
|
+
) -> Self:
|
|
141
|
+
"""
|
|
142
|
+
Add a $lookup stage with let and pipeline (join by expression, variables from document).
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
from_collection: Name of the collection to join with.
|
|
146
|
+
let: Variables for the subpipeline (available in pipeline as $$key).
|
|
147
|
+
pipeline: Subpipeline as list of stages or PipelineBuilder (will call .build()).
|
|
148
|
+
as_field: Name of the field for join results.
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
Self for method chaining.
|
|
152
|
+
|
|
153
|
+
Raises:
|
|
154
|
+
TypeError: If from_collection, as_field are not strings; if let is not a dict;
|
|
155
|
+
if pipeline is None or not a list/PipelineBuilder; if pipeline list has non-dict stages.
|
|
156
|
+
ValueError: If from_collection or as_field are empty; if pipeline list is empty.
|
|
157
|
+
|
|
158
|
+
Example:
|
|
159
|
+
>>> builder.lookup_let(
|
|
160
|
+
... from_collection="teams",
|
|
161
|
+
... let={"teamId": "$idTeam"},
|
|
162
|
+
... pipeline=[{"$match": {"$expr": {"$eq": ["$_id", "$$teamId"]}}}],
|
|
163
|
+
... as_field="team"
|
|
164
|
+
... )
|
|
165
|
+
"""
|
|
166
|
+
if not isinstance(from_collection, str):
|
|
167
|
+
raise TypeError("from_collection must be a string")
|
|
168
|
+
if not from_collection:
|
|
169
|
+
raise ValueError("from_collection must be a non-empty string")
|
|
170
|
+
if let is None:
|
|
171
|
+
raise TypeError("let cannot be None")
|
|
172
|
+
if not isinstance(let, dict):
|
|
173
|
+
raise TypeError("let must be a dict")
|
|
174
|
+
if not isinstance(as_field, str):
|
|
175
|
+
raise TypeError("as_field must be a string")
|
|
176
|
+
if not as_field:
|
|
177
|
+
raise ValueError("as_field must be a non-empty string")
|
|
178
|
+
if pipeline is None:
|
|
179
|
+
raise TypeError("pipeline cannot be None")
|
|
180
|
+
if isinstance(pipeline, PipelineBuilder):
|
|
181
|
+
pipeline = pipeline.build()
|
|
182
|
+
if not isinstance(pipeline, list):
|
|
183
|
+
raise TypeError("pipeline must be a list or PipelineBuilder")
|
|
184
|
+
if not pipeline:
|
|
185
|
+
raise ValueError("pipeline cannot be empty")
|
|
186
|
+
if not all(isinstance(stage, dict) for stage in pipeline):
|
|
187
|
+
raise TypeError("All pipeline stages must be dictionaries")
|
|
188
|
+
|
|
189
|
+
lookup_stage: Dict[str, Any] = {
|
|
190
|
+
"from": from_collection,
|
|
191
|
+
"let": let,
|
|
192
|
+
"pipeline": pipeline,
|
|
193
|
+
"as": as_field,
|
|
194
|
+
}
|
|
195
|
+
self._stages.append({"$lookup": lookup_stage})
|
|
196
|
+
return self
|
|
197
|
+
|
|
198
|
+
def union_with(
|
|
199
|
+
self,
|
|
200
|
+
coll: str,
|
|
201
|
+
pipeline: Optional[Union[List[Dict[str, Any]], "PipelineBuilder"]] = None,
|
|
202
|
+
) -> Self:
|
|
203
|
+
"""
|
|
204
|
+
Add a $unionWith stage to combine documents from another collection.
|
|
205
|
+
|
|
206
|
+
Merges all documents from the current pipeline with documents from the
|
|
207
|
+
given collection. If pipeline is provided, it is run on the other
|
|
208
|
+
collection before merging; you can pass a list of stages or a
|
|
209
|
+
PipelineBuilder (its .build() is used internally).
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
coll: Name of the collection to union with.
|
|
213
|
+
pipeline: Optional subpipeline (list of stages or PipelineBuilder).
|
|
214
|
+
Defaults to [].
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
Self for method chaining.
|
|
218
|
+
|
|
219
|
+
Raises:
|
|
220
|
+
TypeError: If coll is not a string; if pipeline is not None and not
|
|
221
|
+
a list or PipelineBuilder; if pipeline list contains non-dict
|
|
222
|
+
stages.
|
|
223
|
+
ValueError: If coll is empty.
|
|
224
|
+
|
|
225
|
+
Example:
|
|
226
|
+
>>> builder.union_with("other_coll")
|
|
227
|
+
>>> builder.union_with("logs", [{"$match": {"level": "error"}}])
|
|
228
|
+
>>> sub = PipelineBuilder().match({"source": "x"}).project({"n": 1})
|
|
229
|
+
>>> builder.union_with("stats", sub)
|
|
230
|
+
"""
|
|
231
|
+
if not isinstance(coll, str):
|
|
232
|
+
raise TypeError("coll must be a string")
|
|
233
|
+
if not coll:
|
|
234
|
+
raise ValueError("coll must be a non-empty string")
|
|
235
|
+
pipeline_list: List[Dict[str, Any]] = []
|
|
236
|
+
if pipeline is not None:
|
|
237
|
+
if isinstance(pipeline, PipelineBuilder):
|
|
238
|
+
pipeline_list = pipeline.build()
|
|
239
|
+
elif isinstance(pipeline, list):
|
|
240
|
+
if not all(isinstance(stage, dict) for stage in pipeline):
|
|
241
|
+
raise TypeError("All pipeline stages must be dictionaries")
|
|
242
|
+
pipeline_list = pipeline
|
|
243
|
+
else:
|
|
244
|
+
raise TypeError("pipeline must be a list or PipelineBuilder")
|
|
245
|
+
self._stages.append({"$unionWith": {"coll": coll, "pipeline": pipeline_list}})
|
|
246
|
+
return self
|
|
247
|
+
|
|
112
248
|
def add_fields(self, fields: Dict[str, Any]) -> Self:
|
|
113
249
|
"""
|
|
114
250
|
Add an $addFields stage for adding or modifying fields.
|
|
115
|
-
|
|
251
|
+
|
|
116
252
|
Args:
|
|
117
253
|
fields: Dictionary with new fields and their expressions
|
|
118
|
-
|
|
254
|
+
|
|
119
255
|
Returns:
|
|
120
256
|
Self for method chaining
|
|
121
|
-
|
|
257
|
+
|
|
122
258
|
Raises:
|
|
123
259
|
TypeError: If fields is not a dictionary
|
|
124
|
-
|
|
260
|
+
|
|
125
261
|
Example:
|
|
126
262
|
>>> builder.add_fields({
|
|
127
263
|
... "fullName": {"$concat": ["$firstName", " ", "$lastName"]}
|
|
@@ -138,16 +274,16 @@ class PipelineBuilder:
|
|
|
138
274
|
def project(self, fields: Dict[str, Any]) -> Self:
|
|
139
275
|
"""
|
|
140
276
|
Add a $project stage for reshaping documents.
|
|
141
|
-
|
|
277
|
+
|
|
142
278
|
Args:
|
|
143
279
|
fields: Dictionary with fields to include/exclude or transform
|
|
144
|
-
|
|
280
|
+
|
|
145
281
|
Returns:
|
|
146
282
|
Self for method chaining
|
|
147
|
-
|
|
283
|
+
|
|
148
284
|
Raises:
|
|
149
285
|
TypeError: If fields is not a dictionary
|
|
150
|
-
|
|
286
|
+
|
|
151
287
|
Example:
|
|
152
288
|
>>> builder.project({"name": 1, "email": 1, "_id": 0})
|
|
153
289
|
"""
|
|
@@ -162,21 +298,21 @@ class PipelineBuilder:
|
|
|
162
298
|
def group(self, group_by: Union[str, Dict[str, Any], Any], accumulators: Dict[str, Any]) -> Self:
|
|
163
299
|
"""
|
|
164
300
|
Add a $group stage for grouping documents.
|
|
165
|
-
|
|
301
|
+
|
|
166
302
|
Args:
|
|
167
303
|
group_by: Expression for grouping (becomes _id). Can be:
|
|
168
304
|
- A string (field path, e.g., "$category")
|
|
169
305
|
- A dict (composite key, e.g., {"category": "$category"})
|
|
170
306
|
- Any other value (null, number, etc.)
|
|
171
307
|
accumulators: Dictionary with accumulators (sum, avg, count, etc.)
|
|
172
|
-
|
|
308
|
+
|
|
173
309
|
Returns:
|
|
174
310
|
Self for method chaining
|
|
175
|
-
|
|
311
|
+
|
|
176
312
|
Raises:
|
|
177
313
|
TypeError: If accumulators is not a dictionary
|
|
178
314
|
ValueError: If both group_by and accumulators are empty (when group_by is dict/str)
|
|
179
|
-
|
|
315
|
+
|
|
180
316
|
Example:
|
|
181
317
|
>>> builder.group(
|
|
182
318
|
... group_by="$category", # String field path
|
|
@@ -189,7 +325,31 @@ class PipelineBuilder:
|
|
|
189
325
|
"""
|
|
190
326
|
if not isinstance(accumulators, dict):
|
|
191
327
|
raise TypeError(f"accumulators must be a dict, got {type(accumulators)}")
|
|
192
|
-
|
|
328
|
+
|
|
329
|
+
# Guard against a common mistake: passing {"_id": ...} as group_by.
|
|
330
|
+
# group_by should be the expression that becomes the $group _id.
|
|
331
|
+
# If users pass {"_id": expr}, MongoDB will create nested _id and later
|
|
332
|
+
# expressions like $first: "$_id" may fail because $_id becomes an object.
|
|
333
|
+
if isinstance(group_by, dict) and set(group_by.keys()) == {"_id"}:
|
|
334
|
+
inner = group_by["_id"]
|
|
335
|
+
raise ValueError(
|
|
336
|
+
"Invalid group_by: you passed a dict wrapper {'_id': ...} to PipelineBuilder.group().\n"
|
|
337
|
+
"PipelineBuilder.group(group_by=...) expects the expression that becomes $group._id.\n"
|
|
338
|
+
"\n"
|
|
339
|
+
"Did you mean one of these?\n"
|
|
340
|
+
f"- builder.group(group_by={inner!r}, accumulators=...)\n"
|
|
341
|
+
f"- builder.group(group_by={inner!r}, accumulators={{...}}) # same, explicit\n"
|
|
342
|
+
"\n"
|
|
343
|
+
"Examples:\n"
|
|
344
|
+
"- Array _id: builder.group(group_by=['$idSeason', '$idTournament'], accumulators={...})\n"
|
|
345
|
+
"- Field path: builder.group(group_by='$category', accumulators={...})\n"
|
|
346
|
+
"- Composite key: builder.group(group_by={'category': '$category'}, accumulators={...})\n"
|
|
347
|
+
"\n"
|
|
348
|
+
"Why this matters: {'_id': expr} would create a nested _id object in MongoDB, and later\n"
|
|
349
|
+
"operators like $first/$last on '$_id' may fail with: "
|
|
350
|
+
"\"$first's argument must be an array, but is object\"."
|
|
351
|
+
)
|
|
352
|
+
|
|
193
353
|
# Validate empty cases
|
|
194
354
|
# group_by can be None, empty string, empty dict, etc. - all are valid in MongoDB
|
|
195
355
|
# But if it's a string and empty, or dict and empty, and accumulators is also empty,
|
|
@@ -200,7 +360,7 @@ class PipelineBuilder:
|
|
|
200
360
|
elif isinstance(group_by, str):
|
|
201
361
|
if not group_by and not accumulators:
|
|
202
362
|
raise ValueError("group_by and accumulators cannot both be empty")
|
|
203
|
-
|
|
363
|
+
|
|
204
364
|
group_stage = {"_id": group_by, **accumulators}
|
|
205
365
|
self._stages.append({"$group": group_stage})
|
|
206
366
|
return self
|
|
@@ -213,19 +373,19 @@ class PipelineBuilder:
|
|
|
213
373
|
) -> Self:
|
|
214
374
|
"""
|
|
215
375
|
Add an $unwind stage for unwinding arrays.
|
|
216
|
-
|
|
376
|
+
|
|
217
377
|
Args:
|
|
218
378
|
path: Path to the array field
|
|
219
379
|
preserve_null_and_empty_arrays: Preserve documents with null/empty arrays
|
|
220
380
|
include_array_index: Name of the field for array element index
|
|
221
|
-
|
|
381
|
+
|
|
222
382
|
Returns:
|
|
223
383
|
Self for method chaining
|
|
224
|
-
|
|
384
|
+
|
|
225
385
|
Raises:
|
|
226
386
|
TypeError: If path is not a string
|
|
227
387
|
ValueError: If path is empty
|
|
228
|
-
|
|
388
|
+
|
|
229
389
|
Example:
|
|
230
390
|
>>> builder.unwind("tags", preserve_null_and_empty_arrays=True)
|
|
231
391
|
>>> builder.unwind("items", include_array_index="itemIndex")
|
|
@@ -234,7 +394,7 @@ class PipelineBuilder:
|
|
|
234
394
|
raise TypeError(f"path must be a string, got {type(path)}")
|
|
235
395
|
if not path:
|
|
236
396
|
raise ValueError("path cannot be empty")
|
|
237
|
-
|
|
397
|
+
|
|
238
398
|
unwind_stage: Dict[str, Any] = {"path": path}
|
|
239
399
|
if preserve_null_and_empty_arrays:
|
|
240
400
|
unwind_stage["preserveNullAndEmptyArrays"] = True
|
|
@@ -246,16 +406,16 @@ class PipelineBuilder:
|
|
|
246
406
|
def sort(self, fields: Dict[str, int]) -> Self:
|
|
247
407
|
"""
|
|
248
408
|
Add a $sort stage for sorting documents.
|
|
249
|
-
|
|
409
|
+
|
|
250
410
|
Args:
|
|
251
411
|
fields: Dictionary with fields and sort direction (1 - asc, -1 - desc)
|
|
252
|
-
|
|
412
|
+
|
|
253
413
|
Returns:
|
|
254
414
|
Self for method chaining
|
|
255
|
-
|
|
415
|
+
|
|
256
416
|
Raises:
|
|
257
417
|
TypeError: If fields is not a dictionary
|
|
258
|
-
|
|
418
|
+
|
|
259
419
|
Example:
|
|
260
420
|
>>> builder.sort({"createdAt": -1, "name": 1})
|
|
261
421
|
"""
|
|
@@ -270,17 +430,17 @@ class PipelineBuilder:
|
|
|
270
430
|
def limit(self, limit: int) -> Self:
|
|
271
431
|
"""
|
|
272
432
|
Add a $limit stage to limit the number of documents.
|
|
273
|
-
|
|
433
|
+
|
|
274
434
|
Args:
|
|
275
435
|
limit: Maximum number of documents
|
|
276
|
-
|
|
436
|
+
|
|
277
437
|
Returns:
|
|
278
438
|
Self for method chaining
|
|
279
|
-
|
|
439
|
+
|
|
280
440
|
Raises:
|
|
281
441
|
TypeError: If limit is not an integer
|
|
282
442
|
ValueError: If limit is negative
|
|
283
|
-
|
|
443
|
+
|
|
284
444
|
Example:
|
|
285
445
|
>>> builder.limit(10)
|
|
286
446
|
"""
|
|
@@ -295,17 +455,17 @@ class PipelineBuilder:
|
|
|
295
455
|
def skip(self, skip: int) -> Self:
|
|
296
456
|
"""
|
|
297
457
|
Add a $skip stage to skip documents.
|
|
298
|
-
|
|
458
|
+
|
|
299
459
|
Args:
|
|
300
460
|
skip: Number of documents to skip
|
|
301
|
-
|
|
461
|
+
|
|
302
462
|
Returns:
|
|
303
463
|
Self for method chaining
|
|
304
|
-
|
|
464
|
+
|
|
305
465
|
Raises:
|
|
306
466
|
TypeError: If skip is not an integer
|
|
307
467
|
ValueError: If skip is negative
|
|
308
|
-
|
|
468
|
+
|
|
309
469
|
Example:
|
|
310
470
|
>>> builder.skip(20)
|
|
311
471
|
"""
|
|
@@ -320,24 +480,24 @@ class PipelineBuilder:
|
|
|
320
480
|
def unset(self, fields: Union[str, List[str]]) -> Self:
|
|
321
481
|
"""
|
|
322
482
|
Add a $unset stage to remove fields from documents.
|
|
323
|
-
|
|
483
|
+
|
|
324
484
|
Args:
|
|
325
485
|
fields: Field name or list of field names to remove
|
|
326
|
-
|
|
486
|
+
|
|
327
487
|
Returns:
|
|
328
488
|
Self for method chaining
|
|
329
|
-
|
|
489
|
+
|
|
330
490
|
Raises:
|
|
331
491
|
TypeError: If fields is not a string or list of strings
|
|
332
492
|
ValueError: If fields is empty
|
|
333
|
-
|
|
493
|
+
|
|
334
494
|
Example:
|
|
335
495
|
>>> builder.unset("temp_field")
|
|
336
496
|
>>> builder.unset(["field1", "field2", "field3"])
|
|
337
497
|
"""
|
|
338
498
|
if fields is None:
|
|
339
499
|
raise TypeError("fields cannot be None")
|
|
340
|
-
|
|
500
|
+
|
|
341
501
|
if isinstance(fields, str):
|
|
342
502
|
if not fields:
|
|
343
503
|
raise ValueError("fields cannot be an empty string")
|
|
@@ -353,23 +513,23 @@ class PipelineBuilder:
|
|
|
353
513
|
self._stages.append({"$unset": fields if len(fields) > 1 else fields[0]})
|
|
354
514
|
else:
|
|
355
515
|
raise TypeError(f"fields must be a string or list of strings, got {type(fields)}")
|
|
356
|
-
|
|
516
|
+
|
|
357
517
|
return self
|
|
358
518
|
|
|
359
519
|
def replace_root(self, new_root: Dict[str, Any]) -> Self:
|
|
360
520
|
"""
|
|
361
521
|
Add a $replaceRoot stage to replace the root document.
|
|
362
|
-
|
|
522
|
+
|
|
363
523
|
Args:
|
|
364
524
|
new_root: Expression for the new root document (must contain 'newRoot' key)
|
|
365
|
-
|
|
525
|
+
|
|
366
526
|
Returns:
|
|
367
527
|
Self for method chaining
|
|
368
|
-
|
|
528
|
+
|
|
369
529
|
Raises:
|
|
370
530
|
TypeError: If new_root is not a dictionary
|
|
371
531
|
ValueError: If new_root is empty or missing 'newRoot' key
|
|
372
|
-
|
|
532
|
+
|
|
373
533
|
Example:
|
|
374
534
|
>>> builder.replace_root({"newRoot": "$embedded"})
|
|
375
535
|
>>> builder.replace_root({"newRoot": {"$mergeObjects": ["$doc1", "$doc2"]}})
|
|
@@ -382,48 +542,48 @@ class PipelineBuilder:
|
|
|
382
542
|
raise ValueError("new_root cannot be empty")
|
|
383
543
|
if "newRoot" not in new_root:
|
|
384
544
|
raise ValueError("new_root must contain 'newRoot' key")
|
|
385
|
-
|
|
545
|
+
|
|
386
546
|
self._stages.append({"$replaceRoot": new_root})
|
|
387
547
|
return self
|
|
388
548
|
|
|
389
549
|
def replace_with(self, replacement: Any) -> Self:
|
|
390
550
|
"""
|
|
391
551
|
Add a $replaceWith stage (alias for $replaceRoot in MongoDB 4.2+).
|
|
392
|
-
|
|
552
|
+
|
|
393
553
|
Args:
|
|
394
554
|
replacement: Expression for the replacement document
|
|
395
|
-
|
|
555
|
+
|
|
396
556
|
Returns:
|
|
397
557
|
Self for method chaining
|
|
398
|
-
|
|
558
|
+
|
|
399
559
|
Raises:
|
|
400
560
|
ValueError: If replacement is None
|
|
401
|
-
|
|
561
|
+
|
|
402
562
|
Example:
|
|
403
563
|
>>> builder.replace_with("$embedded")
|
|
404
564
|
>>> builder.replace_with({"$mergeObjects": ["$doc1", "$doc2"]})
|
|
405
565
|
"""
|
|
406
566
|
if replacement is None:
|
|
407
567
|
raise ValueError("replacement cannot be None")
|
|
408
|
-
|
|
568
|
+
|
|
409
569
|
self._stages.append({"$replaceWith": replacement})
|
|
410
570
|
return self
|
|
411
571
|
|
|
412
572
|
def facet(self, facets: Dict[str, List[Dict[str, Any]]]) -> Self:
|
|
413
573
|
"""
|
|
414
574
|
Add a $facet stage for parallel execution of multiple sub-pipelines.
|
|
415
|
-
|
|
575
|
+
|
|
416
576
|
Args:
|
|
417
577
|
facets: Dictionary where keys are output field names and values are
|
|
418
578
|
lists of pipeline stages for each sub-pipeline
|
|
419
|
-
|
|
579
|
+
|
|
420
580
|
Returns:
|
|
421
581
|
Self for method chaining
|
|
422
|
-
|
|
582
|
+
|
|
423
583
|
Raises:
|
|
424
584
|
TypeError: If facets is not a dictionary
|
|
425
585
|
ValueError: If facets is empty or contains invalid values
|
|
426
|
-
|
|
586
|
+
|
|
427
587
|
Example:
|
|
428
588
|
>>> builder.facet({
|
|
429
589
|
... "items": [{"$skip": 10}, {"$limit": 20}],
|
|
@@ -436,31 +596,31 @@ class PipelineBuilder:
|
|
|
436
596
|
raise TypeError(f"facets must be a dict, got {type(facets)}")
|
|
437
597
|
if not facets:
|
|
438
598
|
raise ValueError("facets cannot be empty")
|
|
439
|
-
|
|
599
|
+
|
|
440
600
|
# Validate that all values are lists of dictionaries
|
|
441
601
|
for key, value in facets.items():
|
|
442
602
|
if not isinstance(value, list):
|
|
443
603
|
raise TypeError(f"facet '{key}' must be a list, got {type(value)}")
|
|
444
604
|
if not all(isinstance(stage, dict) for stage in value):
|
|
445
605
|
raise TypeError(f"all stages in facet '{key}' must be dictionaries")
|
|
446
|
-
|
|
606
|
+
|
|
447
607
|
self._stages.append({"$facet": facets})
|
|
448
608
|
return self
|
|
449
609
|
|
|
450
610
|
def count(self, field_name: str = "count") -> Self:
|
|
451
611
|
"""
|
|
452
612
|
Add a $count stage to count documents.
|
|
453
|
-
|
|
613
|
+
|
|
454
614
|
Args:
|
|
455
615
|
field_name: Name of the field for the count result
|
|
456
|
-
|
|
616
|
+
|
|
457
617
|
Returns:
|
|
458
618
|
Self for method chaining
|
|
459
|
-
|
|
619
|
+
|
|
460
620
|
Raises:
|
|
461
621
|
TypeError: If field_name is not a string
|
|
462
622
|
ValueError: If field_name is empty
|
|
463
|
-
|
|
623
|
+
|
|
464
624
|
Example:
|
|
465
625
|
>>> builder.match({"status": "active"}).count("active_count")
|
|
466
626
|
"""
|
|
@@ -470,26 +630,26 @@ class PipelineBuilder:
|
|
|
470
630
|
raise TypeError(f"field_name must be a string, got {type(field_name)}")
|
|
471
631
|
if not field_name:
|
|
472
632
|
raise ValueError("field_name cannot be empty")
|
|
473
|
-
|
|
633
|
+
|
|
474
634
|
self._stages.append({"$count": field_name})
|
|
475
635
|
return self
|
|
476
636
|
|
|
477
637
|
def set_field(self, fields: Dict[str, Any]) -> Self:
|
|
478
638
|
"""
|
|
479
639
|
Add a $set stage (alias for $addFields in MongoDB 3.4+).
|
|
480
|
-
|
|
640
|
+
|
|
481
641
|
Functionally equivalent to add_fields(), but $set is a more intuitive alias.
|
|
482
|
-
|
|
642
|
+
|
|
483
643
|
Args:
|
|
484
644
|
fields: Dictionary with fields and their values/expressions
|
|
485
|
-
|
|
645
|
+
|
|
486
646
|
Returns:
|
|
487
647
|
Self for method chaining
|
|
488
|
-
|
|
648
|
+
|
|
489
649
|
Raises:
|
|
490
650
|
TypeError: If fields is not a dictionary
|
|
491
651
|
ValueError: If fields is empty
|
|
492
|
-
|
|
652
|
+
|
|
493
653
|
Example:
|
|
494
654
|
>>> builder.set_field({"status": "active", "updatedAt": "$$NOW"})
|
|
495
655
|
"""
|
|
@@ -500,20 +660,20 @@ class PipelineBuilder:
|
|
|
500
660
|
if not fields:
|
|
501
661
|
# Empty dict - valid case, skip (same as add_fields behavior)
|
|
502
662
|
return self
|
|
503
|
-
|
|
663
|
+
|
|
504
664
|
self._stages.append({"$set": fields})
|
|
505
665
|
return self
|
|
506
666
|
|
|
507
667
|
def add_stage(self, stage: Dict[str, Any]) -> Self:
|
|
508
668
|
"""
|
|
509
669
|
Add an arbitrary pipeline stage for advanced use cases.
|
|
510
|
-
|
|
670
|
+
|
|
511
671
|
Args:
|
|
512
672
|
stage: Dictionary with an arbitrary MongoDB aggregation stage
|
|
513
|
-
|
|
673
|
+
|
|
514
674
|
Returns:
|
|
515
675
|
Self for method chaining
|
|
516
|
-
|
|
676
|
+
|
|
517
677
|
Example:
|
|
518
678
|
>>> builder.add_stage({
|
|
519
679
|
... "$facet": {
|
|
@@ -529,10 +689,10 @@ class PipelineBuilder:
|
|
|
529
689
|
def __len__(self) -> int:
|
|
530
690
|
"""
|
|
531
691
|
Return the number of stages in the pipeline.
|
|
532
|
-
|
|
692
|
+
|
|
533
693
|
Returns:
|
|
534
694
|
Number of stages
|
|
535
|
-
|
|
695
|
+
|
|
536
696
|
Example:
|
|
537
697
|
>>> builder = PipelineBuilder()
|
|
538
698
|
>>> builder.match({"status": "active"}).limit(10)
|
|
@@ -544,10 +704,10 @@ class PipelineBuilder:
|
|
|
544
704
|
def __repr__(self) -> str:
|
|
545
705
|
"""
|
|
546
706
|
Return a string representation of the builder for debugging.
|
|
547
|
-
|
|
707
|
+
|
|
548
708
|
Returns:
|
|
549
709
|
String representation showing stage count and preview
|
|
550
|
-
|
|
710
|
+
|
|
551
711
|
Example:
|
|
552
712
|
>>> builder = PipelineBuilder()
|
|
553
713
|
>>> builder.match({"status": "active"}).limit(10)
|
|
@@ -557,7 +717,7 @@ class PipelineBuilder:
|
|
|
557
717
|
stages_count = len(self._stages)
|
|
558
718
|
if stages_count == 0:
|
|
559
719
|
return "PipelineBuilder(stages=0)"
|
|
560
|
-
|
|
720
|
+
|
|
561
721
|
stage_types = [list(stage.keys())[0] for stage in self._stages[:3]]
|
|
562
722
|
stages_preview = ", ".join(stage_types)
|
|
563
723
|
if stages_count > 3:
|
|
@@ -567,10 +727,10 @@ class PipelineBuilder:
|
|
|
567
727
|
def clear(self) -> Self:
|
|
568
728
|
"""
|
|
569
729
|
Clear all stages from the pipeline.
|
|
570
|
-
|
|
730
|
+
|
|
571
731
|
Returns:
|
|
572
732
|
Self for method chaining
|
|
573
|
-
|
|
733
|
+
|
|
574
734
|
Example:
|
|
575
735
|
>>> builder = PipelineBuilder()
|
|
576
736
|
>>> builder.match({"status": "active"}).clear()
|
|
@@ -583,10 +743,10 @@ class PipelineBuilder:
|
|
|
583
743
|
def copy(self) -> "PipelineBuilder":
|
|
584
744
|
"""
|
|
585
745
|
Create a copy of the builder with current stages.
|
|
586
|
-
|
|
746
|
+
|
|
587
747
|
Returns:
|
|
588
748
|
New PipelineBuilder instance with copied stages
|
|
589
|
-
|
|
749
|
+
|
|
590
750
|
Example:
|
|
591
751
|
>>> builder1 = PipelineBuilder().match({"status": "active"})
|
|
592
752
|
>>> builder2 = builder1.copy()
|
|
@@ -603,17 +763,17 @@ class PipelineBuilder:
|
|
|
603
763
|
def validate(self) -> bool:
|
|
604
764
|
"""
|
|
605
765
|
Validate the pipeline before execution.
|
|
606
|
-
|
|
766
|
+
|
|
607
767
|
Checks that the pipeline is not empty and has valid structure.
|
|
608
768
|
Validates critical MongoDB rules:
|
|
609
769
|
- $out and $merge stages must be the last stage in the pipeline
|
|
610
|
-
|
|
770
|
+
|
|
611
771
|
Returns:
|
|
612
772
|
True if pipeline is valid
|
|
613
|
-
|
|
773
|
+
|
|
614
774
|
Raises:
|
|
615
775
|
ValueError: If pipeline is empty or has validation errors
|
|
616
|
-
|
|
776
|
+
|
|
617
777
|
Example:
|
|
618
778
|
>>> builder = PipelineBuilder()
|
|
619
779
|
>>> builder.match({"status": "active"}).validate()
|
|
@@ -623,20 +783,20 @@ class PipelineBuilder:
|
|
|
623
783
|
"""
|
|
624
784
|
if not self._stages:
|
|
625
785
|
raise ValueError("Pipeline cannot be empty")
|
|
626
|
-
|
|
786
|
+
|
|
627
787
|
# Validate that $out and $merge are the last stages (critical MongoDB rule)
|
|
628
788
|
stage_types = self.get_stage_types()
|
|
629
|
-
|
|
789
|
+
|
|
630
790
|
# Check if $out or $merge exist
|
|
631
791
|
has_out = "$out" in stage_types
|
|
632
792
|
has_merge = "$merge" in stage_types
|
|
633
|
-
|
|
793
|
+
|
|
634
794
|
if has_out and has_merge:
|
|
635
795
|
raise ValueError(
|
|
636
796
|
"Pipeline cannot contain both $out and $merge stages. "
|
|
637
797
|
"Only one output stage is allowed."
|
|
638
798
|
)
|
|
639
|
-
|
|
799
|
+
|
|
640
800
|
# Check if $out or $merge exist and validate position
|
|
641
801
|
for stage_name in ["$out", "$merge"]:
|
|
642
802
|
if stage_name in stage_types:
|
|
@@ -646,16 +806,16 @@ class PipelineBuilder:
|
|
|
646
806
|
f"{stage_name} stage must be the last stage in the pipeline. "
|
|
647
807
|
f"Found at position {stage_index + 1} of {len(stage_types)}."
|
|
648
808
|
)
|
|
649
|
-
|
|
809
|
+
|
|
650
810
|
return True
|
|
651
811
|
|
|
652
812
|
def get_stage_types(self) -> List[str]:
|
|
653
813
|
"""
|
|
654
814
|
Get a list of stage types in the pipeline.
|
|
655
|
-
|
|
815
|
+
|
|
656
816
|
Returns:
|
|
657
817
|
List of stage type strings (e.g., ["$match", "$lookup", "$limit"])
|
|
658
|
-
|
|
818
|
+
|
|
659
819
|
Example:
|
|
660
820
|
>>> builder = PipelineBuilder()
|
|
661
821
|
>>> builder.match({"status": "active"}).limit(10)
|
|
@@ -667,16 +827,16 @@ class PipelineBuilder:
|
|
|
667
827
|
def has_stage(self, stage_type: str) -> bool:
|
|
668
828
|
"""
|
|
669
829
|
Check if the pipeline contains a specific stage type.
|
|
670
|
-
|
|
830
|
+
|
|
671
831
|
Args:
|
|
672
832
|
stage_type: Type of stage to check (e.g., "$match", "$lookup")
|
|
673
|
-
|
|
833
|
+
|
|
674
834
|
Returns:
|
|
675
835
|
True if the stage type is present in the pipeline
|
|
676
|
-
|
|
836
|
+
|
|
677
837
|
Raises:
|
|
678
838
|
TypeError: If stage_type is not a string
|
|
679
|
-
|
|
839
|
+
|
|
680
840
|
Example:
|
|
681
841
|
>>> builder = PipelineBuilder()
|
|
682
842
|
>>> builder.match({"status": "active"}).limit(10)
|
|
@@ -693,16 +853,16 @@ class PipelineBuilder:
|
|
|
693
853
|
def prepend(self, stage: Dict[str, Any]) -> Self:
|
|
694
854
|
"""
|
|
695
855
|
Add a stage at the beginning of the pipeline.
|
|
696
|
-
|
|
856
|
+
|
|
697
857
|
Args:
|
|
698
858
|
stage: Dictionary with a MongoDB aggregation stage
|
|
699
|
-
|
|
859
|
+
|
|
700
860
|
Returns:
|
|
701
861
|
Self for method chaining
|
|
702
|
-
|
|
862
|
+
|
|
703
863
|
Raises:
|
|
704
864
|
TypeError: If stage is not a dictionary
|
|
705
|
-
|
|
865
|
+
|
|
706
866
|
Example:
|
|
707
867
|
>>> builder = PipelineBuilder()
|
|
708
868
|
>>> builder.match({"status": "active"})
|
|
@@ -721,18 +881,18 @@ class PipelineBuilder:
|
|
|
721
881
|
def insert_at(self, position: int, stage: Dict[str, Any]) -> Self:
|
|
722
882
|
"""
|
|
723
883
|
Insert a stage at a specific position in the pipeline.
|
|
724
|
-
|
|
884
|
+
|
|
725
885
|
Args:
|
|
726
886
|
position: Index where to insert (0-based)
|
|
727
887
|
stage: Dictionary with a MongoDB aggregation stage to insert
|
|
728
|
-
|
|
888
|
+
|
|
729
889
|
Returns:
|
|
730
890
|
Self for method chaining
|
|
731
|
-
|
|
891
|
+
|
|
732
892
|
Raises:
|
|
733
893
|
TypeError: If stage is not a dictionary
|
|
734
894
|
IndexError: If position is out of range [0, len(stages)]
|
|
735
|
-
|
|
895
|
+
|
|
736
896
|
Example:
|
|
737
897
|
>>> builder = PipelineBuilder()
|
|
738
898
|
>>> builder.match({"status": "active"}).group({"_id": "$category"}, {})
|
|
@@ -746,28 +906,28 @@ class PipelineBuilder:
|
|
|
746
906
|
raise TypeError(f"stage must be a dict, got {type(stage)}")
|
|
747
907
|
if not stage:
|
|
748
908
|
return self
|
|
749
|
-
|
|
909
|
+
|
|
750
910
|
if position < 0 or position > len(self._stages):
|
|
751
911
|
raise IndexError(
|
|
752
912
|
f"Position {position} out of range [0, {len(self._stages)}]"
|
|
753
913
|
)
|
|
754
|
-
|
|
914
|
+
|
|
755
915
|
self._stages.insert(position, stage)
|
|
756
916
|
return self
|
|
757
917
|
|
|
758
918
|
def get_stage_at(self, index: int) -> Dict[str, Any]:
|
|
759
919
|
"""
|
|
760
920
|
Get a specific stage from the pipeline by index.
|
|
761
|
-
|
|
921
|
+
|
|
762
922
|
Args:
|
|
763
923
|
index: Zero-based index of the stage to retrieve
|
|
764
|
-
|
|
924
|
+
|
|
765
925
|
Returns:
|
|
766
926
|
Dictionary representing the stage at the given index
|
|
767
|
-
|
|
927
|
+
|
|
768
928
|
Raises:
|
|
769
929
|
IndexError: If index is out of range
|
|
770
|
-
|
|
930
|
+
|
|
771
931
|
Example:
|
|
772
932
|
>>> builder = PipelineBuilder()
|
|
773
933
|
>>> builder.match({"status": "active"}).limit(10)
|
|
@@ -779,21 +939,22 @@ class PipelineBuilder:
|
|
|
779
939
|
raise IndexError(
|
|
780
940
|
f"Index {index} out of range [0, {len(self._stages)}]"
|
|
781
941
|
)
|
|
782
|
-
|
|
942
|
+
# Return a deep copy so callers can safely mutate nested structures
|
|
943
|
+
return copy.deepcopy(self._stages[index])
|
|
783
944
|
|
|
784
945
|
def pretty_print(self, indent: int = 2, ensure_ascii: bool = False) -> str:
|
|
785
946
|
"""
|
|
786
947
|
Return a formatted JSON string representation of the pipeline.
|
|
787
|
-
|
|
948
|
+
|
|
788
949
|
Useful for debugging and understanding pipeline structure.
|
|
789
|
-
|
|
950
|
+
|
|
790
951
|
Args:
|
|
791
952
|
indent: Number of spaces for indentation (default: 2)
|
|
792
953
|
ensure_ascii: If False, non-ASCII characters are output as-is (default: False)
|
|
793
|
-
|
|
954
|
+
|
|
794
955
|
Returns:
|
|
795
956
|
Formatted JSON string of the pipeline
|
|
796
|
-
|
|
957
|
+
|
|
797
958
|
Example:
|
|
798
959
|
>>> builder = PipelineBuilder()
|
|
799
960
|
>>> builder.match({"status": "active"}).limit(10)
|
|
@@ -811,6 +972,36 @@ class PipelineBuilder:
|
|
|
811
972
|
"""
|
|
812
973
|
return json.dumps(self._stages, indent=indent, ensure_ascii=ensure_ascii)
|
|
813
974
|
|
|
975
|
+
def pretty_print_stage(
|
|
976
|
+
self,
|
|
977
|
+
stage: Union[int, Dict[str, Any]],
|
|
978
|
+
indent: int = 2,
|
|
979
|
+
ensure_ascii: bool = False,
|
|
980
|
+
) -> str:
|
|
981
|
+
"""
|
|
982
|
+
Return a formatted JSON string representation of a single stage.
|
|
983
|
+
|
|
984
|
+
Args:
|
|
985
|
+
stage: Stage index (0-based) or a stage dict
|
|
986
|
+
indent: Number of spaces for indentation (default: 2)
|
|
987
|
+
ensure_ascii: If False, non-ASCII characters are output as-is (default: False)
|
|
988
|
+
|
|
989
|
+
Returns:
|
|
990
|
+
Formatted JSON string of the stage
|
|
991
|
+
|
|
992
|
+
Raises:
|
|
993
|
+
TypeError: If stage is not an int or dict
|
|
994
|
+
IndexError: If stage is an int out of range
|
|
995
|
+
"""
|
|
996
|
+
if isinstance(stage, int):
|
|
997
|
+
stage_dict = self.get_stage_at(stage)
|
|
998
|
+
elif isinstance(stage, dict):
|
|
999
|
+
stage_dict = copy.deepcopy(stage)
|
|
1000
|
+
else:
|
|
1001
|
+
raise TypeError(f"stage must be an int index or a dict, got {type(stage)}")
|
|
1002
|
+
|
|
1003
|
+
return json.dumps(stage_dict, indent=indent, ensure_ascii=ensure_ascii)
|
|
1004
|
+
|
|
814
1005
|
def to_json_file(
|
|
815
1006
|
self,
|
|
816
1007
|
filepath: Union[str, Path],
|
|
@@ -820,23 +1011,23 @@ class PipelineBuilder:
|
|
|
820
1011
|
) -> None:
|
|
821
1012
|
"""
|
|
822
1013
|
Save the pipeline to a JSON file.
|
|
823
|
-
|
|
1014
|
+
|
|
824
1015
|
Useful for debugging, comparison with other pipelines, or versioning.
|
|
825
|
-
|
|
1016
|
+
|
|
826
1017
|
Args:
|
|
827
1018
|
filepath: Path to the output JSON file (str or Path)
|
|
828
1019
|
indent: Number of spaces for indentation (default: 2)
|
|
829
1020
|
ensure_ascii: If False, non-ASCII characters are output as-is (default: False)
|
|
830
1021
|
metadata: Optional metadata to include in the JSON file
|
|
831
|
-
|
|
1022
|
+
|
|
832
1023
|
Raises:
|
|
833
1024
|
IOError: If file cannot be written
|
|
834
|
-
|
|
1025
|
+
|
|
835
1026
|
Example:
|
|
836
1027
|
>>> builder = PipelineBuilder()
|
|
837
1028
|
>>> builder.match({"status": "active"}).limit(10)
|
|
838
1029
|
>>> builder.to_json_file("debug_pipeline.json")
|
|
839
|
-
|
|
1030
|
+
|
|
840
1031
|
>>> # With metadata
|
|
841
1032
|
>>> builder.to_json_file(
|
|
842
1033
|
... "pipeline.json",
|
|
@@ -845,23 +1036,69 @@ class PipelineBuilder:
|
|
|
845
1036
|
"""
|
|
846
1037
|
filepath = Path(filepath)
|
|
847
1038
|
filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
848
|
-
|
|
1039
|
+
|
|
849
1040
|
output: Dict[str, Any] = {
|
|
850
1041
|
"pipeline": self._stages,
|
|
851
1042
|
}
|
|
852
1043
|
if metadata:
|
|
853
1044
|
output["metadata"] = metadata
|
|
854
|
-
|
|
1045
|
+
|
|
855
1046
|
with open(filepath, "w", encoding="utf-8") as f:
|
|
856
1047
|
json.dump(output, f, indent=indent, ensure_ascii=ensure_ascii)
|
|
857
1048
|
|
|
1049
|
+
def compare_with(self, other: "PipelineBuilder", context_lines: int = 3) -> str:
|
|
1050
|
+
"""
|
|
1051
|
+
Compare this pipeline with another pipeline and return a unified diff.
|
|
1052
|
+
|
|
1053
|
+
This is useful when migrating legacy pipelines (e.g., templates) to builder code.
|
|
1054
|
+
|
|
1055
|
+
Args:
|
|
1056
|
+
other: Another PipelineBuilder instance to compare with
|
|
1057
|
+
context_lines: Number of context lines in the unified diff (default: 3)
|
|
1058
|
+
|
|
1059
|
+
Returns:
|
|
1060
|
+
Unified diff as a string. Returns "No differences." if pipelines are identical.
|
|
1061
|
+
|
|
1062
|
+
Raises:
|
|
1063
|
+
TypeError: If other is not a PipelineBuilder
|
|
1064
|
+
ValueError: If context_lines is negative
|
|
1065
|
+
|
|
1066
|
+
Example:
|
|
1067
|
+
>>> legacy = PipelineBuilder().match({"a": 1})
|
|
1068
|
+
>>> new = PipelineBuilder().match({"a": 2})
|
|
1069
|
+
>>> print(new.compare_with(legacy))
|
|
1070
|
+
"""
|
|
1071
|
+
if not isinstance(other, PipelineBuilder):
|
|
1072
|
+
raise TypeError(f"other must be a PipelineBuilder, got {type(other)}")
|
|
1073
|
+
if not isinstance(context_lines, int):
|
|
1074
|
+
raise TypeError(f"context_lines must be an int, got {type(context_lines)}")
|
|
1075
|
+
if context_lines < 0:
|
|
1076
|
+
raise ValueError("context_lines cannot be negative")
|
|
1077
|
+
|
|
1078
|
+
a = json.dumps(
|
|
1079
|
+
self.build(),
|
|
1080
|
+
indent=2,
|
|
1081
|
+
ensure_ascii=False,
|
|
1082
|
+
sort_keys=True,
|
|
1083
|
+
).splitlines(keepends=True)
|
|
1084
|
+
b = json.dumps(
|
|
1085
|
+
other.build(),
|
|
1086
|
+
indent=2,
|
|
1087
|
+
ensure_ascii=False,
|
|
1088
|
+
sort_keys=True,
|
|
1089
|
+
).splitlines(keepends=True)
|
|
1090
|
+
|
|
1091
|
+
diff = difflib.unified_diff(a, b, fromfile="new", tofile="other", n=context_lines)
|
|
1092
|
+
out = "".join(diff)
|
|
1093
|
+
return out if out else "No differences."
|
|
1094
|
+
|
|
858
1095
|
def build(self) -> List[Dict[str, Any]]:
|
|
859
1096
|
"""
|
|
860
1097
|
Return the completed pipeline.
|
|
861
|
-
|
|
1098
|
+
|
|
862
1099
|
Returns:
|
|
863
1100
|
List of dictionaries with aggregation pipeline stages
|
|
864
|
-
|
|
1101
|
+
|
|
865
1102
|
Example:
|
|
866
1103
|
>>> pipeline = builder.build()
|
|
867
1104
|
>>> collection.aggregate(pipeline)
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mongo-pipebuilder
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Type-safe, fluent MongoDB aggregation pipeline builder
|
|
5
5
|
Author-email: seligoroff <seligoroff@gmail.com>
|
|
6
|
-
License: MIT
|
|
6
|
+
License-Expression: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/seligoroff/mongo-pipebuilder
|
|
8
8
|
Project-URL: Documentation, https://github.com/seligoroff/mongo-pipebuilder#readme
|
|
9
9
|
Project-URL: Repository, https://github.com/seligoroff/mongo-pipebuilder
|
|
@@ -11,7 +11,6 @@ Project-URL: Issues, https://github.com/seligoroff/mongo-pipebuilder/issues
|
|
|
11
11
|
Keywords: mongodb,aggregation,pipeline,builder,query
|
|
12
12
|
Classifier: Development Status :: 3 - Alpha
|
|
13
13
|
Classifier: Intended Audience :: Developers
|
|
14
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
15
14
|
Classifier: Programming Language :: Python :: 3
|
|
16
15
|
Classifier: Programming Language :: Python :: 3.8
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.9
|
|
@@ -98,6 +97,15 @@ Adds a `$match` stage to filter documents.
|
|
|
98
97
|
.match({"status": "active", "age": {"$gte": 18}})
|
|
99
98
|
```
|
|
100
99
|
|
|
100
|
+
##### `match_expr(expr: Dict[str, Any]) -> Self`
|
|
101
|
+
|
|
102
|
+
Adds a `$match` stage with an `$expr` condition (expression-based filter; useful for comparing fields or using variables from `let` in subpipelines).
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
.match_expr({"$eq": ["$id", "$$teamId"]})
|
|
106
|
+
.match_expr({"$and": [{"$gte": ["$field", "$other"]}, {"$lte": ["$score", 100]}]})
|
|
107
|
+
```
|
|
108
|
+
|
|
101
109
|
##### `lookup(from_collection: str, local_field: str, foreign_field: str, as_field: str, pipeline: Optional[List[Dict[str, Any]]] = None) -> Self`
|
|
102
110
|
|
|
103
111
|
Adds a `$lookup` stage to join with another collection.
|
|
@@ -112,6 +120,43 @@ Adds a `$lookup` stage to join with another collection.
|
|
|
112
120
|
)
|
|
113
121
|
```
|
|
114
122
|
|
|
123
|
+
##### `lookup_let(from_collection: str, let: Dict[str, Any], pipeline: Union[List[Dict[str, Any]], PipelineBuilder], as_field: str) -> Self`
|
|
124
|
+
|
|
125
|
+
Adds a `$lookup` stage with `let` and `pipeline` (join by expression; variables from the current document are available in the subpipeline as `$$var`). Use this when the join condition is an expression (e.g. `$expr`) rather than equality of two fields.
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
# With list of stages
|
|
129
|
+
.lookup_let(
|
|
130
|
+
from_collection="teams",
|
|
131
|
+
let={"teamId": "$idTeam"},
|
|
132
|
+
pipeline=[
|
|
133
|
+
{"$match": {"$expr": {"$eq": ["$_id", "$$teamId"]}}},
|
|
134
|
+
{"$project": {"name": 1, "_id": 0}}
|
|
135
|
+
],
|
|
136
|
+
as_field="team"
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# With PipelineBuilder for the subpipeline (optionally using match_expr)
|
|
140
|
+
sub = PipelineBuilder().match_expr({"$eq": ["$_id", "$$teamId"]}).project({"name": 1, "_id": 0})
|
|
141
|
+
.lookup_let("teams", {"teamId": "$idTeam"}, sub, as_field="team")
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
##### `union_with(coll: str, pipeline: Optional[Union[List[Dict[str, Any]], PipelineBuilder]] = None) -> Self`
|
|
145
|
+
|
|
146
|
+
Adds a `$unionWith` stage to combine documents from the current pipeline with documents from another collection. Optionally runs a subpipeline on the other collection before merging.
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
# Union with another collection (no subpipeline)
|
|
150
|
+
.union_with("other_coll")
|
|
151
|
+
|
|
152
|
+
# With subpipeline as list of stages
|
|
153
|
+
.union_with("logs", [{"$match": {"level": "error"}}, {"$limit": 100}])
|
|
154
|
+
|
|
155
|
+
# With PipelineBuilder for the subpipeline
|
|
156
|
+
sub = PipelineBuilder().match({"source": "individual"}).project({"name": 1})
|
|
157
|
+
.union_with("sso_individual_statistics", sub)
|
|
158
|
+
```
|
|
159
|
+
|
|
115
160
|
##### `add_fields(fields: Dict[str, Any]) -> Self`
|
|
116
161
|
|
|
117
162
|
Adds a `$addFields` stage to add or modify fields.
|
|
@@ -252,7 +297,7 @@ builder.prepend({"$match": {"deleted": False}})
|
|
|
252
297
|
Inserts a stage at a specific position (0-based index) in the pipeline.
|
|
253
298
|
|
|
254
299
|
```python
|
|
255
|
-
builder.match({"status": "active"}).group(
|
|
300
|
+
builder.match({"status": "active"}).group("$category", {"count": {"$sum": 1}})
|
|
256
301
|
builder.insert_at(1, {"$sort": {"name": 1}})
|
|
257
302
|
# Pipeline: [{"$match": {...}}, {"$sort": {...}}, {"$group": {...}}]
|
|
258
303
|
```
|
|
@@ -327,6 +372,15 @@ print(builder.pretty_print())
|
|
|
327
372
|
# ]
|
|
328
373
|
```
|
|
329
374
|
|
|
375
|
+
##### `pretty_print_stage(stage: Union[int, Dict[str, Any]], indent: int = 2, ensure_ascii: bool = False) -> str`
|
|
376
|
+
|
|
377
|
+
Returns a formatted JSON string representation of a single stage (by index or by dict).
|
|
378
|
+
|
|
379
|
+
```python
|
|
380
|
+
builder = PipelineBuilder().match({"status": "active"}).limit(10)
|
|
381
|
+
print(builder.pretty_print_stage(0)) # Prints the $match stage
|
|
382
|
+
```
|
|
383
|
+
|
|
330
384
|
##### `to_json_file(filepath: Union[str, Path], indent: int = 2, ensure_ascii: bool = False, metadata: Optional[Dict[str, Any]] = None) -> None`
|
|
331
385
|
|
|
332
386
|
Saves the pipeline to a JSON file. Useful for debugging, comparison, or versioning.
|
|
@@ -345,6 +399,17 @@ builder.to_json_file(
|
|
|
345
399
|
)
|
|
346
400
|
```
|
|
347
401
|
|
|
402
|
+
##### `compare_with(other: PipelineBuilder, context_lines: int = 3) -> str`
|
|
403
|
+
|
|
404
|
+
Returns a unified diff between two pipelines (useful for comparing “new” builder pipelines vs legacy/template pipelines).
|
|
405
|
+
|
|
406
|
+
```python
|
|
407
|
+
legacy = PipelineBuilder().match({"status": "active"}).limit(10)
|
|
408
|
+
new = PipelineBuilder().match({"status": "inactive"}).limit(10)
|
|
409
|
+
|
|
410
|
+
print(new.compare_with(legacy))
|
|
411
|
+
```
|
|
412
|
+
|
|
348
413
|
##### `build() -> List[Dict[str, Any]]`
|
|
349
414
|
|
|
350
415
|
Returns the complete pipeline as a list of stage dictionaries.
|
|
@@ -391,6 +456,31 @@ pipeline = (
|
|
|
391
456
|
)
|
|
392
457
|
```
|
|
393
458
|
|
|
459
|
+
### Lookup by expression (lookup_let)
|
|
460
|
+
|
|
461
|
+
When the join condition is an expression (e.g. `$expr`) rather than matching two fields, use `lookup_let`. The subpipeline can be built with `match_expr()`:
|
|
462
|
+
|
|
463
|
+
```python
|
|
464
|
+
sub = (
|
|
465
|
+
PipelineBuilder()
|
|
466
|
+
.match_expr({"$eq": ["$_id", "$$teamId"]})
|
|
467
|
+
.project({"name": 1, "slug": 1, "_id": 0})
|
|
468
|
+
)
|
|
469
|
+
pipeline = (
|
|
470
|
+
PipelineBuilder()
|
|
471
|
+
.match({"status": "active"})
|
|
472
|
+
.lookup_let(
|
|
473
|
+
from_collection="teams",
|
|
474
|
+
let={"teamId": "$idTeam"},
|
|
475
|
+
pipeline=sub,
|
|
476
|
+
as_field="team"
|
|
477
|
+
)
|
|
478
|
+
.unwind("team", preserve_null_and_empty_arrays=True)
|
|
479
|
+
.project({"title": 1, "teamName": "$team.name"})
|
|
480
|
+
.build()
|
|
481
|
+
)
|
|
482
|
+
```
|
|
483
|
+
|
|
394
484
|
### Aggregation with Grouping
|
|
395
485
|
|
|
396
486
|
```python
|
|
@@ -500,11 +590,39 @@ base = get_base_pipeline(user_id)
|
|
|
500
590
|
# Create multiple queries from cached base
|
|
501
591
|
recent = base.copy().sort({"createdAt": -1}).limit(10).build()
|
|
502
592
|
by_category = base.copy().match({"category": "tech"}).build()
|
|
503
|
-
with_stats = base.copy().group(
|
|
593
|
+
with_stats = base.copy().group("$category", {"count": {"$sum": 1}}).build()
|
|
504
594
|
|
|
505
595
|
# Base pipeline is safely cached and reused
|
|
506
596
|
```
|
|
507
597
|
|
|
598
|
+
## Best Practices
|
|
599
|
+
|
|
600
|
+
### Array `_id` after `$group`: prefer `$arrayElemAt` and materialize fields
|
|
601
|
+
|
|
602
|
+
If you use `$group` with an array `_id` (e.g. `["_idSeason", "_idTournament"]`), avoid relying on `$_id` later in the pipeline.
|
|
603
|
+
Instead, **extract elements with `$arrayElemAt` and store them into explicit fields**, then use those fields in subsequent stages.
|
|
604
|
+
|
|
605
|
+
```python
|
|
606
|
+
pipeline = (
|
|
607
|
+
PipelineBuilder()
|
|
608
|
+
.group(
|
|
609
|
+
group_by=["$idSeason", "$idTournament"],
|
|
610
|
+
accumulators={"idTeams": {"$addToSet": "$idTeam"}},
|
|
611
|
+
)
|
|
612
|
+
.project({
|
|
613
|
+
"idSeason": {"$arrayElemAt": ["$_id", 0]},
|
|
614
|
+
"idTournament": {"$arrayElemAt": ["$_id", 1]},
|
|
615
|
+
"idTeams": 1,
|
|
616
|
+
# Optional: preserve array _id explicitly if you really need it later
|
|
617
|
+
# "_id": "$_id",
|
|
618
|
+
})
|
|
619
|
+
.build()
|
|
620
|
+
)
|
|
621
|
+
```
|
|
622
|
+
|
|
623
|
+
This pattern reduces surprises and helps avoid errors like:
|
|
624
|
+
`$first's argument must be an array, but is object`.
|
|
625
|
+
|
|
508
626
|
#### Example: Pipeline Factories
|
|
509
627
|
|
|
510
628
|
```python
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
mongo_pipebuilder/__init__.py,sha256=3iWmQvRAT2QZHXURN9AHoMPn-7FjwH9ig8QyTUCVLh4,336
|
|
2
|
+
mongo_pipebuilder/builder.py,sha256=_c-5uuNwWJigKzzIcOXXkPY9oD_UOC0lomhx03yJz9U,38834
|
|
3
|
+
mongo_pipebuilder-0.4.0.dist-info/licenses/LICENSE,sha256=-ZkZpDLHDQAc-YBIojJ6eDsMwxwx5pRuQz3RHnl9Y8w,1104
|
|
4
|
+
mongo_pipebuilder-0.4.0.dist-info/METADATA,sha256=IAtv0lDGEIiQ-OlFLn1LR6fDFtgC1xj_PSH3Ak31lE4,20002
|
|
5
|
+
mongo_pipebuilder-0.4.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
6
|
+
mongo_pipebuilder-0.4.0.dist-info/top_level.txt,sha256=wLn7H_v-qaNIws5FeBbKPZBCmYFYgFEhPaLjoCWcisc,18
|
|
7
|
+
mongo_pipebuilder-0.4.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
mongo_pipebuilder/__init__.py,sha256=pP27GA8G6dttP-gMq9uNCJoS66-cb3JJiVVdI340er4,336
|
|
2
|
-
mongo_pipebuilder/builder.py,sha256=oQxRYL9ycjYCv2ErP_YHz-Uoo2pPRaRBaaaCLEsL5Mo,30286
|
|
3
|
-
mongo_pipebuilder-0.3.0.dist-info/licenses/LICENSE,sha256=-ZkZpDLHDQAc-YBIojJ6eDsMwxwx5pRuQz3RHnl9Y8w,1104
|
|
4
|
-
mongo_pipebuilder-0.3.0.dist-info/METADATA,sha256=KkgWrj5TD22yDj915Jrri_JftYMEpTz6hXSeHKEM7mk,15850
|
|
5
|
-
mongo_pipebuilder-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
6
|
-
mongo_pipebuilder-0.3.0.dist-info/top_level.txt,sha256=wLn7H_v-qaNIws5FeBbKPZBCmYFYgFEhPaLjoCWcisc,18
|
|
7
|
-
mongo_pipebuilder-0.3.0.dist-info/RECORD,,
|