mongo-pipebuilder 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,14 @@
1
+ """
2
+ mongo-pipebuilder: Type-safe, fluent MongoDB aggregation pipeline builder.
3
+
4
+ This package provides a Builder Pattern implementation for constructing
5
+ MongoDB aggregation pipelines safely and readably.
6
+
7
+ Author: seligoroff
8
+ """
9
+
10
+ from mongo_pipebuilder.builder import PipelineBuilder
11
+
12
+ __version__ = "0.2.1"
13
+ __all__ = ["PipelineBuilder"]
14
+
@@ -0,0 +1,766 @@
1
+ """
2
+ Pipeline Builder for MongoDB aggregation pipelines.
3
+
4
+ Builder Pattern implementation for safe construction of MongoDB aggregation pipelines
5
+
6
+
7
+ Author: seligoroff
8
+ """
9
+ from typing import Any, Dict, List, Optional, Union
10
+
11
+ # For compatibility with Python < 3.11
12
+ try:
13
+ from typing import Self
14
+ except ImportError:
15
+ from typing_extensions import Self
16
+
17
+
18
+ class PipelineBuilder:
19
+ """Builder for MongoDB aggregation pipelines with fluent interface."""
20
+
21
+ def __init__(self) -> None:
22
+ """Initialize a new builder with an empty pipeline."""
23
+ self._stages: List[Dict[str, Any]] = []
24
+
25
+ def match(self, conditions: Dict[str, Any]) -> Self:
26
+ """
27
+ Add a $match stage for filtering documents.
28
+
29
+ Args:
30
+ conditions: Dictionary with filtering conditions
31
+
32
+ Returns:
33
+ Self for method chaining
34
+
35
+ Raises:
36
+ TypeError: If conditions is not a dictionary
37
+ ValueError: If conditions is None
38
+
39
+ Example:
40
+ >>> builder.match({"status": "active", "age": {"$gte": 18}})
41
+ """
42
+ if conditions is None:
43
+ raise TypeError("conditions cannot be None, use empty dict {} instead")
44
+ if not isinstance(conditions, dict):
45
+ raise TypeError(f"conditions must be a dict, got {type(conditions)}")
46
+ if conditions:
47
+ self._stages.append({"$match": conditions})
48
+ return self
49
+
50
+ def lookup(
51
+ self,
52
+ from_collection: str,
53
+ local_field: str,
54
+ foreign_field: str,
55
+ as_field: str,
56
+ pipeline: Optional[List[Dict[str, Any]]] = None,
57
+ ) -> Self:
58
+ """
59
+ Add a $lookup stage for joining with another collection.
60
+
61
+ Args:
62
+ from_collection: Name of the collection to join with
63
+ local_field: Field in the current collection
64
+ foreign_field: Field in the target collection
65
+ as_field: Name of the field for join results
66
+ pipeline: Optional nested pipeline for filtering
67
+
68
+ Returns:
69
+ Self for method chaining
70
+
71
+ Raises:
72
+ TypeError: If pipeline is not None and not a list, or if string fields are not strings
73
+ ValueError: If required string fields are empty
74
+
75
+ Example:
76
+ >>> builder.lookup(
77
+ ... from_collection="users",
78
+ ... local_field="userId",
79
+ ... foreign_field="_id",
80
+ ... as_field="user"
81
+ ... )
82
+ """
83
+ # Validate string parameters
84
+ if not isinstance(from_collection, str) or not from_collection:
85
+ raise ValueError("from_collection must be a non-empty string")
86
+ if not isinstance(local_field, str) or not local_field:
87
+ raise ValueError("local_field must be a non-empty string")
88
+ if not isinstance(foreign_field, str) or not foreign_field:
89
+ raise ValueError("foreign_field must be a non-empty string")
90
+ if not isinstance(as_field, str) or not as_field:
91
+ raise ValueError("as_field must be a non-empty string")
92
+
93
+ # Validate pipeline
94
+ if pipeline is not None:
95
+ if not isinstance(pipeline, list):
96
+ raise TypeError(f"pipeline must be a list, got {type(pipeline)}")
97
+ if not all(isinstance(stage, dict) for stage in pipeline):
98
+ raise TypeError("All pipeline stages must be dictionaries")
99
+
100
+ lookup_stage: Dict[str, Any] = {
101
+ "from": from_collection,
102
+ "localField": local_field,
103
+ "foreignField": foreign_field,
104
+ "as": as_field,
105
+ }
106
+ if pipeline:
107
+ lookup_stage["pipeline"] = pipeline
108
+ self._stages.append({"$lookup": lookup_stage})
109
+ return self
110
+
111
+ def add_fields(self, fields: Dict[str, Any]) -> Self:
112
+ """
113
+ Add an $addFields stage for adding or modifying fields.
114
+
115
+ Args:
116
+ fields: Dictionary with new fields and their expressions
117
+
118
+ Returns:
119
+ Self for method chaining
120
+
121
+ Raises:
122
+ TypeError: If fields is not a dictionary
123
+
124
+ Example:
125
+ >>> builder.add_fields({
126
+ ... "fullName": {"$concat": ["$firstName", " ", "$lastName"]}
127
+ ... })
128
+ """
129
+ if fields is None:
130
+ raise TypeError("fields cannot be None, use empty dict {} instead")
131
+ if not isinstance(fields, dict):
132
+ raise TypeError(f"fields must be a dict, got {type(fields)}")
133
+ if fields:
134
+ self._stages.append({"$addFields": fields})
135
+ return self
136
+
137
+ def project(self, fields: Dict[str, Any]) -> Self:
138
+ """
139
+ Add a $project stage for reshaping documents.
140
+
141
+ Args:
142
+ fields: Dictionary with fields to include/exclude or transform
143
+
144
+ Returns:
145
+ Self for method chaining
146
+
147
+ Raises:
148
+ TypeError: If fields is not a dictionary
149
+
150
+ Example:
151
+ >>> builder.project({"name": 1, "email": 1, "_id": 0})
152
+ """
153
+ if fields is None:
154
+ raise TypeError("fields cannot be None, use empty dict {} instead")
155
+ if not isinstance(fields, dict):
156
+ raise TypeError(f"fields must be a dict, got {type(fields)}")
157
+ if fields:
158
+ self._stages.append({"$project": fields})
159
+ return self
160
+
161
+ def group(self, group_by: Dict[str, Any], accumulators: Dict[str, Any]) -> Self:
162
+ """
163
+ Add a $group stage for grouping documents.
164
+
165
+ Args:
166
+ group_by: Expression for grouping (becomes _id)
167
+ accumulators: Dictionary with accumulators (sum, avg, count, etc.)
168
+
169
+ Returns:
170
+ Self for method chaining
171
+
172
+ Raises:
173
+ TypeError: If arguments are not dictionaries
174
+ ValueError: If both group_by and accumulators are empty
175
+
176
+ Example:
177
+ >>> builder.group(
178
+ ... group_by={"category": "$category"},
179
+ ... accumulators={"total": {"$sum": "$amount"}}
180
+ ... )
181
+ """
182
+ if not isinstance(group_by, dict):
183
+ raise TypeError(f"group_by must be a dict, got {type(group_by)}")
184
+ if not isinstance(accumulators, dict):
185
+ raise TypeError(f"accumulators must be a dict, got {type(accumulators)}")
186
+
187
+ # Empty group_by is technically valid in MongoDB (groups all into one document)
188
+ # But if both are empty, it's likely an error
189
+ if not group_by and not accumulators:
190
+ raise ValueError("group_by and accumulators cannot both be empty")
191
+
192
+ group_stage = {"_id": group_by, **accumulators}
193
+ self._stages.append({"$group": group_stage})
194
+ return self
195
+
196
+ def unwind(
197
+ self,
198
+ path: str,
199
+ preserve_null_and_empty_arrays: bool = False,
200
+ include_array_index: Optional[str] = None,
201
+ ) -> Self:
202
+ """
203
+ Add an $unwind stage for unwinding arrays.
204
+
205
+ Args:
206
+ path: Path to the array field
207
+ preserve_null_and_empty_arrays: Preserve documents with null/empty arrays
208
+ include_array_index: Name of the field for array element index
209
+
210
+ Returns:
211
+ Self for method chaining
212
+
213
+ Raises:
214
+ TypeError: If path is not a string
215
+ ValueError: If path is empty
216
+
217
+ Example:
218
+ >>> builder.unwind("tags", preserve_null_and_empty_arrays=True)
219
+ >>> builder.unwind("items", include_array_index="itemIndex")
220
+ """
221
+ if not isinstance(path, str):
222
+ raise TypeError(f"path must be a string, got {type(path)}")
223
+ if not path:
224
+ raise ValueError("path cannot be empty")
225
+
226
+ unwind_stage: Dict[str, Any] = {"path": path}
227
+ if preserve_null_and_empty_arrays:
228
+ unwind_stage["preserveNullAndEmptyArrays"] = True
229
+ if include_array_index:
230
+ unwind_stage["includeArrayIndex"] = include_array_index
231
+ self._stages.append({"$unwind": unwind_stage})
232
+ return self
233
+
234
+ def sort(self, fields: Dict[str, int]) -> Self:
235
+ """
236
+ Add a $sort stage for sorting documents.
237
+
238
+ Args:
239
+ fields: Dictionary with fields and sort direction (1 - asc, -1 - desc)
240
+
241
+ Returns:
242
+ Self for method chaining
243
+
244
+ Raises:
245
+ TypeError: If fields is not a dictionary
246
+
247
+ Example:
248
+ >>> builder.sort({"createdAt": -1, "name": 1})
249
+ """
250
+ if fields is None:
251
+ raise TypeError("fields cannot be None, use empty dict {} instead")
252
+ if not isinstance(fields, dict):
253
+ raise TypeError(f"fields must be a dict, got {type(fields)}")
254
+ if fields:
255
+ self._stages.append({"$sort": fields})
256
+ return self
257
+
258
+ def limit(self, limit: int) -> Self:
259
+ """
260
+ Add a $limit stage to limit the number of documents.
261
+
262
+ Args:
263
+ limit: Maximum number of documents
264
+
265
+ Returns:
266
+ Self for method chaining
267
+
268
+ Raises:
269
+ TypeError: If limit is not an integer
270
+ ValueError: If limit is negative
271
+
272
+ Example:
273
+ >>> builder.limit(10)
274
+ """
275
+ if not isinstance(limit, int):
276
+ raise TypeError(f"limit must be an integer, got {type(limit)}")
277
+ if limit < 0:
278
+ raise ValueError("limit cannot be negative")
279
+ if limit > 0:
280
+ self._stages.append({"$limit": limit})
281
+ return self
282
+
283
+ def skip(self, skip: int) -> Self:
284
+ """
285
+ Add a $skip stage to skip documents.
286
+
287
+ Args:
288
+ skip: Number of documents to skip
289
+
290
+ Returns:
291
+ Self for method chaining
292
+
293
+ Raises:
294
+ TypeError: If skip is not an integer
295
+ ValueError: If skip is negative
296
+
297
+ Example:
298
+ >>> builder.skip(20)
299
+ """
300
+ if not isinstance(skip, int):
301
+ raise TypeError(f"skip must be an integer, got {type(skip)}")
302
+ if skip < 0:
303
+ raise ValueError("skip cannot be negative")
304
+ if skip > 0:
305
+ self._stages.append({"$skip": skip})
306
+ return self
307
+
308
+ def unset(self, fields: Union[str, List[str]]) -> Self:
309
+ """
310
+ Add a $unset stage to remove fields from documents.
311
+
312
+ Args:
313
+ fields: Field name or list of field names to remove
314
+
315
+ Returns:
316
+ Self for method chaining
317
+
318
+ Raises:
319
+ TypeError: If fields is not a string or list of strings
320
+ ValueError: If fields is empty
321
+
322
+ Example:
323
+ >>> builder.unset("temp_field")
324
+ >>> builder.unset(["field1", "field2", "field3"])
325
+ """
326
+ if fields is None:
327
+ raise TypeError("fields cannot be None")
328
+
329
+ if isinstance(fields, str):
330
+ if not fields:
331
+ raise ValueError("fields cannot be an empty string")
332
+ self._stages.append({"$unset": fields})
333
+ elif isinstance(fields, list):
334
+ if not fields:
335
+ raise ValueError("fields cannot be an empty list")
336
+ if not all(isinstance(f, str) for f in fields):
337
+ raise TypeError("all items in fields list must be strings")
338
+ if not all(f for f in fields): # Check for empty strings
339
+ raise ValueError("fields list cannot contain empty strings")
340
+ # MongoDB accepts list for multiple fields, or string for single field
341
+ self._stages.append({"$unset": fields if len(fields) > 1 else fields[0]})
342
+ else:
343
+ raise TypeError(f"fields must be a string or list of strings, got {type(fields)}")
344
+
345
+ return self
346
+
347
+ def replace_root(self, new_root: Dict[str, Any]) -> Self:
348
+ """
349
+ Add a $replaceRoot stage to replace the root document.
350
+
351
+ Args:
352
+ new_root: Expression for the new root document (must contain 'newRoot' key)
353
+
354
+ Returns:
355
+ Self for method chaining
356
+
357
+ Raises:
358
+ TypeError: If new_root is not a dictionary
359
+ ValueError: If new_root is empty or missing 'newRoot' key
360
+
361
+ Example:
362
+ >>> builder.replace_root({"newRoot": "$embedded"})
363
+ >>> builder.replace_root({"newRoot": {"$mergeObjects": ["$doc1", "$doc2"]}})
364
+ """
365
+ if new_root is None:
366
+ raise TypeError("new_root cannot be None, use empty dict {} instead")
367
+ if not isinstance(new_root, dict):
368
+ raise TypeError(f"new_root must be a dict, got {type(new_root)}")
369
+ if not new_root:
370
+ raise ValueError("new_root cannot be empty")
371
+ if "newRoot" not in new_root:
372
+ raise ValueError("new_root must contain 'newRoot' key")
373
+
374
+ self._stages.append({"$replaceRoot": new_root})
375
+ return self
376
+
377
+ def replace_with(self, replacement: Any) -> Self:
378
+ """
379
+ Add a $replaceWith stage (alias for $replaceRoot in MongoDB 4.2+).
380
+
381
+ Args:
382
+ replacement: Expression for the replacement document
383
+
384
+ Returns:
385
+ Self for method chaining
386
+
387
+ Raises:
388
+ ValueError: If replacement is None
389
+
390
+ Example:
391
+ >>> builder.replace_with("$embedded")
392
+ >>> builder.replace_with({"$mergeObjects": ["$doc1", "$doc2"]})
393
+ """
394
+ if replacement is None:
395
+ raise ValueError("replacement cannot be None")
396
+
397
+ self._stages.append({"$replaceWith": replacement})
398
+ return self
399
+
400
+ def facet(self, facets: Dict[str, List[Dict[str, Any]]]) -> Self:
401
+ """
402
+ Add a $facet stage for parallel execution of multiple sub-pipelines.
403
+
404
+ Args:
405
+ facets: Dictionary where keys are output field names and values are
406
+ lists of pipeline stages for each sub-pipeline
407
+
408
+ Returns:
409
+ Self for method chaining
410
+
411
+ Raises:
412
+ TypeError: If facets is not a dictionary
413
+ ValueError: If facets is empty or contains invalid values
414
+
415
+ Example:
416
+ >>> builder.facet({
417
+ ... "items": [{"$skip": 10}, {"$limit": 20}],
418
+ ... "meta": [{"$count": "total"}]
419
+ ... })
420
+ """
421
+ if facets is None:
422
+ raise TypeError("facets cannot be None, use empty dict {} instead")
423
+ if not isinstance(facets, dict):
424
+ raise TypeError(f"facets must be a dict, got {type(facets)}")
425
+ if not facets:
426
+ raise ValueError("facets cannot be empty")
427
+
428
+ # Validate that all values are lists of dictionaries
429
+ for key, value in facets.items():
430
+ if not isinstance(value, list):
431
+ raise TypeError(f"facet '{key}' must be a list, got {type(value)}")
432
+ if not all(isinstance(stage, dict) for stage in value):
433
+ raise TypeError(f"all stages in facet '{key}' must be dictionaries")
434
+
435
+ self._stages.append({"$facet": facets})
436
+ return self
437
+
438
+ def count(self, field_name: str = "count") -> Self:
439
+ """
440
+ Add a $count stage to count documents.
441
+
442
+ Args:
443
+ field_name: Name of the field for the count result
444
+
445
+ Returns:
446
+ Self for method chaining
447
+
448
+ Raises:
449
+ TypeError: If field_name is not a string
450
+ ValueError: If field_name is empty
451
+
452
+ Example:
453
+ >>> builder.match({"status": "active"}).count("active_count")
454
+ """
455
+ if field_name is None:
456
+ raise TypeError("field_name cannot be None")
457
+ if not isinstance(field_name, str):
458
+ raise TypeError(f"field_name must be a string, got {type(field_name)}")
459
+ if not field_name:
460
+ raise ValueError("field_name cannot be empty")
461
+
462
+ self._stages.append({"$count": field_name})
463
+ return self
464
+
465
+ def set_field(self, fields: Dict[str, Any]) -> Self:
466
+ """
467
+ Add a $set stage (alias for $addFields in MongoDB 3.4+).
468
+
469
+ Functionally equivalent to add_fields(), but $set is a more intuitive alias.
470
+
471
+ Args:
472
+ fields: Dictionary with fields and their values/expressions
473
+
474
+ Returns:
475
+ Self for method chaining
476
+
477
+ Raises:
478
+ TypeError: If fields is not a dictionary
479
+ ValueError: If fields is empty
480
+
481
+ Example:
482
+ >>> builder.set_field({"status": "active", "updatedAt": "$$NOW"})
483
+ """
484
+ if fields is None:
485
+ raise TypeError("fields cannot be None, use empty dict {} instead")
486
+ if not isinstance(fields, dict):
487
+ raise TypeError(f"fields must be a dict, got {type(fields)}")
488
+ if not fields:
489
+ # Empty dict - valid case, skip (same as add_fields behavior)
490
+ return self
491
+
492
+ self._stages.append({"$set": fields})
493
+ return self
494
+
495
+ def add_stage(self, stage: Dict[str, Any]) -> Self:
496
+ """
497
+ Add an arbitrary pipeline stage for advanced use cases.
498
+
499
+ Args:
500
+ stage: Dictionary with an arbitrary MongoDB aggregation stage
501
+
502
+ Returns:
503
+ Self for method chaining
504
+
505
+ Example:
506
+ >>> builder.add_stage({
507
+ ... "$facet": {
508
+ ... "categories": [{"$group": {"_id": "$category"}}],
509
+ ... "total": [{"$count": "count"}]
510
+ ... }
511
+ ... })
512
+ """
513
+ if stage:
514
+ self._stages.append(stage)
515
+ return self
516
+
517
+ def __len__(self) -> int:
518
+ """
519
+ Return the number of stages in the pipeline.
520
+
521
+ Returns:
522
+ Number of stages
523
+
524
+ Example:
525
+ >>> builder = PipelineBuilder()
526
+ >>> builder.match({"status": "active"}).limit(10)
527
+ >>> len(builder)
528
+ 2
529
+ """
530
+ return len(self._stages)
531
+
532
+ def __repr__(self) -> str:
533
+ """
534
+ Return a string representation of the builder for debugging.
535
+
536
+ Returns:
537
+ String representation showing stage count and preview
538
+
539
+ Example:
540
+ >>> builder = PipelineBuilder()
541
+ >>> builder.match({"status": "active"}).limit(10)
542
+ >>> repr(builder)
543
+ 'PipelineBuilder(stages=2, preview=[$match, $limit])'
544
+ """
545
+ stages_count = len(self._stages)
546
+ if stages_count == 0:
547
+ return "PipelineBuilder(stages=0)"
548
+
549
+ stage_types = [list(stage.keys())[0] for stage in self._stages[:3]]
550
+ stages_preview = ", ".join(stage_types)
551
+ if stages_count > 3:
552
+ stages_preview += "..."
553
+ return f"PipelineBuilder(stages={stages_count}, preview=[{stages_preview}])"
554
+
555
+ def clear(self) -> Self:
556
+ """
557
+ Clear all stages from the pipeline.
558
+
559
+ Returns:
560
+ Self for method chaining
561
+
562
+ Example:
563
+ >>> builder = PipelineBuilder()
564
+ >>> builder.match({"status": "active"}).clear()
565
+ >>> len(builder)
566
+ 0
567
+ """
568
+ self._stages.clear()
569
+ return self
570
+
571
+ def copy(self) -> "PipelineBuilder":
572
+ """
573
+ Create a copy of the builder with current stages.
574
+
575
+ Returns:
576
+ New PipelineBuilder instance with copied stages
577
+
578
+ Example:
579
+ >>> builder1 = PipelineBuilder().match({"status": "active"})
580
+ >>> builder2 = builder1.copy()
581
+ >>> builder2.limit(10)
582
+ >>> len(builder1) # Original unchanged
583
+ 1
584
+ >>> len(builder2) # Copy has new stage
585
+ 2
586
+ """
587
+ new_builder = PipelineBuilder()
588
+ new_builder._stages = self._stages.copy()
589
+ return new_builder
590
+
591
+ def validate(self) -> bool:
592
+ """
593
+ Validate the pipeline before execution.
594
+
595
+ Checks that the pipeline is not empty and has valid structure.
596
+ Validates critical MongoDB rules:
597
+ - $out and $merge stages must be the last stage in the pipeline
598
+
599
+ Returns:
600
+ True if pipeline is valid
601
+
602
+ Raises:
603
+ ValueError: If pipeline is empty or has validation errors
604
+
605
+ Example:
606
+ >>> builder = PipelineBuilder()
607
+ >>> builder.match({"status": "active"}).validate()
608
+ True
609
+ >>> PipelineBuilder().validate()
610
+ ValueError: Pipeline cannot be empty
611
+ """
612
+ if not self._stages:
613
+ raise ValueError("Pipeline cannot be empty")
614
+
615
+ # Validate that $out and $merge are the last stages (critical MongoDB rule)
616
+ stage_types = self.get_stage_types()
617
+
618
+ # Check if $out or $merge exist
619
+ has_out = "$out" in stage_types
620
+ has_merge = "$merge" in stage_types
621
+
622
+ if has_out and has_merge:
623
+ raise ValueError(
624
+ "Pipeline cannot contain both $out and $merge stages. "
625
+ "Only one output stage is allowed."
626
+ )
627
+
628
+ # If $out exists, it must be the last stage
629
+ if has_out:
630
+ out_index = stage_types.index("$out")
631
+ if out_index != len(stage_types) - 1:
632
+ raise ValueError(
633
+ f"$out stage must be the last stage in the pipeline. "
634
+ f"Found at position {out_index + 1} of {len(stage_types)}."
635
+ )
636
+
637
+ # If $merge exists, it must be the last stage
638
+ if has_merge:
639
+ merge_index = stage_types.index("$merge")
640
+ if merge_index != len(stage_types) - 1:
641
+ raise ValueError(
642
+ f"$merge stage must be the last stage in the pipeline. "
643
+ f"Found at position {merge_index + 1} of {len(stage_types)}."
644
+ )
645
+
646
+ return True
647
+
648
+ def get_stage_types(self) -> List[str]:
649
+ """
650
+ Get a list of stage types in the pipeline.
651
+
652
+ Returns:
653
+ List of stage type strings (e.g., ["$match", "$lookup", "$limit"])
654
+
655
+ Example:
656
+ >>> builder = PipelineBuilder()
657
+ >>> builder.match({"status": "active"}).limit(10)
658
+ >>> builder.get_stage_types()
659
+ ['$match', '$limit']
660
+ """
661
+ return [list(stage.keys())[0] for stage in self._stages]
662
+
663
+ def has_stage(self, stage_type: str) -> bool:
664
+ """
665
+ Check if the pipeline contains a specific stage type.
666
+
667
+ Args:
668
+ stage_type: Type of stage to check (e.g., "$match", "$lookup")
669
+
670
+ Returns:
671
+ True if the stage type is present in the pipeline
672
+
673
+ Raises:
674
+ TypeError: If stage_type is not a string
675
+
676
+ Example:
677
+ >>> builder = PipelineBuilder()
678
+ >>> builder.match({"status": "active"}).limit(10)
679
+ >>> builder.has_stage("$match")
680
+ True
681
+ >>> builder.has_stage("$group")
682
+ False
683
+ """
684
+ if not isinstance(stage_type, str):
685
+ raise TypeError(f"stage_type must be a string, got {type(stage_type)}")
686
+ # Check if stage_type is a key in any stage dictionary
687
+ return any(stage_type in stage for stage in self._stages)
688
+
689
+ def prepend(self, stage: Dict[str, Any]) -> Self:
690
+ """
691
+ Add a stage at the beginning of the pipeline.
692
+
693
+ Args:
694
+ stage: Dictionary with a MongoDB aggregation stage
695
+
696
+ Returns:
697
+ Self for method chaining
698
+
699
+ Raises:
700
+ TypeError: If stage is not a dictionary
701
+
702
+ Example:
703
+ >>> builder = PipelineBuilder()
704
+ >>> builder.match({"status": "active"})
705
+ >>> builder.prepend({"$match": {"deleted": False}})
706
+ >>> builder.build()
707
+ [{"$match": {"deleted": False}}, {"$match": {"status": "active"}}]
708
+ """
709
+ if stage is None:
710
+ raise TypeError("stage cannot be None")
711
+ if not isinstance(stage, dict):
712
+ raise TypeError(f"stage must be a dict, got {type(stage)}")
713
+ if stage:
714
+ self._stages.insert(0, stage)
715
+ return self
716
+
717
+ def insert_at(self, position: int, stage: Dict[str, Any]) -> Self:
718
+ """
719
+ Insert a stage at a specific position in the pipeline.
720
+
721
+ Args:
722
+ position: Index where to insert (0-based)
723
+ stage: Dictionary with a MongoDB aggregation stage to insert
724
+
725
+ Returns:
726
+ Self for method chaining
727
+
728
+ Raises:
729
+ TypeError: If stage is not a dictionary
730
+ IndexError: If position is out of range [0, len(stages)]
731
+
732
+ Example:
733
+ >>> builder = PipelineBuilder()
734
+ >>> builder.match({"status": "active"}).group({"_id": "$category"}, {})
735
+ >>> builder.insert_at(1, {"$sort": {"name": 1}})
736
+ >>> builder.build()
737
+ [{"$match": {"status": "active"}}, {"$sort": {"name": 1}}, {"$group": {...}}]
738
+ """
739
+ if stage is None:
740
+ raise TypeError("stage cannot be None")
741
+ if not isinstance(stage, dict):
742
+ raise TypeError(f"stage must be a dict, got {type(stage)}")
743
+ if not stage:
744
+ return self
745
+
746
+ if position < 0 or position > len(self._stages):
747
+ raise IndexError(
748
+ f"Position {position} out of range [0, {len(self._stages)}]"
749
+ )
750
+
751
+ self._stages.insert(position, stage)
752
+ return self
753
+
754
+ def build(self) -> List[Dict[str, Any]]:
755
+ """
756
+ Return the completed pipeline.
757
+
758
+ Returns:
759
+ List of dictionaries with aggregation pipeline stages
760
+
761
+ Example:
762
+ >>> pipeline = builder.build()
763
+ >>> collection.aggregate(pipeline)
764
+ """
765
+ return self._stages.copy()
766
+
@@ -0,0 +1,375 @@
1
+ Metadata-Version: 2.4
2
+ Name: mongo-pipebuilder
3
+ Version: 0.2.1
4
+ Summary: Type-safe, fluent MongoDB aggregation pipeline builder
5
+ Author-email: seligoroff <seligoroff@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/seligoroff/mongo-pipebuilder
8
+ Project-URL: Documentation, https://github.com/seligoroff/mongo-pipebuilder#readme
9
+ Project-URL: Repository, https://github.com/seligoroff/mongo-pipebuilder
10
+ Project-URL: Issues, https://github.com/seligoroff/mongo-pipebuilder/issues
11
+ Keywords: mongodb,aggregation,pipeline,builder,query
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Database
22
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
23
+ Requires-Python: >=3.8
24
+ Description-Content-Type: text/markdown
25
+ License-File: LICENSE
26
+ Requires-Dist: typing_extensions>=4.0.0; python_version < "3.11"
27
+ Dynamic: license-file
28
+
29
+ # mongo-pipebuilder
30
+
31
+ Type-safe, fluent MongoDB aggregation pipeline builder for Python.
32
+
33
+ ## Overview
34
+
35
+ `mongo-pipebuilder` provides a clean, type-safe way to build MongoDB aggregation pipelines using the Builder Pattern with a fluent interface for maximum readability and safety.
36
+
37
+ ## Features
38
+
39
+ - ✅ **Type-safe**: Full type hints support with IDE autocomplete
40
+ - ✅ **Fluent interface**: Chain methods for readable, maintainable code
41
+ - ✅ **Zero dependencies**: Pure Python, lightweight package
42
+ - ✅ **Extensible**: Easy to add custom stages via `add_stage()`
43
+ - ✅ **Well tested**: Comprehensive test suite with 96%+ coverage
44
+
45
+ ## Installation
46
+
47
+ ```bash
48
+ pip install mongo-pipebuilder
49
+ ```
50
+
51
+ ## Quick Start
52
+
53
+ ```python
54
+ from mongo_pipebuilder import PipelineBuilder
55
+
56
+ # Build a pipeline
57
+ pipeline = (
58
+ PipelineBuilder()
59
+ .match({"status": "active"})
60
+ .lookup(
61
+ from_collection="users",
62
+ local_field="userId",
63
+ foreign_field="_id",
64
+ as_field="user"
65
+ )
66
+ .project({"name": 1, "user.email": 1})
67
+ .sort({"name": 1})
68
+ .limit(10)
69
+ .build()
70
+ )
71
+
72
+ # Use with pymongo
73
+ from pymongo import MongoClient
74
+ client = MongoClient()
75
+ collection = client.db.my_collection
76
+ results = collection.aggregate(pipeline)
77
+ ```
78
+
79
+ ## API Reference
80
+
81
+ ### PipelineBuilder
82
+
83
+ Main class for building aggregation pipelines.
84
+
85
+ #### Methods
86
+
87
+ ##### `match(conditions: Dict[str, Any]) -> Self`
88
+
89
+ Adds a `$match` stage to filter documents.
90
+
91
+ ```python
92
+ .match({"status": "active", "age": {"$gte": 18}})
93
+ ```
94
+
95
+ ##### `lookup(from_collection: str, local_field: str, foreign_field: str, as_field: str, pipeline: Optional[List[Dict[str, Any]]] = None) -> Self`
96
+
97
+ Adds a `$lookup` stage to join with another collection.
98
+
99
+ ```python
100
+ .lookup(
101
+ from_collection="users",
102
+ local_field="userId",
103
+ foreign_field="_id",
104
+ as_field="user",
105
+ pipeline=[{"$match": {"active": True}}] # Optional nested pipeline
106
+ )
107
+ ```
108
+
109
+ ##### `add_fields(fields: Dict[str, Any]) -> Self`
110
+
111
+ Adds a `$addFields` stage to add or modify fields.
112
+
113
+ ```python
114
+ .add_fields({"fullName": {"$concat": ["$firstName", " ", "$lastName"]}})
115
+ ```
116
+
117
+ ##### `project(fields: Dict[str, Any]) -> Self`
118
+
119
+ Adds a `$project` stage to reshape documents.
120
+
121
+ ```python
122
+ .project({"name": 1, "email": 1, "_id": 0})
123
+ ```
124
+
125
+ ##### `group(group_by: Dict[str, Any], accumulators: Dict[str, Any]) -> Self`
126
+
127
+ Adds a `$group` stage to group documents.
128
+
129
+ ```python
130
+ .group(
131
+ group_by={"category": "$category"},
132
+ accumulators={"total": {"$sum": "$amount"}}
133
+ )
134
+ ```
135
+
136
+ ##### `unwind(path: str, preserve_null_and_empty_arrays: bool = False, include_array_index: Optional[str] = None) -> Self`
137
+
138
+ Adds a `$unwind` stage to deconstruct arrays.
139
+
140
+ ```python
141
+ .unwind("tags", preserve_null_and_empty_arrays=True)
142
+ .unwind("items", include_array_index="itemIndex")
143
+ ```
144
+
145
+ ##### `sort(fields: Dict[str, int]) -> Self`
146
+
147
+ Adds a `$sort` stage.
148
+
149
+ ```python
150
+ .sort({"createdAt": -1, "name": 1})
151
+ ```
152
+
153
+ ##### `limit(limit: int) -> Self`
154
+
155
+ Adds a `$limit` stage.
156
+
157
+ ```python
158
+ .limit(10)
159
+ ```
160
+
161
+ ##### `skip(skip: int) -> Self`
162
+
163
+ Adds a `$skip` stage.
164
+
165
+ ```python
166
+ .skip(20)
167
+ ```
168
+
169
+ ##### `unset(fields: Union[str, List[str]]) -> Self`
170
+
171
+ Adds a `$unset` stage to remove fields from documents.
172
+
173
+ ```python
174
+ .unset("temp_field")
175
+ .unset(["field1", "field2", "field3"])
176
+ ```
177
+
178
+ ##### `replace_root(new_root: Dict[str, Any]) -> Self`
179
+
180
+ Adds a `$replaceRoot` stage to replace the root document.
181
+
182
+ ```python
183
+ .replace_root({"newRoot": "$embedded"})
184
+ .replace_root({"newRoot": {"$mergeObjects": ["$doc1", "$doc2"]}})
185
+ ```
186
+
187
+ ##### `replace_with(replacement: Any) -> Self`
188
+
189
+ Adds a `$replaceWith` stage (alias for `$replaceRoot` in MongoDB 4.2+).
190
+
191
+ ```python
192
+ .replace_with("$embedded")
193
+ .replace_with({"$mergeObjects": ["$doc1", "$doc2"]})
194
+ ```
195
+
196
+ ##### `facet(facets: Dict[str, List[Dict[str, Any]]]) -> Self`
197
+
198
+ Adds a `$facet` stage for parallel execution of multiple sub-pipelines.
199
+
200
+ ```python
201
+ .facet({
202
+ "items": [{"$skip": 10}, {"$limit": 20}],
203
+ "meta": [{"$count": "total"}]
204
+ })
205
+ ```
206
+
207
+ ##### `count(field_name: str = "count") -> Self`
208
+
209
+ Adds a `$count` stage to count documents.
210
+
211
+ ```python
212
+ .match({"status": "active"}).count("active_count")
213
+ ```
214
+
215
+ ##### `set_field(fields: Dict[str, Any]) -> Self`
216
+
217
+ Adds a `$set` stage (alias for `$addFields` in MongoDB 3.4+).
218
+
219
+ ```python
220
+ .set_field({"status": "active", "updatedAt": "$$NOW"})
221
+ ```
222
+
223
+ ##### `add_stage(stage: Dict[str, Any]) -> Self`
224
+
225
+ Adds a custom stage for advanced use cases.
226
+
227
+ ```python
228
+ .add_stage({"$facet": {
229
+ "categories": [{"$group": {"_id": "$category"}}],
230
+ "total": [{"$count": "count"}]
231
+ }})
232
+ ```
233
+
234
+ ##### `prepend(stage: Dict[str, Any]) -> Self`
235
+
236
+ Adds a stage at the beginning of the pipeline.
237
+
238
+ ```python
239
+ builder.match({"status": "active"})
240
+ builder.prepend({"$match": {"deleted": False}})
241
+ # Pipeline: [{"$match": {"deleted": False}}, {"$match": {"status": "active"}}]
242
+ ```
243
+
244
+ ##### `insert_at(position: int, stage: Dict[str, Any]) -> Self`
245
+
246
+ Inserts a stage at a specific position (0-based index) in the pipeline.
247
+
248
+ ```python
249
+ builder.match({"status": "active"}).group({"_id": "$category"}, {"count": {"$sum": 1}})
250
+ builder.insert_at(1, {"$sort": {"name": 1}})
251
+ # Pipeline: [{"$match": {...}}, {"$sort": {...}}, {"$group": {...}}]
252
+ ```
253
+
254
+ **Note:** For inserting before a specific stage type, combine with `get_stage_types()`:
255
+
256
+ ```python
257
+ stage_types = builder.get_stage_types()
258
+ group_index = stage_types.index("$group")
259
+ builder.insert_at(group_index, {"$addFields": {"x": 1}})
260
+ ```
261
+
262
+ ##### `validate() -> bool`
263
+
264
+ Validates the pipeline before execution. Checks that:
265
+ - Pipeline is not empty
266
+ - `$out` and `$merge` stages are the last stages (critical MongoDB rule)
267
+ - `$out` and `$merge` are not used together
268
+
269
+ ```python
270
+ builder = PipelineBuilder()
271
+ builder.match({"status": "active"}).validate() # Returns True
272
+
273
+ # Invalid: $out not last
274
+ builder.add_stage({"$out": "output"}).match({"status": "active"})
275
+ builder.validate() # Raises ValueError: $out stage must be the last stage
276
+ ```
277
+
278
+ ##### `build() -> List[Dict[str, Any]]`
279
+
280
+ Returns the complete pipeline as a list of stage dictionaries.
281
+
282
+ ## Examples
283
+
284
+ ### Complex Pipeline with Nested Lookup
285
+
286
+ ```python
287
+ pipeline = (
288
+ PipelineBuilder()
289
+ .match({"status": "published"})
290
+ .lookup(
291
+ from_collection="authors",
292
+ local_field="authorId",
293
+ foreign_field="_id",
294
+ as_field="author"
295
+ )
296
+ .unwind("author", preserve_null_and_empty_arrays=True)
297
+ .lookup(
298
+ from_collection="categories",
299
+ local_field="categoryId",
300
+ foreign_field="_id",
301
+ as_field="category",
302
+ pipeline=[
303
+ {"$match": {"active": True}},
304
+ {"$project": {"name": 1, "slug": 1}}
305
+ ]
306
+ )
307
+ .unwind("category")
308
+ .add_fields({
309
+ "authorName": "$author.name",
310
+ "categoryName": "$category.name"
311
+ })
312
+ .project({
313
+ "title": 1,
314
+ "authorName": 1,
315
+ "categoryName": 1,
316
+ "publishedAt": 1
317
+ })
318
+ .sort({"publishedAt": -1})
319
+ .limit(20)
320
+ .build()
321
+ )
322
+ ```
323
+
324
+ ### Aggregation with Grouping
325
+
326
+ ```python
327
+ pipeline = (
328
+ PipelineBuilder()
329
+ .match({"date": {"$gte": "2024-01-01"}})
330
+ .group(
331
+ group_by={"month": {"$dateToString": {"format": "%Y-%m", "date": "$date"}}},
332
+ accumulators={
333
+ "totalSales": {"$sum": "$amount"},
334
+ "avgAmount": {"$avg": "$amount"},
335
+ "count": {"$sum": 1}
336
+ }
337
+ )
338
+ .sort({"month": 1})
339
+ .build()
340
+ )
341
+ ```
342
+
343
+ ## Development
344
+
345
+ ### Project Structure
346
+
347
+ ```
348
+ mongo-pipebuilder/
349
+ ├── src/
350
+ │ └── mongo_pipebuilder/
351
+ │ ├── __init__.py
352
+ │ └── builder.py
353
+ ├── tests/
354
+ │ └── test_builder.py
355
+ ├── examples/
356
+ │ └── examples.py
357
+ ├── pyproject.toml
358
+ ├── README.md
359
+ └── LICENSE
360
+ ```
361
+
362
+ ### Running Tests
363
+
364
+ ```bash
365
+ pytest tests/
366
+ ```
367
+
368
+ ### Contributing
369
+
370
+ See [DEVELOPMENT.md](DEVELOPMENT.md) for development guidelines.
371
+
372
+ ## License
373
+
374
+ MIT License - see [LICENSE](LICENSE) file for details.
375
+
@@ -0,0 +1,7 @@
1
+ mongo_pipebuilder/__init__.py,sha256=eOj_NuBMA9YbVFPmm13UV25RR5_QK2ctrx8QXV3yGTU,336
2
+ mongo_pipebuilder/builder.py,sha256=kurxcQ5IXErefUQjqQ5XAzkfZEv9siN-PufmWoFA0aE,26545
3
+ mongo_pipebuilder-0.2.1.dist-info/licenses/LICENSE,sha256=GLx_6hrvLsyIL34dpRYvjCSIXyYD8PzhBR09opTrixI,1088
4
+ mongo_pipebuilder-0.2.1.dist-info/METADATA,sha256=Kjav6gd0U9MREeLM3tcLz2b5RfBlCyjnJP0H9gG97XM,9126
5
+ mongo_pipebuilder-0.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
+ mongo_pipebuilder-0.2.1.dist-info/top_level.txt,sha256=wLn7H_v-qaNIws5FeBbKPZBCmYFYgFEhPaLjoCWcisc,18
7
+ mongo_pipebuilder-0.2.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 mongo-pipebuilder contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
@@ -0,0 +1 @@
1
+ mongo_pipebuilder