mongo-aggro 0.1.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. mongo_aggro/__init__.py +400 -0
  2. mongo_aggro/accumulators.py +30 -12
  3. mongo_aggro/base.py +49 -9
  4. mongo_aggro/expressions/__init__.py +396 -0
  5. mongo_aggro/expressions/arithmetic.py +329 -0
  6. mongo_aggro/expressions/array.py +425 -0
  7. mongo_aggro/expressions/base.py +180 -0
  8. mongo_aggro/expressions/bitwise.py +84 -0
  9. mongo_aggro/expressions/comparison.py +161 -0
  10. mongo_aggro/expressions/conditional.py +117 -0
  11. mongo_aggro/expressions/date.py +665 -0
  12. mongo_aggro/expressions/encrypted.py +116 -0
  13. mongo_aggro/expressions/logical.py +72 -0
  14. mongo_aggro/expressions/object.py +122 -0
  15. mongo_aggro/expressions/set.py +150 -0
  16. mongo_aggro/expressions/size.py +48 -0
  17. mongo_aggro/expressions/string.py +365 -0
  18. mongo_aggro/expressions/trigonometry.py +283 -0
  19. mongo_aggro/expressions/type.py +205 -0
  20. mongo_aggro/expressions/variable.py +73 -0
  21. mongo_aggro/expressions/window.py +327 -0
  22. mongo_aggro/operators/__init__.py +65 -0
  23. mongo_aggro/operators/array.py +41 -0
  24. mongo_aggro/operators/base.py +15 -0
  25. mongo_aggro/operators/bitwise.py +81 -0
  26. mongo_aggro/operators/comparison.py +82 -0
  27. mongo_aggro/operators/element.py +32 -0
  28. mongo_aggro/operators/geo.py +171 -0
  29. mongo_aggro/operators/logical.py +111 -0
  30. mongo_aggro/operators/misc.py +102 -0
  31. mongo_aggro/operators/regex.py +25 -0
  32. mongo_aggro/stages/__init__.py +110 -0
  33. mongo_aggro/stages/array.py +69 -0
  34. mongo_aggro/stages/change.py +109 -0
  35. mongo_aggro/stages/core.py +170 -0
  36. mongo_aggro/stages/geo.py +93 -0
  37. mongo_aggro/stages/group.py +154 -0
  38. mongo_aggro/stages/join.py +221 -0
  39. mongo_aggro/stages/misc.py +45 -0
  40. mongo_aggro/stages/output.py +136 -0
  41. mongo_aggro/stages/search.py +315 -0
  42. mongo_aggro/stages/session.py +111 -0
  43. mongo_aggro/stages/stats.py +152 -0
  44. mongo_aggro/stages/transform.py +136 -0
  45. mongo_aggro/stages/window.py +139 -0
  46. mongo_aggro-0.2.2.dist-info/METADATA +193 -0
  47. mongo_aggro-0.2.2.dist-info/RECORD +49 -0
  48. {mongo_aggro-0.1.0.dist-info → mongo_aggro-0.2.2.dist-info}/WHEEL +1 -1
  49. mongo_aggro/operators.py +0 -247
  50. mongo_aggro/stages.py +0 -990
  51. mongo_aggro-0.1.0.dist-info/METADATA +0 -537
  52. mongo_aggro-0.1.0.dist-info/RECORD +0 -9
  53. {mongo_aggro-0.1.0.dist-info → mongo_aggro-0.2.2.dist-info}/licenses/LICENSE +0 -0
mongo_aggro/stages.py DELETED
@@ -1,990 +0,0 @@
1
- """MongoDB aggregation pipeline stages."""
2
-
3
- from typing import Any, Literal
4
-
5
- from pydantic import BaseModel, ConfigDict, Field
6
-
7
- from mongo_aggro.base import BaseStage, Pipeline
8
-
9
-
10
- class Match(BaseModel, BaseStage):
11
- """
12
- $match stage - filters documents by specified criteria.
13
-
14
- Example:
15
- >>> Match(query={"status": "active"}).model_dump()
16
- {"$match": {"status": "active"}}
17
-
18
- >>> # With logical operators
19
- >>> Match(query={"$and": [{"status": "active"}, {"age": {"$gt": 18}}]})
20
- """
21
-
22
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
23
-
24
- query: dict[str, Any] = Field(..., description="Query filter conditions")
25
-
26
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
27
- return {"$match": self.query}
28
-
29
-
30
- class Project(BaseModel, BaseStage):
31
- """
32
- $project stage - shapes documents by including/excluding fields.
33
-
34
- Example:
35
- >>> Project(fields={"name": 1, "year": 1, "_id": 0}).model_dump()
36
- {"$project": {"name": 1, "year": 1, "_id": 0}}
37
-
38
- >>> # With expressions
39
- >>> Project(fields={"fullName": {"$concat": ["$first", " ", "$last"]}})
40
- """
41
-
42
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
43
-
44
- fields: dict[str, Any] = Field(
45
- ..., description="Field projections (1=include, 0=exclude, or expr)"
46
- )
47
-
48
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
49
- return {"$project": self.fields}
50
-
51
-
52
- class Group(BaseModel, BaseStage):
53
- """
54
- $group stage - groups documents by specified expression.
55
-
56
- Example:
57
- >>> Group(
58
- ... id="$category",
59
- ... total={"$sum": "$quantity"},
60
- ... count={"$sum": 1}
61
- ... ).model_dump()
62
- {
63
- "$group": {
64
- "_id": "$category",
65
- "total": {"$sum": "$quantity"},
66
- "count": {"$sum": 1}
67
- }
68
- }
69
- """
70
-
71
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
72
-
73
- id: Any = Field(
74
- ...,
75
- validation_alias="_id",
76
- serialization_alias="_id",
77
- description="Grouping expression",
78
- )
79
- accumulators: dict[str, Any] = Field(
80
- default_factory=dict,
81
- description="Accumulator expressions (e.g., $sum, $avg)",
82
- )
83
-
84
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
85
- result = {"_id": self.id}
86
- result.update(self.accumulators)
87
- return {"$group": result}
88
-
89
-
90
- class Sort(BaseModel, BaseStage):
91
- """
92
- $sort stage - sorts documents.
93
-
94
- Example:
95
- >>> Sort(fields={"age": -1, "name": 1}).model_dump()
96
- {"$sort": {"age": -1, "name": 1}}
97
- """
98
-
99
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
100
-
101
- fields: dict[str, Literal[-1, 1]] = Field(
102
- ..., description="Sort specification (1=asc, -1=desc)"
103
- )
104
-
105
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
106
- return {"$sort": self.fields}
107
-
108
-
109
- class Limit(BaseModel, BaseStage):
110
- """
111
- $limit stage - limits the number of documents.
112
-
113
- Example:
114
- >>> Limit(count=10).model_dump()
115
- {"$limit": 10}
116
- """
117
-
118
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
119
-
120
- count: int = Field(..., gt=0, description="Maximum number of documents")
121
-
122
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
123
- return {"$limit": self.count}
124
-
125
-
126
- class Skip(BaseModel, BaseStage):
127
- """
128
- $skip stage - skips a number of documents.
129
-
130
- Example:
131
- >>> Skip(count=5).model_dump()
132
- {"$skip": 5}
133
- """
134
-
135
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
136
-
137
- count: int = Field(..., ge=0, description="Number of documents to skip")
138
-
139
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
140
- return {"$skip": self.count}
141
-
142
-
143
- class Unwind(BaseModel, BaseStage):
144
- """
145
- $unwind stage - deconstructs an array field.
146
-
147
- Example:
148
- >>> Unwind(path="cars").model_dump()
149
- {"$unwind": "$cars"}
150
-
151
- >>> # With options
152
- >>> Unwind(
153
- ... path="items",
154
- ... include_array_index="itemIndex",
155
- ... preserve_null_and_empty=True
156
- ... ).model_dump()
157
- {"$unwind": {
158
- "path": "$items",
159
- "includeArrayIndex": "itemIndex",
160
- "preserveNullAndEmptyArrays": true
161
- }}
162
- """
163
-
164
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
165
-
166
- path: str = Field(..., description="Array field path (without $)")
167
- include_array_index: str | None = Field(
168
- default=None,
169
- validation_alias="includeArrayIndex",
170
- serialization_alias="includeArrayIndex",
171
- description="Name of index field",
172
- )
173
- preserve_null_and_empty: bool | None = Field(
174
- default=None,
175
- validation_alias="preserveNullAndEmptyArrays",
176
- serialization_alias="preserveNullAndEmptyArrays",
177
- description="Output doc if array is null/empty/missing",
178
- )
179
-
180
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
181
- field_path = (
182
- f"${self.path}" if not self.path.startswith("$") else self.path
183
- )
184
-
185
- if (
186
- self.include_array_index is None
187
- and self.preserve_null_and_empty is None
188
- ):
189
- return {"$unwind": field_path}
190
-
191
- result: dict[str, Any] = {"path": field_path}
192
- if self.include_array_index is not None:
193
- result["includeArrayIndex"] = self.include_array_index
194
- if self.preserve_null_and_empty is not None:
195
- result["preserveNullAndEmptyArrays"] = self.preserve_null_and_empty
196
- return {"$unwind": result}
197
-
198
-
199
- class Lookup(BaseModel, BaseStage):
200
- """
201
- $lookup stage - performs a left outer join.
202
-
203
- Example:
204
- >>> # Simple lookup
205
- >>> Lookup(
206
- ... from_collection="products",
207
- ... local_field="product_id",
208
- ... foreign_field="_id",
209
- ... as_field="product"
210
- ... ).model_dump()
211
- {"$lookup": {
212
- "from": "products",
213
- "localField": "product_id",
214
- "foreignField": "_id",
215
- "as": "product"
216
- }}
217
-
218
- >>> # With pipeline
219
- >>> Lookup(
220
- ... from_collection="orders",
221
- ... let={"customerId": "$_id"},
222
- ... pipeline=Pipeline([Match(query={"status": "active"})]),
223
- ... as_field="orders"
224
- ... ).model_dump()
225
- """
226
-
227
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
228
-
229
- from_collection: str = Field(
230
- ...,
231
- validation_alias="from",
232
- serialization_alias="from",
233
- description="Foreign collection name",
234
- )
235
- local_field: str | None = Field(
236
- default=None,
237
- validation_alias="localField",
238
- serialization_alias="localField",
239
- description="Local field for join",
240
- )
241
- foreign_field: str | None = Field(
242
- default=None,
243
- validation_alias="foreignField",
244
- serialization_alias="foreignField",
245
- description="Foreign field for join",
246
- )
247
- let: dict[str, Any] | None = Field(
248
- default=None, description="Variables for pipeline"
249
- )
250
- pipeline: Pipeline | list[dict[str, Any]] | None = Field(
251
- default=None, description="Sub-pipeline for complex joins"
252
- )
253
- as_field: str = Field(
254
- ...,
255
- validation_alias="as",
256
- serialization_alias="as",
257
- description="Output array field name",
258
- )
259
-
260
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
261
- result: dict[str, Any] = {
262
- "from": self.from_collection,
263
- "as": self.as_field,
264
- }
265
-
266
- if self.local_field is not None:
267
- result["localField"] = self.local_field
268
- if self.foreign_field is not None:
269
- result["foreignField"] = self.foreign_field
270
- if self.let is not None:
271
- result["let"] = self.let
272
- if self.pipeline is not None:
273
- if isinstance(self.pipeline, Pipeline):
274
- result["pipeline"] = self.pipeline.to_list()
275
- else:
276
- result["pipeline"] = self.pipeline
277
-
278
- return {"$lookup": result}
279
-
280
-
281
- class AddFields(BaseModel, BaseStage):
282
- """
283
- $addFields stage - adds new fields to documents.
284
-
285
- Example:
286
- >>> AddFields(fields={"isActive": True, "score": {"$sum": "$marks"}})
287
- {"$addFields": {"isActive": true, "score": {"$sum": "$marks"}}}
288
- """
289
-
290
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
291
-
292
- fields: dict[str, Any] = Field(..., description="Fields to add")
293
-
294
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
295
- return {"$addFields": self.fields}
296
-
297
-
298
- class Set(BaseModel, BaseStage):
299
- """
300
- $set stage - alias for $addFields.
301
-
302
- Example:
303
- >>> Set(fields={"status": "processed"}).model_dump()
304
- {"$set": {"status": "processed"}}
305
- """
306
-
307
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
308
-
309
- fields: dict[str, Any] = Field(..., description="Fields to set")
310
-
311
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
312
- return {"$set": self.fields}
313
-
314
-
315
- class Unset(BaseModel, BaseStage):
316
- """
317
- $unset stage - removes fields from documents.
318
-
319
- Example:
320
- >>> Unset(fields=["password", "secret"]).model_dump()
321
- {"$unset": ["password", "secret"]}
322
-
323
- >>> Unset(fields="temporaryField").model_dump()
324
- {"$unset": "temporaryField"}
325
- """
326
-
327
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
328
-
329
- fields: str | list[str] = Field(..., description="Field(s) to remove")
330
-
331
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
332
- return {"$unset": self.fields}
333
-
334
-
335
- class Count(BaseModel, BaseStage):
336
- """
337
- $count stage - counts documents.
338
-
339
- Example:
340
- >>> Count(field="total").model_dump()
341
- {"$count": "total"}
342
- """
343
-
344
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
345
-
346
- field: str = Field(..., description="Output field name for count")
347
-
348
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
349
- return {"$count": self.field}
350
-
351
-
352
- class SortByCount(BaseModel, BaseStage):
353
- """
354
- $sortByCount stage - groups and counts by field, sorted by count.
355
-
356
- Example:
357
- >>> SortByCount(field="category").model_dump()
358
- {"$sortByCount": "$category"}
359
- """
360
-
361
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
362
-
363
- field: str = Field(..., description="Field to group and count by")
364
-
365
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
366
- field_path = (
367
- f"${self.field}" if not self.field.startswith("$") else self.field
368
- )
369
- return {"$sortByCount": field_path}
370
-
371
-
372
- class Facet(BaseModel, BaseStage):
373
- """
374
- $facet stage - processes multiple pipelines within a single stage.
375
-
376
- Example:
377
- >>> Facet(pipelines={
378
- ... "byCategory": Pipeline([Group(id="$category")]),
379
- ... "byYear": Pipeline([Group(id="$year")])
380
- ... }).model_dump()
381
- {"$facet": {
382
- "byCategory": [{"$group": {"_id": "$category"}}],
383
- "byYear": [{"$group": {"_id": "$year"}}]
384
- }}
385
- """
386
-
387
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
388
-
389
- pipelines: dict[str, Pipeline | list[dict[str, Any]]] = Field(
390
- ..., description="Named pipelines"
391
- )
392
-
393
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
394
- result: dict[str, list[dict[str, Any]]] = {}
395
- for name, pipeline in self.pipelines.items():
396
- if isinstance(pipeline, Pipeline):
397
- result[name] = pipeline.to_list()
398
- else:
399
- result[name] = pipeline
400
- return {"$facet": result}
401
-
402
-
403
- class Bucket(BaseModel, BaseStage):
404
- """
405
- $bucket stage - categorizes documents into buckets.
406
-
407
- Example:
408
- >>> Bucket(
409
- ... group_by="$price",
410
- ... boundaries=[0, 100, 500, 1000],
411
- ... default="Other",
412
- ... output={"count": {"$sum": 1}}
413
- ... ).model_dump()
414
- {"$bucket": {
415
- "groupBy": "$price",
416
- "boundaries": [0, 100, 500, 1000],
417
- "default": "Other",
418
- "output": {"count": {"$sum": 1}}
419
- }}
420
- """
421
-
422
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
423
-
424
- group_by: str | dict[str, Any] = Field(
425
- ...,
426
- validation_alias="groupBy",
427
- serialization_alias="groupBy",
428
- description="Expression to group by",
429
- )
430
- boundaries: list[Any] = Field(..., description="Bucket boundaries")
431
- default: Any | None = Field(
432
- default=None, description="Default bucket for non-matching docs"
433
- )
434
- output: dict[str, Any] | None = Field(
435
- default=None, description="Output document specification"
436
- )
437
-
438
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
439
- result: dict[str, Any] = {
440
- "groupBy": self.group_by,
441
- "boundaries": self.boundaries,
442
- }
443
- if self.default is not None:
444
- result["default"] = self.default
445
- if self.output is not None:
446
- result["output"] = self.output
447
- return {"$bucket": result}
448
-
449
-
450
- class BucketAuto(BaseModel, BaseStage):
451
- """
452
- $bucketAuto stage - automatically categorizes into specified buckets.
453
-
454
- Example:
455
- >>> BucketAuto(group_by="$age", buckets=5).model_dump()
456
- {"$bucketAuto": {"groupBy": "$age", "buckets": 5}}
457
- """
458
-
459
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
460
-
461
- group_by: str | dict[str, Any] = Field(
462
- ...,
463
- validation_alias="groupBy",
464
- serialization_alias="groupBy",
465
- description="Expression to group by",
466
- )
467
- buckets: int = Field(..., gt=0, description="Number of buckets")
468
- output: dict[str, Any] | None = Field(
469
- default=None, description="Output document specification"
470
- )
471
- granularity: str | None = Field(
472
- default=None, description="Preferred number series"
473
- )
474
-
475
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
476
- result: dict[str, Any] = {
477
- "groupBy": self.group_by,
478
- "buckets": self.buckets,
479
- }
480
- if self.output is not None:
481
- result["output"] = self.output
482
- if self.granularity is not None:
483
- result["granularity"] = self.granularity
484
- return {"$bucketAuto": result}
485
-
486
-
487
- class ReplaceRoot(BaseModel, BaseStage):
488
- """
489
- $replaceRoot stage - replaces document with specified embedded document.
490
-
491
- Example:
492
- >>> ReplaceRoot(new_root="$nested").model_dump()
493
- {"$replaceRoot": {"newRoot": "$nested"}}
494
- """
495
-
496
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
497
-
498
- new_root: str | dict[str, Any] = Field(
499
- ...,
500
- validation_alias="newRoot",
501
- serialization_alias="newRoot",
502
- description="Expression for new root",
503
- )
504
-
505
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
506
- return {"$replaceRoot": {"newRoot": self.new_root}}
507
-
508
-
509
- class ReplaceWith(BaseModel, BaseStage):
510
- """
511
- $replaceWith stage - replaces document (alias for $replaceRoot).
512
-
513
- Example:
514
- >>> ReplaceWith(expression="$embedded").model_dump()
515
- {"$replaceWith": "$embedded"}
516
- """
517
-
518
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
519
-
520
- expression: str | dict[str, Any] = Field(
521
- ..., description="Expression for new document"
522
- )
523
-
524
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
525
- return {"$replaceWith": self.expression}
526
-
527
-
528
- class Sample(BaseModel, BaseStage):
529
- """
530
- $sample stage - randomly selects documents.
531
-
532
- Example:
533
- >>> Sample(size=10).model_dump()
534
- {"$sample": {"size": 10}}
535
- """
536
-
537
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
538
-
539
- size: int = Field(..., gt=0, description="Number of documents to sample")
540
-
541
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
542
- return {"$sample": {"size": self.size}}
543
-
544
-
545
- class Out(BaseModel, BaseStage):
546
- """
547
- $out stage - writes results to a collection.
548
-
549
- Example:
550
- >>> Out(collection="results").model_dump()
551
- {"$out": "results"}
552
-
553
- >>> Out(collection="results", db="analytics").model_dump()
554
- {"$out": {"db": "analytics", "coll": "results"}}
555
- """
556
-
557
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
558
-
559
- collection: str = Field(..., description="Output collection name")
560
- db: str | None = Field(default=None, description="Output database name")
561
-
562
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
563
- if self.db is not None:
564
- return {"$out": {"db": self.db, "coll": self.collection}}
565
- return {"$out": self.collection}
566
-
567
-
568
- class Merge(BaseModel, BaseStage):
569
- """
570
- $merge stage - writes results to a collection with merge behavior.
571
-
572
- Example:
573
- >>> Merge(
574
- ... into="reports",
575
- ... on="_id",
576
- ... when_matched="merge",
577
- ... when_not_matched="insert"
578
- ... ).model_dump()
579
- {"$merge": {
580
- "into": "reports",
581
- "on": "_id",
582
- "whenMatched": "merge",
583
- "whenNotMatched": "insert"
584
- }}
585
- """
586
-
587
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
588
-
589
- into: str | dict[str, str] = Field(
590
- ..., description="Target collection (or {db, coll})"
591
- )
592
- on: str | list[str] | None = Field(
593
- default=None, description="Field(s) to match on"
594
- )
595
- let: dict[str, Any] | None = Field(
596
- default=None, description="Variables for pipeline"
597
- )
598
- when_matched: str | list[dict[str, Any]] | None = Field(
599
- default=None,
600
- validation_alias="whenMatched",
601
- serialization_alias="whenMatched",
602
- description="Action when matched (replace, keepExisting, merge, fail, "
603
- "or pipeline)",
604
- )
605
- when_not_matched: str | None = Field(
606
- default=None,
607
- validation_alias="whenNotMatched",
608
- serialization_alias="whenNotMatched",
609
- description="Action when not matched (insert, discard, fail)",
610
- )
611
-
612
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
613
- result: dict[str, Any] = {"into": self.into}
614
- if self.on is not None:
615
- result["on"] = self.on
616
- if self.let is not None:
617
- result["let"] = self.let
618
- if self.when_matched is not None:
619
- result["whenMatched"] = self.when_matched
620
- if self.when_not_matched is not None:
621
- result["whenNotMatched"] = self.when_not_matched
622
- return {"$merge": result}
623
-
624
-
625
- class Redact(BaseModel, BaseStage):
626
- """
627
- $redact stage - restricts document content based on stored info.
628
-
629
- Example:
630
- >>> Redact(expression={
631
- ... "$cond": {
632
- ... "if": {"$eq": ["$level", 5]},
633
- ... "then": "$$PRUNE",
634
- ... "else": "$$DESCEND"
635
- ... }
636
- ... }).model_dump()
637
- """
638
-
639
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
640
-
641
- expression: dict[str, Any] = Field(..., description="Redaction expression")
642
-
643
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
644
- return {"$redact": self.expression}
645
-
646
-
647
- class UnionWith(BaseModel, BaseStage):
648
- """
649
- $unionWith stage - combines pipeline results with another collection.
650
-
651
- Example:
652
- >>> UnionWith(collection="archive").model_dump()
653
- {"$unionWith": "archive"}
654
-
655
- >>> UnionWith(
656
- ... collection="archive",
657
- ... pipeline=Pipeline([Match(query={"year": 2023})])
658
- ... ).model_dump()
659
- {"$unionWith": {"coll": "archive", "pipeline": [...]}}
660
- """
661
-
662
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
663
-
664
- collection: str = Field(
665
- ...,
666
- validation_alias="coll",
667
- serialization_alias="coll",
668
- description="Collection to union",
669
- )
670
- pipeline: Pipeline | list[dict[str, Any]] | None = Field(
671
- default=None, description="Pipeline for the other collection"
672
- )
673
-
674
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
675
- if self.pipeline is None:
676
- return {"$unionWith": self.collection}
677
-
678
- pl = (
679
- self.pipeline.to_list()
680
- if isinstance(self.pipeline, Pipeline)
681
- else self.pipeline
682
- )
683
- return {"$unionWith": {"coll": self.collection, "pipeline": pl}}
684
-
685
-
686
- class GeoNear(BaseModel, BaseStage):
687
- """
688
- $geoNear stage - returns documents near a geographic point.
689
-
690
- Example:
691
- >>> GeoNear(
692
- ... near={"type": "Point", "coordinates": [-73.99, 40.73]},
693
- ... distance_field="dist.calculated",
694
- ... spherical=True,
695
- ... max_distance=5000
696
- ... ).model_dump()
697
- """
698
-
699
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
700
-
701
- near: dict[str, Any] | list[float] = Field(
702
- ..., description="GeoJSON point or legacy coordinates"
703
- )
704
- distance_field: str = Field(
705
- ...,
706
- validation_alias="distanceField",
707
- serialization_alias="distanceField",
708
- description="Field for calculated distance",
709
- )
710
- spherical: bool | None = Field(
711
- default=None, description="Use spherical geometry"
712
- )
713
- max_distance: float | None = Field(
714
- default=None,
715
- validation_alias="maxDistance",
716
- serialization_alias="maxDistance",
717
- description="Max distance in meters",
718
- )
719
- min_distance: float | None = Field(
720
- default=None,
721
- validation_alias="minDistance",
722
- serialization_alias="minDistance",
723
- description="Min distance in meters",
724
- )
725
- query: dict[str, Any] | None = Field(
726
- default=None, description="Additional query filter"
727
- )
728
- distance_multiplier: float | None = Field(
729
- default=None,
730
- validation_alias="distanceMultiplier",
731
- serialization_alias="distanceMultiplier",
732
- description="Multiplier for distances",
733
- )
734
- include_locs: str | None = Field(
735
- default=None,
736
- validation_alias="includeLocs",
737
- serialization_alias="includeLocs",
738
- description="Field for matched location",
739
- )
740
- key: str | None = Field(
741
- default=None, description="Geospatial index to use"
742
- )
743
-
744
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
745
- result: dict[str, Any] = {
746
- "near": self.near,
747
- "distanceField": self.distance_field,
748
- }
749
- if self.spherical is not None:
750
- result["spherical"] = self.spherical
751
- if self.max_distance is not None:
752
- result["maxDistance"] = self.max_distance
753
- if self.min_distance is not None:
754
- result["minDistance"] = self.min_distance
755
- if self.query is not None:
756
- result["query"] = self.query
757
- if self.distance_multiplier is not None:
758
- result["distanceMultiplier"] = self.distance_multiplier
759
- if self.include_locs is not None:
760
- result["includeLocs"] = self.include_locs
761
- if self.key is not None:
762
- result["key"] = self.key
763
- return {"$geoNear": result}
764
-
765
-
766
- class GraphLookup(BaseModel, BaseStage):
767
- """
768
- $graphLookup stage - performs recursive search.
769
-
770
- Example:
771
- >>> GraphLookup(
772
- ... from_collection="employees",
773
- ... start_with="$reportsTo",
774
- ... connect_from_field="reportsTo",
775
- ... connect_to_field="name",
776
- ... as_field="reportingHierarchy"
777
- ... ).model_dump()
778
- """
779
-
780
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
781
-
782
- from_collection: str = Field(
783
- ...,
784
- validation_alias="from",
785
- serialization_alias="from",
786
- description="Collection to search",
787
- )
788
- start_with: Any = Field(
789
- ...,
790
- validation_alias="startWith",
791
- serialization_alias="startWith",
792
- description="Expression for starting point",
793
- )
794
- connect_from_field: str = Field(
795
- ...,
796
- validation_alias="connectFromField",
797
- serialization_alias="connectFromField",
798
- description="Field to recurse from",
799
- )
800
- connect_to_field: str = Field(
801
- ...,
802
- validation_alias="connectToField",
803
- serialization_alias="connectToField",
804
- description="Field to match",
805
- )
806
- as_field: str = Field(
807
- ...,
808
- validation_alias="as",
809
- serialization_alias="as",
810
- description="Output array field",
811
- )
812
- max_depth: int | None = Field(
813
- default=None,
814
- validation_alias="maxDepth",
815
- serialization_alias="maxDepth",
816
- description="Maximum recursion depth",
817
- )
818
- depth_field: str | None = Field(
819
- default=None,
820
- validation_alias="depthField",
821
- serialization_alias="depthField",
822
- description="Field for recursion depth",
823
- )
824
- restrict_search_with_match: dict[str, Any] | None = Field(
825
- default=None,
826
- validation_alias="restrictSearchWithMatch",
827
- serialization_alias="restrictSearchWithMatch",
828
- description="Additional match conditions",
829
- )
830
-
831
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
832
- result: dict[str, Any] = {
833
- "from": self.from_collection,
834
- "startWith": self.start_with,
835
- "connectFromField": self.connect_from_field,
836
- "connectToField": self.connect_to_field,
837
- "as": self.as_field,
838
- }
839
- if self.max_depth is not None:
840
- result["maxDepth"] = self.max_depth
841
- if self.depth_field is not None:
842
- result["depthField"] = self.depth_field
843
- if self.restrict_search_with_match is not None:
844
- result["restrictSearchWithMatch"] = self.restrict_search_with_match
845
- return {"$graphLookup": result}
846
-
847
-
848
- class SetWindowFields(BaseModel, BaseStage):
849
- """
850
- $setWindowFields stage - performs window calculations.
851
-
852
- Example:
853
- >>> SetWindowFields(
854
- ... partition_by="$state",
855
- ... sort_by={"date": 1},
856
- ... output={
857
- ... "cumulative": {
858
- ... "$sum": "$quantity",
859
- ... "window": {"documents": ["unbounded", "current"]}
860
- ... }
861
- ... }
862
- ... ).model_dump()
863
- """
864
-
865
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
866
-
867
- partition_by: str | dict[str, Any] | None = Field(
868
- default=None,
869
- validation_alias="partitionBy",
870
- serialization_alias="partitionBy",
871
- description="Partitioning expression",
872
- )
873
- sort_by: dict[str, Literal[-1, 1]] | None = Field(
874
- default=None,
875
- validation_alias="sortBy",
876
- serialization_alias="sortBy",
877
- description="Sort specification",
878
- )
879
- output: dict[str, Any] = Field(
880
- ..., description="Output field specifications"
881
- )
882
-
883
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
884
- result: dict[str, Any] = {"output": self.output}
885
- if self.partition_by is not None:
886
- result["partitionBy"] = self.partition_by
887
- if self.sort_by is not None:
888
- result["sortBy"] = self.sort_by
889
- return {"$setWindowFields": result}
890
-
891
-
892
- class Densify(BaseModel, BaseStage):
893
- """
894
- $densify stage - fills gaps in data.
895
-
896
- Example:
897
- >>> Densify(
898
- ... field="date",
899
- ... range={"step": 1, "unit": "day", "bounds": "full"},
900
- ... partition_by_fields=["series"]
901
- ... ).model_dump()
902
- """
903
-
904
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
905
-
906
- field: str = Field(..., description="Field to densify")
907
- range: dict[str, Any] = Field(..., description="Range specification")
908
- partition_by_fields: list[str] | None = Field(
909
- default=None,
910
- validation_alias="partitionByFields",
911
- serialization_alias="partitionByFields",
912
- description="Fields to partition by",
913
- )
914
-
915
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
916
- result: dict[str, Any] = {"field": self.field, "range": self.range}
917
- if self.partition_by_fields is not None:
918
- result["partitionByFields"] = self.partition_by_fields
919
- return {"$densify": result}
920
-
921
-
922
- class Fill(BaseModel, BaseStage):
923
- """
924
- $fill stage - fills null/missing field values.
925
-
926
- Example:
927
- >>> Fill(
928
- ... sort_by={"date": 1},
929
- ... output={
930
- ... "score": {"method": "linear"},
931
- ... "bootcamp": {"value": "missing"}
932
- ... }
933
- ... ).model_dump()
934
- """
935
-
936
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
937
-
938
- output: dict[str, Any] = Field(
939
- ..., description="Output field specifications"
940
- )
941
- partition_by: str | dict[str, Any] | None = Field(
942
- default=None,
943
- validation_alias="partitionBy",
944
- serialization_alias="partitionBy",
945
- description="Partitioning expression",
946
- )
947
- partition_by_fields: list[str] | None = Field(
948
- default=None,
949
- validation_alias="partitionByFields",
950
- serialization_alias="partitionByFields",
951
- description="Fields to partition by",
952
- )
953
- sort_by: dict[str, Literal[-1, 1]] | None = Field(
954
- default=None,
955
- validation_alias="sortBy",
956
- serialization_alias="sortBy",
957
- description="Sort specification",
958
- )
959
-
960
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
961
- result: dict[str, Any] = {"output": self.output}
962
- if self.partition_by is not None:
963
- result["partitionBy"] = self.partition_by
964
- if self.partition_by_fields is not None:
965
- result["partitionByFields"] = self.partition_by_fields
966
- if self.sort_by is not None:
967
- result["sortBy"] = self.sort_by
968
- return {"$fill": result}
969
-
970
-
971
- class Documents(BaseModel, BaseStage):
972
- """
973
- $documents stage - returns literal documents.
974
-
975
- Example:
976
- >>> Documents(documents=[
977
- ... {"x": 1, "y": 2},
978
- ... {"x": 3, "y": 4}
979
- ... ]).model_dump()
980
- {"$documents": [{"x": 1, "y": 2}, {"x": 3, "y": 4}]}
981
- """
982
-
983
- model_config = ConfigDict(populate_by_name=True, extra="forbid")
984
-
985
- documents: list[dict[str, Any]] = Field(
986
- ..., description="Documents to return"
987
- )
988
-
989
- def model_dump(self, **kwargs: Any) -> dict[str, Any]:
990
- return {"$documents": self.documents}