mongo-pipebuilder 0.2.3__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,6 @@ Author: seligoroff
9
9
 
10
10
  from mongo_pipebuilder.builder import PipelineBuilder
11
11
 
12
- __version__ = "0.2.3"
12
+ __version__ = "0.3.1"
13
13
  __all__ = ["PipelineBuilder"]
14
14
 
@@ -6,6 +6,10 @@ Builder Pattern implementation for safe construction of MongoDB aggregation pipe
6
6
 
7
7
  Author: seligoroff
8
8
  """
9
+ import copy
10
+ import difflib
11
+ import json
12
+ from pathlib import Path
9
13
  from typing import Any, Dict, List, Optional, Union
10
14
 
11
15
  # For compatibility with Python < 3.11
@@ -187,6 +191,29 @@ class PipelineBuilder:
187
191
  """
188
192
  if not isinstance(accumulators, dict):
189
193
  raise TypeError(f"accumulators must be a dict, got {type(accumulators)}")
194
+
195
+ # Guard against a common mistake: passing {"_id": ...} as group_by.
196
+ # group_by should be the expression that becomes the $group _id.
197
+ # If users pass {"_id": expr}, MongoDB will create nested _id and later
198
+ # expressions like $first: "$_id" may fail because $_id becomes an object.
199
+ if isinstance(group_by, dict) and set(group_by.keys()) == {"_id"}:
200
+ inner = group_by["_id"]
201
+ raise ValueError(
202
+ "Invalid group_by: you passed a dict wrapper {'_id': ...} to PipelineBuilder.group().\n"
203
+ "PipelineBuilder.group(group_by=...) expects the expression that becomes $group._id.\n"
204
+ "\n"
205
+ "Did you mean one of these?\n"
206
+ f"- builder.group(group_by={inner!r}, accumulators=...)\n"
207
+ f"- builder.group(group_by={inner!r}, accumulators={{...}}) # same, explicit\n"
208
+ "\n"
209
+ "Examples:\n"
210
+ "- Array _id: builder.group(group_by=['$idSeason', '$idTournament'], accumulators={...})\n"
211
+ "- Field path: builder.group(group_by='$category', accumulators={...})\n"
212
+ "- Composite key: builder.group(group_by={'category': '$category'}, accumulators={...})\n"
213
+ "\n"
214
+ "Why this matters: {'_id': expr} would create a nested _id object in MongoDB, and later\n"
215
+ "operators like $first/$last on '$_id' may fail with: \"$first's argument must be an array, but is object\"."
216
+ )
190
217
 
191
218
  # Validate empty cases
192
219
  # group_by can be None, empty string, empty dict, etc. - all are valid in MongoDB
@@ -753,6 +780,183 @@ class PipelineBuilder:
753
780
  self._stages.insert(position, stage)
754
781
  return self
755
782
 
783
+ def get_stage_at(self, index: int) -> Dict[str, Any]:
784
+ """
785
+ Get a specific stage from the pipeline by index.
786
+
787
+ Args:
788
+ index: Zero-based index of the stage to retrieve
789
+
790
+ Returns:
791
+ Dictionary representing the stage at the given index
792
+
793
+ Raises:
794
+ IndexError: If index is out of range
795
+
796
+ Example:
797
+ >>> builder = PipelineBuilder()
798
+ >>> builder.match({"status": "active"}).limit(10)
799
+ >>> stage = builder.get_stage_at(0)
800
+ >>> stage
801
+ {"$match": {"status": "active"}}
802
+ """
803
+ if index < 0 or index >= len(self._stages):
804
+ raise IndexError(
805
+ f"Index {index} out of range [0, {len(self._stages)}]"
806
+ )
807
+ # Return a deep copy so callers can safely mutate nested structures
808
+ return copy.deepcopy(self._stages[index])
809
+
810
+ def pretty_print(self, indent: int = 2, ensure_ascii: bool = False) -> str:
811
+ """
812
+ Return a formatted JSON string representation of the pipeline.
813
+
814
+ Useful for debugging and understanding pipeline structure.
815
+
816
+ Args:
817
+ indent: Number of spaces for indentation (default: 2)
818
+ ensure_ascii: If False, non-ASCII characters are output as-is (default: False)
819
+
820
+ Returns:
821
+ Formatted JSON string of the pipeline
822
+
823
+ Example:
824
+ >>> builder = PipelineBuilder()
825
+ >>> builder.match({"status": "active"}).limit(10)
826
+ >>> print(builder.pretty_print())
827
+ [
828
+ {
829
+ "$match": {
830
+ "status": "active"
831
+ }
832
+ },
833
+ {
834
+ "$limit": 10
835
+ }
836
+ ]
837
+ """
838
+ return json.dumps(self._stages, indent=indent, ensure_ascii=ensure_ascii)
839
+
840
+ def pretty_print_stage(
841
+ self,
842
+ stage: Union[int, Dict[str, Any]],
843
+ indent: int = 2,
844
+ ensure_ascii: bool = False,
845
+ ) -> str:
846
+ """
847
+ Return a formatted JSON string representation of a single stage.
848
+
849
+ Args:
850
+ stage: Stage index (0-based) or a stage dict
851
+ indent: Number of spaces for indentation (default: 2)
852
+ ensure_ascii: If False, non-ASCII characters are output as-is (default: False)
853
+
854
+ Returns:
855
+ Formatted JSON string of the stage
856
+
857
+ Raises:
858
+ TypeError: If stage is not an int or dict
859
+ IndexError: If stage is an int out of range
860
+ """
861
+ if isinstance(stage, int):
862
+ stage_dict = self.get_stage_at(stage)
863
+ elif isinstance(stage, dict):
864
+ stage_dict = copy.deepcopy(stage)
865
+ else:
866
+ raise TypeError(f"stage must be an int index or a dict, got {type(stage)}")
867
+
868
+ return json.dumps(stage_dict, indent=indent, ensure_ascii=ensure_ascii)
869
+
870
+ def to_json_file(
871
+ self,
872
+ filepath: Union[str, Path],
873
+ indent: int = 2,
874
+ ensure_ascii: bool = False,
875
+ metadata: Optional[Dict[str, Any]] = None,
876
+ ) -> None:
877
+ """
878
+ Save the pipeline to a JSON file.
879
+
880
+ Useful for debugging, comparison with other pipelines, or versioning.
881
+
882
+ Args:
883
+ filepath: Path to the output JSON file (str or Path)
884
+ indent: Number of spaces for indentation (default: 2)
885
+ ensure_ascii: If False, non-ASCII characters are output as-is (default: False)
886
+ metadata: Optional metadata to include in the JSON file
887
+
888
+ Raises:
889
+ IOError: If file cannot be written
890
+
891
+ Example:
892
+ >>> builder = PipelineBuilder()
893
+ >>> builder.match({"status": "active"}).limit(10)
894
+ >>> builder.to_json_file("debug_pipeline.json")
895
+
896
+ >>> # With metadata
897
+ >>> builder.to_json_file(
898
+ ... "pipeline.json",
899
+ ... metadata={"version": "1.0", "author": "developer"}
900
+ ... )
901
+ """
902
+ filepath = Path(filepath)
903
+ filepath.parent.mkdir(parents=True, exist_ok=True)
904
+
905
+ output: Dict[str, Any] = {
906
+ "pipeline": self._stages,
907
+ }
908
+ if metadata:
909
+ output["metadata"] = metadata
910
+
911
+ with open(filepath, "w", encoding="utf-8") as f:
912
+ json.dump(output, f, indent=indent, ensure_ascii=ensure_ascii)
913
+
914
+ def compare_with(self, other: "PipelineBuilder", context_lines: int = 3) -> str:
915
+ """
916
+ Compare this pipeline with another pipeline and return a unified diff.
917
+
918
+ This is useful when migrating legacy pipelines (e.g., templates) to builder code.
919
+
920
+ Args:
921
+ other: Another PipelineBuilder instance to compare with
922
+ context_lines: Number of context lines in the unified diff (default: 3)
923
+
924
+ Returns:
925
+ Unified diff as a string. Returns "No differences." if pipelines are identical.
926
+
927
+ Raises:
928
+ TypeError: If other is not a PipelineBuilder
929
+ ValueError: If context_lines is negative
930
+
931
+ Example:
932
+ >>> legacy = PipelineBuilder().match({"a": 1})
933
+ >>> new = PipelineBuilder().match({"a": 2})
934
+ >>> print(new.compare_with(legacy))
935
+ """
936
+ if not isinstance(other, PipelineBuilder):
937
+ raise TypeError(f"other must be a PipelineBuilder, got {type(other)}")
938
+ if not isinstance(context_lines, int):
939
+ raise TypeError(f"context_lines must be an int, got {type(context_lines)}")
940
+ if context_lines < 0:
941
+ raise ValueError("context_lines cannot be negative")
942
+
943
+ a = json.dumps(
944
+ self.build(),
945
+ indent=2,
946
+ ensure_ascii=False,
947
+ sort_keys=True,
948
+ ).splitlines(keepends=True)
949
+ b = json.dumps(
950
+ other.build(),
951
+ indent=2,
952
+ ensure_ascii=False,
953
+ sort_keys=True,
954
+ ).splitlines(keepends=True)
955
+
956
+ diff = difflib.unified_diff(a, b, fromfile="new", tofile="other", n=context_lines)
957
+ out = "".join(diff)
958
+ return out if out else "No differences."
959
+
756
960
  def build(self) -> List[Dict[str, Any]]:
757
961
  """
758
962
  Return the completed pipeline.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mongo-pipebuilder
3
- Version: 0.2.3
3
+ Version: 0.3.1
4
4
  Summary: Type-safe, fluent MongoDB aggregation pipeline builder
5
5
  Author-email: seligoroff <seligoroff@gmail.com>
6
6
  License: MIT
@@ -252,7 +252,7 @@ builder.prepend({"$match": {"deleted": False}})
252
252
  Inserts a stage at a specific position (0-based index) in the pipeline.
253
253
 
254
254
  ```python
255
- builder.match({"status": "active"}).group({"_id": "$category"}, {"count": {"$sum": 1}})
255
+ builder.match({"status": "active"}).group("$category", {"count": {"$sum": 1}})
256
256
  builder.insert_at(1, {"$sort": {"name": 1}})
257
257
  # Pipeline: [{"$match": {...}}, {"$sort": {...}}, {"$group": {...}}]
258
258
  ```
@@ -297,6 +297,74 @@ builder.add_stage({"$out": "output"}).match({"status": "active"})
297
297
  builder.validate() # Raises ValueError: $out stage must be the last stage
298
298
  ```
299
299
 
300
+ ##### `get_stage_at(index: int) -> Dict[str, Any]`
301
+
302
+ Gets a specific stage from the pipeline by index. Returns a copy of the stage.
303
+
304
+ ```python
305
+ builder = PipelineBuilder()
306
+ builder.match({"status": "active"}).limit(10)
307
+ stage = builder.get_stage_at(0) # Returns {"$match": {"status": "active"}}
308
+ ```
309
+
310
+ ##### `pretty_print(indent: int = 2, ensure_ascii: bool = False) -> str`
311
+
312
+ Returns a formatted JSON string representation of the pipeline. Useful for debugging.
313
+
314
+ ```python
315
+ builder = PipelineBuilder()
316
+ builder.match({"status": "active"}).limit(10)
317
+ print(builder.pretty_print())
318
+ # [
319
+ # {
320
+ # "$match": {
321
+ # "status": "active"
322
+ # }
323
+ # },
324
+ # {
325
+ # "$limit": 10
326
+ # }
327
+ # ]
328
+ ```
329
+
330
+ ##### `pretty_print_stage(stage: Union[int, Dict[str, Any]], indent: int = 2, ensure_ascii: bool = False) -> str`
331
+
332
+ Returns a formatted JSON string representation of a single stage (by index or by dict).
333
+
334
+ ```python
335
+ builder = PipelineBuilder().match({"status": "active"}).limit(10)
336
+ print(builder.pretty_print_stage(0)) # Prints the $match stage
337
+ ```
338
+
339
+ ##### `to_json_file(filepath: Union[str, Path], indent: int = 2, ensure_ascii: bool = False, metadata: Optional[Dict[str, Any]] = None) -> None`
340
+
341
+ Saves the pipeline to a JSON file. Useful for debugging, comparison, or versioning.
342
+
343
+ ```python
344
+ builder = PipelineBuilder()
345
+ builder.match({"status": "active"}).limit(10)
346
+
347
+ # Basic usage
348
+ builder.to_json_file("debug_pipeline.json")
349
+
350
+ # With metadata
351
+ builder.to_json_file(
352
+ "pipeline.json",
353
+ metadata={"version": "1.0", "author": "developer"}
354
+ )
355
+ ```
356
+
357
+ ##### `compare_with(other: PipelineBuilder, context_lines: int = 3) -> str`
358
+
359
+ Returns a unified diff between two pipelines (useful for comparing “new” builder pipelines vs legacy/template pipelines).
360
+
361
+ ```python
362
+ legacy = PipelineBuilder().match({"status": "active"}).limit(10)
363
+ new = PipelineBuilder().match({"status": "inactive"}).limit(10)
364
+
365
+ print(new.compare_with(legacy))
366
+ ```
367
+
300
368
  ##### `build() -> List[Dict[str, Any]]`
301
369
 
302
370
  Returns the complete pipeline as a list of stage dictionaries.
@@ -452,11 +520,39 @@ base = get_base_pipeline(user_id)
452
520
  # Create multiple queries from cached base
453
521
  recent = base.copy().sort({"createdAt": -1}).limit(10).build()
454
522
  by_category = base.copy().match({"category": "tech"}).build()
455
- with_stats = base.copy().group({"_id": "$category"}, {"count": {"$sum": 1}}).build()
523
+ with_stats = base.copy().group("$category", {"count": {"$sum": 1}}).build()
456
524
 
457
525
  # Base pipeline is safely cached and reused
458
526
  ```
459
527
 
528
+ ## Best Practices
529
+
530
+ ### Array `_id` after `$group`: prefer `$arrayElemAt` and materialize fields
531
+
532
+ If you use `$group` with an array `_id` (e.g. `["_idSeason", "_idTournament"]`), avoid relying on `$_id` later in the pipeline.
533
+ Instead, **extract elements with `$arrayElemAt` and store them into explicit fields**, then use those fields in subsequent stages.
534
+
535
+ ```python
536
+ pipeline = (
537
+ PipelineBuilder()
538
+ .group(
539
+ group_by=["$idSeason", "$idTournament"],
540
+ accumulators={"idTeams": {"$addToSet": "$idTeam"}},
541
+ )
542
+ .project({
543
+ "idSeason": {"$arrayElemAt": ["$_id", 0]},
544
+ "idTournament": {"$arrayElemAt": ["$_id", 1]},
545
+ "idTeams": 1,
546
+ # Optional: preserve array _id explicitly if you really need it later
547
+ # "_id": "$_id",
548
+ })
549
+ .build()
550
+ )
551
+ ```
552
+
553
+ This pattern reduces surprises and helps avoid errors like:
554
+ `$first's argument must be an array, but is object`.
555
+
460
556
  #### Example: Pipeline Factories
461
557
 
462
558
  ```python
@@ -544,3 +640,14 @@ MIT License - see [LICENSE](LICENSE) file for details.
544
640
 
545
641
 
546
642
 
643
+
644
+
645
+
646
+
647
+
648
+
649
+
650
+
651
+
652
+
653
+
@@ -0,0 +1,7 @@
1
+ mongo_pipebuilder/__init__.py,sha256=dvekji4j1j9v5MzJOJIqyO2znWVia1opBn8Y1Sc_Y3k,336
2
+ mongo_pipebuilder/builder.py,sha256=Fz7oUiB9FpqnIwnGgamof2ZEBaUGjfYSuB7mYCJO9Qc,34731
3
+ mongo_pipebuilder-0.3.1.dist-info/licenses/LICENSE,sha256=-ZkZpDLHDQAc-YBIojJ6eDsMwxwx5pRuQz3RHnl9Y8w,1104
4
+ mongo_pipebuilder-0.3.1.dist-info/METADATA,sha256=hYFQkwz1xtJK-MPAV2Vp4PuwKLvC7CLc5U-emp4yOzw,17478
5
+ mongo_pipebuilder-0.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
+ mongo_pipebuilder-0.3.1.dist-info/top_level.txt,sha256=wLn7H_v-qaNIws5FeBbKPZBCmYFYgFEhPaLjoCWcisc,18
7
+ mongo_pipebuilder-0.3.1.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- mongo_pipebuilder/__init__.py,sha256=82PaAyv4VoEvfvVhlYnMTPnZEMiOI24Q4Nw9RSrEjdA,336
2
- mongo_pipebuilder/builder.py,sha256=GivmjNqk2K5v3fX1TWMFFH7jx3WxlWWhlggWoRxCNl4,26875
3
- mongo_pipebuilder-0.2.3.dist-info/licenses/LICENSE,sha256=ITJa-Zkh2Qc1_xRiHcfkL5zsmTicbSxqsMih4cjtBM4,1093
4
- mongo_pipebuilder-0.2.3.dist-info/METADATA,sha256=eh6i_U365QAqWhERNAsFOgEVpjokTt4VEEJH8SqDAtA,14661
5
- mongo_pipebuilder-0.2.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
- mongo_pipebuilder-0.2.3.dist-info/top_level.txt,sha256=wLn7H_v-qaNIws5FeBbKPZBCmYFYgFEhPaLjoCWcisc,18
7
- mongo_pipebuilder-0.2.3.dist-info/RECORD,,