mongo-pipebuilder 0.2.3__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mongo_pipebuilder-0.2.3 → mongo_pipebuilder-0.3.1}/LICENSE +11 -0
- {mongo_pipebuilder-0.2.3/src/mongo_pipebuilder.egg-info → mongo_pipebuilder-0.3.1}/PKG-INFO +110 -3
- {mongo_pipebuilder-0.2.3 → mongo_pipebuilder-0.3.1}/README.md +109 -2
- {mongo_pipebuilder-0.2.3 → mongo_pipebuilder-0.3.1}/pyproject.toml +1 -1
- {mongo_pipebuilder-0.2.3 → mongo_pipebuilder-0.3.1}/setup.cfg +0 -0
- {mongo_pipebuilder-0.2.3 → mongo_pipebuilder-0.3.1}/src/mongo_pipebuilder/__init__.py +1 -1
- {mongo_pipebuilder-0.2.3 → mongo_pipebuilder-0.3.1}/src/mongo_pipebuilder/builder.py +204 -0
- {mongo_pipebuilder-0.2.3 → mongo_pipebuilder-0.3.1/src/mongo_pipebuilder.egg-info}/PKG-INFO +110 -3
- {mongo_pipebuilder-0.2.3 → mongo_pipebuilder-0.3.1}/src/mongo_pipebuilder.egg-info/SOURCES.txt +0 -0
- {mongo_pipebuilder-0.2.3 → mongo_pipebuilder-0.3.1}/src/mongo_pipebuilder.egg-info/dependency_links.txt +0 -0
- {mongo_pipebuilder-0.2.3 → mongo_pipebuilder-0.3.1}/src/mongo_pipebuilder.egg-info/requires.txt +0 -0
- {mongo_pipebuilder-0.2.3 → mongo_pipebuilder-0.3.1}/src/mongo_pipebuilder.egg-info/top_level.txt +0 -0
- {mongo_pipebuilder-0.2.3 → mongo_pipebuilder-0.3.1}/tests/test_builder.py +0 -0
- {mongo_pipebuilder-0.2.3 → mongo_pipebuilder-0.3.1}/tests/test_builder_debug.py +297 -0
- {mongo_pipebuilder-0.2.3 → mongo_pipebuilder-0.3.1}/tests/test_builder_insert.py +4 -3
- {mongo_pipebuilder-0.2.3 → mongo_pipebuilder-0.3.1}/tests/test_builder_validation.py +11 -0
- {mongo_pipebuilder-0.2.3 → mongo_pipebuilder-0.3.1}/tests/test_builder_validation_existing.py +7 -0
- {mongo_pipebuilder-0.2.3 → mongo_pipebuilder-0.3.1}/tests/test_builder_validation_new.py +1 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mongo-pipebuilder
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Type-safe, fluent MongoDB aggregation pipeline builder
|
|
5
5
|
Author-email: seligoroff <seligoroff@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -252,7 +252,7 @@ builder.prepend({"$match": {"deleted": False}})
|
|
|
252
252
|
Inserts a stage at a specific position (0-based index) in the pipeline.
|
|
253
253
|
|
|
254
254
|
```python
|
|
255
|
-
builder.match({"status": "active"}).group(
|
|
255
|
+
builder.match({"status": "active"}).group("$category", {"count": {"$sum": 1}})
|
|
256
256
|
builder.insert_at(1, {"$sort": {"name": 1}})
|
|
257
257
|
# Pipeline: [{"$match": {...}}, {"$sort": {...}}, {"$group": {...}}]
|
|
258
258
|
```
|
|
@@ -297,6 +297,74 @@ builder.add_stage({"$out": "output"}).match({"status": "active"})
|
|
|
297
297
|
builder.validate() # Raises ValueError: $out stage must be the last stage
|
|
298
298
|
```
|
|
299
299
|
|
|
300
|
+
##### `get_stage_at(index: int) -> Dict[str, Any]`
|
|
301
|
+
|
|
302
|
+
Gets a specific stage from the pipeline by index. Returns a copy of the stage.
|
|
303
|
+
|
|
304
|
+
```python
|
|
305
|
+
builder = PipelineBuilder()
|
|
306
|
+
builder.match({"status": "active"}).limit(10)
|
|
307
|
+
stage = builder.get_stage_at(0) # Returns {"$match": {"status": "active"}}
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
##### `pretty_print(indent: int = 2, ensure_ascii: bool = False) -> str`
|
|
311
|
+
|
|
312
|
+
Returns a formatted JSON string representation of the pipeline. Useful for debugging.
|
|
313
|
+
|
|
314
|
+
```python
|
|
315
|
+
builder = PipelineBuilder()
|
|
316
|
+
builder.match({"status": "active"}).limit(10)
|
|
317
|
+
print(builder.pretty_print())
|
|
318
|
+
# [
|
|
319
|
+
# {
|
|
320
|
+
# "$match": {
|
|
321
|
+
# "status": "active"
|
|
322
|
+
# }
|
|
323
|
+
# },
|
|
324
|
+
# {
|
|
325
|
+
# "$limit": 10
|
|
326
|
+
# }
|
|
327
|
+
# ]
|
|
328
|
+
```
|
|
329
|
+
|
|
330
|
+
##### `pretty_print_stage(stage: Union[int, Dict[str, Any]], indent: int = 2, ensure_ascii: bool = False) -> str`
|
|
331
|
+
|
|
332
|
+
Returns a formatted JSON string representation of a single stage (by index or by dict).
|
|
333
|
+
|
|
334
|
+
```python
|
|
335
|
+
builder = PipelineBuilder().match({"status": "active"}).limit(10)
|
|
336
|
+
print(builder.pretty_print_stage(0)) # Prints the $match stage
|
|
337
|
+
```
|
|
338
|
+
|
|
339
|
+
##### `to_json_file(filepath: Union[str, Path], indent: int = 2, ensure_ascii: bool = False, metadata: Optional[Dict[str, Any]] = None) -> None`
|
|
340
|
+
|
|
341
|
+
Saves the pipeline to a JSON file. Useful for debugging, comparison, or versioning.
|
|
342
|
+
|
|
343
|
+
```python
|
|
344
|
+
builder = PipelineBuilder()
|
|
345
|
+
builder.match({"status": "active"}).limit(10)
|
|
346
|
+
|
|
347
|
+
# Basic usage
|
|
348
|
+
builder.to_json_file("debug_pipeline.json")
|
|
349
|
+
|
|
350
|
+
# With metadata
|
|
351
|
+
builder.to_json_file(
|
|
352
|
+
"pipeline.json",
|
|
353
|
+
metadata={"version": "1.0", "author": "developer"}
|
|
354
|
+
)
|
|
355
|
+
```
|
|
356
|
+
|
|
357
|
+
##### `compare_with(other: PipelineBuilder, context_lines: int = 3) -> str`
|
|
358
|
+
|
|
359
|
+
Returns a unified diff between two pipelines (useful for comparing “new” builder pipelines vs legacy/template pipelines).
|
|
360
|
+
|
|
361
|
+
```python
|
|
362
|
+
legacy = PipelineBuilder().match({"status": "active"}).limit(10)
|
|
363
|
+
new = PipelineBuilder().match({"status": "inactive"}).limit(10)
|
|
364
|
+
|
|
365
|
+
print(new.compare_with(legacy))
|
|
366
|
+
```
|
|
367
|
+
|
|
300
368
|
##### `build() -> List[Dict[str, Any]]`
|
|
301
369
|
|
|
302
370
|
Returns the complete pipeline as a list of stage dictionaries.
|
|
@@ -452,11 +520,39 @@ base = get_base_pipeline(user_id)
|
|
|
452
520
|
# Create multiple queries from cached base
|
|
453
521
|
recent = base.copy().sort({"createdAt": -1}).limit(10).build()
|
|
454
522
|
by_category = base.copy().match({"category": "tech"}).build()
|
|
455
|
-
with_stats = base.copy().group(
|
|
523
|
+
with_stats = base.copy().group("$category", {"count": {"$sum": 1}}).build()
|
|
456
524
|
|
|
457
525
|
# Base pipeline is safely cached and reused
|
|
458
526
|
```
|
|
459
527
|
|
|
528
|
+
## Best Practices
|
|
529
|
+
|
|
530
|
+
### Array `_id` after `$group`: prefer `$arrayElemAt` and materialize fields
|
|
531
|
+
|
|
532
|
+
If you use `$group` with an array `_id` (e.g. `["_idSeason", "_idTournament"]`), avoid relying on `$_id` later in the pipeline.
|
|
533
|
+
Instead, **extract elements with `$arrayElemAt` and store them into explicit fields**, then use those fields in subsequent stages.
|
|
534
|
+
|
|
535
|
+
```python
|
|
536
|
+
pipeline = (
|
|
537
|
+
PipelineBuilder()
|
|
538
|
+
.group(
|
|
539
|
+
group_by=["$idSeason", "$idTournament"],
|
|
540
|
+
accumulators={"idTeams": {"$addToSet": "$idTeam"}},
|
|
541
|
+
)
|
|
542
|
+
.project({
|
|
543
|
+
"idSeason": {"$arrayElemAt": ["$_id", 0]},
|
|
544
|
+
"idTournament": {"$arrayElemAt": ["$_id", 1]},
|
|
545
|
+
"idTeams": 1,
|
|
546
|
+
# Optional: preserve array _id explicitly if you really need it later
|
|
547
|
+
# "_id": "$_id",
|
|
548
|
+
})
|
|
549
|
+
.build()
|
|
550
|
+
)
|
|
551
|
+
```
|
|
552
|
+
|
|
553
|
+
This pattern reduces surprises and helps avoid errors like:
|
|
554
|
+
`$first's argument must be an array, but is object`.
|
|
555
|
+
|
|
460
556
|
#### Example: Pipeline Factories
|
|
461
557
|
|
|
462
558
|
```python
|
|
@@ -544,3 +640,14 @@ MIT License - see [LICENSE](LICENSE) file for details.
|
|
|
544
640
|
|
|
545
641
|
|
|
546
642
|
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
|
|
652
|
+
|
|
653
|
+
|
|
@@ -224,7 +224,7 @@ builder.prepend({"$match": {"deleted": False}})
|
|
|
224
224
|
Inserts a stage at a specific position (0-based index) in the pipeline.
|
|
225
225
|
|
|
226
226
|
```python
|
|
227
|
-
builder.match({"status": "active"}).group(
|
|
227
|
+
builder.match({"status": "active"}).group("$category", {"count": {"$sum": 1}})
|
|
228
228
|
builder.insert_at(1, {"$sort": {"name": 1}})
|
|
229
229
|
# Pipeline: [{"$match": {...}}, {"$sort": {...}}, {"$group": {...}}]
|
|
230
230
|
```
|
|
@@ -269,6 +269,74 @@ builder.add_stage({"$out": "output"}).match({"status": "active"})
|
|
|
269
269
|
builder.validate() # Raises ValueError: $out stage must be the last stage
|
|
270
270
|
```
|
|
271
271
|
|
|
272
|
+
##### `get_stage_at(index: int) -> Dict[str, Any]`
|
|
273
|
+
|
|
274
|
+
Gets a specific stage from the pipeline by index. Returns a copy of the stage.
|
|
275
|
+
|
|
276
|
+
```python
|
|
277
|
+
builder = PipelineBuilder()
|
|
278
|
+
builder.match({"status": "active"}).limit(10)
|
|
279
|
+
stage = builder.get_stage_at(0) # Returns {"$match": {"status": "active"}}
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
##### `pretty_print(indent: int = 2, ensure_ascii: bool = False) -> str`
|
|
283
|
+
|
|
284
|
+
Returns a formatted JSON string representation of the pipeline. Useful for debugging.
|
|
285
|
+
|
|
286
|
+
```python
|
|
287
|
+
builder = PipelineBuilder()
|
|
288
|
+
builder.match({"status": "active"}).limit(10)
|
|
289
|
+
print(builder.pretty_print())
|
|
290
|
+
# [
|
|
291
|
+
# {
|
|
292
|
+
# "$match": {
|
|
293
|
+
# "status": "active"
|
|
294
|
+
# }
|
|
295
|
+
# },
|
|
296
|
+
# {
|
|
297
|
+
# "$limit": 10
|
|
298
|
+
# }
|
|
299
|
+
# ]
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
##### `pretty_print_stage(stage: Union[int, Dict[str, Any]], indent: int = 2, ensure_ascii: bool = False) -> str`
|
|
303
|
+
|
|
304
|
+
Returns a formatted JSON string representation of a single stage (by index or by dict).
|
|
305
|
+
|
|
306
|
+
```python
|
|
307
|
+
builder = PipelineBuilder().match({"status": "active"}).limit(10)
|
|
308
|
+
print(builder.pretty_print_stage(0)) # Prints the $match stage
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
##### `to_json_file(filepath: Union[str, Path], indent: int = 2, ensure_ascii: bool = False, metadata: Optional[Dict[str, Any]] = None) -> None`
|
|
312
|
+
|
|
313
|
+
Saves the pipeline to a JSON file. Useful for debugging, comparison, or versioning.
|
|
314
|
+
|
|
315
|
+
```python
|
|
316
|
+
builder = PipelineBuilder()
|
|
317
|
+
builder.match({"status": "active"}).limit(10)
|
|
318
|
+
|
|
319
|
+
# Basic usage
|
|
320
|
+
builder.to_json_file("debug_pipeline.json")
|
|
321
|
+
|
|
322
|
+
# With metadata
|
|
323
|
+
builder.to_json_file(
|
|
324
|
+
"pipeline.json",
|
|
325
|
+
metadata={"version": "1.0", "author": "developer"}
|
|
326
|
+
)
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
##### `compare_with(other: PipelineBuilder, context_lines: int = 3) -> str`
|
|
330
|
+
|
|
331
|
+
Returns a unified diff between two pipelines (useful for comparing “new” builder pipelines vs legacy/template pipelines).
|
|
332
|
+
|
|
333
|
+
```python
|
|
334
|
+
legacy = PipelineBuilder().match({"status": "active"}).limit(10)
|
|
335
|
+
new = PipelineBuilder().match({"status": "inactive"}).limit(10)
|
|
336
|
+
|
|
337
|
+
print(new.compare_with(legacy))
|
|
338
|
+
```
|
|
339
|
+
|
|
272
340
|
##### `build() -> List[Dict[str, Any]]`
|
|
273
341
|
|
|
274
342
|
Returns the complete pipeline as a list of stage dictionaries.
|
|
@@ -424,11 +492,39 @@ base = get_base_pipeline(user_id)
|
|
|
424
492
|
# Create multiple queries from cached base
|
|
425
493
|
recent = base.copy().sort({"createdAt": -1}).limit(10).build()
|
|
426
494
|
by_category = base.copy().match({"category": "tech"}).build()
|
|
427
|
-
with_stats = base.copy().group(
|
|
495
|
+
with_stats = base.copy().group("$category", {"count": {"$sum": 1}}).build()
|
|
428
496
|
|
|
429
497
|
# Base pipeline is safely cached and reused
|
|
430
498
|
```
|
|
431
499
|
|
|
500
|
+
## Best Practices
|
|
501
|
+
|
|
502
|
+
### Array `_id` after `$group`: prefer `$arrayElemAt` and materialize fields
|
|
503
|
+
|
|
504
|
+
If you use `$group` with an array `_id` (e.g. `["_idSeason", "_idTournament"]`), avoid relying on `$_id` later in the pipeline.
|
|
505
|
+
Instead, **extract elements with `$arrayElemAt` and store them into explicit fields**, then use those fields in subsequent stages.
|
|
506
|
+
|
|
507
|
+
```python
|
|
508
|
+
pipeline = (
|
|
509
|
+
PipelineBuilder()
|
|
510
|
+
.group(
|
|
511
|
+
group_by=["$idSeason", "$idTournament"],
|
|
512
|
+
accumulators={"idTeams": {"$addToSet": "$idTeam"}},
|
|
513
|
+
)
|
|
514
|
+
.project({
|
|
515
|
+
"idSeason": {"$arrayElemAt": ["$_id", 0]},
|
|
516
|
+
"idTournament": {"$arrayElemAt": ["$_id", 1]},
|
|
517
|
+
"idTeams": 1,
|
|
518
|
+
# Optional: preserve array _id explicitly if you really need it later
|
|
519
|
+
# "_id": "$_id",
|
|
520
|
+
})
|
|
521
|
+
.build()
|
|
522
|
+
)
|
|
523
|
+
```
|
|
524
|
+
|
|
525
|
+
This pattern reduces surprises and helps avoid errors like:
|
|
526
|
+
`$first's argument must be an array, but is object`.
|
|
527
|
+
|
|
432
528
|
#### Example: Pipeline Factories
|
|
433
529
|
|
|
434
530
|
```python
|
|
@@ -516,3 +612,14 @@ MIT License - see [LICENSE](LICENSE) file for details.
|
|
|
516
612
|
|
|
517
613
|
|
|
518
614
|
|
|
615
|
+
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
|
|
625
|
+
|
|
File without changes
|
|
@@ -6,6 +6,10 @@ Builder Pattern implementation for safe construction of MongoDB aggregation pipe
|
|
|
6
6
|
|
|
7
7
|
Author: seligoroff
|
|
8
8
|
"""
|
|
9
|
+
import copy
|
|
10
|
+
import difflib
|
|
11
|
+
import json
|
|
12
|
+
from pathlib import Path
|
|
9
13
|
from typing import Any, Dict, List, Optional, Union
|
|
10
14
|
|
|
11
15
|
# For compatibility with Python < 3.11
|
|
@@ -187,6 +191,29 @@ class PipelineBuilder:
|
|
|
187
191
|
"""
|
|
188
192
|
if not isinstance(accumulators, dict):
|
|
189
193
|
raise TypeError(f"accumulators must be a dict, got {type(accumulators)}")
|
|
194
|
+
|
|
195
|
+
# Guard against a common mistake: passing {"_id": ...} as group_by.
|
|
196
|
+
# group_by should be the expression that becomes the $group _id.
|
|
197
|
+
# If users pass {"_id": expr}, MongoDB will create nested _id and later
|
|
198
|
+
# expressions like $first: "$_id" may fail because $_id becomes an object.
|
|
199
|
+
if isinstance(group_by, dict) and set(group_by.keys()) == {"_id"}:
|
|
200
|
+
inner = group_by["_id"]
|
|
201
|
+
raise ValueError(
|
|
202
|
+
"Invalid group_by: you passed a dict wrapper {'_id': ...} to PipelineBuilder.group().\n"
|
|
203
|
+
"PipelineBuilder.group(group_by=...) expects the expression that becomes $group._id.\n"
|
|
204
|
+
"\n"
|
|
205
|
+
"Did you mean one of these?\n"
|
|
206
|
+
f"- builder.group(group_by={inner!r}, accumulators=...)\n"
|
|
207
|
+
f"- builder.group(group_by={inner!r}, accumulators={{...}}) # same, explicit\n"
|
|
208
|
+
"\n"
|
|
209
|
+
"Examples:\n"
|
|
210
|
+
"- Array _id: builder.group(group_by=['$idSeason', '$idTournament'], accumulators={...})\n"
|
|
211
|
+
"- Field path: builder.group(group_by='$category', accumulators={...})\n"
|
|
212
|
+
"- Composite key: builder.group(group_by={'category': '$category'}, accumulators={...})\n"
|
|
213
|
+
"\n"
|
|
214
|
+
"Why this matters: {'_id': expr} would create a nested _id object in MongoDB, and later\n"
|
|
215
|
+
"operators like $first/$last on '$_id' may fail with: \"$first's argument must be an array, but is object\"."
|
|
216
|
+
)
|
|
190
217
|
|
|
191
218
|
# Validate empty cases
|
|
192
219
|
# group_by can be None, empty string, empty dict, etc. - all are valid in MongoDB
|
|
@@ -753,6 +780,183 @@ class PipelineBuilder:
|
|
|
753
780
|
self._stages.insert(position, stage)
|
|
754
781
|
return self
|
|
755
782
|
|
|
783
|
+
def get_stage_at(self, index: int) -> Dict[str, Any]:
|
|
784
|
+
"""
|
|
785
|
+
Get a specific stage from the pipeline by index.
|
|
786
|
+
|
|
787
|
+
Args:
|
|
788
|
+
index: Zero-based index of the stage to retrieve
|
|
789
|
+
|
|
790
|
+
Returns:
|
|
791
|
+
Dictionary representing the stage at the given index
|
|
792
|
+
|
|
793
|
+
Raises:
|
|
794
|
+
IndexError: If index is out of range
|
|
795
|
+
|
|
796
|
+
Example:
|
|
797
|
+
>>> builder = PipelineBuilder()
|
|
798
|
+
>>> builder.match({"status": "active"}).limit(10)
|
|
799
|
+
>>> stage = builder.get_stage_at(0)
|
|
800
|
+
>>> stage
|
|
801
|
+
{"$match": {"status": "active"}}
|
|
802
|
+
"""
|
|
803
|
+
if index < 0 or index >= len(self._stages):
|
|
804
|
+
raise IndexError(
|
|
805
|
+
f"Index {index} out of range [0, {len(self._stages)}]"
|
|
806
|
+
)
|
|
807
|
+
# Return a deep copy so callers can safely mutate nested structures
|
|
808
|
+
return copy.deepcopy(self._stages[index])
|
|
809
|
+
|
|
810
|
+
def pretty_print(self, indent: int = 2, ensure_ascii: bool = False) -> str:
|
|
811
|
+
"""
|
|
812
|
+
Return a formatted JSON string representation of the pipeline.
|
|
813
|
+
|
|
814
|
+
Useful for debugging and understanding pipeline structure.
|
|
815
|
+
|
|
816
|
+
Args:
|
|
817
|
+
indent: Number of spaces for indentation (default: 2)
|
|
818
|
+
ensure_ascii: If False, non-ASCII characters are output as-is (default: False)
|
|
819
|
+
|
|
820
|
+
Returns:
|
|
821
|
+
Formatted JSON string of the pipeline
|
|
822
|
+
|
|
823
|
+
Example:
|
|
824
|
+
>>> builder = PipelineBuilder()
|
|
825
|
+
>>> builder.match({"status": "active"}).limit(10)
|
|
826
|
+
>>> print(builder.pretty_print())
|
|
827
|
+
[
|
|
828
|
+
{
|
|
829
|
+
"$match": {
|
|
830
|
+
"status": "active"
|
|
831
|
+
}
|
|
832
|
+
},
|
|
833
|
+
{
|
|
834
|
+
"$limit": 10
|
|
835
|
+
}
|
|
836
|
+
]
|
|
837
|
+
"""
|
|
838
|
+
return json.dumps(self._stages, indent=indent, ensure_ascii=ensure_ascii)
|
|
839
|
+
|
|
840
|
+
def pretty_print_stage(
|
|
841
|
+
self,
|
|
842
|
+
stage: Union[int, Dict[str, Any]],
|
|
843
|
+
indent: int = 2,
|
|
844
|
+
ensure_ascii: bool = False,
|
|
845
|
+
) -> str:
|
|
846
|
+
"""
|
|
847
|
+
Return a formatted JSON string representation of a single stage.
|
|
848
|
+
|
|
849
|
+
Args:
|
|
850
|
+
stage: Stage index (0-based) or a stage dict
|
|
851
|
+
indent: Number of spaces for indentation (default: 2)
|
|
852
|
+
ensure_ascii: If False, non-ASCII characters are output as-is (default: False)
|
|
853
|
+
|
|
854
|
+
Returns:
|
|
855
|
+
Formatted JSON string of the stage
|
|
856
|
+
|
|
857
|
+
Raises:
|
|
858
|
+
TypeError: If stage is not an int or dict
|
|
859
|
+
IndexError: If stage is an int out of range
|
|
860
|
+
"""
|
|
861
|
+
if isinstance(stage, int):
|
|
862
|
+
stage_dict = self.get_stage_at(stage)
|
|
863
|
+
elif isinstance(stage, dict):
|
|
864
|
+
stage_dict = copy.deepcopy(stage)
|
|
865
|
+
else:
|
|
866
|
+
raise TypeError(f"stage must be an int index or a dict, got {type(stage)}")
|
|
867
|
+
|
|
868
|
+
return json.dumps(stage_dict, indent=indent, ensure_ascii=ensure_ascii)
|
|
869
|
+
|
|
870
|
+
def to_json_file(
|
|
871
|
+
self,
|
|
872
|
+
filepath: Union[str, Path],
|
|
873
|
+
indent: int = 2,
|
|
874
|
+
ensure_ascii: bool = False,
|
|
875
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
876
|
+
) -> None:
|
|
877
|
+
"""
|
|
878
|
+
Save the pipeline to a JSON file.
|
|
879
|
+
|
|
880
|
+
Useful for debugging, comparison with other pipelines, or versioning.
|
|
881
|
+
|
|
882
|
+
Args:
|
|
883
|
+
filepath: Path to the output JSON file (str or Path)
|
|
884
|
+
indent: Number of spaces for indentation (default: 2)
|
|
885
|
+
ensure_ascii: If False, non-ASCII characters are output as-is (default: False)
|
|
886
|
+
metadata: Optional metadata to include in the JSON file
|
|
887
|
+
|
|
888
|
+
Raises:
|
|
889
|
+
IOError: If file cannot be written
|
|
890
|
+
|
|
891
|
+
Example:
|
|
892
|
+
>>> builder = PipelineBuilder()
|
|
893
|
+
>>> builder.match({"status": "active"}).limit(10)
|
|
894
|
+
>>> builder.to_json_file("debug_pipeline.json")
|
|
895
|
+
|
|
896
|
+
>>> # With metadata
|
|
897
|
+
>>> builder.to_json_file(
|
|
898
|
+
... "pipeline.json",
|
|
899
|
+
... metadata={"version": "1.0", "author": "developer"}
|
|
900
|
+
... )
|
|
901
|
+
"""
|
|
902
|
+
filepath = Path(filepath)
|
|
903
|
+
filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
904
|
+
|
|
905
|
+
output: Dict[str, Any] = {
|
|
906
|
+
"pipeline": self._stages,
|
|
907
|
+
}
|
|
908
|
+
if metadata:
|
|
909
|
+
output["metadata"] = metadata
|
|
910
|
+
|
|
911
|
+
with open(filepath, "w", encoding="utf-8") as f:
|
|
912
|
+
json.dump(output, f, indent=indent, ensure_ascii=ensure_ascii)
|
|
913
|
+
|
|
914
|
+
def compare_with(self, other: "PipelineBuilder", context_lines: int = 3) -> str:
|
|
915
|
+
"""
|
|
916
|
+
Compare this pipeline with another pipeline and return a unified diff.
|
|
917
|
+
|
|
918
|
+
This is useful when migrating legacy pipelines (e.g., templates) to builder code.
|
|
919
|
+
|
|
920
|
+
Args:
|
|
921
|
+
other: Another PipelineBuilder instance to compare with
|
|
922
|
+
context_lines: Number of context lines in the unified diff (default: 3)
|
|
923
|
+
|
|
924
|
+
Returns:
|
|
925
|
+
Unified diff as a string. Returns "No differences." if pipelines are identical.
|
|
926
|
+
|
|
927
|
+
Raises:
|
|
928
|
+
TypeError: If other is not a PipelineBuilder
|
|
929
|
+
ValueError: If context_lines is negative
|
|
930
|
+
|
|
931
|
+
Example:
|
|
932
|
+
>>> legacy = PipelineBuilder().match({"a": 1})
|
|
933
|
+
>>> new = PipelineBuilder().match({"a": 2})
|
|
934
|
+
>>> print(new.compare_with(legacy))
|
|
935
|
+
"""
|
|
936
|
+
if not isinstance(other, PipelineBuilder):
|
|
937
|
+
raise TypeError(f"other must be a PipelineBuilder, got {type(other)}")
|
|
938
|
+
if not isinstance(context_lines, int):
|
|
939
|
+
raise TypeError(f"context_lines must be an int, got {type(context_lines)}")
|
|
940
|
+
if context_lines < 0:
|
|
941
|
+
raise ValueError("context_lines cannot be negative")
|
|
942
|
+
|
|
943
|
+
a = json.dumps(
|
|
944
|
+
self.build(),
|
|
945
|
+
indent=2,
|
|
946
|
+
ensure_ascii=False,
|
|
947
|
+
sort_keys=True,
|
|
948
|
+
).splitlines(keepends=True)
|
|
949
|
+
b = json.dumps(
|
|
950
|
+
other.build(),
|
|
951
|
+
indent=2,
|
|
952
|
+
ensure_ascii=False,
|
|
953
|
+
sort_keys=True,
|
|
954
|
+
).splitlines(keepends=True)
|
|
955
|
+
|
|
956
|
+
diff = difflib.unified_diff(a, b, fromfile="new", tofile="other", n=context_lines)
|
|
957
|
+
out = "".join(diff)
|
|
958
|
+
return out if out else "No differences."
|
|
959
|
+
|
|
756
960
|
def build(self) -> List[Dict[str, Any]]:
|
|
757
961
|
"""
|
|
758
962
|
Return the completed pipeline.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mongo-pipebuilder
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Type-safe, fluent MongoDB aggregation pipeline builder
|
|
5
5
|
Author-email: seligoroff <seligoroff@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -252,7 +252,7 @@ builder.prepend({"$match": {"deleted": False}})
|
|
|
252
252
|
Inserts a stage at a specific position (0-based index) in the pipeline.
|
|
253
253
|
|
|
254
254
|
```python
|
|
255
|
-
builder.match({"status": "active"}).group(
|
|
255
|
+
builder.match({"status": "active"}).group("$category", {"count": {"$sum": 1}})
|
|
256
256
|
builder.insert_at(1, {"$sort": {"name": 1}})
|
|
257
257
|
# Pipeline: [{"$match": {...}}, {"$sort": {...}}, {"$group": {...}}]
|
|
258
258
|
```
|
|
@@ -297,6 +297,74 @@ builder.add_stage({"$out": "output"}).match({"status": "active"})
|
|
|
297
297
|
builder.validate() # Raises ValueError: $out stage must be the last stage
|
|
298
298
|
```
|
|
299
299
|
|
|
300
|
+
##### `get_stage_at(index: int) -> Dict[str, Any]`
|
|
301
|
+
|
|
302
|
+
Gets a specific stage from the pipeline by index. Returns a copy of the stage.
|
|
303
|
+
|
|
304
|
+
```python
|
|
305
|
+
builder = PipelineBuilder()
|
|
306
|
+
builder.match({"status": "active"}).limit(10)
|
|
307
|
+
stage = builder.get_stage_at(0) # Returns {"$match": {"status": "active"}}
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
##### `pretty_print(indent: int = 2, ensure_ascii: bool = False) -> str`
|
|
311
|
+
|
|
312
|
+
Returns a formatted JSON string representation of the pipeline. Useful for debugging.
|
|
313
|
+
|
|
314
|
+
```python
|
|
315
|
+
builder = PipelineBuilder()
|
|
316
|
+
builder.match({"status": "active"}).limit(10)
|
|
317
|
+
print(builder.pretty_print())
|
|
318
|
+
# [
|
|
319
|
+
# {
|
|
320
|
+
# "$match": {
|
|
321
|
+
# "status": "active"
|
|
322
|
+
# }
|
|
323
|
+
# },
|
|
324
|
+
# {
|
|
325
|
+
# "$limit": 10
|
|
326
|
+
# }
|
|
327
|
+
# ]
|
|
328
|
+
```
|
|
329
|
+
|
|
330
|
+
##### `pretty_print_stage(stage: Union[int, Dict[str, Any]], indent: int = 2, ensure_ascii: bool = False) -> str`
|
|
331
|
+
|
|
332
|
+
Returns a formatted JSON string representation of a single stage (by index or by dict).
|
|
333
|
+
|
|
334
|
+
```python
|
|
335
|
+
builder = PipelineBuilder().match({"status": "active"}).limit(10)
|
|
336
|
+
print(builder.pretty_print_stage(0)) # Prints the $match stage
|
|
337
|
+
```
|
|
338
|
+
|
|
339
|
+
##### `to_json_file(filepath: Union[str, Path], indent: int = 2, ensure_ascii: bool = False, metadata: Optional[Dict[str, Any]] = None) -> None`
|
|
340
|
+
|
|
341
|
+
Saves the pipeline to a JSON file. Useful for debugging, comparison, or versioning.
|
|
342
|
+
|
|
343
|
+
```python
|
|
344
|
+
builder = PipelineBuilder()
|
|
345
|
+
builder.match({"status": "active"}).limit(10)
|
|
346
|
+
|
|
347
|
+
# Basic usage
|
|
348
|
+
builder.to_json_file("debug_pipeline.json")
|
|
349
|
+
|
|
350
|
+
# With metadata
|
|
351
|
+
builder.to_json_file(
|
|
352
|
+
"pipeline.json",
|
|
353
|
+
metadata={"version": "1.0", "author": "developer"}
|
|
354
|
+
)
|
|
355
|
+
```
|
|
356
|
+
|
|
357
|
+
##### `compare_with(other: PipelineBuilder, context_lines: int = 3) -> str`
|
|
358
|
+
|
|
359
|
+
Returns a unified diff between two pipelines (useful for comparing “new” builder pipelines vs legacy/template pipelines).
|
|
360
|
+
|
|
361
|
+
```python
|
|
362
|
+
legacy = PipelineBuilder().match({"status": "active"}).limit(10)
|
|
363
|
+
new = PipelineBuilder().match({"status": "inactive"}).limit(10)
|
|
364
|
+
|
|
365
|
+
print(new.compare_with(legacy))
|
|
366
|
+
```
|
|
367
|
+
|
|
300
368
|
##### `build() -> List[Dict[str, Any]]`
|
|
301
369
|
|
|
302
370
|
Returns the complete pipeline as a list of stage dictionaries.
|
|
@@ -452,11 +520,39 @@ base = get_base_pipeline(user_id)
|
|
|
452
520
|
# Create multiple queries from cached base
|
|
453
521
|
recent = base.copy().sort({"createdAt": -1}).limit(10).build()
|
|
454
522
|
by_category = base.copy().match({"category": "tech"}).build()
|
|
455
|
-
with_stats = base.copy().group(
|
|
523
|
+
with_stats = base.copy().group("$category", {"count": {"$sum": 1}}).build()
|
|
456
524
|
|
|
457
525
|
# Base pipeline is safely cached and reused
|
|
458
526
|
```
|
|
459
527
|
|
|
528
|
+
## Best Practices
|
|
529
|
+
|
|
530
|
+
### Array `_id` after `$group`: prefer `$arrayElemAt` and materialize fields
|
|
531
|
+
|
|
532
|
+
If you use `$group` with an array `_id` (e.g. `["_idSeason", "_idTournament"]`), avoid relying on `$_id` later in the pipeline.
|
|
533
|
+
Instead, **extract elements with `$arrayElemAt` and store them into explicit fields**, then use those fields in subsequent stages.
|
|
534
|
+
|
|
535
|
+
```python
|
|
536
|
+
pipeline = (
|
|
537
|
+
PipelineBuilder()
|
|
538
|
+
.group(
|
|
539
|
+
group_by=["$idSeason", "$idTournament"],
|
|
540
|
+
accumulators={"idTeams": {"$addToSet": "$idTeam"}},
|
|
541
|
+
)
|
|
542
|
+
.project({
|
|
543
|
+
"idSeason": {"$arrayElemAt": ["$_id", 0]},
|
|
544
|
+
"idTournament": {"$arrayElemAt": ["$_id", 1]},
|
|
545
|
+
"idTeams": 1,
|
|
546
|
+
# Optional: preserve array _id explicitly if you really need it later
|
|
547
|
+
# "_id": "$_id",
|
|
548
|
+
})
|
|
549
|
+
.build()
|
|
550
|
+
)
|
|
551
|
+
```
|
|
552
|
+
|
|
553
|
+
This pattern reduces surprises and helps avoid errors like:
|
|
554
|
+
`$first's argument must be an array, but is object`.
|
|
555
|
+
|
|
460
556
|
#### Example: Pipeline Factories
|
|
461
557
|
|
|
462
558
|
```python
|
|
@@ -544,3 +640,14 @@ MIT License - see [LICENSE](LICENSE) file for details.
|
|
|
544
640
|
|
|
545
641
|
|
|
546
642
|
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
|
|
652
|
+
|
|
653
|
+
|
{mongo_pipebuilder-0.2.3 → mongo_pipebuilder-0.3.1}/src/mongo_pipebuilder.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{mongo_pipebuilder-0.2.3 → mongo_pipebuilder-0.3.1}/src/mongo_pipebuilder.egg-info/requires.txt
RENAMED
|
File without changes
|
{mongo_pipebuilder-0.2.3 → mongo_pipebuilder-0.3.1}/src/mongo_pipebuilder.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
@@ -5,6 +5,10 @@ Tests for methods from Proposal 2 (debug/validation) and Proposal 4 (pipeline an
|
|
|
5
5
|
|
|
6
6
|
Author: seligoroff
|
|
7
7
|
"""
|
|
8
|
+
import json
|
|
9
|
+
import tempfile
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
8
12
|
import pytest
|
|
9
13
|
from mongo_pipebuilder import PipelineBuilder
|
|
10
14
|
|
|
@@ -289,3 +293,296 @@ class TestPipelineBuilderAnalysis:
|
|
|
289
293
|
for stage_type in stage_types:
|
|
290
294
|
assert builder.has_stage(stage_type) is True
|
|
291
295
|
|
|
296
|
+
|
|
297
|
+
class TestPipelineBuilderDebugMethods:
|
|
298
|
+
"""Tests for Phase 1 debug methods: pretty_print, to_json_file, get_stage_at."""
|
|
299
|
+
|
|
300
|
+
def test_get_stage_at_valid_index(self):
|
|
301
|
+
"""Test get_stage_at() with valid index."""
|
|
302
|
+
builder = PipelineBuilder()
|
|
303
|
+
builder.match({"status": "active"}).limit(10).sort({"name": 1})
|
|
304
|
+
|
|
305
|
+
stage0 = builder.get_stage_at(0)
|
|
306
|
+
assert stage0 == {"$match": {"status": "active"}}
|
|
307
|
+
|
|
308
|
+
stage1 = builder.get_stage_at(1)
|
|
309
|
+
assert stage1 == {"$limit": 10}
|
|
310
|
+
|
|
311
|
+
stage2 = builder.get_stage_at(2)
|
|
312
|
+
assert stage2 == {"$sort": {"name": 1}}
|
|
313
|
+
|
|
314
|
+
def test_get_stage_at_returns_copy(self):
|
|
315
|
+
"""Test that get_stage_at() returns a copy, not reference."""
|
|
316
|
+
builder = PipelineBuilder()
|
|
317
|
+
builder.match({"status": "active"})
|
|
318
|
+
|
|
319
|
+
stage = builder.get_stage_at(0)
|
|
320
|
+
stage["$match"]["new_field"] = "value"
|
|
321
|
+
|
|
322
|
+
# Original should be unchanged
|
|
323
|
+
original_stage = builder.get_stage_at(0)
|
|
324
|
+
assert "new_field" not in original_stage["$match"]
|
|
325
|
+
|
|
326
|
+
def test_get_stage_at_invalid_index_negative(self):
|
|
327
|
+
"""Test get_stage_at() raises IndexError for negative index."""
|
|
328
|
+
builder = PipelineBuilder()
|
|
329
|
+
builder.match({"status": "active"})
|
|
330
|
+
|
|
331
|
+
with pytest.raises(IndexError, match="Index -1 out of range"):
|
|
332
|
+
builder.get_stage_at(-1)
|
|
333
|
+
|
|
334
|
+
def test_get_stage_at_invalid_index_too_large(self):
|
|
335
|
+
"""Test get_stage_at() raises IndexError for index too large."""
|
|
336
|
+
builder = PipelineBuilder()
|
|
337
|
+
builder.match({"status": "active"})
|
|
338
|
+
|
|
339
|
+
with pytest.raises(IndexError, match="Index 10 out of range"):
|
|
340
|
+
builder.get_stage_at(10)
|
|
341
|
+
|
|
342
|
+
def test_get_stage_at_empty_builder(self):
|
|
343
|
+
"""Test get_stage_at() raises IndexError on empty builder."""
|
|
344
|
+
builder = PipelineBuilder()
|
|
345
|
+
|
|
346
|
+
with pytest.raises(IndexError, match="Index 0 out of range"):
|
|
347
|
+
builder.get_stage_at(0)
|
|
348
|
+
|
|
349
|
+
def test_pretty_print_empty_builder(self):
|
|
350
|
+
"""Test pretty_print() with empty builder."""
|
|
351
|
+
builder = PipelineBuilder()
|
|
352
|
+
result = builder.pretty_print()
|
|
353
|
+
|
|
354
|
+
assert result == "[]"
|
|
355
|
+
# Should be valid JSON
|
|
356
|
+
json.loads(result)
|
|
357
|
+
|
|
358
|
+
def test_pretty_print_single_stage(self):
|
|
359
|
+
"""Test pretty_print() with single stage."""
|
|
360
|
+
builder = PipelineBuilder()
|
|
361
|
+
builder.match({"status": "active"})
|
|
362
|
+
result = builder.pretty_print()
|
|
363
|
+
|
|
364
|
+
# Should be valid JSON
|
|
365
|
+
parsed = json.loads(result)
|
|
366
|
+
assert parsed == [{"$match": {"status": "active"}}]
|
|
367
|
+
|
|
368
|
+
# Should contain expected content
|
|
369
|
+
assert "$match" in result
|
|
370
|
+
assert "status" in result
|
|
371
|
+
assert "active" in result
|
|
372
|
+
|
|
373
|
+
def test_pretty_print_multiple_stages(self):
|
|
374
|
+
"""Test pretty_print() with multiple stages."""
|
|
375
|
+
builder = PipelineBuilder()
|
|
376
|
+
builder.match({"status": "active"}).limit(10).sort({"name": 1})
|
|
377
|
+
result = builder.pretty_print()
|
|
378
|
+
|
|
379
|
+
# Should be valid JSON
|
|
380
|
+
parsed = json.loads(result)
|
|
381
|
+
assert len(parsed) == 3
|
|
382
|
+
assert parsed[0] == {"$match": {"status": "active"}}
|
|
383
|
+
assert parsed[1] == {"$limit": 10}
|
|
384
|
+
assert parsed[2] == {"$sort": {"name": 1}}
|
|
385
|
+
|
|
386
|
+
def test_pretty_print_custom_indent(self):
|
|
387
|
+
"""Test pretty_print() with custom indent."""
|
|
388
|
+
builder = PipelineBuilder()
|
|
389
|
+
builder.match({"status": "active"})
|
|
390
|
+
result = builder.pretty_print(indent=4)
|
|
391
|
+
|
|
392
|
+
# Should be valid JSON
|
|
393
|
+
parsed = json.loads(result)
|
|
394
|
+
assert parsed == [{"$match": {"status": "active"}}]
|
|
395
|
+
|
|
396
|
+
# Should use 4 spaces for indentation
|
|
397
|
+
lines = result.split("\n")
|
|
398
|
+
if len(lines) > 1:
|
|
399
|
+
assert lines[1].startswith(" ") # 4 spaces
|
|
400
|
+
|
|
401
|
+
def test_pretty_print_ensure_ascii(self):
|
|
402
|
+
"""Test pretty_print() with ensure_ascii=True."""
|
|
403
|
+
builder = PipelineBuilder()
|
|
404
|
+
builder.match({"name": "тест"}) # Non-ASCII characters
|
|
405
|
+
result_ascii = builder.pretty_print(ensure_ascii=True)
|
|
406
|
+
result_no_ascii = builder.pretty_print(ensure_ascii=False)
|
|
407
|
+
|
|
408
|
+
# Both should be valid JSON
|
|
409
|
+
json.loads(result_ascii)
|
|
410
|
+
json.loads(result_no_ascii)
|
|
411
|
+
|
|
412
|
+
# Non-ASCII version should contain original characters
|
|
413
|
+
assert "тест" in result_no_ascii
|
|
414
|
+
|
|
415
|
+
def test_to_json_file_basic(self):
|
|
416
|
+
"""Test to_json_file() saves pipeline correctly."""
|
|
417
|
+
builder = PipelineBuilder()
|
|
418
|
+
builder.match({"status": "active"}).limit(10)
|
|
419
|
+
|
|
420
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
421
|
+
filepath = Path(tmpdir) / "test_pipeline.json"
|
|
422
|
+
builder.to_json_file(filepath)
|
|
423
|
+
|
|
424
|
+
# File should exist
|
|
425
|
+
assert filepath.exists()
|
|
426
|
+
|
|
427
|
+
# Should contain valid JSON
|
|
428
|
+
with open(filepath, "r", encoding="utf-8") as f:
|
|
429
|
+
data = json.load(f)
|
|
430
|
+
|
|
431
|
+
assert "pipeline" in data
|
|
432
|
+
assert data["pipeline"] == [
|
|
433
|
+
{"$match": {"status": "active"}},
|
|
434
|
+
{"$limit": 10}
|
|
435
|
+
]
|
|
436
|
+
|
|
437
|
+
def test_to_json_file_with_metadata(self):
|
|
438
|
+
"""Test to_json_file() with metadata."""
|
|
439
|
+
builder = PipelineBuilder()
|
|
440
|
+
builder.match({"status": "active"})
|
|
441
|
+
|
|
442
|
+
metadata = {
|
|
443
|
+
"version": "1.0",
|
|
444
|
+
"author": "developer",
|
|
445
|
+
"description": "Test pipeline"
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
449
|
+
filepath = Path(tmpdir) / "test_pipeline.json"
|
|
450
|
+
builder.to_json_file(filepath, metadata=metadata)
|
|
451
|
+
|
|
452
|
+
with open(filepath, "r", encoding="utf-8") as f:
|
|
453
|
+
data = json.load(f)
|
|
454
|
+
|
|
455
|
+
assert "pipeline" in data
|
|
456
|
+
assert "metadata" in data
|
|
457
|
+
assert data["metadata"] == metadata
|
|
458
|
+
|
|
459
|
+
def test_to_json_file_creates_directory(self):
|
|
460
|
+
"""Test to_json_file() creates parent directories if they don't exist."""
|
|
461
|
+
builder = PipelineBuilder()
|
|
462
|
+
builder.match({"status": "active"})
|
|
463
|
+
|
|
464
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
465
|
+
filepath = Path(tmpdir) / "nested" / "path" / "test_pipeline.json"
|
|
466
|
+
|
|
467
|
+
# Directory shouldn't exist yet
|
|
468
|
+
assert not filepath.parent.exists()
|
|
469
|
+
|
|
470
|
+
builder.to_json_file(filepath)
|
|
471
|
+
|
|
472
|
+
# File and directory should be created
|
|
473
|
+
assert filepath.exists()
|
|
474
|
+
assert filepath.parent.exists()
|
|
475
|
+
|
|
476
|
+
def test_to_json_file_string_path(self):
|
|
477
|
+
"""Test to_json_file() accepts string path."""
|
|
478
|
+
builder = PipelineBuilder()
|
|
479
|
+
builder.match({"status": "active"})
|
|
480
|
+
|
|
481
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
482
|
+
filepath = str(Path(tmpdir) / "test_pipeline.json")
|
|
483
|
+
builder.to_json_file(filepath)
|
|
484
|
+
|
|
485
|
+
assert Path(filepath).exists()
|
|
486
|
+
|
|
487
|
+
def test_to_json_file_custom_indent(self):
|
|
488
|
+
"""Test to_json_file() with custom indent."""
|
|
489
|
+
builder = PipelineBuilder()
|
|
490
|
+
builder.match({"status": "active"})
|
|
491
|
+
|
|
492
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
493
|
+
filepath = Path(tmpdir) / "test_pipeline.json"
|
|
494
|
+
builder.to_json_file(filepath, indent=4)
|
|
495
|
+
|
|
496
|
+
with open(filepath, "r", encoding="utf-8") as f:
|
|
497
|
+
content = f.read()
|
|
498
|
+
lines = content.split("\n")
|
|
499
|
+
if len(lines) > 1:
|
|
500
|
+
assert lines[1].startswith(" ") # 4 spaces
|
|
501
|
+
|
|
502
|
+
def test_pretty_print_and_to_json_file_consistency(self):
|
|
503
|
+
"""Test that pretty_print() and to_json_file() produce consistent output."""
|
|
504
|
+
builder = PipelineBuilder()
|
|
505
|
+
builder.match({"status": "active"}).limit(10)
|
|
506
|
+
|
|
507
|
+
pretty_output = builder.pretty_print()
|
|
508
|
+
|
|
509
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
510
|
+
filepath = Path(tmpdir) / "test_pipeline.json"
|
|
511
|
+
builder.to_json_file(filepath)
|
|
512
|
+
|
|
513
|
+
with open(filepath, "r", encoding="utf-8") as f:
|
|
514
|
+
file_data = json.load(f)
|
|
515
|
+
|
|
516
|
+
# Pipeline in file should match pretty_print output when parsed
|
|
517
|
+
pretty_parsed = json.loads(pretty_output)
|
|
518
|
+
assert file_data["pipeline"] == pretty_parsed
|
|
519
|
+
|
|
520
|
+
def test_get_stage_at_with_complex_stage(self):
|
|
521
|
+
"""Test get_stage_at() with complex stage (e.g., lookup)."""
|
|
522
|
+
builder = PipelineBuilder()
|
|
523
|
+
builder.match({"status": "active"})
|
|
524
|
+
builder.lookup(
|
|
525
|
+
from_collection="users",
|
|
526
|
+
local_field="userId",
|
|
527
|
+
foreign_field="_id",
|
|
528
|
+
as_field="user"
|
|
529
|
+
)
|
|
530
|
+
|
|
531
|
+
stage = builder.get_stage_at(1)
|
|
532
|
+
assert "$lookup" in stage
|
|
533
|
+
assert stage["$lookup"]["from"] == "users"
|
|
534
|
+
assert stage["$lookup"]["localField"] == "userId"
|
|
535
|
+
assert stage["$lookup"]["foreignField"] == "_id"
|
|
536
|
+
assert stage["$lookup"]["as"] == "user"
|
|
537
|
+
|
|
538
|
+
def test_compare_with_no_differences(self):
|
|
539
|
+
"""Test compare_with() returns 'No differences.' for identical pipelines."""
|
|
540
|
+
a = PipelineBuilder().match({"status": "active"}).limit(10)
|
|
541
|
+
b = PipelineBuilder().match({"status": "active"}).limit(10)
|
|
542
|
+
assert a.compare_with(b) == "No differences."
|
|
543
|
+
|
|
544
|
+
def test_compare_with_has_diff(self):
|
|
545
|
+
"""Test compare_with() returns unified diff when pipelines differ."""
|
|
546
|
+
legacy = PipelineBuilder().match({"status": "active"})
|
|
547
|
+
new = PipelineBuilder().match({"status": "inactive"})
|
|
548
|
+
diff = new.compare_with(legacy)
|
|
549
|
+
assert diff != "No differences."
|
|
550
|
+
assert "--- new" in diff
|
|
551
|
+
assert "+++ other" in diff
|
|
552
|
+
assert "\"active\"" in diff or "active" in diff
|
|
553
|
+
assert "\"inactive\"" in diff or "inactive" in diff
|
|
554
|
+
|
|
555
|
+
def test_compare_with_invalid_other_raises(self):
|
|
556
|
+
"""Test compare_with() validates 'other' argument."""
|
|
557
|
+
builder = PipelineBuilder().match({"status": "active"})
|
|
558
|
+
with pytest.raises(TypeError, match="other must be a PipelineBuilder"):
|
|
559
|
+
builder.compare_with([]) # type: ignore[arg-type]
|
|
560
|
+
|
|
561
|
+
def test_compare_with_negative_context_lines_raises(self):
|
|
562
|
+
"""Test compare_with() validates context_lines."""
|
|
563
|
+
a = PipelineBuilder().match({"status": "active"})
|
|
564
|
+
b = PipelineBuilder().match({"status": "inactive"})
|
|
565
|
+
with pytest.raises(ValueError, match="context_lines cannot be negative"):
|
|
566
|
+
a.compare_with(b, context_lines=-1)
|
|
567
|
+
|
|
568
|
+
def test_pretty_print_stage_by_index(self):
|
|
569
|
+
"""Test pretty_print_stage() with stage index."""
|
|
570
|
+
builder = PipelineBuilder().match({"status": "active"}).limit(10)
|
|
571
|
+
s = builder.pretty_print_stage(0)
|
|
572
|
+
parsed = json.loads(s)
|
|
573
|
+
assert parsed == {"$match": {"status": "active"}}
|
|
574
|
+
|
|
575
|
+
def test_pretty_print_stage_by_dict(self):
|
|
576
|
+
"""Test pretty_print_stage() with stage dict."""
|
|
577
|
+
builder = PipelineBuilder()
|
|
578
|
+
stage = {"$limit": 5}
|
|
579
|
+
s = builder.pretty_print_stage(stage, indent=4)
|
|
580
|
+
parsed = json.loads(s)
|
|
581
|
+
assert parsed == stage
|
|
582
|
+
|
|
583
|
+
def test_pretty_print_stage_invalid_type_raises(self):
|
|
584
|
+
"""Test pretty_print_stage() validates stage argument."""
|
|
585
|
+
builder = PipelineBuilder().match({"status": "active"})
|
|
586
|
+
with pytest.raises(TypeError, match="stage must be an int index or a dict"):
|
|
587
|
+
builder.pretty_print_stage("0") # type: ignore[arg-type]
|
|
588
|
+
|
|
@@ -95,7 +95,7 @@ class TestInsertAt:
|
|
|
95
95
|
def test_insert_at_middle(self):
|
|
96
96
|
"""Test insert_at() inserts in the middle."""
|
|
97
97
|
builder = PipelineBuilder()
|
|
98
|
-
builder.match({"status": "active"}).group(
|
|
98
|
+
builder.match({"status": "active"}).group("$category", {"count": {"$sum": 1}})
|
|
99
99
|
builder.insert_at(1, {"$sort": {"name": 1}})
|
|
100
100
|
|
|
101
101
|
pipeline = builder.build()
|
|
@@ -188,7 +188,7 @@ class TestInsertAt:
|
|
|
188
188
|
builder.match({"status": "active"})
|
|
189
189
|
builder.lookup("users", "userId", "_id", "user")
|
|
190
190
|
builder.unwind("user")
|
|
191
|
-
builder.group(
|
|
191
|
+
builder.group("$category", {"count": {"$sum": 1}})
|
|
192
192
|
|
|
193
193
|
# Insert $addFields before $group
|
|
194
194
|
builder.insert_at(3, {"$addFields": {"categoryUpper": {"$toUpper": "$category"}}})
|
|
@@ -224,7 +224,7 @@ class TestPrependAndInsertAtIntegration:
|
|
|
224
224
|
builder = PipelineBuilder()
|
|
225
225
|
builder.match({"status": "active"})
|
|
226
226
|
builder.lookup("users", "userId", "_id", "user")
|
|
227
|
-
builder.group(
|
|
227
|
+
builder.group("$category", {"count": {"$sum": 1}})
|
|
228
228
|
|
|
229
229
|
# Find position of $group and insert before it
|
|
230
230
|
stage_types = builder.get_stage_types()
|
|
@@ -257,3 +257,4 @@ class TestPrependAndInsertAtIntegration:
|
|
|
257
257
|
assert pipeline[3] == {"$limit": 10}
|
|
258
258
|
|
|
259
259
|
|
|
260
|
+
|
{mongo_pipebuilder-0.2.3 → mongo_pipebuilder-0.3.1}/tests/test_builder_validation_existing.py
RENAMED
|
@@ -91,6 +91,12 @@ class TestGroupValidation:
|
|
|
91
91
|
with pytest.raises(TypeError, match="accumulators must be a dict"):
|
|
92
92
|
builder.group({}, 123)
|
|
93
93
|
|
|
94
|
+
def test_group_nested_id_wrapper_raises_error(self):
|
|
95
|
+
"""Test that group({'_id': ...}, ...) raises ValueError with guidance."""
|
|
96
|
+
builder = PipelineBuilder()
|
|
97
|
+
with pytest.raises(ValueError, match="Invalid group_by: you passed a dict wrapper"):
|
|
98
|
+
builder.group({"_id": ["$a", "$b"]}, {"count": {"$sum": 1}})
|
|
99
|
+
|
|
94
100
|
|
|
95
101
|
class TestUnwindValidation:
|
|
96
102
|
"""Tests for $unwind stage validation."""
|
|
@@ -201,3 +207,4 @@ class TestProjectValidation:
|
|
|
201
207
|
|
|
202
208
|
|
|
203
209
|
|
|
210
|
+
|