mongo-pipebuilder 0.2.2__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (18) hide show
  1. {mongo_pipebuilder-0.2.2 → mongo_pipebuilder-0.3.0}/LICENSE +15 -0
  2. {mongo_pipebuilder-0.2.2/src/mongo_pipebuilder.egg-info → mongo_pipebuilder-0.3.0}/PKG-INFO +235 -6
  3. {mongo_pipebuilder-0.2.2 → mongo_pipebuilder-0.3.0}/README.md +234 -5
  4. {mongo_pipebuilder-0.2.2 → mongo_pipebuilder-0.3.0}/pyproject.toml +1 -1
  5. {mongo_pipebuilder-0.2.2 → mongo_pipebuilder-0.3.0}/src/mongo_pipebuilder/__init__.py +1 -1
  6. {mongo_pipebuilder-0.2.2 → mongo_pipebuilder-0.3.0}/src/mongo_pipebuilder/builder.py +113 -20
  7. {mongo_pipebuilder-0.2.2 → mongo_pipebuilder-0.3.0/src/mongo_pipebuilder.egg-info}/PKG-INFO +235 -6
  8. {mongo_pipebuilder-0.2.2 → mongo_pipebuilder-0.3.0}/tests/test_builder_debug.py +246 -0
  9. {mongo_pipebuilder-0.2.2 → mongo_pipebuilder-0.3.0}/tests/test_builder_insert.py +2 -0
  10. {mongo_pipebuilder-0.2.2 → mongo_pipebuilder-0.3.0}/tests/test_builder_validation.py +15 -0
  11. {mongo_pipebuilder-0.2.2 → mongo_pipebuilder-0.3.0}/tests/test_builder_validation_existing.py +2 -0
  12. {mongo_pipebuilder-0.2.2 → mongo_pipebuilder-0.3.0}/tests/test_builder_validation_new.py +2 -0
  13. {mongo_pipebuilder-0.2.2 → mongo_pipebuilder-0.3.0}/setup.cfg +0 -0
  14. {mongo_pipebuilder-0.2.2 → mongo_pipebuilder-0.3.0}/src/mongo_pipebuilder.egg-info/SOURCES.txt +0 -0
  15. {mongo_pipebuilder-0.2.2 → mongo_pipebuilder-0.3.0}/src/mongo_pipebuilder.egg-info/dependency_links.txt +0 -0
  16. {mongo_pipebuilder-0.2.2 → mongo_pipebuilder-0.3.0}/src/mongo_pipebuilder.egg-info/requires.txt +0 -0
  17. {mongo_pipebuilder-0.2.2 → mongo_pipebuilder-0.3.0}/src/mongo_pipebuilder.egg-info/top_level.txt +0 -0
  18. {mongo_pipebuilder-0.2.2 → mongo_pipebuilder-0.3.0}/tests/test_builder.py +0 -0
@@ -21,3 +21,18 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
21
  SOFTWARE.
22
22
 
23
23
 
24
+
25
+
26
+
27
+
28
+
29
+
30
+
31
+
32
+
33
+
34
+
35
+
36
+
37
+
38
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mongo-pipebuilder
3
- Version: 0.2.2
3
+ Version: 0.3.0
4
4
  Summary: Type-safe, fluent MongoDB aggregation pipeline builder
5
5
  Author-email: seligoroff <seligoroff@gmail.com>
6
6
  License: MIT
@@ -28,6 +28,12 @@ Dynamic: license-file
28
28
 
29
29
  # mongo-pipebuilder
30
30
 
31
+ [![PyPI version](https://badge.fury.io/py/mongo-pipebuilder.svg)](https://badge.fury.io/py/mongo-pipebuilder)
32
+ [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
33
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
34
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
35
+ [![Test Coverage](https://img.shields.io/badge/coverage-96%25-green.svg)](https://github.com/seligoroff/mongo-pipebuilder)
36
+
31
37
  Type-safe, fluent MongoDB aggregation pipeline builder for Python.
32
38
 
33
39
  ## Overview
@@ -36,11 +42,11 @@ Type-safe, fluent MongoDB aggregation pipeline builder for Python.
36
42
 
37
43
  ## Features
38
44
 
39
- - **Type-safe**: Full type hints support with IDE autocomplete
40
- - **Fluent interface**: Chain methods for readable, maintainable code
41
- - **Zero dependencies**: Pure Python, lightweight package
42
- - **Extensible**: Easy to add custom stages via `add_stage()`
43
- - **Well tested**: Comprehensive test suite with 96%+ coverage
45
+ - **Type-safe**: Full type hints support with IDE autocomplete
46
+ - **Fluent interface**: Chain methods for readable, maintainable code
47
+ - **Zero dependencies**: Pure Python, lightweight package
48
+ - **Extensible**: Easy to add custom stages via `add_stage()`
49
+ - **Well tested**: Comprehensive test suite with 96%+ coverage
44
50
 
45
51
  ## Installation
46
52
 
@@ -259,6 +265,22 @@ group_index = stage_types.index("$group")
259
265
  builder.insert_at(group_index, {"$addFields": {"x": 1}})
260
266
  ```
261
267
 
268
+ ##### `copy() -> PipelineBuilder`
269
+
270
+ Creates an independent copy of the builder with current stages. Useful for creating immutable variants and composing pipelines.
271
+
272
+ ```python
273
+ builder1 = PipelineBuilder().match({"status": "active"})
274
+ builder2 = builder1.copy()
275
+ builder2.limit(10)
276
+
277
+ # Original unchanged
278
+ assert len(builder1) == 1
279
+ assert len(builder2) == 2
280
+ ```
281
+
282
+ See [Composing and Reusing Pipelines](#composing-and-reusing-pipelines) for practical examples.
283
+
262
284
  ##### `validate() -> bool`
263
285
 
264
286
  Validates the pipeline before execution. Checks that:
@@ -275,6 +297,54 @@ builder.add_stage({"$out": "output"}).match({"status": "active"})
275
297
  builder.validate() # Raises ValueError: $out stage must be the last stage
276
298
  ```
277
299
 
300
+ ##### `get_stage_at(index: int) -> Dict[str, Any]`
301
+
302
+ Gets a specific stage from the pipeline by index. Returns a copy of the stage.
303
+
304
+ ```python
305
+ builder = PipelineBuilder()
306
+ builder.match({"status": "active"}).limit(10)
307
+ stage = builder.get_stage_at(0) # Returns {"$match": {"status": "active"}}
308
+ ```
309
+
310
+ ##### `pretty_print(indent: int = 2, ensure_ascii: bool = False) -> str`
311
+
312
+ Returns a formatted JSON string representation of the pipeline. Useful for debugging.
313
+
314
+ ```python
315
+ builder = PipelineBuilder()
316
+ builder.match({"status": "active"}).limit(10)
317
+ print(builder.pretty_print())
318
+ # [
319
+ # {
320
+ # "$match": {
321
+ # "status": "active"
322
+ # }
323
+ # },
324
+ # {
325
+ # "$limit": 10
326
+ # }
327
+ # ]
328
+ ```
329
+
330
+ ##### `to_json_file(filepath: Union[str, Path], indent: int = 2, ensure_ascii: bool = False, metadata: Optional[Dict[str, Any]] = None) -> None`
331
+
332
+ Saves the pipeline to a JSON file. Useful for debugging, comparison, or versioning.
333
+
334
+ ```python
335
+ builder = PipelineBuilder()
336
+ builder.match({"status": "active"}).limit(10)
337
+
338
+ # Basic usage
339
+ builder.to_json_file("debug_pipeline.json")
340
+
341
+ # With metadata
342
+ builder.to_json_file(
343
+ "pipeline.json",
344
+ metadata={"version": "1.0", "author": "developer"}
345
+ )
346
+ ```
347
+
278
348
  ##### `build() -> List[Dict[str, Any]]`
279
349
 
280
350
  Returns the complete pipeline as a list of stage dictionaries.
@@ -340,6 +410,150 @@ pipeline = (
340
410
  )
341
411
  ```
342
412
 
413
+ ### Composing and Reusing Pipelines
414
+
415
+ The `copy()` method allows you to create immutable variants of pipelines, enabling safe composition and reuse. This is useful when you need to:
416
+ - Create multiple variants from a base pipeline
417
+ - Compose pipelines functionally
418
+ - Cache base pipelines safely
419
+ - Pass pipelines to functions without side effects
420
+
421
+ #### Example: Building Multiple Variants from a Base Pipeline
422
+
423
+ ```python
424
+ from mongo_pipebuilder import PipelineBuilder
425
+
426
+ # Base pipeline with common filtering and joining
427
+ base_pipeline = (
428
+ PipelineBuilder()
429
+ .match({"status": "published", "deleted": False})
430
+ .lookup(
431
+ from_collection="authors",
432
+ local_field="authorId",
433
+ foreign_field="_id",
434
+ as_field="author"
435
+ )
436
+ .unwind("author", preserve_null_and_empty_arrays=True)
437
+ .project({
438
+ "title": 1,
439
+ "authorName": "$author.name",
440
+ "publishedAt": 1
441
+ })
442
+ )
443
+
444
+ # Create variants with different sorting and limits
445
+ recent_posts = base_pipeline.copy().sort({"publishedAt": -1}).limit(10).build()
446
+ popular_posts = base_pipeline.copy().sort({"views": -1}).limit(5).build()
447
+ author_posts = base_pipeline.copy().match({"authorName": "John Doe"}).build()
448
+
449
+ # Base pipeline remains unchanged
450
+ assert len(base_pipeline) == 4 # Still has 4 stages
451
+ ```
452
+
453
+ #### Example: Functional Composition Pattern
454
+
455
+ ```python
456
+ def add_pagination(builder, page: int, page_size: int = 10):
457
+ """Add pagination to a pipeline."""
458
+ return builder.copy().skip(page * page_size).limit(page_size)
459
+
460
+ def add_sorting(builder, sort_field: str, ascending: bool = True):
461
+ """Add sorting to a pipeline."""
462
+ return builder.copy().sort({sort_field: 1 if ascending else -1})
463
+
464
+ # Compose pipelines functionally
465
+ base = PipelineBuilder().match({"status": "active"})
466
+
467
+ # Create different variants
468
+ page1 = add_pagination(add_sorting(base, "createdAt"), page=0)
469
+ page2 = add_pagination(add_sorting(base, "createdAt"), page=1)
470
+ sorted_by_name = add_sorting(base, "name", ascending=True)
471
+
472
+ # All variants are independent
473
+ assert len(base) == 1 # Base unchanged
474
+ assert len(page1) == 3 # match + sort + skip + limit
475
+ ```
476
+
477
+ #### Example: Caching Base Pipelines
478
+
479
+ ```python
480
+ from functools import lru_cache
481
+
482
+ @lru_cache(maxsize=100)
483
+ def get_base_pipeline(user_id: str):
484
+ """Cache base pipeline for a user."""
485
+ return (
486
+ PipelineBuilder()
487
+ .match({"userId": user_id, "status": "active"})
488
+ .lookup(
489
+ from_collection="profiles",
490
+ local_field="userId",
491
+ foreign_field="_id",
492
+ as_field="profile"
493
+ )
494
+ )
495
+
496
+ # Reuse cached base pipeline with different modifications
497
+ user_id = "12345"
498
+ base = get_base_pipeline(user_id)
499
+
500
+ # Create multiple queries from cached base
501
+ recent = base.copy().sort({"createdAt": -1}).limit(10).build()
502
+ by_category = base.copy().match({"category": "tech"}).build()
503
+ with_stats = base.copy().group({"_id": "$category"}, {"count": {"$sum": 1}}).build()
504
+
505
+ # Base pipeline is safely cached and reused
506
+ ```
507
+
508
+ #### Example: Pipeline Factories
509
+
510
+ ```python
511
+ class PipelineFactory:
512
+ """Factory for creating common pipeline patterns."""
513
+
514
+ @staticmethod
515
+ def base_article_pipeline():
516
+ """Base pipeline for articles."""
517
+ return (
518
+ PipelineBuilder()
519
+ .match({"status": "published"})
520
+ .lookup(
521
+ from_collection="authors",
522
+ local_field="authorId",
523
+ foreign_field="_id",
524
+ as_field="author"
525
+ )
526
+ )
527
+
528
+ @staticmethod
529
+ def with_author_filter(builder, author_name: str):
530
+ """Add author filter to pipeline."""
531
+ return builder.copy().match({"author.name": author_name})
532
+
533
+ @staticmethod
534
+ def with_date_range(builder, start_date: str, end_date: str):
535
+ """Add date range filter to pipeline."""
536
+ return builder.copy().match({
537
+ "publishedAt": {"$gte": start_date, "$lte": end_date}
538
+ })
539
+
540
+ # Usage
541
+ base = PipelineFactory.base_article_pipeline()
542
+ johns_articles = PipelineFactory.with_author_filter(base, "John Doe")
543
+ recent_johns = PipelineFactory.with_date_range(
544
+ johns_articles,
545
+ start_date="2024-01-01",
546
+ end_date="2024-12-31"
547
+ ).sort({"publishedAt": -1}).limit(10).build()
548
+ ```
549
+
550
+ **Key Benefits:**
551
+ - Safe reuse: Base pipelines remain unchanged
552
+ - Functional composition: Build pipelines from smaller parts
553
+ - Caching friendly: Base pipelines can be safely cached
554
+ - No side effects: Functions can safely modify copies
555
+ - Thread-safe: Multiple threads can use copies independently
556
+
343
557
  ## Development
344
558
 
345
559
  ### Project Structure
@@ -374,3 +588,18 @@ See [DEVELOPMENT.md](DEVELOPMENT.md) for development guidelines.
374
588
  MIT License - see [LICENSE](LICENSE) file for details.
375
589
 
376
590
 
591
+
592
+
593
+
594
+
595
+
596
+
597
+
598
+
599
+
600
+
601
+
602
+
603
+
604
+
605
+
@@ -1,5 +1,11 @@
1
1
  # mongo-pipebuilder
2
2
 
3
+ [![PyPI version](https://badge.fury.io/py/mongo-pipebuilder.svg)](https://badge.fury.io/py/mongo-pipebuilder)
4
+ [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
6
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
7
+ [![Test Coverage](https://img.shields.io/badge/coverage-96%25-green.svg)](https://github.com/seligoroff/mongo-pipebuilder)
8
+
3
9
  Type-safe, fluent MongoDB aggregation pipeline builder for Python.
4
10
 
5
11
  ## Overview
@@ -8,11 +14,11 @@ Type-safe, fluent MongoDB aggregation pipeline builder for Python.
8
14
 
9
15
  ## Features
10
16
 
11
- - **Type-safe**: Full type hints support with IDE autocomplete
12
- - **Fluent interface**: Chain methods for readable, maintainable code
13
- - **Zero dependencies**: Pure Python, lightweight package
14
- - **Extensible**: Easy to add custom stages via `add_stage()`
15
- - **Well tested**: Comprehensive test suite with 96%+ coverage
17
+ - **Type-safe**: Full type hints support with IDE autocomplete
18
+ - **Fluent interface**: Chain methods for readable, maintainable code
19
+ - **Zero dependencies**: Pure Python, lightweight package
20
+ - **Extensible**: Easy to add custom stages via `add_stage()`
21
+ - **Well tested**: Comprehensive test suite with 96%+ coverage
16
22
 
17
23
  ## Installation
18
24
 
@@ -231,6 +237,22 @@ group_index = stage_types.index("$group")
231
237
  builder.insert_at(group_index, {"$addFields": {"x": 1}})
232
238
  ```
233
239
 
240
+ ##### `copy() -> PipelineBuilder`
241
+
242
+ Creates an independent copy of the builder with current stages. Useful for creating immutable variants and composing pipelines.
243
+
244
+ ```python
245
+ builder1 = PipelineBuilder().match({"status": "active"})
246
+ builder2 = builder1.copy()
247
+ builder2.limit(10)
248
+
249
+ # Original unchanged
250
+ assert len(builder1) == 1
251
+ assert len(builder2) == 2
252
+ ```
253
+
254
+ See [Composing and Reusing Pipelines](#composing-and-reusing-pipelines) for practical examples.
255
+
234
256
  ##### `validate() -> bool`
235
257
 
236
258
  Validates the pipeline before execution. Checks that:
@@ -247,6 +269,54 @@ builder.add_stage({"$out": "output"}).match({"status": "active"})
247
269
  builder.validate() # Raises ValueError: $out stage must be the last stage
248
270
  ```
249
271
 
272
+ ##### `get_stage_at(index: int) -> Dict[str, Any]`
273
+
274
+ Gets a specific stage from the pipeline by index. Returns a copy of the stage.
275
+
276
+ ```python
277
+ builder = PipelineBuilder()
278
+ builder.match({"status": "active"}).limit(10)
279
+ stage = builder.get_stage_at(0) # Returns {"$match": {"status": "active"}}
280
+ ```
281
+
282
+ ##### `pretty_print(indent: int = 2, ensure_ascii: bool = False) -> str`
283
+
284
+ Returns a formatted JSON string representation of the pipeline. Useful for debugging.
285
+
286
+ ```python
287
+ builder = PipelineBuilder()
288
+ builder.match({"status": "active"}).limit(10)
289
+ print(builder.pretty_print())
290
+ # [
291
+ # {
292
+ # "$match": {
293
+ # "status": "active"
294
+ # }
295
+ # },
296
+ # {
297
+ # "$limit": 10
298
+ # }
299
+ # ]
300
+ ```
301
+
302
+ ##### `to_json_file(filepath: Union[str, Path], indent: int = 2, ensure_ascii: bool = False, metadata: Optional[Dict[str, Any]] = None) -> None`
303
+
304
+ Saves the pipeline to a JSON file. Useful for debugging, comparison, or versioning.
305
+
306
+ ```python
307
+ builder = PipelineBuilder()
308
+ builder.match({"status": "active"}).limit(10)
309
+
310
+ # Basic usage
311
+ builder.to_json_file("debug_pipeline.json")
312
+
313
+ # With metadata
314
+ builder.to_json_file(
315
+ "pipeline.json",
316
+ metadata={"version": "1.0", "author": "developer"}
317
+ )
318
+ ```
319
+
250
320
  ##### `build() -> List[Dict[str, Any]]`
251
321
 
252
322
  Returns the complete pipeline as a list of stage dictionaries.
@@ -312,6 +382,150 @@ pipeline = (
312
382
  )
313
383
  ```
314
384
 
385
+ ### Composing and Reusing Pipelines
386
+
387
+ The `copy()` method allows you to create immutable variants of pipelines, enabling safe composition and reuse. This is useful when you need to:
388
+ - Create multiple variants from a base pipeline
389
+ - Compose pipelines functionally
390
+ - Cache base pipelines safely
391
+ - Pass pipelines to functions without side effects
392
+
393
+ #### Example: Building Multiple Variants from a Base Pipeline
394
+
395
+ ```python
396
+ from mongo_pipebuilder import PipelineBuilder
397
+
398
+ # Base pipeline with common filtering and joining
399
+ base_pipeline = (
400
+ PipelineBuilder()
401
+ .match({"status": "published", "deleted": False})
402
+ .lookup(
403
+ from_collection="authors",
404
+ local_field="authorId",
405
+ foreign_field="_id",
406
+ as_field="author"
407
+ )
408
+ .unwind("author", preserve_null_and_empty_arrays=True)
409
+ .project({
410
+ "title": 1,
411
+ "authorName": "$author.name",
412
+ "publishedAt": 1
413
+ })
414
+ )
415
+
416
+ # Create variants with different sorting and limits
417
+ recent_posts = base_pipeline.copy().sort({"publishedAt": -1}).limit(10).build()
418
+ popular_posts = base_pipeline.copy().sort({"views": -1}).limit(5).build()
419
+ author_posts = base_pipeline.copy().match({"authorName": "John Doe"}).build()
420
+
421
+ # Base pipeline remains unchanged
422
+ assert len(base_pipeline) == 4 # Still has 4 stages
423
+ ```
424
+
425
+ #### Example: Functional Composition Pattern
426
+
427
+ ```python
428
+ def add_pagination(builder, page: int, page_size: int = 10):
429
+ """Add pagination to a pipeline."""
430
+ return builder.copy().skip(page * page_size).limit(page_size)
431
+
432
+ def add_sorting(builder, sort_field: str, ascending: bool = True):
433
+ """Add sorting to a pipeline."""
434
+ return builder.copy().sort({sort_field: 1 if ascending else -1})
435
+
436
+ # Compose pipelines functionally
437
+ base = PipelineBuilder().match({"status": "active"})
438
+
439
+ # Create different variants
440
+ page1 = add_pagination(add_sorting(base, "createdAt"), page=0)
441
+ page2 = add_pagination(add_sorting(base, "createdAt"), page=1)
442
+ sorted_by_name = add_sorting(base, "name", ascending=True)
443
+
444
+ # All variants are independent
445
+ assert len(base) == 1 # Base unchanged
446
+ assert len(page1) == 3 # match + sort + skip + limit
447
+ ```
448
+
449
+ #### Example: Caching Base Pipelines
450
+
451
+ ```python
452
+ from functools import lru_cache
453
+
454
+ @lru_cache(maxsize=100)
455
+ def get_base_pipeline(user_id: str):
456
+ """Cache base pipeline for a user."""
457
+ return (
458
+ PipelineBuilder()
459
+ .match({"userId": user_id, "status": "active"})
460
+ .lookup(
461
+ from_collection="profiles",
462
+ local_field="userId",
463
+ foreign_field="_id",
464
+ as_field="profile"
465
+ )
466
+ )
467
+
468
+ # Reuse cached base pipeline with different modifications
469
+ user_id = "12345"
470
+ base = get_base_pipeline(user_id)
471
+
472
+ # Create multiple queries from cached base
473
+ recent = base.copy().sort({"createdAt": -1}).limit(10).build()
474
+ by_category = base.copy().match({"category": "tech"}).build()
475
+ with_stats = base.copy().group({"_id": "$category"}, {"count": {"$sum": 1}}).build()
476
+
477
+ # Base pipeline is safely cached and reused
478
+ ```
479
+
480
+ #### Example: Pipeline Factories
481
+
482
+ ```python
483
+ class PipelineFactory:
484
+ """Factory for creating common pipeline patterns."""
485
+
486
+ @staticmethod
487
+ def base_article_pipeline():
488
+ """Base pipeline for articles."""
489
+ return (
490
+ PipelineBuilder()
491
+ .match({"status": "published"})
492
+ .lookup(
493
+ from_collection="authors",
494
+ local_field="authorId",
495
+ foreign_field="_id",
496
+ as_field="author"
497
+ )
498
+ )
499
+
500
+ @staticmethod
501
+ def with_author_filter(builder, author_name: str):
502
+ """Add author filter to pipeline."""
503
+ return builder.copy().match({"author.name": author_name})
504
+
505
+ @staticmethod
506
+ def with_date_range(builder, start_date: str, end_date: str):
507
+ """Add date range filter to pipeline."""
508
+ return builder.copy().match({
509
+ "publishedAt": {"$gte": start_date, "$lte": end_date}
510
+ })
511
+
512
+ # Usage
513
+ base = PipelineFactory.base_article_pipeline()
514
+ johns_articles = PipelineFactory.with_author_filter(base, "John Doe")
515
+ recent_johns = PipelineFactory.with_date_range(
516
+ johns_articles,
517
+ start_date="2024-01-01",
518
+ end_date="2024-12-31"
519
+ ).sort({"publishedAt": -1}).limit(10).build()
520
+ ```
521
+
522
+ **Key Benefits:**
523
+ - Safe reuse: Base pipelines remain unchanged
524
+ - Functional composition: Build pipelines from smaller parts
525
+ - Caching friendly: Base pipelines can be safely cached
526
+ - No side effects: Functions can safely modify copies
527
+ - Thread-safe: Multiple threads can use copies independently
528
+
315
529
  ## Development
316
530
 
317
531
  ### Project Structure
@@ -346,3 +560,18 @@ See [DEVELOPMENT.md](DEVELOPMENT.md) for development guidelines.
346
560
  MIT License - see [LICENSE](LICENSE) file for details.
347
561
 
348
562
 
563
+
564
+
565
+
566
+
567
+
568
+
569
+
570
+
571
+
572
+
573
+
574
+
575
+
576
+
577
+
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "mongo-pipebuilder"
7
- version = "0.2.2"
7
+ version = "0.3.0"
8
8
  description = "Type-safe, fluent MongoDB aggregation pipeline builder"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -9,6 +9,6 @@ Author: seligoroff
9
9
 
10
10
  from mongo_pipebuilder.builder import PipelineBuilder
11
11
 
12
- __version__ = "0.2.2"
12
+ __version__ = "0.3.0"
13
13
  __all__ = ["PipelineBuilder"]
14
14
 
@@ -6,6 +6,8 @@ Builder Pattern implementation for safe construction of MongoDB aggregation pipe
6
6
 
7
7
  Author: seligoroff
8
8
  """
9
+ import json
10
+ from pathlib import Path
9
11
  from typing import Any, Dict, List, Optional, Union
10
12
 
11
13
  # For compatibility with Python < 3.11
@@ -33,8 +35,7 @@ class PipelineBuilder:
33
35
  Self for method chaining
34
36
 
35
37
  Raises:
36
- TypeError: If conditions is not a dictionary
37
- ValueError: If conditions is None
38
+ TypeError: If conditions is None or not a dictionary
38
39
 
39
40
  Example:
40
41
  >>> builder.match({"status": "active", "age": {"$gte": 18}})
@@ -636,23 +637,15 @@ class PipelineBuilder:
636
637
  "Only one output stage is allowed."
637
638
  )
638
639
 
639
- # If $out exists, it must be the last stage
640
- if has_out:
641
- out_index = stage_types.index("$out")
642
- if out_index != len(stage_types) - 1:
643
- raise ValueError(
644
- f"$out stage must be the last stage in the pipeline. "
645
- f"Found at position {out_index + 1} of {len(stage_types)}."
646
- )
647
-
648
- # If $merge exists, it must be the last stage
649
- if has_merge:
650
- merge_index = stage_types.index("$merge")
651
- if merge_index != len(stage_types) - 1:
652
- raise ValueError(
653
- f"$merge stage must be the last stage in the pipeline. "
654
- f"Found at position {merge_index + 1} of {len(stage_types)}."
655
- )
640
+ # Check if $out or $merge exist and validate position
641
+ for stage_name in ["$out", "$merge"]:
642
+ if stage_name in stage_types:
643
+ stage_index = stage_types.index(stage_name)
644
+ if stage_index != len(stage_types) - 1:
645
+ raise ValueError(
646
+ f"{stage_name} stage must be the last stage in the pipeline. "
647
+ f"Found at position {stage_index + 1} of {len(stage_types)}."
648
+ )
656
649
 
657
650
  return True
658
651
 
@@ -669,7 +662,7 @@ class PipelineBuilder:
669
662
  >>> builder.get_stage_types()
670
663
  ['$match', '$limit']
671
664
  """
672
- return [list(stage.keys())[0] for stage in self._stages]
665
+ return [next(iter(stage)) for stage in self._stages]
673
666
 
674
667
  def has_stage(self, stage_type: str) -> bool:
675
668
  """
@@ -762,6 +755,106 @@ class PipelineBuilder:
762
755
  self._stages.insert(position, stage)
763
756
  return self
764
757
 
758
+ def get_stage_at(self, index: int) -> Dict[str, Any]:
759
+ """
760
+ Get a specific stage from the pipeline by index.
761
+
762
+ Args:
763
+ index: Zero-based index of the stage to retrieve
764
+
765
+ Returns:
766
+ Dictionary representing the stage at the given index
767
+
768
+ Raises:
769
+ IndexError: If index is out of range
770
+
771
+ Example:
772
+ >>> builder = PipelineBuilder()
773
+ >>> builder.match({"status": "active"}).limit(10)
774
+ >>> stage = builder.get_stage_at(0)
775
+ >>> stage
776
+ {"$match": {"status": "active"}}
777
+ """
778
+ if index < 0 or index >= len(self._stages):
779
+ raise IndexError(
780
+ f"Index {index} out of range [0, {len(self._stages)}]"
781
+ )
782
+ return self._stages[index].copy()
783
+
784
+ def pretty_print(self, indent: int = 2, ensure_ascii: bool = False) -> str:
785
+ """
786
+ Return a formatted JSON string representation of the pipeline.
787
+
788
+ Useful for debugging and understanding pipeline structure.
789
+
790
+ Args:
791
+ indent: Number of spaces for indentation (default: 2)
792
+ ensure_ascii: If False, non-ASCII characters are output as-is (default: False)
793
+
794
+ Returns:
795
+ Formatted JSON string of the pipeline
796
+
797
+ Example:
798
+ >>> builder = PipelineBuilder()
799
+ >>> builder.match({"status": "active"}).limit(10)
800
+ >>> print(builder.pretty_print())
801
+ [
802
+ {
803
+ "$match": {
804
+ "status": "active"
805
+ }
806
+ },
807
+ {
808
+ "$limit": 10
809
+ }
810
+ ]
811
+ """
812
+ return json.dumps(self._stages, indent=indent, ensure_ascii=ensure_ascii)
813
+
814
+ def to_json_file(
815
+ self,
816
+ filepath: Union[str, Path],
817
+ indent: int = 2,
818
+ ensure_ascii: bool = False,
819
+ metadata: Optional[Dict[str, Any]] = None,
820
+ ) -> None:
821
+ """
822
+ Save the pipeline to a JSON file.
823
+
824
+ Useful for debugging, comparison with other pipelines, or versioning.
825
+
826
+ Args:
827
+ filepath: Path to the output JSON file (str or Path)
828
+ indent: Number of spaces for indentation (default: 2)
829
+ ensure_ascii: If False, non-ASCII characters are output as-is (default: False)
830
+ metadata: Optional metadata to include in the JSON file
831
+
832
+ Raises:
833
+ IOError: If file cannot be written
834
+
835
+ Example:
836
+ >>> builder = PipelineBuilder()
837
+ >>> builder.match({"status": "active"}).limit(10)
838
+ >>> builder.to_json_file("debug_pipeline.json")
839
+
840
+ >>> # With metadata
841
+ >>> builder.to_json_file(
842
+ ... "pipeline.json",
843
+ ... metadata={"version": "1.0", "author": "developer"}
844
+ ... )
845
+ """
846
+ filepath = Path(filepath)
847
+ filepath.parent.mkdir(parents=True, exist_ok=True)
848
+
849
+ output: Dict[str, Any] = {
850
+ "pipeline": self._stages,
851
+ }
852
+ if metadata:
853
+ output["metadata"] = metadata
854
+
855
+ with open(filepath, "w", encoding="utf-8") as f:
856
+ json.dump(output, f, indent=indent, ensure_ascii=ensure_ascii)
857
+
765
858
  def build(self) -> List[Dict[str, Any]]:
766
859
  """
767
860
  Return the completed pipeline.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mongo-pipebuilder
3
- Version: 0.2.2
3
+ Version: 0.3.0
4
4
  Summary: Type-safe, fluent MongoDB aggregation pipeline builder
5
5
  Author-email: seligoroff <seligoroff@gmail.com>
6
6
  License: MIT
@@ -28,6 +28,12 @@ Dynamic: license-file
28
28
 
29
29
  # mongo-pipebuilder
30
30
 
31
+ [![PyPI version](https://badge.fury.io/py/mongo-pipebuilder.svg)](https://badge.fury.io/py/mongo-pipebuilder)
32
+ [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
33
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
34
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
35
+ [![Test Coverage](https://img.shields.io/badge/coverage-96%25-green.svg)](https://github.com/seligoroff/mongo-pipebuilder)
36
+
31
37
  Type-safe, fluent MongoDB aggregation pipeline builder for Python.
32
38
 
33
39
  ## Overview
@@ -36,11 +42,11 @@ Type-safe, fluent MongoDB aggregation pipeline builder for Python.
36
42
 
37
43
  ## Features
38
44
 
39
- - **Type-safe**: Full type hints support with IDE autocomplete
40
- - **Fluent interface**: Chain methods for readable, maintainable code
41
- - **Zero dependencies**: Pure Python, lightweight package
42
- - **Extensible**: Easy to add custom stages via `add_stage()`
43
- - **Well tested**: Comprehensive test suite with 96%+ coverage
45
+ - **Type-safe**: Full type hints support with IDE autocomplete
46
+ - **Fluent interface**: Chain methods for readable, maintainable code
47
+ - **Zero dependencies**: Pure Python, lightweight package
48
+ - **Extensible**: Easy to add custom stages via `add_stage()`
49
+ - **Well tested**: Comprehensive test suite with 96%+ coverage
44
50
 
45
51
  ## Installation
46
52
 
@@ -259,6 +265,22 @@ group_index = stage_types.index("$group")
259
265
  builder.insert_at(group_index, {"$addFields": {"x": 1}})
260
266
  ```
261
267
 
268
+ ##### `copy() -> PipelineBuilder`
269
+
270
+ Creates an independent copy of the builder with current stages. Useful for creating immutable variants and composing pipelines.
271
+
272
+ ```python
273
+ builder1 = PipelineBuilder().match({"status": "active"})
274
+ builder2 = builder1.copy()
275
+ builder2.limit(10)
276
+
277
+ # Original unchanged
278
+ assert len(builder1) == 1
279
+ assert len(builder2) == 2
280
+ ```
281
+
282
+ See [Composing and Reusing Pipelines](#composing-and-reusing-pipelines) for practical examples.
283
+
262
284
  ##### `validate() -> bool`
263
285
 
264
286
  Validates the pipeline before execution. Checks that:
@@ -275,6 +297,54 @@ builder.add_stage({"$out": "output"}).match({"status": "active"})
275
297
  builder.validate() # Raises ValueError: $out stage must be the last stage
276
298
  ```
277
299
 
300
+ ##### `get_stage_at(index: int) -> Dict[str, Any]`
301
+
302
+ Gets a specific stage from the pipeline by index. Returns a copy of the stage.
303
+
304
+ ```python
305
+ builder = PipelineBuilder()
306
+ builder.match({"status": "active"}).limit(10)
307
+ stage = builder.get_stage_at(0) # Returns {"$match": {"status": "active"}}
308
+ ```
309
+
310
+ ##### `pretty_print(indent: int = 2, ensure_ascii: bool = False) -> str`
311
+
312
+ Returns a formatted JSON string representation of the pipeline. Useful for debugging.
313
+
314
+ ```python
315
+ builder = PipelineBuilder()
316
+ builder.match({"status": "active"}).limit(10)
317
+ print(builder.pretty_print())
318
+ # [
319
+ # {
320
+ # "$match": {
321
+ # "status": "active"
322
+ # }
323
+ # },
324
+ # {
325
+ # "$limit": 10
326
+ # }
327
+ # ]
328
+ ```
329
+
330
+ ##### `to_json_file(filepath: Union[str, Path], indent: int = 2, ensure_ascii: bool = False, metadata: Optional[Dict[str, Any]] = None) -> None`
331
+
332
+ Saves the pipeline to a JSON file. Useful for debugging, comparison, or versioning.
333
+
334
+ ```python
335
+ builder = PipelineBuilder()
336
+ builder.match({"status": "active"}).limit(10)
337
+
338
+ # Basic usage
339
+ builder.to_json_file("debug_pipeline.json")
340
+
341
+ # With metadata
342
+ builder.to_json_file(
343
+ "pipeline.json",
344
+ metadata={"version": "1.0", "author": "developer"}
345
+ )
346
+ ```
347
+
278
348
  ##### `build() -> List[Dict[str, Any]]`
279
349
 
280
350
  Returns the complete pipeline as a list of stage dictionaries.
@@ -340,6 +410,150 @@ pipeline = (
340
410
  )
341
411
  ```
342
412
 
413
+ ### Composing and Reusing Pipelines
414
+
415
+ The `copy()` method allows you to create immutable variants of pipelines, enabling safe composition and reuse. This is useful when you need to:
416
+ - Create multiple variants from a base pipeline
417
+ - Compose pipelines functionally
418
+ - Cache base pipelines safely
419
+ - Pass pipelines to functions without side effects
420
+
421
+ #### Example: Building Multiple Variants from a Base Pipeline
422
+
423
+ ```python
424
+ from mongo_pipebuilder import PipelineBuilder
425
+
426
+ # Base pipeline with common filtering and joining
427
+ base_pipeline = (
428
+ PipelineBuilder()
429
+ .match({"status": "published", "deleted": False})
430
+ .lookup(
431
+ from_collection="authors",
432
+ local_field="authorId",
433
+ foreign_field="_id",
434
+ as_field="author"
435
+ )
436
+ .unwind("author", preserve_null_and_empty_arrays=True)
437
+ .project({
438
+ "title": 1,
439
+ "authorName": "$author.name",
440
+ "publishedAt": 1
441
+ })
442
+ )
443
+
444
+ # Create variants with different sorting and limits
445
+ recent_posts = base_pipeline.copy().sort({"publishedAt": -1}).limit(10).build()
446
+ popular_posts = base_pipeline.copy().sort({"views": -1}).limit(5).build()
447
+ author_posts = base_pipeline.copy().match({"authorName": "John Doe"}).build()
448
+
449
+ # Base pipeline remains unchanged
450
+ assert len(base_pipeline) == 4 # Still has 4 stages
451
+ ```
452
+
453
+ #### Example: Functional Composition Pattern
454
+
455
+ ```python
456
+ def add_pagination(builder, page: int, page_size: int = 10):
457
+ """Add pagination to a pipeline."""
458
+ return builder.copy().skip(page * page_size).limit(page_size)
459
+
460
+ def add_sorting(builder, sort_field: str, ascending: bool = True):
461
+ """Add sorting to a pipeline."""
462
+ return builder.copy().sort({sort_field: 1 if ascending else -1})
463
+
464
+ # Compose pipelines functionally
465
+ base = PipelineBuilder().match({"status": "active"})
466
+
467
+ # Create different variants
468
+ page1 = add_pagination(add_sorting(base, "createdAt"), page=0)
469
+ page2 = add_pagination(add_sorting(base, "createdAt"), page=1)
470
+ sorted_by_name = add_sorting(base, "name", ascending=True)
471
+
472
+ # All variants are independent
473
+ assert len(base) == 1 # Base unchanged
474
+ assert len(page1) == 3 # match + sort + skip + limit
475
+ ```
476
+
477
+ #### Example: Caching Base Pipelines
478
+
479
+ ```python
480
+ from functools import lru_cache
481
+
482
+ @lru_cache(maxsize=100)
483
+ def get_base_pipeline(user_id: str):
484
+ """Cache base pipeline for a user."""
485
+ return (
486
+ PipelineBuilder()
487
+ .match({"userId": user_id, "status": "active"})
488
+ .lookup(
489
+ from_collection="profiles",
490
+ local_field="userId",
491
+ foreign_field="_id",
492
+ as_field="profile"
493
+ )
494
+ )
495
+
496
+ # Reuse cached base pipeline with different modifications
497
+ user_id = "12345"
498
+ base = get_base_pipeline(user_id)
499
+
500
+ # Create multiple queries from cached base
501
+ recent = base.copy().sort({"createdAt": -1}).limit(10).build()
502
+ by_category = base.copy().match({"category": "tech"}).build()
503
+ with_stats = base.copy().group({"_id": "$category"}, {"count": {"$sum": 1}}).build()
504
+
505
+ # Base pipeline is safely cached and reused
506
+ ```
507
+
508
+ #### Example: Pipeline Factories
509
+
510
+ ```python
511
+ class PipelineFactory:
512
+ """Factory for creating common pipeline patterns."""
513
+
514
+ @staticmethod
515
+ def base_article_pipeline():
516
+ """Base pipeline for articles."""
517
+ return (
518
+ PipelineBuilder()
519
+ .match({"status": "published"})
520
+ .lookup(
521
+ from_collection="authors",
522
+ local_field="authorId",
523
+ foreign_field="_id",
524
+ as_field="author"
525
+ )
526
+ )
527
+
528
+ @staticmethod
529
+ def with_author_filter(builder, author_name: str):
530
+ """Add author filter to pipeline."""
531
+ return builder.copy().match({"author.name": author_name})
532
+
533
+ @staticmethod
534
+ def with_date_range(builder, start_date: str, end_date: str):
535
+ """Add date range filter to pipeline."""
536
+ return builder.copy().match({
537
+ "publishedAt": {"$gte": start_date, "$lte": end_date}
538
+ })
539
+
540
+ # Usage
541
+ base = PipelineFactory.base_article_pipeline()
542
+ johns_articles = PipelineFactory.with_author_filter(base, "John Doe")
543
+ recent_johns = PipelineFactory.with_date_range(
544
+ johns_articles,
545
+ start_date="2024-01-01",
546
+ end_date="2024-12-31"
547
+ ).sort({"publishedAt": -1}).limit(10).build()
548
+ ```
549
+
550
+ **Key Benefits:**
551
+ - Safe reuse: Base pipelines remain unchanged
552
+ - Functional composition: Build pipelines from smaller parts
553
+ - Caching friendly: Base pipelines can be safely cached
554
+ - No side effects: Functions can safely modify copies
555
+ - Thread-safe: Multiple threads can use copies independently
556
+
343
557
  ## Development
344
558
 
345
559
  ### Project Structure
@@ -374,3 +588,18 @@ See [DEVELOPMENT.md](DEVELOPMENT.md) for development guidelines.
374
588
  MIT License - see [LICENSE](LICENSE) file for details.
375
589
 
376
590
 
591
+
592
+
593
+
594
+
595
+
596
+
597
+
598
+
599
+
600
+
601
+
602
+
603
+
604
+
605
+
@@ -5,6 +5,10 @@ Tests for methods from Proposal 2 (debug/validation) and Proposal 4 (pipeline an
5
5
 
6
6
  Author: seligoroff
7
7
  """
8
+ import json
9
+ import tempfile
10
+ from pathlib import Path
11
+
8
12
  import pytest
9
13
  from mongo_pipebuilder import PipelineBuilder
10
14
 
@@ -289,3 +293,245 @@ class TestPipelineBuilderAnalysis:
289
293
  for stage_type in stage_types:
290
294
  assert builder.has_stage(stage_type) is True
291
295
 
296
+
297
+ class TestPipelineBuilderDebugMethods:
298
+ """Tests for Phase 1 debug methods: pretty_print, to_json_file, get_stage_at."""
299
+
300
+ def test_get_stage_at_valid_index(self):
301
+ """Test get_stage_at() with valid index."""
302
+ builder = PipelineBuilder()
303
+ builder.match({"status": "active"}).limit(10).sort({"name": 1})
304
+
305
+ stage0 = builder.get_stage_at(0)
306
+ assert stage0 == {"$match": {"status": "active"}}
307
+
308
+ stage1 = builder.get_stage_at(1)
309
+ assert stage1 == {"$limit": 10}
310
+
311
+ stage2 = builder.get_stage_at(2)
312
+ assert stage2 == {"$sort": {"name": 1}}
313
+
314
+ def test_get_stage_at_returns_copy(self):
315
+ """Test that get_stage_at() returns a copy, not reference."""
316
+ builder = PipelineBuilder()
317
+ builder.match({"status": "active"})
318
+
319
+ stage = builder.get_stage_at(0)
320
+ stage["$match"]["new_field"] = "value"
321
+
322
+ # Original should be unchanged
323
+ original_stage = builder.get_stage_at(0)
324
+ assert "new_field" not in original_stage["$match"]
325
+
326
+ def test_get_stage_at_invalid_index_negative(self):
327
+ """Test get_stage_at() raises IndexError for negative index."""
328
+ builder = PipelineBuilder()
329
+ builder.match({"status": "active"})
330
+
331
+ with pytest.raises(IndexError, match="Index -1 out of range"):
332
+ builder.get_stage_at(-1)
333
+
334
+ def test_get_stage_at_invalid_index_too_large(self):
335
+ """Test get_stage_at() raises IndexError for index too large."""
336
+ builder = PipelineBuilder()
337
+ builder.match({"status": "active"})
338
+
339
+ with pytest.raises(IndexError, match="Index 10 out of range"):
340
+ builder.get_stage_at(10)
341
+
342
+ def test_get_stage_at_empty_builder(self):
343
+ """Test get_stage_at() raises IndexError on empty builder."""
344
+ builder = PipelineBuilder()
345
+
346
+ with pytest.raises(IndexError, match="Index 0 out of range"):
347
+ builder.get_stage_at(0)
348
+
349
+ def test_pretty_print_empty_builder(self):
350
+ """Test pretty_print() with empty builder."""
351
+ builder = PipelineBuilder()
352
+ result = builder.pretty_print()
353
+
354
+ assert result == "[]"
355
+ # Should be valid JSON
356
+ json.loads(result)
357
+
358
+ def test_pretty_print_single_stage(self):
359
+ """Test pretty_print() with single stage."""
360
+ builder = PipelineBuilder()
361
+ builder.match({"status": "active"})
362
+ result = builder.pretty_print()
363
+
364
+ # Should be valid JSON
365
+ parsed = json.loads(result)
366
+ assert parsed == [{"$match": {"status": "active"}}]
367
+
368
+ # Should contain expected content
369
+ assert "$match" in result
370
+ assert "status" in result
371
+ assert "active" in result
372
+
373
+ def test_pretty_print_multiple_stages(self):
374
+ """Test pretty_print() with multiple stages."""
375
+ builder = PipelineBuilder()
376
+ builder.match({"status": "active"}).limit(10).sort({"name": 1})
377
+ result = builder.pretty_print()
378
+
379
+ # Should be valid JSON
380
+ parsed = json.loads(result)
381
+ assert len(parsed) == 3
382
+ assert parsed[0] == {"$match": {"status": "active"}}
383
+ assert parsed[1] == {"$limit": 10}
384
+ assert parsed[2] == {"$sort": {"name": 1}}
385
+
386
+ def test_pretty_print_custom_indent(self):
387
+ """Test pretty_print() with custom indent."""
388
+ builder = PipelineBuilder()
389
+ builder.match({"status": "active"})
390
+ result = builder.pretty_print(indent=4)
391
+
392
+ # Should be valid JSON
393
+ parsed = json.loads(result)
394
+ assert parsed == [{"$match": {"status": "active"}}]
395
+
396
+ # Should use 4 spaces for indentation
397
+ lines = result.split("\n")
398
+ if len(lines) > 1:
399
+ assert lines[1].startswith(" ") # 4 spaces
400
+
401
+ def test_pretty_print_ensure_ascii(self):
402
+ """Test pretty_print() with ensure_ascii=True."""
403
+ builder = PipelineBuilder()
404
+ builder.match({"name": "тест"}) # Non-ASCII characters
405
+ result_ascii = builder.pretty_print(ensure_ascii=True)
406
+ result_no_ascii = builder.pretty_print(ensure_ascii=False)
407
+
408
+ # Both should be valid JSON
409
+ json.loads(result_ascii)
410
+ json.loads(result_no_ascii)
411
+
412
+ # Non-ASCII version should contain original characters
413
+ assert "тест" in result_no_ascii
414
+
415
+ def test_to_json_file_basic(self):
416
+ """Test to_json_file() saves pipeline correctly."""
417
+ builder = PipelineBuilder()
418
+ builder.match({"status": "active"}).limit(10)
419
+
420
+ with tempfile.TemporaryDirectory() as tmpdir:
421
+ filepath = Path(tmpdir) / "test_pipeline.json"
422
+ builder.to_json_file(filepath)
423
+
424
+ # File should exist
425
+ assert filepath.exists()
426
+
427
+ # Should contain valid JSON
428
+ with open(filepath, "r", encoding="utf-8") as f:
429
+ data = json.load(f)
430
+
431
+ assert "pipeline" in data
432
+ assert data["pipeline"] == [
433
+ {"$match": {"status": "active"}},
434
+ {"$limit": 10}
435
+ ]
436
+
437
+ def test_to_json_file_with_metadata(self):
438
+ """Test to_json_file() with metadata."""
439
+ builder = PipelineBuilder()
440
+ builder.match({"status": "active"})
441
+
442
+ metadata = {
443
+ "version": "1.0",
444
+ "author": "developer",
445
+ "description": "Test pipeline"
446
+ }
447
+
448
+ with tempfile.TemporaryDirectory() as tmpdir:
449
+ filepath = Path(tmpdir) / "test_pipeline.json"
450
+ builder.to_json_file(filepath, metadata=metadata)
451
+
452
+ with open(filepath, "r", encoding="utf-8") as f:
453
+ data = json.load(f)
454
+
455
+ assert "pipeline" in data
456
+ assert "metadata" in data
457
+ assert data["metadata"] == metadata
458
+
459
+ def test_to_json_file_creates_directory(self):
460
+ """Test to_json_file() creates parent directories if they don't exist."""
461
+ builder = PipelineBuilder()
462
+ builder.match({"status": "active"})
463
+
464
+ with tempfile.TemporaryDirectory() as tmpdir:
465
+ filepath = Path(tmpdir) / "nested" / "path" / "test_pipeline.json"
466
+
467
+ # Directory shouldn't exist yet
468
+ assert not filepath.parent.exists()
469
+
470
+ builder.to_json_file(filepath)
471
+
472
+ # File and directory should be created
473
+ assert filepath.exists()
474
+ assert filepath.parent.exists()
475
+
476
+ def test_to_json_file_string_path(self):
477
+ """Test to_json_file() accepts string path."""
478
+ builder = PipelineBuilder()
479
+ builder.match({"status": "active"})
480
+
481
+ with tempfile.TemporaryDirectory() as tmpdir:
482
+ filepath = str(Path(tmpdir) / "test_pipeline.json")
483
+ builder.to_json_file(filepath)
484
+
485
+ assert Path(filepath).exists()
486
+
487
+ def test_to_json_file_custom_indent(self):
488
+ """Test to_json_file() with custom indent."""
489
+ builder = PipelineBuilder()
490
+ builder.match({"status": "active"})
491
+
492
+ with tempfile.TemporaryDirectory() as tmpdir:
493
+ filepath = Path(tmpdir) / "test_pipeline.json"
494
+ builder.to_json_file(filepath, indent=4)
495
+
496
+ with open(filepath, "r", encoding="utf-8") as f:
497
+ content = f.read()
498
+ lines = content.split("\n")
499
+ if len(lines) > 1:
500
+ assert lines[1].startswith(" ") # 4 spaces
501
+
502
+ def test_pretty_print_and_to_json_file_consistency(self):
503
+ """Test that pretty_print() and to_json_file() produce consistent output."""
504
+ builder = PipelineBuilder()
505
+ builder.match({"status": "active"}).limit(10)
506
+
507
+ pretty_output = builder.pretty_print()
508
+
509
+ with tempfile.TemporaryDirectory() as tmpdir:
510
+ filepath = Path(tmpdir) / "test_pipeline.json"
511
+ builder.to_json_file(filepath)
512
+
513
+ with open(filepath, "r", encoding="utf-8") as f:
514
+ file_data = json.load(f)
515
+
516
+ # Pipeline in file should match pretty_print output when parsed
517
+ pretty_parsed = json.loads(pretty_output)
518
+ assert file_data["pipeline"] == pretty_parsed
519
+
520
+ def test_get_stage_at_with_complex_stage(self):
521
+ """Test get_stage_at() with complex stage (e.g., lookup)."""
522
+ builder = PipelineBuilder()
523
+ builder.match({"status": "active"})
524
+ builder.lookup(
525
+ from_collection="users",
526
+ local_field="userId",
527
+ foreign_field="_id",
528
+ as_field="user"
529
+ )
530
+
531
+ stage = builder.get_stage_at(1)
532
+ assert "$lookup" in stage
533
+ assert stage["$lookup"]["from"] == "users"
534
+ assert stage["$lookup"]["localField"] == "userId"
535
+ assert stage["$lookup"]["foreignField"] == "_id"
536
+ assert stage["$lookup"]["as"] == "user"
537
+
@@ -256,3 +256,5 @@ class TestPrependAndInsertAtIntegration:
256
256
  assert pipeline[2] == {"$sort": {"name": 1}}
257
257
  assert pipeline[3] == {"$limit": 10}
258
258
 
259
+
260
+
@@ -200,3 +200,5 @@ class TestProjectValidation:
200
200
  builder.project(123)
201
201
 
202
202
 
203
+
204
+
@@ -157,3 +157,5 @@ class TestSetFieldValidation:
157
157
  builder.set_field(123)
158
158
 
159
159
 
160
+
161
+