mongo-pipebuilder 0.2.1__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (18) hide show
  1. {mongo_pipebuilder-0.2.1 → mongo_pipebuilder-0.2.3}/LICENSE +5 -0
  2. {mongo_pipebuilder-0.2.1/src/mongo_pipebuilder.egg-info → mongo_pipebuilder-0.2.3}/PKG-INFO +177 -6
  3. {mongo_pipebuilder-0.2.1 → mongo_pipebuilder-0.2.3}/README.md +176 -5
  4. {mongo_pipebuilder-0.2.1 → mongo_pipebuilder-0.2.3}/pyproject.toml +1 -1
  5. {mongo_pipebuilder-0.2.1 → mongo_pipebuilder-0.2.3}/src/mongo_pipebuilder/__init__.py +1 -1
  6. {mongo_pipebuilder-0.2.1 → mongo_pipebuilder-0.2.3}/src/mongo_pipebuilder/builder.py +33 -31
  7. {mongo_pipebuilder-0.2.1 → mongo_pipebuilder-0.2.3/src/mongo_pipebuilder.egg-info}/PKG-INFO +177 -6
  8. {mongo_pipebuilder-0.2.1 → mongo_pipebuilder-0.2.3}/tests/test_builder.py +16 -1
  9. {mongo_pipebuilder-0.2.1 → mongo_pipebuilder-0.2.3}/tests/test_builder_insert.py +1 -0
  10. {mongo_pipebuilder-0.2.1 → mongo_pipebuilder-0.2.3}/tests/test_builder_validation.py +5 -0
  11. {mongo_pipebuilder-0.2.1 → mongo_pipebuilder-0.2.3}/tests/test_builder_validation_existing.py +12 -4
  12. {mongo_pipebuilder-0.2.1 → mongo_pipebuilder-0.2.3}/tests/test_builder_validation_new.py +1 -0
  13. {mongo_pipebuilder-0.2.1 → mongo_pipebuilder-0.2.3}/setup.cfg +0 -0
  14. {mongo_pipebuilder-0.2.1 → mongo_pipebuilder-0.2.3}/src/mongo_pipebuilder.egg-info/SOURCES.txt +0 -0
  15. {mongo_pipebuilder-0.2.1 → mongo_pipebuilder-0.2.3}/src/mongo_pipebuilder.egg-info/dependency_links.txt +0 -0
  16. {mongo_pipebuilder-0.2.1 → mongo_pipebuilder-0.2.3}/src/mongo_pipebuilder.egg-info/requires.txt +0 -0
  17. {mongo_pipebuilder-0.2.1 → mongo_pipebuilder-0.2.3}/src/mongo_pipebuilder.egg-info/top_level.txt +0 -0
  18. {mongo_pipebuilder-0.2.1 → mongo_pipebuilder-0.2.3}/tests/test_builder_debug.py +0 -0
@@ -20,3 +20,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
20
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
21
  SOFTWARE.
22
22
 
23
+
24
+
25
+
26
+
27
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mongo-pipebuilder
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: Type-safe, fluent MongoDB aggregation pipeline builder
5
5
  Author-email: seligoroff <seligoroff@gmail.com>
6
6
  License: MIT
@@ -28,6 +28,12 @@ Dynamic: license-file
28
28
 
29
29
  # mongo-pipebuilder
30
30
 
31
+ [![PyPI version](https://badge.fury.io/py/mongo-pipebuilder.svg)](https://badge.fury.io/py/mongo-pipebuilder)
32
+ [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
33
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
34
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
35
+ [![Test Coverage](https://img.shields.io/badge/coverage-96%25-green.svg)](https://github.com/seligoroff/mongo-pipebuilder)
36
+
31
37
  Type-safe, fluent MongoDB aggregation pipeline builder for Python.
32
38
 
33
39
  ## Overview
@@ -36,11 +42,11 @@ Type-safe, fluent MongoDB aggregation pipeline builder for Python.
36
42
 
37
43
  ## Features
38
44
 
39
- - **Type-safe**: Full type hints support with IDE autocomplete
40
- - **Fluent interface**: Chain methods for readable, maintainable code
41
- - **Zero dependencies**: Pure Python, lightweight package
42
- - **Extensible**: Easy to add custom stages via `add_stage()`
43
- - **Well tested**: Comprehensive test suite with 96%+ coverage
45
+ - **Type-safe**: Full type hints support with IDE autocomplete
46
+ - **Fluent interface**: Chain methods for readable, maintainable code
47
+ - **Zero dependencies**: Pure Python, lightweight package
48
+ - **Extensible**: Easy to add custom stages via `add_stage()`
49
+ - **Well tested**: Comprehensive test suite with 96%+ coverage
44
50
 
45
51
  ## Installation
46
52
 
@@ -259,6 +265,22 @@ group_index = stage_types.index("$group")
259
265
  builder.insert_at(group_index, {"$addFields": {"x": 1}})
260
266
  ```
261
267
 
268
+ ##### `copy() -> PipelineBuilder`
269
+
270
+ Creates an independent copy of the builder with current stages. Useful for creating immutable variants and composing pipelines.
271
+
272
+ ```python
273
+ builder1 = PipelineBuilder().match({"status": "active"})
274
+ builder2 = builder1.copy()
275
+ builder2.limit(10)
276
+
277
+ # Original unchanged
278
+ assert len(builder1) == 1
279
+ assert len(builder2) == 2
280
+ ```
281
+
282
+ See [Composing and Reusing Pipelines](#composing-and-reusing-pipelines) for practical examples.
283
+
262
284
  ##### `validate() -> bool`
263
285
 
264
286
  Validates the pipeline before execution. Checks that:
@@ -340,6 +362,150 @@ pipeline = (
340
362
  )
341
363
  ```
342
364
 
365
+ ### Composing and Reusing Pipelines
366
+
367
+ The `copy()` method allows you to create immutable variants of pipelines, enabling safe composition and reuse. This is useful when you need to:
368
+ - Create multiple variants from a base pipeline
369
+ - Compose pipelines functionally
370
+ - Cache base pipelines safely
371
+ - Pass pipelines to functions without side effects
372
+
373
+ #### Example: Building Multiple Variants from a Base Pipeline
374
+
375
+ ```python
376
+ from mongo_pipebuilder import PipelineBuilder
377
+
378
+ # Base pipeline with common filtering and joining
379
+ base_pipeline = (
380
+ PipelineBuilder()
381
+ .match({"status": "published", "deleted": False})
382
+ .lookup(
383
+ from_collection="authors",
384
+ local_field="authorId",
385
+ foreign_field="_id",
386
+ as_field="author"
387
+ )
388
+ .unwind("author", preserve_null_and_empty_arrays=True)
389
+ .project({
390
+ "title": 1,
391
+ "authorName": "$author.name",
392
+ "publishedAt": 1
393
+ })
394
+ )
395
+
396
+ # Create variants with different sorting and limits
397
+ recent_posts = base_pipeline.copy().sort({"publishedAt": -1}).limit(10).build()
398
+ popular_posts = base_pipeline.copy().sort({"views": -1}).limit(5).build()
399
+ author_posts = base_pipeline.copy().match({"authorName": "John Doe"}).build()
400
+
401
+ # Base pipeline remains unchanged
402
+ assert len(base_pipeline) == 4 # Still has 4 stages
403
+ ```
404
+
405
+ #### Example: Functional Composition Pattern
406
+
407
+ ```python
408
+ def add_pagination(builder, page: int, page_size: int = 10):
409
+ """Add pagination to a pipeline."""
410
+ return builder.copy().skip(page * page_size).limit(page_size)
411
+
412
+ def add_sorting(builder, sort_field: str, ascending: bool = True):
413
+ """Add sorting to a pipeline."""
414
+ return builder.copy().sort({sort_field: 1 if ascending else -1})
415
+
416
+ # Compose pipelines functionally
417
+ base = PipelineBuilder().match({"status": "active"})
418
+
419
+ # Create different variants
420
+ page1 = add_pagination(add_sorting(base, "createdAt"), page=0)
421
+ page2 = add_pagination(add_sorting(base, "createdAt"), page=1)
422
+ sorted_by_name = add_sorting(base, "name", ascending=True)
423
+
424
+ # All variants are independent
425
+ assert len(base) == 1 # Base unchanged
426
+ assert len(page1) == 3 # match + sort + skip + limit
427
+ ```
428
+
429
+ #### Example: Caching Base Pipelines
430
+
431
+ ```python
432
+ from functools import lru_cache
433
+
434
+ @lru_cache(maxsize=100)
435
+ def get_base_pipeline(user_id: str):
436
+ """Cache base pipeline for a user."""
437
+ return (
438
+ PipelineBuilder()
439
+ .match({"userId": user_id, "status": "active"})
440
+ .lookup(
441
+ from_collection="profiles",
442
+ local_field="userId",
443
+ foreign_field="_id",
444
+ as_field="profile"
445
+ )
446
+ )
447
+
448
+ # Reuse cached base pipeline with different modifications
449
+ user_id = "12345"
450
+ base = get_base_pipeline(user_id)
451
+
452
+ # Create multiple queries from cached base
453
+ recent = base.copy().sort({"createdAt": -1}).limit(10).build()
454
+ by_category = base.copy().match({"category": "tech"}).build()
455
+ with_stats = base.copy().group({"_id": "$category"}, {"count": {"$sum": 1}}).build()
456
+
457
+ # Base pipeline is safely cached and reused
458
+ ```
459
+
460
+ #### Example: Pipeline Factories
461
+
462
+ ```python
463
+ class PipelineFactory:
464
+ """Factory for creating common pipeline patterns."""
465
+
466
+ @staticmethod
467
+ def base_article_pipeline():
468
+ """Base pipeline for articles."""
469
+ return (
470
+ PipelineBuilder()
471
+ .match({"status": "published"})
472
+ .lookup(
473
+ from_collection="authors",
474
+ local_field="authorId",
475
+ foreign_field="_id",
476
+ as_field="author"
477
+ )
478
+ )
479
+
480
+ @staticmethod
481
+ def with_author_filter(builder, author_name: str):
482
+ """Add author filter to pipeline."""
483
+ return builder.copy().match({"author.name": author_name})
484
+
485
+ @staticmethod
486
+ def with_date_range(builder, start_date: str, end_date: str):
487
+ """Add date range filter to pipeline."""
488
+ return builder.copy().match({
489
+ "publishedAt": {"$gte": start_date, "$lte": end_date}
490
+ })
491
+
492
+ # Usage
493
+ base = PipelineFactory.base_article_pipeline()
494
+ johns_articles = PipelineFactory.with_author_filter(base, "John Doe")
495
+ recent_johns = PipelineFactory.with_date_range(
496
+ johns_articles,
497
+ start_date="2024-01-01",
498
+ end_date="2024-12-31"
499
+ ).sort({"publishedAt": -1}).limit(10).build()
500
+ ```
501
+
502
+ **Key Benefits:**
503
+ - Safe reuse: Base pipelines remain unchanged
504
+ - Functional composition: Build pipelines from smaller parts
505
+ - Caching friendly: Base pipelines can be safely cached
506
+ - No side effects: Functions can safely modify copies
507
+ - Thread-safe: Multiple threads can use copies independently
508
+
343
509
  ## Development
344
510
 
345
511
  ### Project Structure
@@ -373,3 +539,8 @@ See [DEVELOPMENT.md](DEVELOPMENT.md) for development guidelines.
373
539
 
374
540
  MIT License - see [LICENSE](LICENSE) file for details.
375
541
 
542
+
543
+
544
+
545
+
546
+
@@ -1,5 +1,11 @@
1
1
  # mongo-pipebuilder
2
2
 
3
+ [![PyPI version](https://badge.fury.io/py/mongo-pipebuilder.svg)](https://badge.fury.io/py/mongo-pipebuilder)
4
+ [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
6
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
7
+ [![Test Coverage](https://img.shields.io/badge/coverage-96%25-green.svg)](https://github.com/seligoroff/mongo-pipebuilder)
8
+
3
9
  Type-safe, fluent MongoDB aggregation pipeline builder for Python.
4
10
 
5
11
  ## Overview
@@ -8,11 +14,11 @@ Type-safe, fluent MongoDB aggregation pipeline builder for Python.
8
14
 
9
15
  ## Features
10
16
 
11
- - **Type-safe**: Full type hints support with IDE autocomplete
12
- - **Fluent interface**: Chain methods for readable, maintainable code
13
- - **Zero dependencies**: Pure Python, lightweight package
14
- - **Extensible**: Easy to add custom stages via `add_stage()`
15
- - **Well tested**: Comprehensive test suite with 96%+ coverage
17
+ - **Type-safe**: Full type hints support with IDE autocomplete
18
+ - **Fluent interface**: Chain methods for readable, maintainable code
19
+ - **Zero dependencies**: Pure Python, lightweight package
20
+ - **Extensible**: Easy to add custom stages via `add_stage()`
21
+ - **Well tested**: Comprehensive test suite with 96%+ coverage
16
22
 
17
23
  ## Installation
18
24
 
@@ -231,6 +237,22 @@ group_index = stage_types.index("$group")
231
237
  builder.insert_at(group_index, {"$addFields": {"x": 1}})
232
238
  ```
233
239
 
240
+ ##### `copy() -> PipelineBuilder`
241
+
242
+ Creates an independent copy of the builder with current stages. Useful for creating immutable variants and composing pipelines.
243
+
244
+ ```python
245
+ builder1 = PipelineBuilder().match({"status": "active"})
246
+ builder2 = builder1.copy()
247
+ builder2.limit(10)
248
+
249
+ # Original unchanged
250
+ assert len(builder1) == 1
251
+ assert len(builder2) == 2
252
+ ```
253
+
254
+ See [Composing and Reusing Pipelines](#composing-and-reusing-pipelines) for practical examples.
255
+
234
256
  ##### `validate() -> bool`
235
257
 
236
258
  Validates the pipeline before execution. Checks that:
@@ -312,6 +334,150 @@ pipeline = (
312
334
  )
313
335
  ```
314
336
 
337
+ ### Composing and Reusing Pipelines
338
+
339
+ The `copy()` method allows you to create immutable variants of pipelines, enabling safe composition and reuse. This is useful when you need to:
340
+ - Create multiple variants from a base pipeline
341
+ - Compose pipelines functionally
342
+ - Cache base pipelines safely
343
+ - Pass pipelines to functions without side effects
344
+
345
+ #### Example: Building Multiple Variants from a Base Pipeline
346
+
347
+ ```python
348
+ from mongo_pipebuilder import PipelineBuilder
349
+
350
+ # Base pipeline with common filtering and joining
351
+ base_pipeline = (
352
+ PipelineBuilder()
353
+ .match({"status": "published", "deleted": False})
354
+ .lookup(
355
+ from_collection="authors",
356
+ local_field="authorId",
357
+ foreign_field="_id",
358
+ as_field="author"
359
+ )
360
+ .unwind("author", preserve_null_and_empty_arrays=True)
361
+ .project({
362
+ "title": 1,
363
+ "authorName": "$author.name",
364
+ "publishedAt": 1
365
+ })
366
+ )
367
+
368
+ # Create variants with different sorting and limits
369
+ recent_posts = base_pipeline.copy().sort({"publishedAt": -1}).limit(10).build()
370
+ popular_posts = base_pipeline.copy().sort({"views": -1}).limit(5).build()
371
+ author_posts = base_pipeline.copy().match({"authorName": "John Doe"}).build()
372
+
373
+ # Base pipeline remains unchanged
374
+ assert len(base_pipeline) == 4 # Still has 4 stages
375
+ ```
376
+
377
+ #### Example: Functional Composition Pattern
378
+
379
+ ```python
380
+ def add_pagination(builder, page: int, page_size: int = 10):
381
+ """Add pagination to a pipeline."""
382
+ return builder.copy().skip(page * page_size).limit(page_size)
383
+
384
+ def add_sorting(builder, sort_field: str, ascending: bool = True):
385
+ """Add sorting to a pipeline."""
386
+ return builder.copy().sort({sort_field: 1 if ascending else -1})
387
+
388
+ # Compose pipelines functionally
389
+ base = PipelineBuilder().match({"status": "active"})
390
+
391
+ # Create different variants
392
+ page1 = add_pagination(add_sorting(base, "createdAt"), page=0)
393
+ page2 = add_pagination(add_sorting(base, "createdAt"), page=1)
394
+ sorted_by_name = add_sorting(base, "name", ascending=True)
395
+
396
+ # All variants are independent
397
+ assert len(base) == 1 # Base unchanged
398
+ assert len(page1) == 3 # match + sort + skip + limit
399
+ ```
400
+
401
+ #### Example: Caching Base Pipelines
402
+
403
+ ```python
404
+ from functools import lru_cache
405
+
406
+ @lru_cache(maxsize=100)
407
+ def get_base_pipeline(user_id: str):
408
+ """Cache base pipeline for a user."""
409
+ return (
410
+ PipelineBuilder()
411
+ .match({"userId": user_id, "status": "active"})
412
+ .lookup(
413
+ from_collection="profiles",
414
+ local_field="userId",
415
+ foreign_field="_id",
416
+ as_field="profile"
417
+ )
418
+ )
419
+
420
+ # Reuse cached base pipeline with different modifications
421
+ user_id = "12345"
422
+ base = get_base_pipeline(user_id)
423
+
424
+ # Create multiple queries from cached base
425
+ recent = base.copy().sort({"createdAt": -1}).limit(10).build()
426
+ by_category = base.copy().match({"category": "tech"}).build()
427
+ with_stats = base.copy().group({"_id": "$category"}, {"count": {"$sum": 1}}).build()
428
+
429
+ # Base pipeline is safely cached and reused
430
+ ```
431
+
432
+ #### Example: Pipeline Factories
433
+
434
+ ```python
435
+ class PipelineFactory:
436
+ """Factory for creating common pipeline patterns."""
437
+
438
+ @staticmethod
439
+ def base_article_pipeline():
440
+ """Base pipeline for articles."""
441
+ return (
442
+ PipelineBuilder()
443
+ .match({"status": "published"})
444
+ .lookup(
445
+ from_collection="authors",
446
+ local_field="authorId",
447
+ foreign_field="_id",
448
+ as_field="author"
449
+ )
450
+ )
451
+
452
+ @staticmethod
453
+ def with_author_filter(builder, author_name: str):
454
+ """Add author filter to pipeline."""
455
+ return builder.copy().match({"author.name": author_name})
456
+
457
+ @staticmethod
458
+ def with_date_range(builder, start_date: str, end_date: str):
459
+ """Add date range filter to pipeline."""
460
+ return builder.copy().match({
461
+ "publishedAt": {"$gte": start_date, "$lte": end_date}
462
+ })
463
+
464
+ # Usage
465
+ base = PipelineFactory.base_article_pipeline()
466
+ johns_articles = PipelineFactory.with_author_filter(base, "John Doe")
467
+ recent_johns = PipelineFactory.with_date_range(
468
+ johns_articles,
469
+ start_date="2024-01-01",
470
+ end_date="2024-12-31"
471
+ ).sort({"publishedAt": -1}).limit(10).build()
472
+ ```
473
+
474
+ **Key Benefits:**
475
+ - Safe reuse: Base pipelines remain unchanged
476
+ - Functional composition: Build pipelines from smaller parts
477
+ - Caching friendly: Base pipelines can be safely cached
478
+ - No side effects: Functions can safely modify copies
479
+ - Thread-safe: Multiple threads can use copies independently
480
+
315
481
  ## Development
316
482
 
317
483
  ### Project Structure
@@ -345,3 +511,8 @@ See [DEVELOPMENT.md](DEVELOPMENT.md) for development guidelines.
345
511
 
346
512
  MIT License - see [LICENSE](LICENSE) file for details.
347
513
 
514
+
515
+
516
+
517
+
518
+
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "mongo-pipebuilder"
7
- version = "0.2.1"
7
+ version = "0.2.3"
8
8
  description = "Type-safe, fluent MongoDB aggregation pipeline builder"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -9,6 +9,6 @@ Author: seligoroff
9
9
 
10
10
  from mongo_pipebuilder.builder import PipelineBuilder
11
11
 
12
- __version__ = "0.2.1"
12
+ __version__ = "0.2.3"
13
13
  __all__ = ["PipelineBuilder"]
14
14
 
@@ -33,8 +33,7 @@ class PipelineBuilder:
33
33
  Self for method chaining
34
34
 
35
35
  Raises:
36
- TypeError: If conditions is not a dictionary
37
- ValueError: If conditions is None
36
+ TypeError: If conditions is None or not a dictionary
38
37
 
39
38
  Example:
40
39
  >>> builder.match({"status": "active", "age": {"$gte": 18}})
@@ -158,36 +157,47 @@ class PipelineBuilder:
158
157
  self._stages.append({"$project": fields})
159
158
  return self
160
159
 
161
- def group(self, group_by: Dict[str, Any], accumulators: Dict[str, Any]) -> Self:
160
+ def group(self, group_by: Union[str, Dict[str, Any], Any], accumulators: Dict[str, Any]) -> Self:
162
161
  """
163
162
  Add a $group stage for grouping documents.
164
163
 
165
164
  Args:
166
- group_by: Expression for grouping (becomes _id)
165
+ group_by: Expression for grouping (becomes _id). Can be:
166
+ - A string (field path, e.g., "$category")
167
+ - A dict (composite key, e.g., {"category": "$category"})
168
+ - Any other value (null, number, etc.)
167
169
  accumulators: Dictionary with accumulators (sum, avg, count, etc.)
168
170
 
169
171
  Returns:
170
172
  Self for method chaining
171
173
 
172
174
  Raises:
173
- TypeError: If arguments are not dictionaries
174
- ValueError: If both group_by and accumulators are empty
175
+ TypeError: If accumulators is not a dictionary
176
+ ValueError: If both group_by and accumulators are empty (when group_by is dict/str)
175
177
 
176
178
  Example:
177
179
  >>> builder.group(
178
- ... group_by={"category": "$category"},
180
+ ... group_by="$category", # String field path
181
+ ... accumulators={"total": {"$sum": "$amount"}}
182
+ ... )
183
+ >>> builder.group(
184
+ ... group_by={"category": "$category"}, # Composite key
179
185
  ... accumulators={"total": {"$sum": "$amount"}}
180
186
  ... )
181
187
  """
182
- if not isinstance(group_by, dict):
183
- raise TypeError(f"group_by must be a dict, got {type(group_by)}")
184
188
  if not isinstance(accumulators, dict):
185
189
  raise TypeError(f"accumulators must be a dict, got {type(accumulators)}")
186
190
 
187
- # Empty group_by is technically valid in MongoDB (groups all into one document)
188
- # But if both are empty, it's likely an error
189
- if not group_by and not accumulators:
190
- raise ValueError("group_by and accumulators cannot both be empty")
191
+ # Validate empty cases
192
+ # group_by can be None, empty string, empty dict, etc. - all are valid in MongoDB
193
+ # But if it's a string and empty, or dict and empty, and accumulators is also empty,
194
+ # it's likely an error
195
+ if isinstance(group_by, dict):
196
+ if not group_by and not accumulators:
197
+ raise ValueError("group_by and accumulators cannot both be empty")
198
+ elif isinstance(group_by, str):
199
+ if not group_by and not accumulators:
200
+ raise ValueError("group_by and accumulators cannot both be empty")
191
201
 
192
202
  group_stage = {"_id": group_by, **accumulators}
193
203
  self._stages.append({"$group": group_stage})
@@ -625,23 +635,15 @@ class PipelineBuilder:
625
635
  "Only one output stage is allowed."
626
636
  )
627
637
 
628
- # If $out exists, it must be the last stage
629
- if has_out:
630
- out_index = stage_types.index("$out")
631
- if out_index != len(stage_types) - 1:
632
- raise ValueError(
633
- f"$out stage must be the last stage in the pipeline. "
634
- f"Found at position {out_index + 1} of {len(stage_types)}."
635
- )
636
-
637
- # If $merge exists, it must be the last stage
638
- if has_merge:
639
- merge_index = stage_types.index("$merge")
640
- if merge_index != len(stage_types) - 1:
641
- raise ValueError(
642
- f"$merge stage must be the last stage in the pipeline. "
643
- f"Found at position {merge_index + 1} of {len(stage_types)}."
644
- )
638
+ # Check if $out or $merge exist and validate position
639
+ for stage_name in ["$out", "$merge"]:
640
+ if stage_name in stage_types:
641
+ stage_index = stage_types.index(stage_name)
642
+ if stage_index != len(stage_types) - 1:
643
+ raise ValueError(
644
+ f"{stage_name} stage must be the last stage in the pipeline. "
645
+ f"Found at position {stage_index + 1} of {len(stage_types)}."
646
+ )
645
647
 
646
648
  return True
647
649
 
@@ -658,7 +660,7 @@ class PipelineBuilder:
658
660
  >>> builder.get_stage_types()
659
661
  ['$match', '$limit']
660
662
  """
661
- return [list(stage.keys())[0] for stage in self._stages]
663
+ return [next(iter(stage)) for stage in self._stages]
662
664
 
663
665
  def has_stage(self, stage_type: str) -> bool:
664
666
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mongo-pipebuilder
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: Type-safe, fluent MongoDB aggregation pipeline builder
5
5
  Author-email: seligoroff <seligoroff@gmail.com>
6
6
  License: MIT
@@ -28,6 +28,12 @@ Dynamic: license-file
28
28
 
29
29
  # mongo-pipebuilder
30
30
 
31
+ [![PyPI version](https://badge.fury.io/py/mongo-pipebuilder.svg)](https://badge.fury.io/py/mongo-pipebuilder)
32
+ [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
33
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
34
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
35
+ [![Test Coverage](https://img.shields.io/badge/coverage-96%25-green.svg)](https://github.com/seligoroff/mongo-pipebuilder)
36
+
31
37
  Type-safe, fluent MongoDB aggregation pipeline builder for Python.
32
38
 
33
39
  ## Overview
@@ -36,11 +42,11 @@ Type-safe, fluent MongoDB aggregation pipeline builder for Python.
36
42
 
37
43
  ## Features
38
44
 
39
- - **Type-safe**: Full type hints support with IDE autocomplete
40
- - **Fluent interface**: Chain methods for readable, maintainable code
41
- - **Zero dependencies**: Pure Python, lightweight package
42
- - **Extensible**: Easy to add custom stages via `add_stage()`
43
- - **Well tested**: Comprehensive test suite with 96%+ coverage
45
+ - **Type-safe**: Full type hints support with IDE autocomplete
46
+ - **Fluent interface**: Chain methods for readable, maintainable code
47
+ - **Zero dependencies**: Pure Python, lightweight package
48
+ - **Extensible**: Easy to add custom stages via `add_stage()`
49
+ - **Well tested**: Comprehensive test suite with 96%+ coverage
44
50
 
45
51
  ## Installation
46
52
 
@@ -259,6 +265,22 @@ group_index = stage_types.index("$group")
259
265
  builder.insert_at(group_index, {"$addFields": {"x": 1}})
260
266
  ```
261
267
 
268
+ ##### `copy() -> PipelineBuilder`
269
+
270
+ Creates an independent copy of the builder with current stages. Useful for creating immutable variants and composing pipelines.
271
+
272
+ ```python
273
+ builder1 = PipelineBuilder().match({"status": "active"})
274
+ builder2 = builder1.copy()
275
+ builder2.limit(10)
276
+
277
+ # Original unchanged
278
+ assert len(builder1) == 1
279
+ assert len(builder2) == 2
280
+ ```
281
+
282
+ See [Composing and Reusing Pipelines](#composing-and-reusing-pipelines) for practical examples.
283
+
262
284
  ##### `validate() -> bool`
263
285
 
264
286
  Validates the pipeline before execution. Checks that:
@@ -340,6 +362,150 @@ pipeline = (
340
362
  )
341
363
  ```
342
364
 
365
+ ### Composing and Reusing Pipelines
366
+
367
+ The `copy()` method allows you to create immutable variants of pipelines, enabling safe composition and reuse. This is useful when you need to:
368
+ - Create multiple variants from a base pipeline
369
+ - Compose pipelines functionally
370
+ - Cache base pipelines safely
371
+ - Pass pipelines to functions without side effects
372
+
373
+ #### Example: Building Multiple Variants from a Base Pipeline
374
+
375
+ ```python
376
+ from mongo_pipebuilder import PipelineBuilder
377
+
378
+ # Base pipeline with common filtering and joining
379
+ base_pipeline = (
380
+ PipelineBuilder()
381
+ .match({"status": "published", "deleted": False})
382
+ .lookup(
383
+ from_collection="authors",
384
+ local_field="authorId",
385
+ foreign_field="_id",
386
+ as_field="author"
387
+ )
388
+ .unwind("author", preserve_null_and_empty_arrays=True)
389
+ .project({
390
+ "title": 1,
391
+ "authorName": "$author.name",
392
+ "publishedAt": 1
393
+ })
394
+ )
395
+
396
+ # Create variants with different sorting and limits
397
+ recent_posts = base_pipeline.copy().sort({"publishedAt": -1}).limit(10).build()
398
+ popular_posts = base_pipeline.copy().sort({"views": -1}).limit(5).build()
399
+ author_posts = base_pipeline.copy().match({"authorName": "John Doe"}).build()
400
+
401
+ # Base pipeline remains unchanged
402
+ assert len(base_pipeline) == 4 # Still has 4 stages
403
+ ```
404
+
405
+ #### Example: Functional Composition Pattern
406
+
407
+ ```python
408
+ def add_pagination(builder, page: int, page_size: int = 10):
409
+ """Add pagination to a pipeline."""
410
+ return builder.copy().skip(page * page_size).limit(page_size)
411
+
412
+ def add_sorting(builder, sort_field: str, ascending: bool = True):
413
+ """Add sorting to a pipeline."""
414
+ return builder.copy().sort({sort_field: 1 if ascending else -1})
415
+
416
+ # Compose pipelines functionally
417
+ base = PipelineBuilder().match({"status": "active"})
418
+
419
+ # Create different variants
420
+ page1 = add_pagination(add_sorting(base, "createdAt"), page=0)
421
+ page2 = add_pagination(add_sorting(base, "createdAt"), page=1)
422
+ sorted_by_name = add_sorting(base, "name", ascending=True)
423
+
424
+ # All variants are independent
425
+ assert len(base) == 1 # Base unchanged
426
+ assert len(page1) == 3 # match + sort + skip + limit
427
+ ```
428
+
429
+ #### Example: Caching Base Pipelines
430
+
431
+ ```python
432
+ from functools import lru_cache
433
+
434
+ @lru_cache(maxsize=100)
435
+ def get_base_pipeline(user_id: str):
436
+ """Cache base pipeline for a user."""
437
+ return (
438
+ PipelineBuilder()
439
+ .match({"userId": user_id, "status": "active"})
440
+ .lookup(
441
+ from_collection="profiles",
442
+ local_field="userId",
443
+ foreign_field="_id",
444
+ as_field="profile"
445
+ )
446
+ )
447
+
448
+ # Reuse cached base pipeline with different modifications
449
+ user_id = "12345"
450
+ base = get_base_pipeline(user_id)
451
+
452
+ # Create multiple queries from cached base
453
+ recent = base.copy().sort({"createdAt": -1}).limit(10).build()
454
+ by_category = base.copy().match({"category": "tech"}).build()
455
+ with_stats = base.copy().group({"_id": "$category"}, {"count": {"$sum": 1}}).build()
456
+
457
+ # Base pipeline is safely cached and reused
458
+ ```
459
+
460
+ #### Example: Pipeline Factories
461
+
462
+ ```python
463
+ class PipelineFactory:
464
+ """Factory for creating common pipeline patterns."""
465
+
466
+ @staticmethod
467
+ def base_article_pipeline():
468
+ """Base pipeline for articles."""
469
+ return (
470
+ PipelineBuilder()
471
+ .match({"status": "published"})
472
+ .lookup(
473
+ from_collection="authors",
474
+ local_field="authorId",
475
+ foreign_field="_id",
476
+ as_field="author"
477
+ )
478
+ )
479
+
480
+ @staticmethod
481
+ def with_author_filter(builder, author_name: str):
482
+ """Add author filter to pipeline."""
483
+ return builder.copy().match({"author.name": author_name})
484
+
485
+ @staticmethod
486
+ def with_date_range(builder, start_date: str, end_date: str):
487
+ """Add date range filter to pipeline."""
488
+ return builder.copy().match({
489
+ "publishedAt": {"$gte": start_date, "$lte": end_date}
490
+ })
491
+
492
+ # Usage
493
+ base = PipelineFactory.base_article_pipeline()
494
+ johns_articles = PipelineFactory.with_author_filter(base, "John Doe")
495
+ recent_johns = PipelineFactory.with_date_range(
496
+ johns_articles,
497
+ start_date="2024-01-01",
498
+ end_date="2024-12-31"
499
+ ).sort({"publishedAt": -1}).limit(10).build()
500
+ ```
501
+
502
+ **Key Benefits:**
503
+ - Safe reuse: Base pipelines remain unchanged
504
+ - Functional composition: Build pipelines from smaller parts
505
+ - Caching friendly: Base pipelines can be safely cached
506
+ - No side effects: Functions can safely modify copies
507
+ - Thread-safe: Multiple threads can use copies independently
508
+
343
509
  ## Development
344
510
 
345
511
  ### Project Structure
@@ -373,3 +539,8 @@ See [DEVELOPMENT.md](DEVELOPMENT.md) for development guidelines.
373
539
 
374
540
  MIT License - see [LICENSE](LICENSE) file for details.
375
541
 
542
+
543
+
544
+
545
+
546
+
@@ -78,7 +78,7 @@ class TestPipelineBuilder:
78
78
  assert pipeline == [{"$project": {"name": 1, "email": 1, "_id": 0}}]
79
79
 
80
80
  def test_group_stage(self):
81
- """Test adding $group stage."""
81
+ """Test adding $group stage with dict."""
82
82
  builder = PipelineBuilder()
83
83
  pipeline = builder.group(
84
84
  group_by={"category": "$category"},
@@ -92,6 +92,21 @@ class TestPipelineBuilder:
92
92
  }
93
93
  }]
94
94
 
95
+ def test_group_stage_with_string(self):
96
+ """Test adding $group stage with string field path."""
97
+ builder = PipelineBuilder()
98
+ pipeline = builder.group(
99
+ group_by="$categoryType",
100
+ accumulators={"total": {"$sum": "$amount"}}
101
+ ).build()
102
+
103
+ assert pipeline == [{
104
+ "$group": {
105
+ "_id": "$categoryType",
106
+ "total": {"$sum": "$amount"}
107
+ }
108
+ }]
109
+
95
110
  def test_unwind_stage(self):
96
111
  """Test adding $unwind stage."""
97
112
  builder = PipelineBuilder()
@@ -256,3 +256,4 @@ class TestPrependAndInsertAtIntegration:
256
256
  assert pipeline[2] == {"$sort": {"name": 1}}
257
257
  assert pipeline[3] == {"$limit": 10}
258
258
 
259
+
@@ -171,3 +171,8 @@ class TestPipelineValidation:
171
171
  assert "$sort" in pipeline[5]
172
172
  assert "$limit" in pipeline[6]
173
173
 
174
+
175
+
176
+
177
+
178
+
@@ -72,11 +72,18 @@ class TestGroupValidation:
72
72
  pipeline = builder.group({}, {"count": {"$sum": 1}}).build()
73
73
  assert pipeline == [{"$group": {"_id": {}, "count": {"$sum": 1}}}]
74
74
 
75
- def test_group_invalid_group_by_type_raises_error(self):
76
- """Test that group(123, {}) raises TypeError."""
75
+ def test_group_with_string_group_by(self):
76
+ """Test that group() accepts string for group_by (field path)."""
77
77
  builder = PipelineBuilder()
78
- with pytest.raises(TypeError, match="group_by must be a dict"):
79
- builder.group(123, {})
78
+ # String field path is valid in MongoDB
79
+ pipeline = builder.group("$categoryType", {"total": {"$sum": "$amount"}}).build()
80
+ assert pipeline == [{"$group": {"_id": "$categoryType", "total": {"$sum": "$amount"}}}]
81
+
82
+ def test_group_empty_string_with_empty_accumulators_raises_error(self):
83
+ """Test that group('', {}) raises ValueError."""
84
+ builder = PipelineBuilder()
85
+ with pytest.raises(ValueError, match="group_by and accumulators cannot both be empty"):
86
+ builder.group("", {})
80
87
 
81
88
  def test_group_invalid_accumulators_type_raises_error(self):
82
89
  """Test that group({}, 123) raises TypeError."""
@@ -193,3 +200,4 @@ class TestProjectValidation:
193
200
  builder.project(123)
194
201
 
195
202
 
203
+
@@ -157,3 +157,4 @@ class TestSetFieldValidation:
157
157
  builder.set_field(123)
158
158
 
159
159
 
160
+