@cubis/foundry 0.3.10 → 0.3.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/Ai Agent Workflow/powers/database-skills/POWER.md +15 -2
  2. package/Ai Agent Workflow/powers/database-skills/SKILL.md +26 -2
  3. package/Ai Agent Workflow/powers/database-skills/engines/mongodb/POWER.md +10 -0
  4. package/Ai Agent Workflow/powers/database-skills/engines/mysql/POWER.md +10 -0
  5. package/Ai Agent Workflow/powers/database-skills/engines/neki/POWER.md +10 -0
  6. package/Ai Agent Workflow/powers/database-skills/engines/postgres/POWER.md +10 -0
  7. package/Ai Agent Workflow/powers/database-skills/engines/redis/POWER.md +10 -0
  8. package/Ai Agent Workflow/powers/database-skills/engines/sqlite/POWER.md +10 -0
  9. package/Ai Agent Workflow/powers/database-skills/engines/supabase/POWER.md +10 -0
  10. package/Ai Agent Workflow/powers/database-skills/engines/vitess/POWER.md +10 -0
  11. package/Ai Agent Workflow/powers/database-skills/steering/readme.md +18 -6
  12. package/Ai Agent Workflow/skills/database-skills/LATEST_VERSIONS.md +36 -0
  13. package/Ai Agent Workflow/skills/database-skills/README.md +11 -2
  14. package/Ai Agent Workflow/skills/database-skills/SKILL.md +85 -20
  15. package/Ai Agent Workflow/skills/database-skills/skills/mongodb/SKILL.md +29 -7
  16. package/Ai Agent Workflow/skills/database-skills/skills/mongodb/references/aggregation.md +153 -0
  17. package/Ai Agent Workflow/skills/database-skills/skills/mongodb/references/modeling.md +95 -4
  18. package/Ai Agent Workflow/skills/database-skills/skills/mongodb/references/mongoose-nestjs.md +133 -4
  19. package/Ai Agent Workflow/skills/database-skills/skills/mysql/SKILL.md +33 -7
  20. package/Ai Agent Workflow/skills/database-skills/skills/mysql/references/locking-ddl.md +103 -4
  21. package/Ai Agent Workflow/skills/database-skills/skills/mysql/references/query-indexing.md +103 -4
  22. package/Ai Agent Workflow/skills/database-skills/skills/mysql/references/replication.md +142 -0
  23. package/Ai Agent Workflow/skills/database-skills/skills/neki/SKILL.md +18 -7
  24. package/Ai Agent Workflow/skills/database-skills/skills/neki/references/architecture.md +135 -4
  25. package/Ai Agent Workflow/skills/database-skills/skills/neki/references/operations.md +76 -4
  26. package/Ai Agent Workflow/skills/database-skills/skills/postgres/SKILL.md +31 -7
  27. package/Ai Agent Workflow/skills/database-skills/skills/postgres/references/connection-pooling.md +142 -0
  28. package/Ai Agent Workflow/skills/database-skills/skills/postgres/references/migrations.md +126 -0
  29. package/Ai Agent Workflow/skills/database-skills/skills/postgres/references/performance-ops.md +116 -4
  30. package/Ai Agent Workflow/skills/database-skills/skills/postgres/references/schema-indexing.md +78 -4
  31. package/Ai Agent Workflow/skills/database-skills/skills/redis/SKILL.md +28 -7
  32. package/Ai Agent Workflow/skills/database-skills/skills/redis/references/cache-patterns.md +153 -4
  33. package/Ai Agent Workflow/skills/database-skills/skills/redis/references/data-modeling.md +152 -0
  34. package/Ai Agent Workflow/skills/database-skills/skills/redis/references/operations.md +143 -4
  35. package/Ai Agent Workflow/skills/database-skills/skills/sqlite/SKILL.md +28 -7
  36. package/Ai Agent Workflow/skills/database-skills/skills/sqlite/references/local-first.md +94 -4
  37. package/Ai Agent Workflow/skills/database-skills/skills/sqlite/references/performance.md +104 -4
  38. package/Ai Agent Workflow/skills/database-skills/skills/supabase/SKILL.md +27 -7
  39. package/Ai Agent Workflow/skills/database-skills/skills/supabase/references/performance-operations.md +94 -4
  40. package/Ai Agent Workflow/skills/database-skills/skills/supabase/references/rls-auth.md +105 -4
  41. package/Ai Agent Workflow/skills/database-skills/skills/vitess/SKILL.md +27 -7
  42. package/Ai Agent Workflow/skills/database-skills/skills/vitess/references/operational-safety.md +104 -4
  43. package/Ai Agent Workflow/skills/database-skills/skills/vitess/references/sharding-routing.md +124 -4
  44. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/antigravity/agents/backend-specialist.md +1 -1
  45. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/antigravity/agents/database-architect.md +8 -1
  46. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/antigravity/agents/performance-optimizer.md +2 -0
  47. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/antigravity/workflows/database.md +11 -6
  48. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/codex/agents/backend-specialist.md +1 -1
  49. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/codex/agents/database-architect.md +8 -1
  50. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/codex/agents/performance-optimizer.md +2 -0
  51. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/codex/workflows/database.md +11 -6
  52. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/copilot/agents/backend-specialist.md +1 -1
  53. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/copilot/agents/database-architect.md +8 -1
  54. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/copilot/agents/performance-optimizer.md +2 -0
  55. package/Ai Agent Workflow/workflows/agent-environment-setup/platforms/copilot/workflows/database.md +11 -6
  56. package/package.json +1 -1
@@ -1,8 +1,21 @@
1
1
  ---
2
2
  name: database-skills
3
- description: Compatibility power wrapper for the database-skills package.
3
+ description: Power-oriented database execution pack for PostgreSQL, MySQL, Vitess, Neki, MongoDB, SQLite, Supabase, and Redis.
4
4
  ---
5
5
 
6
6
  # database-skills
7
7
 
8
- Use this wrapper when power-oriented flows need the consolidated database package.
8
+ This is the power-mode entrypoint for database tasks.
9
+
10
+ Primary source files:
11
+ - `Ai Agent Workflow/skills/database-skills/SKILL.md`
12
+ - `Ai Agent Workflow/skills/database-skills/LATEST_VERSIONS.md`
13
+
14
+ Execution policy:
15
+ 1. Pick engine wrapper first.
16
+ 2. Use measurable optimization methods only.
17
+ 3. No schema/data destructive action without explicit confirmation.
18
+ 4. Always include rollback.
19
+
20
+ Engine wrappers live in:
21
+ - `Ai Agent Workflow/powers/database-skills/engines/`
@@ -1,9 +1,33 @@
1
1
  ---
2
2
  name: database-skills
3
- description: Compatibility power wrapper for the database-skills package.
3
+ description: Power bridge for the unified database-skills package with engine-specific wrappers.
4
4
  ---
5
5
 
6
6
  # database-skills
7
7
 
8
- This power mirrors:
8
+ Use this power as the routing layer over the database skill pack.
9
+
10
+ Primary source of truth:
9
11
  - `Ai Agent Workflow/skills/database-skills/SKILL.md`
12
+ - `Ai Agent Workflow/skills/database-skills/LATEST_VERSIONS.md`
13
+
14
+ ## Required flow
15
+
16
+ 1. Read the version baseline first.
17
+ 2. Choose the engine wrapper in `engines/<engine>/POWER.md`.
18
+ 3. Produce decisions with:
19
+ - indexing strategy,
20
+ - pagination strategy,
21
+ - query-plan evidence (`EXPLAIN` or equivalent),
22
+ - rollback path.
23
+
24
+ ## Engine wrappers
25
+
26
+ - `engines/postgres/POWER.md`
27
+ - `engines/mysql/POWER.md`
28
+ - `engines/vitess/POWER.md`
29
+ - `engines/neki/POWER.md`
30
+ - `engines/mongodb/POWER.md`
31
+ - `engines/sqlite/POWER.md`
32
+ - `engines/supabase/POWER.md`
33
+ - `engines/redis/POWER.md`
@@ -0,0 +1,10 @@
1
+ # mongodb
2
+
3
+ Source skill:
4
+ - `Ai Agent Workflow/skills/database-skills/skills/mongodb/SKILL.md`
5
+
6
+ Focus:
7
+ - Access-pattern-first schema design
8
+ - Compound index strategy
9
+ - Cursor/range pagination over deep `skip`
10
+ - `explain("executionStats")` evidence
@@ -0,0 +1,10 @@
1
+ # mysql
2
+
3
+ Source skill:
4
+ - `Ai Agent Workflow/skills/database-skills/skills/mysql/SKILL.md`
5
+
6
+ Focus:
7
+ - Composite/covering index design
8
+ - Keyset pagination with stable ordering
9
+ - `EXPLAIN` / `EXPLAIN ANALYZE` evidence
10
+ - Online DDL lock and replication risk checks
@@ -0,0 +1,10 @@
1
+ # neki
2
+
3
+ Source skill:
4
+ - `Ai Agent Workflow/skills/database-skills/skills/neki/SKILL.md`
5
+
6
+ Focus:
7
+ - Shard key and locality planning
8
+ - Cross-shard boundary minimization
9
+ - Reversible migration milestones
10
+ - Explicit assumption tracking (pre-GA)
@@ -0,0 +1,10 @@
1
+ # postgres
2
+
3
+ Source skill:
4
+ - `Ai Agent Workflow/skills/database-skills/skills/postgres/SKILL.md`
5
+
6
+ Focus:
7
+ - Multicolumn/partial/INCLUDE indexes
8
+ - Keyset pagination for deep lists
9
+ - `EXPLAIN (ANALYZE, BUFFERS)` evidence
10
+ - Vacuum/autovacuum and stats health
@@ -0,0 +1,10 @@
1
+ # redis
2
+
3
+ Source skill:
4
+ - `Ai Agent Workflow/skills/database-skills/skills/redis/SKILL.md`
5
+
6
+ Focus:
7
+ - Key schema as access index
8
+ - Sorted set and SCAN-based pagination patterns
9
+ - Pipeline/batching throughput optimization
10
+ - Memory/latency diagnostics and eviction safety
@@ -0,0 +1,10 @@
1
+ # sqlite
2
+
3
+ Source skill:
4
+ - `Ai Agent Workflow/skills/database-skills/skills/sqlite/SKILL.md`
5
+
6
+ Focus:
7
+ - `EXPLAIN QUERY PLAN` verification
8
+ - Multicolumn/covering index design
9
+ - Keyset pagination for large local datasets
10
+ - WAL/checkpoint tuning and transaction batching
@@ -0,0 +1,10 @@
1
+ # supabase
2
+
3
+ Source skill:
4
+ - `Ai Agent Workflow/skills/database-skills/skills/supabase/SKILL.md`
5
+
6
+ Focus:
7
+ - RLS policy correctness and predicate indexing
8
+ - Keyset pagination for heavy endpoints
9
+ - Query optimization + advisor workflow
10
+ - Pooler mode and managed/self-host compatibility checks
@@ -0,0 +1,10 @@
1
+ # vitess
2
+
3
+ Source skill:
4
+ - `Ai Agent Workflow/skills/database-skills/skills/vitess/SKILL.md`
5
+
6
+ Focus:
7
+ - Vindex and VSchema routing quality
8
+ - Shard-local query design
9
+ - Shard-aware seek pagination
10
+ - Resharding rollout + rollback safety
@@ -1,9 +1,21 @@
1
1
  # database-skills steering
2
2
 
3
- Primary source:
4
- - `Ai Agent Workflow/skills/database-skills/`
3
+ Use this steering map for power-mode database tasks.
5
4
 
6
- Suggested flow:
7
- 1. Read hub SKILL
8
- 2. Read engine SKILL
9
- 3. Read engine references needed for current task
5
+ ## Routing map
6
+
7
+ - Postgres: `engines/postgres/POWER.md`
8
+ - MySQL: `engines/mysql/POWER.md`
9
+ - Vitess: `engines/vitess/POWER.md`
10
+ - Neki: `engines/neki/POWER.md`
11
+ - MongoDB: `engines/mongodb/POWER.md`
12
+ - SQLite: `engines/sqlite/POWER.md`
13
+ - Supabase: `engines/supabase/POWER.md`
14
+ - Redis: `engines/redis/POWER.md`
15
+
16
+ ## Required output for DB work
17
+
18
+ - Indexing plan
19
+ - Pagination plan
20
+ - Query-plan evidence
21
+ - Rollback plan
@@ -0,0 +1,36 @@
1
+ # Database Versions Baseline
2
+
3
+ Last verified: 2026-02-20 (US)
4
+
5
+ ## Relational / SQL
6
+
7
+ - PostgreSQL: **18.2** current minor for supported major 18.
8
+ - MySQL:
9
+ - **8.4.8** current LTS patch.
10
+ - **9.6.0** latest Innovation release.
11
+ - SQLite: **3.51.2** current stable release.
12
+ - Vitess: **v23.0.2** current stable patch in v23.0 line.
13
+
14
+ ## Document / KV / Managed
15
+
16
+ - MongoDB: **8.2.5** latest patch in current 8.2 minor line.
17
+ - Redis Open Source: **8.4.0** latest GA major (8.2 remains an actively documented line).
18
+ - Supabase Postgres:
19
+ - Managed projects may run **Postgres 17**.
20
+ - Self-hosted Docker docs currently call out **Postgres 15** compatibility constraints.
21
+ - Neki: announced and in active development/waitlist stage (no GA semantic version yet).
22
+
23
+ ## Source links (official)
24
+
25
+ - PostgreSQL versioning: https://www.postgresql.org/support/versioning/
26
+ - MySQL release model: https://dev.mysql.com/doc/refman/8.4/en/mysql-releases.html
27
+ - MySQL 8.4.8 LTS notes: https://dev.mysql.com/doc/relnotes/mysql/8.4/en/news-8-4-8.html
28
+ - MySQL 9.6.0 Innovation notes: https://dev.mysql.com/doc/relnotes/mysql/9.6/en/news-9-6-0.html
29
+ - SQLite current release: https://sqlite.org/releaselog/current.html
30
+ - Vitess releases: https://vitess.io/docs/releases/
31
+ - MongoDB 8.2 release notes: https://www.mongodb.com/docs/manual/release-notes/8.2/
32
+ - Redis 8.4 updates: https://redis.io/docs/latest/develop/whats-new/8-4/
33
+ - Redis OSS releases: https://github.com/redis/redis/releases
34
+ - Supabase restore compatibility note: https://supabase.com/docs/guides/self-hosting/restore-from-platform
35
+ - Neki announcement: https://planetscale.com/blog/announcing-neki
36
+ - Neki product page: https://planetscale.com/neki
@@ -1,12 +1,13 @@
1
1
  # database-skills
2
2
 
3
- Engine-specific database skill pack inspired by `planetscale/database-skills`, expanded for common stacks.
3
+ Engine-specific database skill pack inspired by `planetscale/database-skills`, expanded for modern production stacks.
4
4
 
5
5
  ## Layout
6
6
 
7
7
  ```text
8
8
  database-skills/
9
9
  ├── README.md
10
+ ├── LATEST_VERSIONS.md
10
11
  ├── SKILL.md
11
12
  └── skills/
12
13
  ├── postgres/
@@ -35,7 +36,15 @@ database-skills/
35
36
  └── references/
36
37
  ```
37
38
 
39
+ ## What each engine pack must cover
40
+
41
+ - Index strategy for real query patterns.
42
+ - Pagination strategy (keyset/seek first, offset only when justified).
43
+ - Query plan workflow (`EXPLAIN` or engine equivalent).
44
+ - Write/read tradeoff notes.
45
+ - Safe rollout + rollback notes for schema and operational changes.
46
+
38
47
  ## Notes
39
48
 
40
49
  - Use this package as the single database skill dependency in agents/workflows.
41
- - Keep engine-specific guidance in `skills/<engine>/references/`.
50
+ - Keep version-sensitive guidance synced with `LATEST_VERSIONS.md`.
@@ -3,35 +3,100 @@ name: database-skills
3
3
  description: Unified database skill hub with engine-specific packs for PostgreSQL, MySQL, Vitess, Neki, MongoDB (Mongoose), SQLite, Supabase, and Redis.
4
4
  allowed-tools: Read, Write, Edit, Glob, Grep, Bash
5
5
  metadata:
6
- version: "1.0.0"
6
+ version: "2.0.0"
7
7
  domain: data
8
- triggers: database, sql, postgres, mysql, vitess, neki, mongodb, mongoose, sqlite, supabase, redis, schema, migration, index, query, performance
8
+ triggers: database, sql, postgres, mysql, vitess, neki, mongodb, mongoose, sqlite, supabase, redis, schema, migration, index, query, performance, pagination, replication, pooling, sharding, cache
9
9
  ---
10
10
 
11
11
  # Database Skills Hub
12
12
 
13
- Use this as the single database package.
13
+ Use this as the single database package. Load the target engine's `SKILL.md` from `skills/<engine>/`, then load relevant `references/*` files.
14
+
15
+ ## Engine selection
16
+
17
+ | Situation | Engine |
18
+ | --- | --- |
19
+ | Relational OLTP, self-hosted or cloud | Postgres |
20
+ | Relational OLTP, MySQL-compatible managed service | MySQL + Vitess (PlanetScale) |
21
+ | Multi-tenant SaaS needing horizontal Postgres scale | Neki |
22
+ | Document model, flexible schema | MongoDB |
23
+ | Mobile / desktop / edge local storage | SQLite |
24
+ | BaaS with built-in auth, storage & realtime | Supabase |
25
+ | Caching, queues, leaderboards, pub/sub | Redis |
14
26
 
15
27
  ## Structure
16
28
 
17
- - `skills/postgres/`
18
- - `skills/mysql/`
19
- - `skills/vitess/`
20
- - `skills/neki/`
21
- - `skills/mongodb/`
22
- - `skills/sqlite/`
23
- - `skills/supabase/`
24
- - `skills/redis/`
29
+ ```
30
+ skills/
31
+ postgres/
32
+ SKILL.md
33
+ references/
34
+ schema-indexing.md ← index types, composite, partial, INCLUDE
35
+ performance-ops.md ← EXPLAIN, pg_stat_statements, VACUUM, autovacuum
36
+ migrations.md ← zero-downtime DDL, expand/contract, tools
37
+ connection-pooling.md ← PgBouncer, pool sizing, serverless patterns
38
+
39
+ mysql/
40
+ SKILL.md
41
+ references/
42
+ query-indexing.md ← EXPLAIN, composite indexes, covering, seek pagination
43
+ locking-ddl.md ← INSTANT/INPLACE/COPY, MDL, gh-ost, deadlocks
44
+ replication.md ← binlog formats, GTID, lag monitoring, read routing
45
+
46
+ vitess/
47
+ SKILL.md
48
+ references/
49
+ sharding-routing.md ← VSchema, vindexes, sequences, scatter queries
50
+ operational-safety.md ← Online DDL strategies, migration lifecycle, VReplication
51
+
52
+ neki/
53
+ SKILL.md
54
+ references/
55
+ architecture.md ← sharded Postgres architecture, pre-sharding checklist
56
+ operations.md ← migration planning, validation, provisional pre-GA guidance
57
+
58
+ mongodb/
59
+ SKILL.md
60
+ references/
61
+ modeling.md ← embed vs reference, compound indexes, explain(), pagination
62
+ mongoose-nestjs.md ← repository pattern, lean reads, transactions, NestJS setup
63
+ aggregation.md ← pipeline stages, $group, $lookup, $facet, performance
64
+
65
+ sqlite/
66
+ SKILL.md
67
+ references/
68
+ local-first.md ← WAL mode, migration patterns, sync/conflict strategies
69
+ performance.md ← EXPLAIN QUERY PLAN, indexes, batch writes, checkpoint tuning
70
+
71
+ supabase/
72
+ SKILL.md
73
+ references/
74
+ rls-auth.md ← RLS policies, auth.uid(), index predicates, service_role
75
+ performance-operations.md ← query optimization, connection modes, pooler selection
76
+
77
+ redis/
78
+ SKILL.md
79
+ references/
80
+ data-modeling.md ← data structure selection, key naming, TTL strategy
81
+ cache-patterns.md ← pipelining, SCAN, rate limiting, leaderboards, invalidation
82
+ operations.md ← memory management, eviction policies, latency diagnostics
83
+ ```
25
84
 
26
- ## Routing
85
+ ## Required flow
27
86
 
28
- 1. Identify the target engine from the user request.
29
- 2. Load that engine's `SKILL.md` first.
30
- 3. Load only the referenced files needed for the current task.
31
- 4. If engine is unclear, ask before implementation.
87
+ 1. Read `LATEST_VERSIONS.md` before proposing version-specific behavior.
88
+ 2. Use engine selection table above to pick the target engine.
89
+ 3. Load the target engine `SKILL.md` and relevant `references/*` files read the ones that match the task (indexing, migrations, replication, etc.).
90
+ 4. Provide an optimization or implementation plan that includes:
91
+ - specific change with rationale,
92
+ - indexing or schema decisions,
93
+ - migration safety (online vs offline),
94
+ - rollback path.
95
+ 5. For production-impacting changes, include blast-radius assessment and rollout stages.
32
96
 
33
- ## Global Guardrails
97
+ ## Cross-engine performance checklist
34
98
 
35
- - No destructive operations without explicit confirmation.
36
- - Include migration and rollback steps for production changes.
37
- - Use evidence-first tuning (`EXPLAIN`, plans, slow logs, lock metrics).
99
+ - **Indexing**: add indexes only for real predicates and sort patterns. Drop unused indexes — they penalize writes.
100
+ - **Pagination**: prefer keyset/seek for deep or high-throughput pagination. Reserve offset for shallow interactive pages only.
101
+ - **Measurement**: compare before/after plans with engine-native explain tooling (`EXPLAIN ANALYZE`, `VEXPLAIN`, `explain()`). Validate with realistic cardinality.
102
+ - **Safety**: no destructive data/schema operations without explicit user confirmation. Always include rollback/recovery steps.
@@ -1,15 +1,37 @@
1
1
  ---
2
2
  name: mongodb
3
- description: MongoDB and Mongoose modeling, indexing, query tuning, and transaction guidance.
3
+ description: MongoDB and Mongoose modeling, indexing, pagination, query tuning, and transaction guidance.
4
4
  ---
5
5
 
6
6
  # MongoDB and Mongoose
7
7
 
8
- Load references as needed:
8
+ ## Optimization workflow
9
+
10
+ 1. Model around dominant read/write paths (embed vs reference).
11
+ 2. Add compound indexes for real filter + sort patterns.
12
+ 3. Validate with `explain("executionStats")`.
13
+ 4. Prefer range/keyset pagination over deep `skip`.
14
+ 5. Re-check index selectivity and write overhead as data grows.
15
+
16
+ ## Indexing techniques
17
+
18
+ - Compound indexes over isolated single-field indexes when query shape demands it.
19
+ - Keep index count controlled on write-heavy collections.
20
+ - Use partial/sparse strategies only when semantics are correct.
21
+
22
+ ## Pagination techniques
23
+
24
+ - Avoid large-offset `skip` on large collections.
25
+ - Use boundary-based pagination with indexed monotonic key (`_id` or timestamp+id).
26
+ - Keep deterministic sort and cursor boundary state.
27
+
28
+ ## Mongoose/NestJS guardrails
29
+
30
+ - Keep schema indexes explicit in model definitions/migrations.
31
+ - Use projections to avoid over-fetching large documents.
32
+ - Use transactions only when cross-document invariants require them.
33
+
34
+ ## References
35
+
9
36
  - `references/modeling.md`
10
37
  - `references/mongoose-nestjs.md`
11
-
12
- Key rules:
13
- - Model by access pattern (embed vs reference).
14
- - Use compound indexes for dominant filters/sorts.
15
- - Use transactions only where multi-document invariants require them.
@@ -0,0 +1,153 @@
1
+ # MongoDB — Aggregation Pipeline
2
+
3
+ ## Pipeline basics
4
+
5
+ The aggregation pipeline is a sequence of stages, each transforming the document stream. Stages execute in order.
6
+
7
+ ```js
8
+ db.orders.aggregate([
9
+ { $match: { status: 'complete', userId: 'u42' } }, // filter early
10
+ { $group: { _id: '$category', total: { $sum: '$amount' }, count: { $sum: 1 } } },
11
+ { $sort: { total: -1 } },
12
+ { $limit: 10 },
13
+ { $project: { category: '$_id', total: 1, count: 1, _id: 0 } }
14
+ ])
15
+ ```
16
+
17
+ **Critical rule**: Put `$match` as early as possible to reduce documents flowing through subsequent stages. MongoDB can use indexes for `$match` at the start of a pipeline.
18
+
19
+ ## Common stages
20
+
21
+ | Stage | Purpose |
22
+ | --- | --- |
23
+ | `$match` | Filter documents (use early, supports indexes) |
24
+ | `$group` | Group by key, apply accumulators (`$sum`, `$avg`, `$min`, `$max`, `$push`, `$addToSet`) |
25
+ | `$project` | Reshape documents (include/exclude/rename/compute fields) |
26
+ | `$sort` | Sort (can use index when at start, before `$group`) |
27
+ | `$limit` / `$skip` | Pagination (prefer range-based — see below) |
28
+ | `$lookup` | Left outer join to another collection |
29
+ | `$unwind` | Flatten an array field into individual documents |
30
+ | `$addFields` | Add computed fields without removing existing |
31
+ | `$facet` | Run multiple sub-pipelines in parallel (for multi-category aggregation) |
32
+ | `$bucket` | Group into ranges (histogram) |
33
+ | `$count` | Count documents |
34
+ | `$out` / `$merge` | Write results to a collection |
35
+
36
+ ## $group accumulators
37
+
38
+ ```js
39
+ { $group: {
40
+ _id: '$category',
41
+ total: { $sum: '$amount' }, // sum
42
+ average: { $avg: '$amount' }, // average
43
+ max: { $max: '$amount' }, // max
44
+ min: { $min: '$amount' }, // min
45
+ count: { $sum: 1 }, // count rows
46
+ products: { $push: '$productId' }, // array of all values (duplicates kept)
47
+ unique: { $addToSet: '$productId' }, // array of unique values
48
+ first: { $first: '$createdAt' }, // first value in group
49
+ }}
50
+ ```
51
+
52
+ ## $lookup — joins
53
+
54
+ ```js
55
+ // Left join orders with users
56
+ db.orders.aggregate([
57
+ { $match: { status: 'complete' } },
58
+ { $lookup: {
59
+ from: 'users',
60
+ localField: 'userId',
61
+ foreignField: '_id',
62
+ as: 'user'
63
+ }},
64
+ { $unwind: { path: '$user', preserveNullAndEmptyArrays: true } },
65
+ { $project: { total: 1, 'user.name': 1, 'user.email': 1 } }
66
+ ])
67
+ ```
68
+
69
+ `$lookup` is expensive on large collections — **prefer embedding** if data is always accessed together. Always filter with `$match` before `$lookup` to minimize joined documents.
70
+
71
+ Pipeline-style `$lookup` (MongoDB 3.6+) for filtered joins:
72
+ ```js
73
+ { $lookup: {
74
+ from: 'orderItems',
75
+ let: { orderId: '$_id' },
76
+ pipeline: [
77
+ { $match: { $expr: { $eq: ['$orderId', '$$orderId'] } } },
78
+ { $project: { sku: 1, qty: 1, _id: 0 } }
79
+ ],
80
+ as: 'items'
81
+ }}
82
+ ```
83
+
84
+ ## $unwind
85
+
86
+ Flattens array fields — generates one output document per array element:
87
+
88
+ ```js
89
+ // Input: { _id: 1, tags: ['a', 'b', 'c'] }
90
+ { $unwind: '$tags' }
91
+ // Output: { _id: 1, tags: 'a' }, { _id: 1, tags: 'b' }, { _id: 1, tags: 'c' }
92
+ ```
93
+
94
+ Watch out: `$unwind` on an array of 1000 elements turns 1 doc into 1000. Use `preserveNullAndEmptyArrays: true` to keep docs with missing/empty arrays.
95
+
96
+ ## Pagination in aggregation
97
+
98
+ Avoid `$skip` for deep pagination — it scans all skipped docs.
99
+
100
+ ```js
101
+ // BAD: $skip is O(N)
102
+ db.orders.aggregate([
103
+ { $sort: { createdAt: -1 } },
104
+ { $skip: 10000 },
105
+ { $limit: 20 }
106
+ ])
107
+
108
+ // GOOD: range-based pagination
109
+ db.orders.aggregate([
110
+ { $match: { createdAt: { $lt: lastSeenDate }, _id: { $lt: lastSeenId } } },
111
+ { $sort: { createdAt: -1, _id: -1 } },
112
+ { $limit: 20 }
113
+ ])
114
+ ```
115
+
116
+ ## $facet — parallel aggregations
117
+
118
+ Run multiple sub-pipelines against the same input in one pass:
119
+
120
+ ```js
121
+ db.products.aggregate([
122
+ { $match: { active: true } },
123
+ { $facet: {
124
+ byCategory: [
125
+ { $group: { _id: '$category', count: { $sum: 1 } } }
126
+ ],
127
+ priceRange: [
128
+ { $bucket: { groupBy: '$price', boundaries: [0, 50, 100, 200, 500], default: '500+' } }
129
+ ],
130
+ total: [
131
+ { $count: 'count' }
132
+ ]
133
+ }}
134
+ ])
135
+ ```
136
+
137
+ ## Performance tips
138
+
139
+ - **Index for `$match` and `$sort`**: the pipeline can use an index only for `$match` and `$sort` stages that appear before any `$group`/`$project`/`$unwind`.
140
+ - **Use `allowDiskUse: true`** for large aggregations that exceed the 100MB in-memory sort limit.
141
+ - **`$project` early** to reduce document size flowing through the pipeline.
142
+ - **Avoid `$lookup` on hot paths** — cache results or redesign schema to embed.
143
+ - **Use `$merge` to pre-compute** expensive aggregations into a results collection, then query that.
144
+
145
+ ```js
146
+ // Run explain on aggregation
147
+ db.orders.explain('executionStats').aggregate([...])
148
+ ```
149
+
150
+ ## Sources
151
+ - Aggregation pipeline: https://www.mongodb.com/docs/manual/aggregation/
152
+ - Pipeline stages reference: https://www.mongodb.com/docs/manual/reference/operator/aggregation-pipeline/
153
+ - Aggregation performance: https://www.mongodb.com/docs/manual/core/aggregation-pipeline-optimization/
@@ -1,5 +1,96 @@
1
- # MongoDB Modeling
1
+ # MongoDB Modeling, Indexing, and Pagination
2
2
 
3
- - Embed for bounded one-to-few relationships.
4
- - Reference for large or shared entities.
5
- - Avoid unbounded arrays in hot collections.
3
+ ## Data modeling philosophy
4
+
5
+ MongoDB is schema-flexible at the storage layer, but **your schema is defined by your queries, not by your data shape**. Model documents for the queries you actually run.
6
+
7
+ ### Embed vs reference
8
+
9
+ | Embed | Reference (DBRef / manual) |
10
+ | --- | --- |
11
+ | Data is always accessed together | Data is accessed independently |
12
+ | 1-to-1 or bounded 1-to-few | 1-to-many with unbounded growth |
13
+ | Child data has no standalone identity | Child data is queried on its own |
14
+ | Written/updated together | Updated at different rates |
15
+
16
+ ```js
17
+ // Embed: order with line items (always fetched together, bounded count)
18
+ { _id: ..., userId: ..., lineItems: [ { sku, qty, price }, ... ] }
19
+
20
+ // Reference: user with orders (orders accessed independently, unbounded)
21
+ { _id: ..., name: "Alice" } // users collection
22
+ { _id: ..., userId: <ref>, total: 99 } // orders collection
23
+ ```
24
+
25
+ Avoid embedding unbounded arrays — document size limit is 16MB and large docs slow query/update performance.
26
+
27
+ ## Index types and when to use them
28
+
29
+ | Type | Use for |
30
+ | --- | --- |
31
+ | **Single field** | Equality, range, sort on one field |
32
+ | **Compound** | Combined equality + range + sort — column order matters |
33
+ | **Multikey** | Fields that are arrays (auto-detected by MongoDB) |
34
+ | **Text** | Full-text search on string fields |
35
+ | **Wildcard** | Arbitrary field access patterns |
36
+ | **2dsphere** | Geospatial queries |
37
+ | **TTL** | Auto-delete documents after expiry |
38
+
39
+ ## Compound index design
40
+
41
+ Same leftmost-prefix rule as SQL — **equality fields first, then range, then sort**:
42
+
43
+ ```js
44
+ // Query: find open orders for a user, sorted by date
45
+ db.orders.find({ userId: X, status: "open" }).sort({ createdAt: -1 })
46
+
47
+ // Correct index: equality fields lead, sort field last
48
+ db.orders.createIndex({ userId: 1, status: 1, createdAt: -1 })
49
+
50
+ // Wrong: sort field before filter — doesn't eliminate docs efficiently
51
+ db.orders.createIndex({ createdAt: -1, userId: 1, status: 1 })
52
+ ```
53
+
54
+ ## Reading query plans with explain()
55
+
56
+ ```js
57
+ db.orders.find({ userId: 1 }).explain("executionStats")
58
+ ```
59
+
60
+ Key things to check:
61
+ - `winningPlan.stage`: `COLLSCAN` = full scan (bad on large collections), `IXSCAN` = index scan (good).
62
+ - `executionStats.totalDocsExamined` vs `nReturned`: should be close. Large ratio = poor index coverage.
63
+ - `executionStats.executionTimeMillis`: baseline to compare before/after index changes.
64
+
65
+ ## Avoid deep skip() pagination
66
+
67
+ `skip(N)` makes MongoDB scan and discard N documents. On large collections this is O(N).
68
+
69
+ ```js
70
+ // BAD: skip-based pagination
71
+ db.orders.find().sort({ _id: 1 }).skip(10000).limit(20)
72
+
73
+ // GOOD: range-based pagination using last seen _id (or cursor field)
74
+ db.orders.find({ _id: { $gt: lastSeenId } }).sort({ _id: 1 }).limit(20)
75
+
76
+ // For composite sort keys
77
+ db.orders.find({
78
+ $or: [
79
+ { createdAt: { $lt: lastDate } },
80
+ { createdAt: lastDate, _id: { $lt: lastId } }
81
+ ]
82
+ }).sort({ createdAt: -1, _id: -1 }).limit(20)
83
+ ```
84
+
85
+ ## Common modeling mistakes
86
+
87
+ - **Storing growing arrays in a document**: the document grows without bound → move to a child collection.
88
+ - **Using `$lookup` as a substitute for embedding**: `$lookup` is expensive; redesign the schema if you always join.
89
+ - **Not projecting fields**: always project only needed fields to reduce document transfer size.
90
+ - **Missing index on high-cardinality filter fields**: every `find()` cold path should have an index.
91
+
92
+ ## Sources
93
+ - Explain plans: https://www.mongodb.com/docs/manual/reference/method/db.collection.explain/
94
+ - `skip()` caveats: https://www.mongodb.com/docs/manual/reference/method/cursor.skip/
95
+ - Query plans: https://www.mongodb.com/docs/manual/core/query-plans/
96
+ - Data modeling guide: https://www.mongodb.com/docs/manual/data-modeling/