npm - aigroup-workflow - Versions diffs - 2.2.0 → 2.2.2 - Mend

aigroup-workflow 2.2.0 → 2.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (645) hide show

package/.claude/commands/fix-build.md +10 -5
package/.claude/commands/init-project.md +13 -8
package/.claude/commands/plan.md +15 -8
package/.claude/commands/review.md +12 -6
package/.claude/commands/tdd.md +11 -5
package/.claude/commands/workflow-start.md +20 -11
package/.claude/settings.json +28 -0
package/.codex/agents/architect.toml +207 -0
package/.codex/agents/build-error-resolver.toml +110 -0
package/.codex/agents/code-reviewer.toml +233 -0
package/.codex/agents/doc-updater.toml +103 -0
package/.codex/agents/e2e-runner.toml +103 -0
package/.codex/agents/get-current-datetime.toml +23 -0
package/.codex/agents/init-architect.toml +181 -0
package/.codex/agents/planner.toml +208 -0
package/.codex/agents/refactor-cleaner.toml +81 -0
package/.codex/agents/rust-reviewer.toml +90 -0
package/.codex/agents/security-reviewer.toml +104 -0
package/.codex/agents/tdd-guide.toml +87 -0
package/AGENTS.md +2 -2
package/CLAUDE.md +23 -1
package/LICENSE +20 -20
package/README.md +333 -333
package/agents/a11y-architect.md +141 -141
package/agents/architect.md +211 -211
package/agents/build-error-resolver.md +114 -114
package/agents/chief-of-staff.md +151 -151
package/agents/code-architect.md +71 -71
package/agents/code-explorer.md +69 -69
package/agents/code-reviewer.md +237 -237
package/agents/code-simplifier.md +47 -47
package/agents/comment-analyzer.md +45 -45
package/agents/conversation-analyzer.md +52 -52
package/agents/cpp-build-resolver.md +90 -90
package/agents/cpp-reviewer.md +72 -72
package/agents/csharp-reviewer.md +101 -101
package/agents/dart-build-resolver.md +201 -201
package/agents/database-reviewer.md +91 -91
package/agents/doc-updater.md +107 -107
package/agents/docs-lookup.md +68 -68
package/agents/e2e-runner.md +107 -107
package/agents/flutter-reviewer.md +243 -243
package/agents/gan-evaluator.md +209 -209
package/agents/gan-generator.md +131 -131
package/agents/gan-planner.md +99 -99
package/agents/get-current-datetime.md +26 -26
package/agents/go-build-resolver.md +94 -94
package/agents/go-reviewer.md +76 -76
package/agents/harness-optimizer.md +35 -35
package/agents/healthcare-reviewer.md +83 -83
package/agents/java-build-resolver.md +153 -153
package/agents/java-reviewer.md +92 -92
package/agents/kotlin-build-resolver.md +118 -118
package/agents/kotlin-reviewer.md +159 -159
package/agents/loop-operator.md +36 -36
package/agents/opensource-forker.md +198 -198
package/agents/opensource-packager.md +249 -249
package/agents/opensource-sanitizer.md +188 -188
package/agents/performance-optimizer.md +446 -446
package/agents/planner.md +212 -212
package/agents/pr-test-analyzer.md +45 -45
package/agents/python-reviewer.md +98 -98
package/agents/pytorch-build-resolver.md +120 -120
package/agents/refactor-cleaner.md +85 -85
package/agents/rust-build-resolver.md +148 -148
package/agents/rust-reviewer.md +94 -94
package/agents/security-reviewer.md +108 -108
package/agents/seo-specialist.md +59 -59
package/agents/silent-failure-hunter.md +50 -50
package/agents/tdd-guide.md +91 -91
package/agents/type-design-analyzer.md +41 -41
package/agents/typescript-reviewer.md +112 -112
package/cli/commands/update.mjs +1 -1
package/cli/utils/scaffold.mjs +53 -0
package/docs/rules/agents.md +166 -50
package/docs/rules/cpp/coding-style.md +44 -44
package/docs/rules/cpp/hooks.md +39 -39
package/docs/rules/cpp/patterns.md +51 -51
package/docs/rules/cpp/security.md +51 -51
package/docs/rules/cpp/testing.md +44 -44
package/docs/rules/csharp/coding-style.md +72 -72
package/docs/rules/csharp/hooks.md +25 -25
package/docs/rules/csharp/patterns.md +50 -50
package/docs/rules/csharp/security.md +58 -58
package/docs/rules/csharp/testing.md +46 -46
package/docs/rules/dart/coding-style.md +159 -159
package/docs/rules/dart/hooks.md +66 -66
package/docs/rules/dart/patterns.md +261 -261
package/docs/rules/dart/security.md +135 -135
package/docs/rules/dart/testing.md +215 -215
package/docs/rules/golang/coding-style.md +32 -32
package/docs/rules/golang/hooks.md +17 -17
package/docs/rules/golang/patterns.md +45 -45
package/docs/rules/golang/security.md +34 -34
package/docs/rules/golang/testing.md +31 -31
package/docs/rules/java/coding-style.md +114 -114
package/docs/rules/java/hooks.md +18 -18
package/docs/rules/java/patterns.md +146 -146
package/docs/rules/java/security.md +100 -100
package/docs/rules/java/testing.md +131 -131
package/docs/rules/kotlin/coding-style.md +86 -86
package/docs/rules/kotlin/hooks.md +17 -17
package/docs/rules/kotlin/patterns.md +146 -146
package/docs/rules/kotlin/security.md +82 -82
package/docs/rules/kotlin/testing.md +128 -128
package/docs/rules/perl/coding-style.md +46 -46
package/docs/rules/perl/hooks.md +22 -22
package/docs/rules/perl/patterns.md +76 -76
package/docs/rules/perl/security.md +69 -69
package/docs/rules/perl/testing.md +54 -54
package/docs/rules/php/coding-style.md +40 -40
package/docs/rules/php/hooks.md +24 -24
package/docs/rules/php/patterns.md +33 -33
package/docs/rules/php/security.md +37 -37
package/docs/rules/php/testing.md +39 -39
package/docs/rules/python/coding-style.md +42 -42
package/docs/rules/python/hooks.md +19 -19
package/docs/rules/python/patterns.md +39 -39
package/docs/rules/python/security.md +30 -30
package/docs/rules/python/testing.md +38 -38
package/docs/rules/rust/coding-style.md +151 -151
package/docs/rules/rust/hooks.md +16 -16
package/docs/rules/rust/patterns.md +168 -168
package/docs/rules/rust/security.md +141 -141
package/docs/rules/rust/testing.md +154 -154
package/docs/rules/swift/coding-style.md +47 -47
package/docs/rules/swift/hooks.md +20 -20
package/docs/rules/swift/patterns.md +66 -66
package/docs/rules/swift/security.md +33 -33
package/docs/rules/swift/testing.md +45 -45
package/docs/rules/typescript/coding-style.md +199 -199
package/docs/rules/typescript/hooks.md +22 -22
package/docs/rules/typescript/patterns.md +52 -52
package/docs/rules/typescript/security.md +28 -28
package/docs/rules/typescript/testing.md +18 -18
package/docs/rules/web/coding-style.md +96 -96
package/docs/rules/web/design-quality.md +62 -62
package/docs/rules/web/hooks.md +120 -120
package/docs/rules/web/patterns.md +79 -79
package/docs/rules/web/performance.md +64 -64
package/docs/rules/web/security.md +57 -57
package/docs/rules/web/testing.md +55 -55
package/docs/templates/README.md +36 -36
package/docs/templates/ai-project-final.md +124 -124
package/docs/templates/ai-project.md +105 -105
package/docs/templates/api.md +157 -157
package/docs/templates/bug.md +62 -62
package/docs/templates/code-review.md +87 -87
package/docs/templates/generic.md +116 -116
package/docs/templates/implementation-plan.md +1 -1
package/docs/templates/meeting.md +68 -68
package/docs/templates/prd.md +98 -98
package/docs/templates/ui.md +134 -134
package/docs/workflow-pipeline.md +11 -10
package/package.json +40 -39
package/scripts/hooks/checks/orchestration-artifacts.cjs +28 -23
package/scripts/hooks/checks/workflow-state.cjs +4 -5
package/scripts/orchestration/lib/orchestrator.cjs +344 -117
package/scripts/orchestration/lib/validate.cjs +145 -0
package/scripts/orchestration/session.cjs +88 -44
package/skills/SUPERPOWERS-LICENSE +21 -21
package/skills/ai-ml/fine-tuning-expert/SKILL.md +162 -162
package/skills/ai-ml/fine-tuning-expert/references/dataset-preparation.md +540 -540
package/skills/ai-ml/fine-tuning-expert/references/deployment-optimization.md +673 -673
package/skills/ai-ml/fine-tuning-expert/references/evaluation-metrics.md +597 -597
package/skills/ai-ml/fine-tuning-expert/references/hyperparameter-tuning.md +565 -565
package/skills/ai-ml/fine-tuning-expert/references/lora-peft.md +347 -347
package/skills/ai-ml/ml-pipeline/SKILL.md +159 -159
package/skills/ai-ml/ml-pipeline/references/experiment-tracking.md +833 -833
package/skills/ai-ml/ml-pipeline/references/feature-engineering.md +631 -631
package/skills/ai-ml/ml-pipeline/references/model-validation.md +978 -978
package/skills/ai-ml/ml-pipeline/references/pipeline-orchestration.md +907 -907
package/skills/ai-ml/ml-pipeline/references/training-pipelines.md +782 -782
package/skills/ai-ml/rag-architect/SKILL.md +194 -194
package/skills/ai-ml/rag-architect/references/chunking-strategies.md +878 -878
package/skills/ai-ml/rag-architect/references/embedding-models.md +561 -561
package/skills/ai-ml/rag-architect/references/rag-evaluation.md +833 -833
package/skills/ai-ml/rag-architect/references/retrieval-optimization.md +795 -795
package/skills/ai-ml/rag-architect/references/vector-databases.md +589 -589
package/skills/ai-ml/spark-engineer/SKILL.md +148 -148
package/skills/ai-ml/spark-engineer/references/partitioning-caching.md +543 -543
package/skills/ai-ml/spark-engineer/references/performance-tuning.md +544 -544
package/skills/ai-ml/spark-engineer/references/rdd-operations.md +599 -599
package/skills/ai-ml/spark-engineer/references/spark-sql-dataframes.md +474 -474
package/skills/ai-ml/spark-engineer/references/streaming-patterns.md +786 -786
package/skills/backend/api-designer/SKILL.md +217 -217
package/skills/backend/api-designer/references/error-handling.md +541 -541
package/skills/backend/api-designer/references/openapi.md +824 -824
package/skills/backend/api-designer/references/pagination.md +494 -494
package/skills/backend/api-designer/references/rest-patterns.md +335 -335
package/skills/backend/api-designer/references/versioning.md +391 -391
package/skills/backend/architecture-designer/SKILL.md +117 -117
package/skills/backend/architecture-designer/references/adr-template.md +116 -116
package/skills/backend/architecture-designer/references/architecture-patterns.md +111 -111
package/skills/backend/architecture-designer/references/database-selection.md +102 -102
package/skills/backend/architecture-designer/references/nfr-checklist.md +112 -112
package/skills/backend/architecture-designer/references/system-design.md +100 -100
package/skills/backend/code-documenter/SKILL.md +147 -147
package/skills/backend/code-documenter/references/api-docs-fastapi-django.md +166 -166
package/skills/backend/code-documenter/references/api-docs-nestjs-express.md +220 -220
package/skills/backend/code-documenter/references/coverage-reports.md +125 -125
package/skills/backend/code-documenter/references/documentation-systems.md +333 -333
package/skills/backend/code-documenter/references/interactive-api-docs.md +531 -531
package/skills/backend/code-documenter/references/python-docstrings.md +121 -121
package/skills/backend/code-documenter/references/typescript-jsdoc.md +145 -145
package/skills/backend/code-documenter/references/user-guides-tutorials.md +530 -530
package/skills/backend/debugging-wizard/SKILL.md +105 -105
package/skills/backend/debugging-wizard/references/common-patterns.md +132 -132
package/skills/backend/debugging-wizard/references/debugging-tools.md +140 -140
package/skills/backend/debugging-wizard/references/quick-fixes.md +177 -177
package/skills/backend/debugging-wizard/references/strategies.md +142 -142
package/skills/backend/debugging-wizard/references/systematic-debugging.md +367 -367
package/skills/backend/feature-forge/SKILL.md +98 -98
package/skills/backend/feature-forge/references/acceptance-criteria.md +104 -104
package/skills/backend/feature-forge/references/ears-syntax.md +99 -99
package/skills/backend/feature-forge/references/interview-questions.md +150 -150
package/skills/backend/feature-forge/references/pre-discovery-subagents.md +54 -54
package/skills/backend/feature-forge/references/specification-template.md +103 -103
package/skills/backend/fullstack-guardian/SKILL.md +105 -105
package/skills/backend/fullstack-guardian/references/api-design-standards.md +307 -307
package/skills/backend/fullstack-guardian/references/architecture-decisions.md +350 -350
package/skills/backend/fullstack-guardian/references/backend-patterns.md +237 -237
package/skills/backend/fullstack-guardian/references/common-patterns.md +134 -134
package/skills/backend/fullstack-guardian/references/deliverables-checklist.md +354 -354
package/skills/backend/fullstack-guardian/references/design-template.md +91 -91
package/skills/backend/fullstack-guardian/references/error-handling.md +135 -135
package/skills/backend/fullstack-guardian/references/frontend-patterns.md +340 -340
package/skills/backend/fullstack-guardian/references/integration-patterns.md +333 -333
package/skills/backend/fullstack-guardian/references/security-checklist.md +106 -106
package/skills/backend/graphql-architect/SKILL.md +146 -146
package/skills/backend/graphql-architect/references/federation.md +418 -418
package/skills/backend/graphql-architect/references/migration-from-rest.md +1141 -1141
package/skills/backend/graphql-architect/references/resolvers.md +425 -425
package/skills/backend/graphql-architect/references/schema-design.md +393 -393
package/skills/backend/graphql-architect/references/security.md +569 -569
package/skills/backend/graphql-architect/references/subscriptions.md +510 -510
package/skills/backend/legacy-modernizer/SKILL.md +137 -137
package/skills/backend/legacy-modernizer/references/legacy-testing.md +381 -381
package/skills/backend/legacy-modernizer/references/migration-strategies.md +423 -423
package/skills/backend/legacy-modernizer/references/refactoring-patterns.md +395 -395
package/skills/backend/legacy-modernizer/references/strangler-fig-pattern.md +281 -281
package/skills/backend/legacy-modernizer/references/system-assessment.md +487 -487
package/skills/backend/microservices-architect/SKILL.md +164 -164
package/skills/backend/microservices-architect/references/communication.md +499 -499
package/skills/backend/microservices-architect/references/data.md +721 -721
package/skills/backend/microservices-architect/references/decomposition.md +344 -344
package/skills/backend/microservices-architect/references/observability.md +805 -805
package/skills/backend/microservices-architect/references/patterns.md +603 -603
package/skills/database/database-optimizer/SKILL.md +147 -147
package/skills/database/database-optimizer/references/index-strategies.md +331 -331
package/skills/database/database-optimizer/references/monitoring-analysis.md +501 -501
package/skills/database/database-optimizer/references/mysql-tuning.md +452 -452
package/skills/database/database-optimizer/references/postgresql-tuning.md +413 -413
package/skills/database/database-optimizer/references/query-optimization.md +251 -251
package/skills/database/postgres-pro/SKILL.md +152 -152
package/skills/database/postgres-pro/references/extensions.md +404 -404
package/skills/database/postgres-pro/references/jsonb.md +321 -321
package/skills/database/postgres-pro/references/maintenance.md +481 -481
package/skills/database/postgres-pro/references/performance.md +265 -265
package/skills/database/postgres-pro/references/replication.md +446 -446
package/skills/database/sql-pro/SKILL.md +129 -129
package/skills/database/sql-pro/references/database-design.md +402 -402
package/skills/database/sql-pro/references/dialect-differences.md +419 -419
package/skills/database/sql-pro/references/optimization.md +384 -384
package/skills/database/sql-pro/references/query-patterns.md +285 -285
package/skills/database/sql-pro/references/window-functions.md +328 -328
package/skills/dotnet/csharp-developer/SKILL.md +125 -125
package/skills/dotnet/csharp-developer/references/aspnet-core.md +394 -394
package/skills/dotnet/csharp-developer/references/blazor.md +553 -553
package/skills/dotnet/csharp-developer/references/entity-framework.md +409 -409
package/skills/dotnet/csharp-developer/references/modern-csharp.md +248 -248
package/skills/dotnet/csharp-developer/references/performance.md +498 -498
package/skills/dotnet/dotnet-core-expert/SKILL.md +138 -138
package/skills/dotnet/dotnet-core-expert/references/authentication.md +546 -546
package/skills/dotnet/dotnet-core-expert/references/clean-architecture.md +455 -455
package/skills/dotnet/dotnet-core-expert/references/cloud-native.md +548 -548
package/skills/dotnet/dotnet-core-expert/references/entity-framework.md +440 -440
package/skills/dotnet/dotnet-core-expert/references/minimal-apis.md +319 -319
package/skills/frontend/angular-architect/SKILL.md +152 -152
package/skills/frontend/angular-architect/references/components.md +297 -297
package/skills/frontend/angular-architect/references/ngrx.md +401 -401
package/skills/frontend/angular-architect/references/routing.md +361 -361
package/skills/frontend/angular-architect/references/rxjs.md +319 -319
package/skills/frontend/angular-architect/references/testing.md +405 -405
package/skills/frontend/design-commands/design.md +91 -91
package/skills/frontend/design-commands/handoff.md +97 -97
package/skills/frontend/design-commands/prototype.md +120 -120
package/skills/frontend/design-commands/spec.md +160 -160
package/skills/frontend/design-commands/style.md +78 -78
package/skills/frontend/flutter-expert/SKILL.md +138 -138
package/skills/frontend/flutter-expert/references/bloc-state.md +259 -259
package/skills/frontend/flutter-expert/references/gorouter-navigation.md +119 -119
package/skills/frontend/flutter-expert/references/performance.md +99 -99
package/skills/frontend/flutter-expert/references/project-structure.md +118 -118
package/skills/frontend/flutter-expert/references/riverpod-state.md +130 -130
package/skills/frontend/flutter-expert/references/widget-patterns.md +123 -123
package/skills/frontend/nextjs-developer/SKILL.md +143 -143
package/skills/frontend/nextjs-developer/references/app-router.md +311 -311
package/skills/frontend/nextjs-developer/references/data-fetching.md +482 -482
package/skills/frontend/nextjs-developer/references/deployment.md +545 -545
package/skills/frontend/nextjs-developer/references/server-actions.md +462 -462
package/skills/frontend/nextjs-developer/references/server-components.md +384 -384
package/skills/frontend/react-expert/SKILL.md +149 -149
package/skills/frontend/react-expert/references/hooks-patterns.md +162 -162
package/skills/frontend/react-expert/references/migration-class-to-modern.md +1119 -1119
package/skills/frontend/react-expert/references/performance.md +168 -168
package/skills/frontend/react-expert/references/react-19-features.md +174 -174
package/skills/frontend/react-expert/references/server-components.md +143 -143
package/skills/frontend/react-expert/references/state-management.md +171 -171
package/skills/frontend/react-expert/references/testing-react.md +174 -174
package/skills/frontend/react-native-expert/SKILL.md +185 -185
package/skills/frontend/react-native-expert/references/expo-router.md +187 -187
package/skills/frontend/react-native-expert/references/list-optimization.md +204 -204
package/skills/frontend/react-native-expert/references/platform-handling.md +188 -188
package/skills/frontend/react-native-expert/references/project-structure.md +171 -171
package/skills/frontend/react-native-expert/references/storage-hooks.md +173 -173
package/skills/frontend/senior-frontend/SKILL.md +477 -477
package/skills/frontend/senior-frontend/references/frontend_best_practices.md +806 -806
package/skills/frontend/senior-frontend/references/nextjs_optimization_guide.md +724 -724
package/skills/frontend/senior-frontend/references/react_patterns.md +746 -746
package/skills/frontend/senior-frontend/scripts/bundle_analyzer.py +407 -407
package/skills/frontend/senior-frontend/scripts/component_generator.py +329 -329
package/skills/frontend/senior-frontend/scripts/frontend_scaffolder.py +1005 -1005
package/skills/frontend/ui-ux-pro-max/SKILL.md +386 -386
package/skills/frontend/ui-ux-pro-max/data/charts.csv +26 -26
package/skills/frontend/ui-ux-pro-max/data/colors.csv +97 -97
package/skills/frontend/ui-ux-pro-max/data/icons.csv +101 -101
package/skills/frontend/ui-ux-pro-max/data/landing.csv +31 -31
package/skills/frontend/ui-ux-pro-max/data/products.csv +96 -96
package/skills/frontend/ui-ux-pro-max/data/react-performance.csv +45 -45
package/skills/frontend/ui-ux-pro-max/data/stacks/astro.csv +54 -54
package/skills/frontend/ui-ux-pro-max/data/stacks/flutter.csv +53 -53
package/skills/frontend/ui-ux-pro-max/data/stacks/html-tailwind.csv +56 -56
package/skills/frontend/ui-ux-pro-max/data/stacks/jetpack-compose.csv +53 -53
package/skills/frontend/ui-ux-pro-max/data/stacks/nextjs.csv +53 -53
package/skills/frontend/ui-ux-pro-max/data/stacks/nuxt-ui.csv +51 -51
package/skills/frontend/ui-ux-pro-max/data/stacks/nuxtjs.csv +59 -59
package/skills/frontend/ui-ux-pro-max/data/stacks/react-native.csv +52 -52
package/skills/frontend/ui-ux-pro-max/data/stacks/react.csv +54 -54
package/skills/frontend/ui-ux-pro-max/data/stacks/shadcn.csv +61 -61
package/skills/frontend/ui-ux-pro-max/data/stacks/svelte.csv +54 -54
package/skills/frontend/ui-ux-pro-max/data/stacks/swiftui.csv +51 -51
package/skills/frontend/ui-ux-pro-max/data/stacks/vue.csv +50 -50
package/skills/frontend/ui-ux-pro-max/data/styles.csv +68 -68
package/skills/frontend/ui-ux-pro-max/data/typography.csv +57 -57
package/skills/frontend/ui-ux-pro-max/data/ui-reasoning.csv +101 -101
package/skills/frontend/ui-ux-pro-max/data/ux-guidelines.csv +99 -99
package/skills/frontend/ui-ux-pro-max/data/web-interface.csv +31 -31
package/skills/frontend/ui-ux-pro-max/scripts/core.py +253 -253
package/skills/frontend/ui-ux-pro-max/scripts/design_system.py +1067 -1067
package/skills/frontend/ui-ux-pro-max/scripts/search.py +114 -114
package/skills/frontend/vue-expert/SKILL.md +98 -98
package/skills/frontend/vue-expert/references/build-tooling.md +480 -480
package/skills/frontend/vue-expert/references/components.md +448 -448
package/skills/frontend/vue-expert/references/composition-api.md +299 -299
package/skills/frontend/vue-expert/references/mobile-hybrid.md +636 -636
package/skills/frontend/vue-expert/references/nuxt.md +669 -669
package/skills/frontend/vue-expert/references/state-management.md +449 -449
package/skills/frontend/vue-expert/references/typescript.md +584 -584
package/skills/frontend/vue-expert-js/SKILL.md +167 -167
package/skills/frontend/vue-expert-js/references/component-architecture.md +219 -219
package/skills/frontend/vue-expert-js/references/composables-patterns.md +183 -183
package/skills/frontend/vue-expert-js/references/jsdoc-typing.md +535 -535
package/skills/frontend/vue-expert-js/references/state-management.md +249 -249
package/skills/frontend/vue-expert-js/references/testing-patterns.md +237 -237
package/skills/go-rust-cpp/cpp-pro/SKILL.md +115 -115
package/skills/go-rust-cpp/cpp-pro/references/build-tooling.md +440 -440
package/skills/go-rust-cpp/cpp-pro/references/concurrency.md +437 -437
package/skills/go-rust-cpp/cpp-pro/references/memory-performance.md +397 -397
package/skills/go-rust-cpp/cpp-pro/references/modern-cpp.md +304 -304
package/skills/go-rust-cpp/cpp-pro/references/templates.md +357 -357
package/skills/go-rust-cpp/golang-pro/SKILL.md +122 -122
package/skills/go-rust-cpp/golang-pro/references/concurrency.md +329 -329
package/skills/go-rust-cpp/golang-pro/references/generics.md +442 -442
package/skills/go-rust-cpp/golang-pro/references/interfaces.md +432 -432
package/skills/go-rust-cpp/golang-pro/references/project-structure.md +477 -477
package/skills/go-rust-cpp/golang-pro/references/testing.md +451 -451
package/skills/go-rust-cpp/rust-engineer/SKILL.md +167 -167
package/skills/go-rust-cpp/rust-engineer/references/async.md +458 -458
package/skills/go-rust-cpp/rust-engineer/references/error-handling.md +334 -334
package/skills/go-rust-cpp/rust-engineer/references/ownership.md +278 -278
package/skills/go-rust-cpp/rust-engineer/references/testing.md +470 -470
package/skills/go-rust-cpp/rust-engineer/references/traits.md +413 -413
package/skills/infra/cli-developer/SKILL.md +113 -113
package/skills/infra/cli-developer/references/design-patterns.md +221 -221
package/skills/infra/cli-developer/references/go-cli.md +540 -540
package/skills/infra/cli-developer/references/node-cli.md +383 -383
package/skills/infra/cli-developer/references/python-cli.md +422 -422
package/skills/infra/cli-developer/references/ux-patterns.md +448 -448
package/skills/infra/cloud-architect/SKILL.md +216 -216
package/skills/infra/cloud-architect/references/aws.md +394 -394
package/skills/infra/cloud-architect/references/azure.md +562 -562
package/skills/infra/cloud-architect/references/cost.md +582 -582
package/skills/infra/cloud-architect/references/gcp.md +633 -633
package/skills/infra/cloud-architect/references/multi-cloud.md +483 -483
package/skills/infra/devops-engineer/SKILL.md +144 -144
package/skills/infra/devops-engineer/references/deployment-strategies.md +241 -241
package/skills/infra/devops-engineer/references/docker-patterns.md +113 -113
package/skills/infra/devops-engineer/references/github-actions.md +139 -139
package/skills/infra/devops-engineer/references/incident-response.md +331 -331
package/skills/infra/devops-engineer/references/kubernetes.md +154 -154
package/skills/infra/devops-engineer/references/platform-engineering.md +417 -417
package/skills/infra/devops-engineer/references/release-automation.md +527 -527
package/skills/infra/devops-engineer/references/terraform-iac.md +141 -141
package/skills/infra/kubernetes-specialist/SKILL.md +241 -241
package/skills/infra/kubernetes-specialist/references/configuration.md +452 -452
package/skills/infra/kubernetes-specialist/references/cost-optimization.md +458 -458
package/skills/infra/kubernetes-specialist/references/custom-operators.md +563 -563
package/skills/infra/kubernetes-specialist/references/gitops.md +530 -530
package/skills/infra/kubernetes-specialist/references/helm-charts.md +912 -912
package/skills/infra/kubernetes-specialist/references/multi-cluster.md +507 -507
package/skills/infra/kubernetes-specialist/references/networking.md +447 -447
package/skills/infra/kubernetes-specialist/references/service-mesh.md +459 -459
package/skills/infra/kubernetes-specialist/references/storage.md +535 -535
package/skills/infra/kubernetes-specialist/references/troubleshooting.md +414 -414
package/skills/infra/kubernetes-specialist/references/workloads.md +377 -377
package/skills/infra/mcp-developer/SKILL.md +143 -143
package/skills/infra/mcp-developer/references/protocol.md +244 -244
package/skills/infra/mcp-developer/references/python-sdk.md +367 -367
package/skills/infra/mcp-developer/references/resources.md +554 -554
package/skills/infra/mcp-developer/references/tools.md +480 -480
package/skills/infra/mcp-developer/references/typescript-sdk.md +350 -350
package/skills/infra/monitoring-expert/SKILL.md +176 -176
package/skills/infra/monitoring-expert/references/alerting-rules.md +141 -141
package/skills/infra/monitoring-expert/references/application-profiling.md +331 -331
package/skills/infra/monitoring-expert/references/capacity-planning.md +344 -344
package/skills/infra/monitoring-expert/references/dashboards.md +126 -126
package/skills/infra/monitoring-expert/references/opentelemetry.md +123 -123
package/skills/infra/monitoring-expert/references/performance-testing.md +269 -269
package/skills/infra/monitoring-expert/references/prometheus-metrics.md +136 -136
package/skills/infra/monitoring-expert/references/structured-logging.md +142 -142
package/skills/infra/sre-engineer/SKILL.md +181 -181
package/skills/infra/sre-engineer/references/automation-toil.md +492 -492
package/skills/infra/sre-engineer/references/error-budget-policy.md +334 -334
package/skills/infra/sre-engineer/references/incident-chaos.md +576 -576
package/skills/infra/sre-engineer/references/monitoring-alerting.md +424 -424
package/skills/infra/sre-engineer/references/slo-sli-management.md +238 -238
package/skills/infra/terraform-engineer/SKILL.md +143 -143
package/skills/infra/terraform-engineer/references/best-practices.md +583 -583
package/skills/infra/terraform-engineer/references/module-patterns.md +297 -297
package/skills/infra/terraform-engineer/references/providers.md +452 -452
package/skills/infra/terraform-engineer/references/state-management.md +371 -371
package/skills/infra/terraform-engineer/references/testing.md +486 -486
package/skills/infra/websocket-engineer/SKILL.md +168 -168
package/skills/infra/websocket-engineer/references/alternatives.md +391 -391
package/skills/infra/websocket-engineer/references/patterns.md +400 -400
package/skills/infra/websocket-engineer/references/protocol.md +195 -195
package/skills/infra/websocket-engineer/references/scaling.md +333 -333
package/skills/infra/websocket-engineer/references/security.md +474 -474
package/skills/java/java-architect/SKILL.md +132 -132
package/skills/java/java-architect/references/jpa-optimization.md +393 -393
package/skills/java/java-architect/references/reactive-webflux.md +356 -356
package/skills/java/java-architect/references/spring-boot-setup.md +269 -269
package/skills/java/java-architect/references/spring-security.md +445 -445
package/skills/java/java-architect/references/testing-patterns.md +500 -500
package/skills/java/kotlin-specialist/SKILL.md +147 -147
package/skills/java/kotlin-specialist/references/android-compose.md +419 -419
package/skills/java/kotlin-specialist/references/coroutines-flow.md +276 -276
package/skills/java/kotlin-specialist/references/dsl-idioms.md +421 -421
package/skills/java/kotlin-specialist/references/ktor-server.md +426 -426
package/skills/java/kotlin-specialist/references/multiplatform-kmp.md +380 -380
package/skills/java/spring-boot-engineer/SKILL.md +195 -195
package/skills/java/spring-boot-engineer/references/cloud.md +498 -498
package/skills/java/spring-boot-engineer/references/data.md +381 -381
package/skills/java/spring-boot-engineer/references/security.md +459 -459
package/skills/java/spring-boot-engineer/references/testing.md +545 -545
package/skills/java/spring-boot-engineer/references/web.md +295 -295
package/skills/javascript/javascript-pro/SKILL.md +132 -132
package/skills/javascript/javascript-pro/references/async-patterns.md +334 -334
package/skills/javascript/javascript-pro/references/browser-apis.md +398 -398
package/skills/javascript/javascript-pro/references/modern-syntax.md +272 -272
package/skills/javascript/javascript-pro/references/modules.md +357 -357
package/skills/javascript/javascript-pro/references/node-essentials.md +471 -471
package/skills/javascript/nestjs-expert/SKILL.md +206 -206
package/skills/javascript/nestjs-expert/references/authentication.md +166 -166
package/skills/javascript/nestjs-expert/references/controllers-routing.md +111 -111
package/skills/javascript/nestjs-expert/references/dtos-validation.md +153 -153
package/skills/javascript/nestjs-expert/references/migration-from-express.md +1237 -1237
package/skills/javascript/nestjs-expert/references/services-di.md +140 -140
package/skills/javascript/nestjs-expert/references/testing-patterns.md +186 -186
package/skills/javascript/typescript-pro/SKILL.md +145 -145
package/skills/javascript/typescript-pro/references/advanced-types.md +259 -259
package/skills/javascript/typescript-pro/references/configuration.md +445 -445
package/skills/javascript/typescript-pro/references/patterns.md +484 -484
package/skills/javascript/typescript-pro/references/type-guards.md +352 -352
package/skills/javascript/typescript-pro/references/utility-types.md +329 -329
package/skills/php/laravel-specialist/SKILL.md +262 -262
package/skills/php/laravel-specialist/references/eloquent.md +351 -351
package/skills/php/laravel-specialist/references/livewire.md +512 -512
package/skills/php/laravel-specialist/references/queues.md +423 -423
package/skills/php/laravel-specialist/references/routing.md +362 -362
package/skills/php/laravel-specialist/references/testing.md +522 -522
package/skills/php/php-pro/SKILL.md +206 -206
package/skills/php/php-pro/references/async-patterns.md +412 -412
package/skills/php/php-pro/references/laravel-patterns.md +377 -377
package/skills/php/php-pro/references/modern-php-features.md +323 -323
package/skills/php/php-pro/references/symfony-patterns.md +466 -466
package/skills/php/php-pro/references/testing-quality.md +466 -466
package/skills/product/competitive-analysis/SKILL.md +257 -257
package/skills/product/meeting-notes/SKILL.md +266 -266
package/skills/product/prd-template/SKILL.md +150 -150
package/skills/product/stakeholder-update/SKILL.md +225 -225
package/skills/product/user-research-synthesis/SKILL.md +235 -235
package/skills/python/django-expert/SKILL.md +162 -162
package/skills/python/django-expert/references/authentication.md +145 -145
package/skills/python/django-expert/references/drf-serializers.md +148 -148
package/skills/python/django-expert/references/models-orm.md +151 -151
package/skills/python/django-expert/references/testing-django.md +204 -204
package/skills/python/django-expert/references/viewsets-views.md +153 -153
package/skills/python/fastapi-expert/SKILL.md +185 -185
package/skills/python/fastapi-expert/references/async-sqlalchemy.md +146 -146
package/skills/python/fastapi-expert/references/authentication.md +159 -159
package/skills/python/fastapi-expert/references/endpoints-routing.md +142 -142
package/skills/python/fastapi-expert/references/migration-from-django.md +996 -996
package/skills/python/fastapi-expert/references/pydantic-v2.md +135 -135
package/skills/python/fastapi-expert/references/testing-async.md +159 -159
package/skills/python/pandas-pro/SKILL.md +178 -178
package/skills/python/pandas-pro/references/aggregation-groupby.md +545 -545
package/skills/python/pandas-pro/references/data-cleaning.md +500 -500
package/skills/python/pandas-pro/references/dataframe-operations.md +420 -420
package/skills/python/pandas-pro/references/merging-joining.md +596 -596
package/skills/python/pandas-pro/references/performance-optimization.md +597 -597
package/skills/python/python-pro/SKILL.md +177 -177
package/skills/python/python-pro/references/async-patterns.md +356 -356
package/skills/python/python-pro/references/packaging.md +460 -460
package/skills/python/python-pro/references/standard-library.md +378 -378
package/skills/python/python-pro/references/testing.md +404 -404
package/skills/python/python-pro/references/type-system.md +290 -290
package/skills/quality/chaos-engineer/SKILL.md +182 -182
package/skills/quality/chaos-engineer/references/chaos-tools.md +511 -511
package/skills/quality/chaos-engineer/references/experiment-design.md +229 -229
package/skills/quality/chaos-engineer/references/game-days.md +434 -434
package/skills/quality/chaos-engineer/references/infrastructure-chaos.md +348 -348
package/skills/quality/chaos-engineer/references/kubernetes-chaos.md +432 -432
package/skills/quality/code-reviewer/SKILL.md +119 -119
package/skills/quality/code-reviewer/references/common-issues.md +142 -142
package/skills/quality/code-reviewer/references/feedback-examples.md +144 -144
package/skills/quality/code-reviewer/references/receiving-feedback.md +238 -238
package/skills/quality/code-reviewer/references/report-template.md +109 -109
package/skills/quality/code-reviewer/references/review-checklist.md +88 -88
package/skills/quality/code-reviewer/references/spec-compliance-review.md +258 -258
package/skills/quality/playwright-expert/SKILL.md +169 -169
package/skills/quality/playwright-expert/references/api-mocking.md +140 -140
package/skills/quality/playwright-expert/references/configuration.md +155 -155
package/skills/quality/playwright-expert/references/debugging-flaky.md +150 -150
package/skills/quality/playwright-expert/references/page-object-model.md +152 -152
package/skills/quality/playwright-expert/references/selectors-locators.md +119 -119
package/skills/quality/secure-code-guardian/SKILL.md +191 -191
package/skills/quality/secure-code-guardian/references/authentication.md +136 -136
package/skills/quality/secure-code-guardian/references/input-validation.md +146 -146
package/skills/quality/secure-code-guardian/references/owasp-prevention.md +135 -135
package/skills/quality/secure-code-guardian/references/security-headers.md +133 -133
package/skills/quality/secure-code-guardian/references/xss-csrf.md +157 -157
package/skills/quality/security-reviewer/SKILL.md +103 -103
package/skills/quality/security-reviewer/references/infrastructure-security.md +268 -268
package/skills/quality/security-reviewer/references/penetration-testing.md +268 -268
package/skills/quality/security-reviewer/references/report-template.md +170 -170
package/skills/quality/security-reviewer/references/sast-tools.md +117 -117
package/skills/quality/security-reviewer/references/secret-scanning.md +125 -125
package/skills/quality/security-reviewer/references/vulnerability-patterns.md +152 -152
package/skills/quality/senior-qa/README.md +196 -196
package/skills/quality/senior-qa/SKILL.md +399 -399
package/skills/quality/senior-qa/references/qa_best_practices.md +964 -964
package/skills/quality/senior-qa/references/test_automation_patterns.md +1009 -1009
package/skills/quality/senior-qa/references/testing_strategies.md +649 -649
package/skills/quality/senior-qa/scripts/coverage_analyzer.py +836 -836
package/skills/quality/senior-qa/scripts/e2e_test_scaffolder.py +820 -820
package/skills/quality/senior-qa/scripts/test_suite_generator.py +605 -605
package/skills/quality/tdd-guide/HOW_TO_USE.md +313 -313
package/skills/quality/tdd-guide/README.md +680 -680
package/skills/quality/tdd-guide/SKILL.md +122 -122
package/skills/quality/tdd-guide/assets/expected_output.json +77 -77
package/skills/quality/tdd-guide/assets/sample_input_python.json +39 -39
package/skills/quality/tdd-guide/assets/sample_input_typescript.json +36 -36
package/skills/quality/tdd-guide/references/ci-integration.md +195 -195
package/skills/quality/tdd-guide/references/framework-guide.md +206 -206
package/skills/quality/tdd-guide/references/tdd-best-practices.md +128 -128
package/skills/quality/tdd-guide/scripts/coverage_analyzer.py +434 -434
package/skills/quality/tdd-guide/scripts/fixture_generator.py +440 -440
package/skills/quality/tdd-guide/scripts/format_detector.py +384 -384
package/skills/quality/tdd-guide/scripts/framework_adapter.py +428 -428
package/skills/quality/tdd-guide/scripts/metrics_calculator.py +456 -456
package/skills/quality/tdd-guide/scripts/output_formatter.py +354 -354
package/skills/quality/tdd-guide/scripts/tdd_workflow.py +474 -474
package/skills/quality/tdd-guide/scripts/test_generator.py +438 -438
package/skills/quality/test-master/SKILL.md +94 -94
package/skills/quality/test-master/references/automation-frameworks.md +294 -294
package/skills/quality/test-master/references/e2e-testing.md +128 -128
package/skills/quality/test-master/references/integration-testing.md +120 -120
package/skills/quality/test-master/references/performance-testing.md +118 -118
package/skills/quality/test-master/references/qa-methodology.md +247 -247
package/skills/quality/test-master/references/security-testing.md +127 -127
package/skills/quality/test-master/references/tdd-iron-laws.md +174 -174
package/skills/quality/test-master/references/test-reports.md +104 -104
package/skills/quality/test-master/references/testing-anti-patterns.md +231 -231
package/skills/quality/test-master/references/unit-testing.md +113 -113
package/skills/ruby/rails-expert/SKILL.md +154 -154
package/skills/ruby/rails-expert/references/active-record.md +244 -244
package/skills/ruby/rails-expert/references/api-development.md +401 -401
package/skills/ruby/rails-expert/references/background-jobs.md +272 -272
package/skills/ruby/rails-expert/references/hotwire-turbo.md +228 -228
package/skills/ruby/rails-expert/references/rspec-testing.md +367 -367
package/skills/swift/swift-expert/SKILL.md +163 -163
package/skills/swift/swift-expert/references/async-concurrency.md +360 -360
package/skills/swift/swift-expert/references/memory-performance.md +377 -377
package/skills/swift/swift-expert/references/protocol-oriented.md +354 -354
package/skills/swift/swift-expert/references/swiftui-patterns.md +291 -291
package/skills/swift/swift-expert/references/testing-patterns.md +399 -399
package/skills/workflow/brainstorming/SKILL.md +164 -164
package/skills/workflow/brainstorming/scripts/frame-template.html +214 -214
package/skills/workflow/brainstorming/scripts/helper.js +88 -88
package/skills/workflow/brainstorming/scripts/server.cjs +354 -354
package/skills/workflow/brainstorming/scripts/start-server.sh +148 -148
package/skills/workflow/brainstorming/scripts/stop-server.sh +56 -56
package/skills/workflow/brainstorming/spec-document-reviewer-prompt.md +49 -49
package/skills/workflow/brainstorming/visual-companion.md +287 -287
package/skills/workflow/documentation/SKILL.md +45 -45
package/skills/workflow/entropy-management/SKILL.md +115 -115
package/skills/workflow/executing-plans/SKILL.md +70 -70
package/skills/workflow/finishing-a-development-branch/SKILL.md +200 -200
package/skills/workflow/receiving-code-review/SKILL.md +213 -213
package/skills/workflow/requesting-code-review/SKILL.md +105 -105
package/skills/workflow/requesting-code-review/code-reviewer.md +146 -146
package/skills/workflow/requirement-engineering/SKILL.md +111 -111
package/skills/workflow/systematic-debugging/CREATION-LOG.md +119 -119
package/skills/workflow/systematic-debugging/SKILL.md +296 -296
package/skills/workflow/systematic-debugging/condition-based-waiting-example.ts +158 -158
package/skills/workflow/systematic-debugging/condition-based-waiting.md +115 -115
package/skills/workflow/systematic-debugging/defense-in-depth.md +122 -122
package/skills/workflow/systematic-debugging/find-polluter.sh +63 -63
package/skills/workflow/systematic-debugging/root-cause-tracing.md +169 -169
package/skills/workflow/systematic-debugging/test-academic.md +14 -14
package/skills/workflow/systematic-debugging/test-pressure-1.md +58 -58
package/skills/workflow/systematic-debugging/test-pressure-2.md +68 -68
package/skills/workflow/systematic-debugging/test-pressure-3.md +69 -69
package/skills/workflow/using-git-worktrees/SKILL.md +218 -218
package/skills/workflow/verification-before-completion/SKILL.md +139 -139
package/skills/workflow/writing-plans/SKILL.md +151 -151
package/skills/workflow/writing-plans/plan-document-reviewer-prompt.md +49 -49
package/skills/workflow/writing-skills/SKILL.md +655 -655
package/skills/workflow/writing-skills/anthropic-best-practices.md +1150 -1150
package/skills/workflow/writing-skills/examples/CLAUDE_MD_TESTING.md +189 -189
package/skills/workflow/writing-skills/persuasion-principles.md +187 -187
package/skills/workflow/writing-skills/render-graphs.js +168 -168
package/skills/workflow/writing-skills/testing-skills-with-subagents.md +384 -384

package/skills/ai-ml/rag-architect/references/chunking-strategies.md CHANGED Viewed

@@ -1,878 +1,878 @@
-# Chunking Strategies
----
-## Strategy Comparison Matrix
-| Strategy | Best For | Chunk Quality | Implementation Complexity |
-|----------|----------|---------------|---------------------------|
-| **Fixed-size** | Simple documents, logs | Low-Medium | Simple |
-| **Recursive character** | General text, articles | Medium | Simple |
-| **Sentence-based** | Conversational, Q&A | Medium-High | Medium |
-| **Semantic** | Technical docs, manuals | High | Medium |
-| **Document-aware** | Structured content (MD, HTML) | High | Medium |
-| **Agentic/Contextual** | Complex documents | Very High | Complex |
-| **Late chunking** | Long-context embeddings | High | Medium |
----
-## When to Use Each Strategy
-### Fixed-Size Chunking
-```
-Best For:
-- Log files and structured data
-- Quick prototyping
-- When content has no natural structure
-- Baseline comparison
-When to Avoid:
-- Technical documentation
-- Content with semantic units (paragraphs, sections)
-- When context preservation matters
-```
-### Recursive Character Splitting
-```
-Best For:
-- General articles and blog posts
-- Mixed content types
-- Default starting point for most RAG
-- LangChain/LlamaIndex default
-When to Avoid:
-- Highly structured documents
-- Code-heavy content
-- Tables and lists
-```
-### Semantic Chunking
-```
-Best For:
-- Technical documentation
-- Research papers
-- Content with natural topic boundaries
-- When retrieval precision is critical
-When to Avoid:
-- Real-time ingestion (slower)
-- Very short documents
-- Cost-sensitive pipelines (requires embeddings)
-```
-### Document-Aware Chunking
-```
-Best For:
-- Markdown documentation
-- HTML pages
-- LaTeX papers
-- Code files
-When to Avoid:
-- Plain text without structure
-- Inconsistent formatting
-```
----
-## Fixed-Size Chunking
-```python
-def fixed_size_chunk(
-    text: str,
-    chunk_size: int = 500,
-    overlap: int = 50
-) -> list[str]:
-    """Simple fixed-size chunking with overlap."""
-    chunks = []
-    start = 0
-    while start < len(text):
-        end = start + chunk_size
-        chunk = text[start:end]
-        # Try to break at word boundary
-        if end < len(text):
-            last_space = chunk.rfind(' ')
-            if last_space > chunk_size * 0.8:  # Only if reasonably far in
-                chunk = chunk[:last_space]
-                end = start + last_space
-        chunks.append(chunk.strip())
-        start = end - overlap
-    return chunks
-# Usage
-chunks = fixed_size_chunk(document_text, chunk_size=500, overlap=50)
-```
----
-## Recursive Character Splitting (LangChain Style)
-```python
-from typing import Callable
-class RecursiveCharacterSplitter:
-    """Split text recursively using multiple separators."""
-    def __init__(
-        self,
-        chunk_size: int = 1000,
-        chunk_overlap: int = 200,
-        separators: list[str] | None = None,
-        length_function: Callable[[str], int] = len
-    ):
-        self.chunk_size = chunk_size
-        self.chunk_overlap = chunk_overlap
-        self.separators = separators or ["\n\n", "\n", ". ", " ", ""]
-        self.length_function = length_function
-    def split_text(self, text: str) -> list[str]:
-        """Split text into chunks."""
-        return self._split_text(text, self.separators)
-    def _split_text(self, text: str, separators: list[str]) -> list[str]:
-        final_chunks = []
-        separator = separators[-1]
-        for i, sep in enumerate(separators):
-            if sep == "":
-                separator = sep
-                break
-            if sep in text:
-                separator = sep
-                break
-        splits = text.split(separator) if separator else list(text)
-        good_splits = []
-        for split in splits:
-            if self.length_function(split) < self.chunk_size:
-                good_splits.append(split)
-            else:
-                if good_splits:
-                    merged = self._merge_splits(good_splits, separator)
-                    final_chunks.extend(merged)
-                    good_splits = []
-                # Recursively split large chunks
-                other_chunks = self._split_text(split, separators[separators.index(separator) + 1:])
-                final_chunks.extend(other_chunks)
-        if good_splits:
-            merged = self._merge_splits(good_splits, separator)
-            final_chunks.extend(merged)
-        return final_chunks
-    def _merge_splits(self, splits: list[str], separator: str) -> list[str]:
-        """Merge splits into chunks respecting size limits."""
-        chunks = []
-        current_chunk = []
-        current_length = 0
-        for split in splits:
-            split_length = self.length_function(split)
-            if current_length + split_length > self.chunk_size:
-                if current_chunk:
-                    chunks.append(separator.join(current_chunk))
-                    # Keep overlap
-                    while current_length > self.chunk_overlap and current_chunk:
-                        current_length -= self.length_function(current_chunk[0])
-                        current_chunk = current_chunk[1:]
-            current_chunk.append(split)
-            current_length += split_length
-        if current_chunk:
-            chunks.append(separator.join(current_chunk))
-        return chunks
-# Usage
-splitter = RecursiveCharacterSplitter(
-    chunk_size=1000,
-    chunk_overlap=200,
-    separators=["\n\n", "\n", ". ", " "]
-)
-chunks = splitter.split_text(document_text)
-```
-### Token-Based Splitting
-```python
-import tiktoken
-def create_token_splitter(
-    model: str = "gpt-4",
-    chunk_size: int = 500,
-    chunk_overlap: int = 50
-):
-    """Create splitter that counts tokens instead of characters."""
-    encoding = tiktoken.encoding_for_model(model)
-    def token_length(text: str) -> int:
-        return len(encoding.encode(text))
-    return RecursiveCharacterSplitter(
-        chunk_size=chunk_size,
-        chunk_overlap=chunk_overlap,
-        length_function=token_length
-    )
-# Usage
-token_splitter = create_token_splitter(chunk_size=500, chunk_overlap=50)
-chunks = token_splitter.split_text(document_text)
-```
----
-## Sentence-Based Chunking
-```python
-import re
-from dataclasses import dataclass
-@dataclass
-class SentenceChunk:
-    text: str
-    sentences: list[str]
-    start_sentence: int
-    end_sentence: int
-def sentence_chunk(
-    text: str,
-    sentences_per_chunk: int = 5,
-    overlap_sentences: int = 1
-) -> list[SentenceChunk]:
-    """Chunk by sentence count with overlap."""
-    # Split into sentences
-    sentence_pattern = r'(?<=[.!?])\s+'
-    sentences = re.split(sentence_pattern, text)
-    sentences = [s.strip() for s in sentences if s.strip()]
-    chunks = []
-    i = 0
-    while i < len(sentences):
-        end = min(i + sentences_per_chunk, len(sentences))
-        chunk_sentences = sentences[i:end]
-        chunks.append(SentenceChunk(
-            text=" ".join(chunk_sentences),
-            sentences=chunk_sentences,
-            start_sentence=i,
-            end_sentence=end - 1
-        ))
-        i += sentences_per_chunk - overlap_sentences
-    return chunks
-# Better sentence splitting with NLTK
-import nltk
-nltk.download('punkt')
-from nltk.tokenize import sent_tokenize
-def sentence_chunk_nltk(
-    text: str,
-    max_chunk_size: int = 1000,
-    overlap_sentences: int = 2
-) -> list[str]:
-    """Chunk by sentences up to max size."""
-    sentences = sent_tokenize(text)
-    chunks = []
-    current_chunk = []
-    current_size = 0
-    for sentence in sentences:
-        sentence_size = len(sentence)
-        if current_size + sentence_size > max_chunk_size and current_chunk:
-            chunks.append(" ".join(current_chunk))
-            # Keep overlap sentences
-            current_chunk = current_chunk[-overlap_sentences:] if overlap_sentences else []
-            current_size = sum(len(s) for s in current_chunk)
-        current_chunk.append(sentence)
-        current_size += sentence_size
-    if current_chunk:
-        chunks.append(" ".join(current_chunk))
-    return chunks
-```
----
-## Semantic Chunking
-```python
-import numpy as np
-from sentence_transformers import SentenceTransformer
-from sklearn.metrics.pairwise import cosine_similarity
-class SemanticChunker:
-    """Chunk based on semantic similarity between sentences."""
-    def __init__(
-        self,
-        model_name: str = "all-MiniLM-L6-v2",
-        similarity_threshold: float = 0.5,
-        min_chunk_size: int = 100,
-        max_chunk_size: int = 1500
-    ):
-        self.model = SentenceTransformer(model_name)
-        self.similarity_threshold = similarity_threshold
-        self.min_chunk_size = min_chunk_size
-        self.max_chunk_size = max_chunk_size
-    def chunk(self, text: str) -> list[str]:
-        """Split text at semantic boundaries."""
-        # Split into sentences
-        sentences = self._split_sentences(text)
-        if len(sentences) <= 1:
-            return [text]
-        # Get embeddings
-        embeddings = self.model.encode(sentences)
-        # Find breakpoints based on similarity drops
-        breakpoints = self._find_breakpoints(embeddings)
-        # Create chunks
-        chunks = []
-        start = 0
-        for bp in breakpoints:
-            chunk_text = " ".join(sentences[start:bp])
-            # Handle size constraints
-            if len(chunk_text) > self.max_chunk_size:
-                # Split large chunks
-                sub_chunks = self._split_large_chunk(sentences[start:bp])
-                chunks.extend(sub_chunks)
-            elif len(chunk_text) >= self.min_chunk_size:
-                chunks.append(chunk_text)
-            elif chunks:
-                # Merge small chunk with previous
-                chunks[-1] += " " + chunk_text
-            else:
-                chunks.append(chunk_text)
-            start = bp
-        # Handle remaining sentences
-        if start < len(sentences):
-            remaining = " ".join(sentences[start:])
-            if chunks and len(remaining) < self.min_chunk_size:
-                chunks[-1] += " " + remaining
-            else:
-                chunks.append(remaining)
-        return chunks
-    def _split_sentences(self, text: str) -> list[str]:
-        """Split text into sentences."""
-        import re
-        sentences = re.split(r'(?<=[.!?])\s+', text)
-        return [s.strip() for s in sentences if s.strip()]
-    def _find_breakpoints(self, embeddings: np.ndarray) -> list[int]:
-        """Find semantic breakpoints using similarity drops."""
-        breakpoints = []
-        for i in range(1, len(embeddings)):
-            similarity = cosine_similarity(
-                embeddings[i-1:i],
-                embeddings[i:i+1]
-            )[0][0]
-            if similarity < self.similarity_threshold:
-                breakpoints.append(i)
-        return breakpoints
-    def _split_large_chunk(self, sentences: list[str]) -> list[str]:
-        """Split oversized chunk at midpoint."""
-        mid = len(sentences) // 2
-        return [
-            " ".join(sentences[:mid]),
-            " ".join(sentences[mid:])
-        ]
-# Usage
-chunker = SemanticChunker(
-    similarity_threshold=0.5,
-    min_chunk_size=200,
-    max_chunk_size=1000
-)
-semantic_chunks = chunker.chunk(document_text)
-```
-### Percentile-Based Breakpoints
-```python
-def find_breakpoints_percentile(
-    embeddings: np.ndarray,
-    percentile: int = 25
-) -> list[int]:
-    """Find breakpoints at similarity drops below percentile threshold."""
-    similarities = []
-    for i in range(1, len(embeddings)):
-        sim = cosine_similarity(
-            embeddings[i-1:i],
-            embeddings[i:i+1]
-        )[0][0]
-        similarities.append((i, sim))
-    # Dynamic threshold based on distribution
-    sim_values = [s[1] for s in similarities]
-    threshold = np.percentile(sim_values, percentile)
-    return [i for i, sim in similarities if sim < threshold]
-```
----
-## Document-Aware Chunking
-### Markdown Chunking
-```python
-import re
-from dataclasses import dataclass
-@dataclass
-class MarkdownChunk:
-    text: str
-    heading: str | None
-    heading_level: int
-    metadata: dict
-def chunk_markdown(
-    text: str,
-    max_chunk_size: int = 1500,
-    include_heading_in_chunk: bool = True
-) -> list[MarkdownChunk]:
-    """Chunk markdown by headers while respecting structure."""
-    # Pattern to match headers
-    header_pattern = r'^(#{1,6})\s+(.+)$'
-    lines = text.split('\n')
-    chunks = []
-    current_chunk_lines = []
-    current_heading = None
-    current_level = 0
-    heading_stack = []  # For breadcrumb context
-    for line in lines:
-        header_match = re.match(header_pattern, line)
-        if header_match:
-            # Save current chunk if exists
-            if current_chunk_lines:
-                chunk_text = '\n'.join(current_chunk_lines)
-                if len(chunk_text.strip()) > 0:
-                    prefix = f"# {current_heading}\n\n" if include_heading_in_chunk and current_heading else ""
-                    chunks.append(MarkdownChunk(
-                        text=prefix + chunk_text,
-                        heading=current_heading,
-                        heading_level=current_level,
-                        metadata={"breadcrumb": " > ".join(heading_stack)}
-                    ))
-            # Update heading context
-            level = len(header_match.group(1))
-            heading = header_match.group(2).strip()
-            # Maintain heading stack for breadcrumbs
-            while heading_stack and current_level >= level:
-                heading_stack.pop()
-                current_level -= 1
-            heading_stack.append(heading)
-            current_heading = heading
-            current_level = level
-            current_chunk_lines = []
-        else:
-            current_chunk_lines.append(line)
-            # Check chunk size
-            current_text = '\n'.join(current_chunk_lines)
-            if len(current_text) > max_chunk_size:
-                # Split at paragraph boundary
-                paragraphs = current_text.split('\n\n')
-                if len(paragraphs) > 1:
-                    split_point = len('\n\n'.join(paragraphs[:-1]))
-                    chunk_text = current_text[:split_point]
-                    prefix = f"# {current_heading}\n\n" if include_heading_in_chunk and current_heading else ""
-                    chunks.append(MarkdownChunk(
-                        text=prefix + chunk_text,
-                        heading=current_heading,
-                        heading_level=current_level,
-                        metadata={"breadcrumb": " > ".join(heading_stack)}
-                    ))
-                    current_chunk_lines = [current_text[split_point:].strip()]
-    # Don't forget the last chunk
-    if current_chunk_lines:
-        chunk_text = '\n'.join(current_chunk_lines)
-        if len(chunk_text.strip()) > 0:
-            prefix = f"# {current_heading}\n\n" if include_heading_in_chunk and current_heading else ""
-            chunks.append(MarkdownChunk(
-                text=prefix + chunk_text,
-                heading=current_heading,
-                heading_level=current_level,
-                metadata={"breadcrumb": " > ".join(heading_stack)}
-            ))
-    return chunks
-```
-### Code-Aware Chunking
-```python
-import re
-from dataclasses import dataclass
-@dataclass
-class CodeChunk:
-    text: str
-    language: str | None
-    chunk_type: str  # "code", "text", "mixed"
-def chunk_with_code_blocks(
-    text: str,
-    max_chunk_size: int = 1500
-) -> list[CodeChunk]:
-    """Chunk text while keeping code blocks intact."""
-    # Pattern to match code blocks
-    code_block_pattern = r'```(\w+)?\n(.*?)```'
-    chunks = []
-    last_end = 0
-    for match in re.finditer(code_block_pattern, text, re.DOTALL):
-        # Text before code block
-        text_before = text[last_end:match.start()].strip()
-        if text_before:
-            # Chunk the text portion
-            text_chunks = recursive_chunk(text_before, max_chunk_size)
-            chunks.extend([
-                CodeChunk(text=t, language=None, chunk_type="text")
-                for t in text_chunks
-            ])
-        # Code block (keep intact if possible)
-        language = match.group(1)
-        code_content = match.group(2)
-        full_block = match.group(0)
-        if len(full_block) <= max_chunk_size:
-            chunks.append(CodeChunk(
-                text=full_block,
-                language=language,
-                chunk_type="code"
-            ))
-        else:
-            # Split large code blocks by function/class
-            code_chunks = split_code_block(code_content, language, max_chunk_size)
-            chunks.extend(code_chunks)
-        last_end = match.end()
-    # Remaining text after last code block
-    remaining = text[last_end:].strip()
-    if remaining:
-        text_chunks = recursive_chunk(remaining, max_chunk_size)
-        chunks.extend([
-            CodeChunk(text=t, language=None, chunk_type="text")
-            for t in text_chunks
-        ])
-    return chunks
-def split_code_block(code: str, language: str, max_size: int) -> list[CodeChunk]:
-    """Split code block at logical boundaries."""
-    # Simple function/class boundary splitting for Python
-    if language == "python":
-        pattern = r'\n(?=def |class |async def )'
-    elif language in ["javascript", "typescript"]:
-        pattern = r'\n(?=function |class |const |export )'
-    else:
-        pattern = r'\n\n'
-    parts = re.split(pattern, code)
-    chunks = []
-    current = ""
-    for part in parts:
-        if len(current) + len(part) > max_size and current:
-            chunks.append(CodeChunk(
-                text=f"```{language}\n{current}```",
-                language=language,
-                chunk_type="code"
-            ))
-            current = part
-        else:
-            current += part
-    if current:
-        chunks.append(CodeChunk(
-            text=f"```{language}\n{current}```",
-            language=language,
-            chunk_type="code"
-        ))
-    return chunks
-```
----
-## Contextual/Agentic Chunking
-```python
-from openai import OpenAI
-def contextual_chunk(
-    document: str,
-    max_chunk_size: int = 1500
-) -> list[dict]:
-    """Use LLM to add context to each chunk."""
-    # First, do structural chunking
-    base_chunks = recursive_chunk(document, max_chunk_size)
-    client = OpenAI()
-    contextualized_chunks = []
-    for chunk in base_chunks:
-        # Generate contextual summary
-        response = client.chat.completions.create(
-            model="gpt-4o-mini",
-            messages=[
-                {
-                    "role": "system",
-                    "content": """Provide a brief context for this document chunk.
-                    Include: what topic it covers, how it relates to the broader document,
-                    and key concepts mentioned. Keep it under 100 words."""
-                },
-                {
-                    "role": "user",
-                    "content": f"Document excerpt:\n\n{chunk}"
-                }
-            ],
-            max_tokens=150
-        )
-        context = response.choices[0].message.content
-        contextualized_chunks.append({
-            "text": chunk,
-            "context": context,
-            "text_with_context": f"Context: {context}\n\nContent: {chunk}"
-        })
-    return contextualized_chunks
-```
-### Propositions-Based Chunking
-```python
-def extract_propositions(text: str) -> list[str]:
-    """Extract atomic propositions from text using LLM."""
-    client = OpenAI()
-    response = client.chat.completions.create(
-        model="gpt-4o-mini",
-        messages=[
-            {
-                "role": "system",
-                "content": """Extract atomic propositions from the text.
-                Each proposition should:
-                - Be a single, complete fact
-                - Be self-contained (understandable without context)
-                - Include necessary entity references
-                Return as a JSON array of strings."""
-            },
-            {
-                "role": "user",
-                "content": text
-            }
-        ],
-        response_format={"type": "json_object"}
-    )
-    import json
-    result = json.loads(response.choices[0].message.content)
-    return result.get("propositions", [])
-# Usage: For very fine-grained retrieval
-propositions = extract_propositions(document_text)
-# Each proposition becomes its own retrievable unit
-```
----
-## Late Chunking (for Long-Context Embeddings)
-```python
-from transformers import AutoTokenizer, AutoModel
-import torch
-class LateChunker:
-    """
-    Late chunking: embed full document, then pool token embeddings into chunks.
-    Preserves full document context while creating retrievable chunks.
-    """
-    def __init__(self, model_name: str = "jinaai/jina-embeddings-v2-base-en"):
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        self.model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
-        self.model.eval()
-    def chunk_and_embed(
-        self,
-        text: str,
-        chunk_size: int = 512,
-        overlap: int = 64
-    ) -> list[dict]:
-        """
-        Embed full document, then create chunk embeddings via mean pooling.
-        """
-        # Tokenize full document
-        inputs = self.tokenizer(
-            text,
-            return_tensors="pt",
-            truncation=True,
-            max_length=8192  # Model's max context
-        )
-        # Get token-level embeddings
-        with torch.no_grad():
-            outputs = self.model(**inputs)
-            token_embeddings = outputs.last_hidden_state[0]  # [seq_len, hidden_dim]
-        # Get token-to-text mapping
-        tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
-        # Create chunks from token embeddings
-        chunks = []
-        seq_len = token_embeddings.shape[0]
-        start = 0
-        while start < seq_len:
-            end = min(start + chunk_size, seq_len)
-            # Mean pool token embeddings for this chunk
-            chunk_embedding = token_embeddings[start:end].mean(dim=0).numpy()
-            # Reconstruct text for this chunk
-            chunk_token_ids = inputs["input_ids"][0][start:end]
-            chunk_text = self.tokenizer.decode(chunk_token_ids, skip_special_tokens=True)
-            chunks.append({
-                "text": chunk_text,
-                "embedding": chunk_embedding,
-                "start_token": start,
-                "end_token": end
-            })
-            start = end - overlap
-        return chunks
-# Usage
-late_chunker = LateChunker()
-chunks_with_embeddings = late_chunker.chunk_and_embed(
-    long_document,
-    chunk_size=512,
-    overlap=64
-)
-```
----
-## Metadata Enrichment
-```python
-from dataclasses import dataclass
-from datetime import datetime
-import hashlib
-@dataclass
-class EnrichedChunk:
-    text: str
-    embedding: list[float] | None
-    metadata: dict
-def enrich_chunk(
-    text: str,
-    source_file: str,
-    chunk_index: int,
-    total_chunks: int,
-    additional_metadata: dict | None = None
-) -> EnrichedChunk:
-    """Add comprehensive metadata to chunk."""
-    metadata = {
-        # Source tracking
-        "source": source_file,
-        "chunk_index": chunk_index,
-        "total_chunks": total_chunks,
-        # Content characteristics
-        "char_count": len(text),
-        "word_count": len(text.split()),
-        "content_hash": hashlib.md5(text.encode()).hexdigest()[:12],
-        # Temporal
-        "indexed_at": datetime.utcnow().isoformat(),
-        # Position context
-        "position": "start" if chunk_index == 0 else (
-            "end" if chunk_index == total_chunks - 1 else "middle"
-        )
-    }
-    if additional_metadata:
-        metadata.update(additional_metadata)
-    return EnrichedChunk(text=text, embedding=None, metadata=metadata)
-```
----
-## Chunk Size Selection Guide
-| Document Type | Recommended Size | Overlap | Rationale |
-|--------------|------------------|---------|-----------|
-| FAQ/Q&A | 200-400 tokens | 20-50 | Keep Q&A pairs together |
-| Technical docs | 400-600 tokens | 50-100 | Balance context vs precision |
-| Legal/contracts | 600-800 tokens | 100-150 | Preserve clause context |
-| Code documentation | 300-500 tokens | 50-100 | Keep function docs together |
-| Chat transcripts | 150-300 tokens | 25-50 | Natural turn boundaries |
-| Research papers | 500-800 tokens | 100-200 | Section-level coherence |
----
-## Quick Reference
-| Strategy | Use Case | Code Pattern |
-|----------|----------|--------------|
-| Fixed-size | Logs, baseline | `text[i:i+chunk_size]` |
-| Recursive | General text | Split by `["\n\n", "\n", ". "]` |
-| Sentence | Q&A content | `sent_tokenize()` + merge |
-| Semantic | Technical docs | Similarity-based breaks |
-| Markdown | Documentation | Header-aware splitting |
-| Late chunking | Long-context models | Embed full, pool chunks |
-## Related Skills
-- **RAG Architect** - Integration with vector databases
-- **Python Pro** - Preprocessing pipelines
-- **NLP Engineer** - Tokenization and text processing
+# Chunking Strategies
+---
+## Strategy Comparison Matrix
+| Strategy | Best For | Chunk Quality | Implementation Complexity |
+|----------|----------|---------------|---------------------------|
+| **Fixed-size** | Simple documents, logs | Low-Medium | Simple |
+| **Recursive character** | General text, articles | Medium | Simple |
+| **Sentence-based** | Conversational, Q&A | Medium-High | Medium |
+| **Semantic** | Technical docs, manuals | High | Medium |
+| **Document-aware** | Structured content (MD, HTML) | High | Medium |
+| **Agentic/Contextual** | Complex documents | Very High | Complex |
+| **Late chunking** | Long-context embeddings | High | Medium |
+---
+## When to Use Each Strategy
+### Fixed-Size Chunking
+```
+Best For:
+- Log files and structured data
+- Quick prototyping
+- When content has no natural structure
+- Baseline comparison
+When to Avoid:
+- Technical documentation
+- Content with semantic units (paragraphs, sections)
+- When context preservation matters
+```
+### Recursive Character Splitting
+```
+Best For:
+- General articles and blog posts
+- Mixed content types
+- Default starting point for most RAG
+- LangChain/LlamaIndex default
+When to Avoid:
+- Highly structured documents
+- Code-heavy content
+- Tables and lists
+```
+### Semantic Chunking
+```
+Best For:
+- Technical documentation
+- Research papers
+- Content with natural topic boundaries
+- When retrieval precision is critical
+When to Avoid:
+- Real-time ingestion (slower)
+- Very short documents
+- Cost-sensitive pipelines (requires embeddings)
+```
+### Document-Aware Chunking
+```
+Best For:
+- Markdown documentation
+- HTML pages
+- LaTeX papers
+- Code files
+When to Avoid:
+- Plain text without structure
+- Inconsistent formatting
+```
+---
+## Fixed-Size Chunking
+```python
+def fixed_size_chunk(
+    text: str,
+    chunk_size: int = 500,
+    overlap: int = 50
+) -> list[str]:
+    """Simple fixed-size chunking with overlap."""
+    chunks = []
+    start = 0
+    while start < len(text):
+        end = start + chunk_size
+        chunk = text[start:end]
+        # Try to break at word boundary
+        if end < len(text):
+            last_space = chunk.rfind(' ')
+            if last_space > chunk_size * 0.8:  # Only if reasonably far in
+                chunk = chunk[:last_space]
+                end = start + last_space
+        chunks.append(chunk.strip())
+        start = end - overlap
+    return chunks
+# Usage
+chunks = fixed_size_chunk(document_text, chunk_size=500, overlap=50)
+```
+---
+## Recursive Character Splitting (LangChain Style)
+```python
+from typing import Callable
+class RecursiveCharacterSplitter:
+    """Split text recursively using multiple separators."""
+    def __init__(
+        self,
+        chunk_size: int = 1000,
+        chunk_overlap: int = 200,
+        separators: list[str] | None = None,
+        length_function: Callable[[str], int] = len
+    ):
+        self.chunk_size = chunk_size
+        self.chunk_overlap = chunk_overlap
+        self.separators = separators or ["\n\n", "\n", ". ", " ", ""]
+        self.length_function = length_function
+    def split_text(self, text: str) -> list[str]:
+        """Split text into chunks."""
+        return self._split_text(text, self.separators)
+    def _split_text(self, text: str, separators: list[str]) -> list[str]:
+        final_chunks = []
+        separator = separators[-1]
+        for i, sep in enumerate(separators):
+            if sep == "":
+                separator = sep
+                break
+            if sep in text:
+                separator = sep
+                break
+        splits = text.split(separator) if separator else list(text)
+        good_splits = []
+        for split in splits:
+            if self.length_function(split) < self.chunk_size:
+                good_splits.append(split)
+            else:
+                if good_splits:
+                    merged = self._merge_splits(good_splits, separator)
+                    final_chunks.extend(merged)
+                    good_splits = []
+                # Recursively split large chunks
+                other_chunks = self._split_text(split, separators[separators.index(separator) + 1:])
+                final_chunks.extend(other_chunks)
+        if good_splits:
+            merged = self._merge_splits(good_splits, separator)
+            final_chunks.extend(merged)
+        return final_chunks
+    def _merge_splits(self, splits: list[str], separator: str) -> list[str]:
+        """Merge splits into chunks respecting size limits."""
+        chunks = []
+        current_chunk = []
+        current_length = 0
+        for split in splits:
+            split_length = self.length_function(split)
+            if current_length + split_length > self.chunk_size:
+                if current_chunk:
+                    chunks.append(separator.join(current_chunk))
+                    # Keep overlap
+                    while current_length > self.chunk_overlap and current_chunk:
+                        current_length -= self.length_function(current_chunk[0])
+                        current_chunk = current_chunk[1:]
+            current_chunk.append(split)
+            current_length += split_length
+        if current_chunk:
+            chunks.append(separator.join(current_chunk))
+        return chunks
+# Usage
+splitter = RecursiveCharacterSplitter(
+    chunk_size=1000,
+    chunk_overlap=200,
+    separators=["\n\n", "\n", ". ", " "]
+)
+chunks = splitter.split_text(document_text)
+```
+### Token-Based Splitting
+```python
+import tiktoken
+def create_token_splitter(
+    model: str = "gpt-4",
+    chunk_size: int = 500,
+    chunk_overlap: int = 50
+):
+    """Create splitter that counts tokens instead of characters."""
+    encoding = tiktoken.encoding_for_model(model)
+    def token_length(text: str) -> int:
+        return len(encoding.encode(text))
+    return RecursiveCharacterSplitter(
+        chunk_size=chunk_size,
+        chunk_overlap=chunk_overlap,
+        length_function=token_length
+    )
+# Usage
+token_splitter = create_token_splitter(chunk_size=500, chunk_overlap=50)
+chunks = token_splitter.split_text(document_text)
+```
+---
+## Sentence-Based Chunking
+```python
+import re
+from dataclasses import dataclass
+@dataclass
+class SentenceChunk:
+    text: str
+    sentences: list[str]
+    start_sentence: int
+    end_sentence: int
+def sentence_chunk(
+    text: str,
+    sentences_per_chunk: int = 5,
+    overlap_sentences: int = 1
+) -> list[SentenceChunk]:
+    """Chunk by sentence count with overlap."""
+    # Split into sentences
+    sentence_pattern = r'(?<=[.!?])\s+'
+    sentences = re.split(sentence_pattern, text)
+    sentences = [s.strip() for s in sentences if s.strip()]
+    chunks = []
+    i = 0
+    while i < len(sentences):
+        end = min(i + sentences_per_chunk, len(sentences))
+        chunk_sentences = sentences[i:end]
+        chunks.append(SentenceChunk(
+            text=" ".join(chunk_sentences),
+            sentences=chunk_sentences,
+            start_sentence=i,
+            end_sentence=end - 1
+        ))
+        i += sentences_per_chunk - overlap_sentences
+    return chunks
+# Better sentence splitting with NLTK
+import nltk
+nltk.download('punkt')
+from nltk.tokenize import sent_tokenize
+def sentence_chunk_nltk(
+    text: str,
+    max_chunk_size: int = 1000,
+    overlap_sentences: int = 2
+) -> list[str]:
+    """Chunk by sentences up to max size."""
+    sentences = sent_tokenize(text)
+    chunks = []
+    current_chunk = []
+    current_size = 0
+    for sentence in sentences:
+        sentence_size = len(sentence)
+        if current_size + sentence_size > max_chunk_size and current_chunk:
+            chunks.append(" ".join(current_chunk))
+            # Keep overlap sentences
+            current_chunk = current_chunk[-overlap_sentences:] if overlap_sentences else []
+            current_size = sum(len(s) for s in current_chunk)
+        current_chunk.append(sentence)
+        current_size += sentence_size
+    if current_chunk:
+        chunks.append(" ".join(current_chunk))
+    return chunks
+```
+---
+## Semantic Chunking
+```python
+import numpy as np
+from sentence_transformers import SentenceTransformer
+from sklearn.metrics.pairwise import cosine_similarity
+class SemanticChunker:
+    """Chunk based on semantic similarity between sentences."""
+    def __init__(
+        self,
+        model_name: str = "all-MiniLM-L6-v2",
+        similarity_threshold: float = 0.5,
+        min_chunk_size: int = 100,
+        max_chunk_size: int = 1500
+    ):
+        self.model = SentenceTransformer(model_name)
+        self.similarity_threshold = similarity_threshold
+        self.min_chunk_size = min_chunk_size
+        self.max_chunk_size = max_chunk_size
+    def chunk(self, text: str) -> list[str]:
+        """Split text at semantic boundaries."""
+        # Split into sentences
+        sentences = self._split_sentences(text)
+        if len(sentences) <= 1:
+            return [text]
+        # Get embeddings
+        embeddings = self.model.encode(sentences)
+        # Find breakpoints based on similarity drops
+        breakpoints = self._find_breakpoints(embeddings)
+        # Create chunks
+        chunks = []
+        start = 0
+        for bp in breakpoints:
+            chunk_text = " ".join(sentences[start:bp])
+            # Handle size constraints
+            if len(chunk_text) > self.max_chunk_size:
+                # Split large chunks
+                sub_chunks = self._split_large_chunk(sentences[start:bp])
+                chunks.extend(sub_chunks)
+            elif len(chunk_text) >= self.min_chunk_size:
+                chunks.append(chunk_text)
+            elif chunks:
+                # Merge small chunk with previous
+                chunks[-1] += " " + chunk_text
+            else:
+                chunks.append(chunk_text)
+            start = bp
+        # Handle remaining sentences
+        if start < len(sentences):
+            remaining = " ".join(sentences[start:])
+            if chunks and len(remaining) < self.min_chunk_size:
+                chunks[-1] += " " + remaining
+            else:
+                chunks.append(remaining)
+        return chunks
+    def _split_sentences(self, text: str) -> list[str]:
+        """Split text into sentences."""
+        import re
+        sentences = re.split(r'(?<=[.!?])\s+', text)
+        return [s.strip() for s in sentences if s.strip()]
+    def _find_breakpoints(self, embeddings: np.ndarray) -> list[int]:
+        """Find semantic breakpoints using similarity drops."""
+        breakpoints = []
+        for i in range(1, len(embeddings)):
+            similarity = cosine_similarity(
+                embeddings[i-1:i],
+                embeddings[i:i+1]
+            )[0][0]
+            if similarity < self.similarity_threshold:
+                breakpoints.append(i)
+        return breakpoints
+    def _split_large_chunk(self, sentences: list[str]) -> list[str]:
+        """Split oversized chunk at midpoint."""
+        mid = len(sentences) // 2
+        return [
+            " ".join(sentences[:mid]),
+            " ".join(sentences[mid:])
+        ]
+# Usage
+chunker = SemanticChunker(
+    similarity_threshold=0.5,
+    min_chunk_size=200,
+    max_chunk_size=1000
+)
+semantic_chunks = chunker.chunk(document_text)
+```
+### Percentile-Based Breakpoints
+```python
+def find_breakpoints_percentile(
+    embeddings: np.ndarray,
+    percentile: int = 25
+) -> list[int]:
+    """Find breakpoints at similarity drops below percentile threshold."""
+    similarities = []
+    for i in range(1, len(embeddings)):
+        sim = cosine_similarity(
+            embeddings[i-1:i],
+            embeddings[i:i+1]
+        )[0][0]
+        similarities.append((i, sim))
+    # Dynamic threshold based on distribution
+    sim_values = [s[1] for s in similarities]
+    threshold = np.percentile(sim_values, percentile)
+    return [i for i, sim in similarities if sim < threshold]
+```
+---
+## Document-Aware Chunking
+### Markdown Chunking
+```python
+import re
+from dataclasses import dataclass
+@dataclass
+class MarkdownChunk:
+    text: str
+    heading: str | None
+    heading_level: int
+    metadata: dict
+def chunk_markdown(
+    text: str,
+    max_chunk_size: int = 1500,
+    include_heading_in_chunk: bool = True
+) -> list[MarkdownChunk]:
+    """Chunk markdown by headers while respecting structure."""
+    # Pattern to match headers
+    header_pattern = r'^(#{1,6})\s+(.+)$'
+    lines = text.split('\n')
+    chunks = []
+    current_chunk_lines = []
+    current_heading = None
+    current_level = 0
+    heading_stack = []  # For breadcrumb context
+    for line in lines:
+        header_match = re.match(header_pattern, line)
+        if header_match:
+            # Save current chunk if exists
+            if current_chunk_lines:
+                chunk_text = '\n'.join(current_chunk_lines)
+                if len(chunk_text.strip()) > 0:
+                    prefix = f"# {current_heading}\n\n" if include_heading_in_chunk and current_heading else ""
+                    chunks.append(MarkdownChunk(
+                        text=prefix + chunk_text,
+                        heading=current_heading,
+                        heading_level=current_level,
+                        metadata={"breadcrumb": " > ".join(heading_stack)}
+                    ))
+            # Update heading context
+            level = len(header_match.group(1))
+            heading = header_match.group(2).strip()
+            # Maintain heading stack for breadcrumbs
+            while heading_stack and current_level >= level:
+                heading_stack.pop()
+                current_level -= 1
+            heading_stack.append(heading)
+            current_heading = heading
+            current_level = level
+            current_chunk_lines = []
+        else:
+            current_chunk_lines.append(line)
+            # Check chunk size
+            current_text = '\n'.join(current_chunk_lines)
+            if len(current_text) > max_chunk_size:
+                # Split at paragraph boundary
+                paragraphs = current_text.split('\n\n')
+                if len(paragraphs) > 1:
+                    split_point = len('\n\n'.join(paragraphs[:-1]))
+                    chunk_text = current_text[:split_point]
+                    prefix = f"# {current_heading}\n\n" if include_heading_in_chunk and current_heading else ""
+                    chunks.append(MarkdownChunk(
+                        text=prefix + chunk_text,
+                        heading=current_heading,
+                        heading_level=current_level,
+                        metadata={"breadcrumb": " > ".join(heading_stack)}
+                    ))
+                    current_chunk_lines = [current_text[split_point:].strip()]
+    # Don't forget the last chunk
+    if current_chunk_lines:
+        chunk_text = '\n'.join(current_chunk_lines)
+        if len(chunk_text.strip()) > 0:
+            prefix = f"# {current_heading}\n\n" if include_heading_in_chunk and current_heading else ""
+            chunks.append(MarkdownChunk(
+                text=prefix + chunk_text,
+                heading=current_heading,
+                heading_level=current_level,
+                metadata={"breadcrumb": " > ".join(heading_stack)}
+            ))
+    return chunks
+```
+### Code-Aware Chunking
+```python
+import re
+from dataclasses import dataclass
+@dataclass
+class CodeChunk:
+    text: str
+    language: str | None
+    chunk_type: str  # "code", "text", "mixed"
+def chunk_with_code_blocks(
+    text: str,
+    max_chunk_size: int = 1500
+) -> list[CodeChunk]:
+    """Chunk text while keeping code blocks intact."""
+    # Pattern to match code blocks
+    code_block_pattern = r'```(\w+)?\n(.*?)```'
+    chunks = []
+    last_end = 0
+    for match in re.finditer(code_block_pattern, text, re.DOTALL):
+        # Text before code block
+        text_before = text[last_end:match.start()].strip()
+        if text_before:
+            # Chunk the text portion
+            text_chunks = recursive_chunk(text_before, max_chunk_size)
+            chunks.extend([
+                CodeChunk(text=t, language=None, chunk_type="text")
+                for t in text_chunks
+            ])
+        # Code block (keep intact if possible)
+        language = match.group(1)
+        code_content = match.group(2)
+        full_block = match.group(0)
+        if len(full_block) <= max_chunk_size:
+            chunks.append(CodeChunk(
+                text=full_block,
+                language=language,
+                chunk_type="code"
+            ))
+        else:
+            # Split large code blocks by function/class
+            code_chunks = split_code_block(code_content, language, max_chunk_size)
+            chunks.extend(code_chunks)
+        last_end = match.end()
+    # Remaining text after last code block
+    remaining = text[last_end:].strip()
+    if remaining:
+        text_chunks = recursive_chunk(remaining, max_chunk_size)
+        chunks.extend([
+            CodeChunk(text=t, language=None, chunk_type="text")
+            for t in text_chunks
+        ])
+    return chunks
+def split_code_block(code: str, language: str, max_size: int) -> list[CodeChunk]:
+    """Split code block at logical boundaries."""
+    # Simple function/class boundary splitting for Python
+    if language == "python":
+        pattern = r'\n(?=def |class |async def )'
+    elif language in ["javascript", "typescript"]:
+        pattern = r'\n(?=function |class |const |export )'
+    else:
+        pattern = r'\n\n'
+    parts = re.split(pattern, code)
+    chunks = []
+    current = ""
+    for part in parts:
+        if len(current) + len(part) > max_size and current:
+            chunks.append(CodeChunk(
+                text=f"```{language}\n{current}```",
+                language=language,
+                chunk_type="code"
+            ))
+            current = part
+        else:
+            current += part
+    if current:
+        chunks.append(CodeChunk(
+            text=f"```{language}\n{current}```",
+            language=language,
+            chunk_type="code"
+        ))
+    return chunks
+```
+---
+## Contextual/Agentic Chunking
+```python
+from openai import OpenAI
+def contextual_chunk(
+    document: str,
+    max_chunk_size: int = 1500
+) -> list[dict]:
+    """Use LLM to add context to each chunk."""
+    # First, do structural chunking
+    base_chunks = recursive_chunk(document, max_chunk_size)
+    client = OpenAI()
+    contextualized_chunks = []
+    for chunk in base_chunks:
+        # Generate contextual summary
+        response = client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[
+                {
+                    "role": "system",
+                    "content": """Provide a brief context for this document chunk.
+                    Include: what topic it covers, how it relates to the broader document,
+                    and key concepts mentioned. Keep it under 100 words."""
+                },
+                {
+                    "role": "user",
+                    "content": f"Document excerpt:\n\n{chunk}"
+                }
+            ],
+            max_tokens=150
+        )
+        context = response.choices[0].message.content
+        contextualized_chunks.append({
+            "text": chunk,
+            "context": context,
+            "text_with_context": f"Context: {context}\n\nContent: {chunk}"
+        })
+    return contextualized_chunks
+```
+### Propositions-Based Chunking
+```python
+def extract_propositions(text: str) -> list[str]:
+    """Extract atomic propositions from text using LLM."""
+    client = OpenAI()
+    response = client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=[
+            {
+                "role": "system",
+                "content": """Extract atomic propositions from the text.
+                Each proposition should:
+                - Be a single, complete fact
+                - Be self-contained (understandable without context)
+                - Include necessary entity references
+                Return as a JSON array of strings."""
+            },
+            {
+                "role": "user",
+                "content": text
+            }
+        ],
+        response_format={"type": "json_object"}
+    )
+    import json
+    result = json.loads(response.choices[0].message.content)
+    return result.get("propositions", [])
+# Usage: For very fine-grained retrieval
+propositions = extract_propositions(document_text)
+# Each proposition becomes its own retrievable unit
+```
+---
+## Late Chunking (for Long-Context Embeddings)
+```python
+from transformers import AutoTokenizer, AutoModel
+import torch
+class LateChunker:
+    """
+    Late chunking: embed full document, then pool token embeddings into chunks.
+    Preserves full document context while creating retrievable chunks.
+    """
+    def __init__(self, model_name: str = "jinaai/jina-embeddings-v2-base-en"):
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
+        self.model.eval()
+    def chunk_and_embed(
+        self,
+        text: str,
+        chunk_size: int = 512,
+        overlap: int = 64
+    ) -> list[dict]:
+        """
+        Embed full document, then create chunk embeddings via mean pooling.
+        """
+        # Tokenize full document
+        inputs = self.tokenizer(
+            text,
+            return_tensors="pt",
+            truncation=True,
+            max_length=8192  # Model's max context
+        )
+        # Get token-level embeddings
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+            token_embeddings = outputs.last_hidden_state[0]  # [seq_len, hidden_dim]
+        # Get token-to-text mapping
+        tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
+        # Create chunks from token embeddings
+        chunks = []
+        seq_len = token_embeddings.shape[0]
+        start = 0
+        while start < seq_len:
+            end = min(start + chunk_size, seq_len)
+            # Mean pool token embeddings for this chunk
+            chunk_embedding = token_embeddings[start:end].mean(dim=0).numpy()
+            # Reconstruct text for this chunk
+            chunk_token_ids = inputs["input_ids"][0][start:end]
+            chunk_text = self.tokenizer.decode(chunk_token_ids, skip_special_tokens=True)
+            chunks.append({
+                "text": chunk_text,
+                "embedding": chunk_embedding,
+                "start_token": start,
+                "end_token": end
+            })
+            start = end - overlap
+        return chunks
+# Usage
+late_chunker = LateChunker()
+chunks_with_embeddings = late_chunker.chunk_and_embed(
+    long_document,
+    chunk_size=512,
+    overlap=64
+)
+```
+---
+## Metadata Enrichment
+```python
+from dataclasses import dataclass
+from datetime import datetime
+import hashlib
+@dataclass
+class EnrichedChunk:
+    text: str
+    embedding: list[float] | None
+    metadata: dict
+def enrich_chunk(
+    text: str,
+    source_file: str,
+    chunk_index: int,
+    total_chunks: int,
+    additional_metadata: dict | None = None
+) -> EnrichedChunk:
+    """Add comprehensive metadata to chunk."""
+    metadata = {
+        # Source tracking
+        "source": source_file,
+        "chunk_index": chunk_index,
+        "total_chunks": total_chunks,
+        # Content characteristics
+        "char_count": len(text),
+        "word_count": len(text.split()),
+        "content_hash": hashlib.md5(text.encode()).hexdigest()[:12],
+        # Temporal
+        "indexed_at": datetime.utcnow().isoformat(),
+        # Position context
+        "position": "start" if chunk_index == 0 else (
+            "end" if chunk_index == total_chunks - 1 else "middle"
+        )
+    }
+    if additional_metadata:
+        metadata.update(additional_metadata)
+    return EnrichedChunk(text=text, embedding=None, metadata=metadata)
+```
+---
+## Chunk Size Selection Guide
+| Document Type | Recommended Size | Overlap | Rationale |
+|--------------|------------------|---------|-----------|
+| FAQ/Q&A | 200-400 tokens | 20-50 | Keep Q&A pairs together |
+| Technical docs | 400-600 tokens | 50-100 | Balance context vs precision |
+| Legal/contracts | 600-800 tokens | 100-150 | Preserve clause context |
+| Code documentation | 300-500 tokens | 50-100 | Keep function docs together |
+| Chat transcripts | 150-300 tokens | 25-50 | Natural turn boundaries |
+| Research papers | 500-800 tokens | 100-200 | Section-level coherence |
+---
+## Quick Reference
+| Strategy | Use Case | Code Pattern |
+|----------|----------|--------------|
+| Fixed-size | Logs, baseline | `text[i:i+chunk_size]` |
+| Recursive | General text | Split by `["\n\n", "\n", ". "]` |
+| Sentence | Q&A content | `sent_tokenize()` + merge |
+| Semantic | Technical docs | Similarity-based breaks |
+| Markdown | Documentation | Header-aware splitting |
+| Late chunking | Long-context models | Embed full, pool chunks |
+## Related Skills
+- **RAG Architect** - Integration with vector databases
+- **Python Pro** - Preprocessing pipelines
+- **NLP Engineer** - Tokenization and text processing