maestro-bundle 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/templates/bundle-ai-agents/skills/agent-orchestration/SKILL.md +107 -41
- package/templates/bundle-ai-agents/skills/agent-orchestration/references/graph-patterns.md +50 -0
- package/templates/bundle-ai-agents/skills/agent-orchestration/references/routing-strategies.md +47 -0
- package/templates/bundle-ai-agents/skills/api-design/SKILL.md +125 -16
- package/templates/bundle-ai-agents/skills/api-design/references/pydantic-patterns.md +72 -0
- package/templates/bundle-ai-agents/skills/api-design/references/rest-conventions.md +51 -0
- package/templates/bundle-ai-agents/skills/clean-architecture/SKILL.md +113 -21
- package/templates/bundle-ai-agents/skills/clean-architecture/references/dependency-injection.md +60 -0
- package/templates/bundle-ai-agents/skills/clean-architecture/references/layer-rules.md +56 -0
- package/templates/bundle-ai-agents/skills/context-engineering/SKILL.md +104 -36
- package/templates/bundle-ai-agents/skills/context-engineering/references/compression-techniques.md +76 -0
- package/templates/bundle-ai-agents/skills/context-engineering/references/context-budget-calculator.md +45 -0
- package/templates/bundle-ai-agents/skills/database-modeling/SKILL.md +146 -19
- package/templates/bundle-ai-agents/skills/database-modeling/references/index-strategies.md +48 -0
- package/templates/bundle-ai-agents/skills/database-modeling/references/naming-conventions.md +27 -0
- package/templates/bundle-ai-agents/skills/docker-containerization/SKILL.md +124 -15
- package/templates/bundle-ai-agents/skills/docker-containerization/references/compose-patterns.md +97 -0
- package/templates/bundle-ai-agents/skills/docker-containerization/references/dockerfile-checklist.md +37 -0
- package/templates/bundle-ai-agents/skills/eval-testing/SKILL.md +113 -25
- package/templates/bundle-ai-agents/skills/eval-testing/references/eval-types.md +52 -0
- package/templates/bundle-ai-agents/skills/eval-testing/references/golden-dataset-template.md +59 -0
- package/templates/bundle-ai-agents/skills/memory-management/SKILL.md +112 -28
- package/templates/bundle-ai-agents/skills/memory-management/references/memory-tiers.md +41 -0
- package/templates/bundle-ai-agents/skills/memory-management/references/namespace-conventions.md +41 -0
- package/templates/bundle-ai-agents/skills/prompt-engineering/SKILL.md +139 -47
- package/templates/bundle-ai-agents/skills/prompt-engineering/references/anti-patterns.md +59 -0
- package/templates/bundle-ai-agents/skills/prompt-engineering/references/prompt-templates.md +75 -0
- package/templates/bundle-ai-agents/skills/rag-pipeline/SKILL.md +104 -27
- package/templates/bundle-ai-agents/skills/rag-pipeline/references/chunking-strategies.md +27 -0
- package/templates/bundle-ai-agents/skills/rag-pipeline/references/embedding-models.md +31 -0
- package/templates/bundle-ai-agents/skills/rag-pipeline/references/rag-evaluation.md +39 -0
- package/templates/bundle-ai-agents/skills/testing-strategy/SKILL.md +127 -18
- package/templates/bundle-ai-agents/skills/testing-strategy/references/fixture-patterns.md +81 -0
- package/templates/bundle-ai-agents/skills/testing-strategy/references/naming-conventions.md +69 -0
- package/templates/bundle-base/skills/branch-strategy/SKILL.md +134 -21
- package/templates/bundle-base/skills/branch-strategy/references/branch-rules.md +40 -0
- package/templates/bundle-base/skills/code-review/SKILL.md +123 -38
- package/templates/bundle-base/skills/code-review/references/review-checklist.md +45 -0
- package/templates/bundle-base/skills/commit-pattern/SKILL.md +98 -39
- package/templates/bundle-base/skills/commit-pattern/references/conventional-commits.md +40 -0
- package/templates/bundle-data-pipeline/skills/data-preprocessing/SKILL.md +110 -19
- package/templates/bundle-data-pipeline/skills/data-preprocessing/references/pandas-cheatsheet.md +63 -0
- package/templates/bundle-data-pipeline/skills/data-preprocessing/references/pandera-schemas.md +44 -0
- package/templates/bundle-data-pipeline/skills/docker-containerization/SKILL.md +132 -16
- package/templates/bundle-data-pipeline/skills/docker-containerization/references/compose-patterns.md +82 -0
- package/templates/bundle-data-pipeline/skills/docker-containerization/references/dockerfile-best-practices.md +57 -0
- package/templates/bundle-data-pipeline/skills/feature-engineering/SKILL.md +143 -45
- package/templates/bundle-data-pipeline/skills/feature-engineering/references/encoding-guide.md +41 -0
- package/templates/bundle-data-pipeline/skills/feature-engineering/references/scaling-guide.md +38 -0
- package/templates/bundle-data-pipeline/skills/mlops-pipeline/SKILL.md +156 -37
- package/templates/bundle-data-pipeline/skills/mlops-pipeline/references/mlflow-commands.md +69 -0
- package/templates/bundle-data-pipeline/skills/model-training/SKILL.md +152 -33
- package/templates/bundle-data-pipeline/skills/model-training/references/evaluation-metrics.md +52 -0
- package/templates/bundle-data-pipeline/skills/model-training/references/model-selection-guide.md +41 -0
- package/templates/bundle-data-pipeline/skills/rag-pipeline/SKILL.md +127 -39
- package/templates/bundle-data-pipeline/skills/rag-pipeline/references/chunking-strategies.md +51 -0
- package/templates/bundle-data-pipeline/skills/rag-pipeline/references/embedding-models.md +49 -0
- package/templates/bundle-frontend-spa/skills/authentication/SKILL.md +196 -13
- package/templates/bundle-frontend-spa/skills/authentication/references/jwt-security.md +41 -0
- package/templates/bundle-frontend-spa/skills/component-design/SKILL.md +191 -41
- package/templates/bundle-frontend-spa/skills/component-design/references/accessibility-checklist.md +41 -0
- package/templates/bundle-frontend-spa/skills/component-design/references/tailwind-patterns.md +65 -0
- package/templates/bundle-frontend-spa/skills/e2e-testing/SKILL.md +241 -79
- package/templates/bundle-frontend-spa/skills/e2e-testing/references/playwright-selectors.md +66 -0
- package/templates/bundle-frontend-spa/skills/e2e-testing/references/test-patterns.md +82 -0
- package/templates/bundle-frontend-spa/skills/integration-api/SKILL.md +221 -31
- package/templates/bundle-frontend-spa/skills/integration-api/references/api-patterns.md +81 -0
- package/templates/bundle-frontend-spa/skills/react-patterns/SKILL.md +195 -70
- package/templates/bundle-frontend-spa/skills/react-patterns/references/component-checklist.md +22 -0
- package/templates/bundle-frontend-spa/skills/react-patterns/references/hook-patterns.md +63 -0
- package/templates/bundle-frontend-spa/skills/responsive-layout/SKILL.md +162 -22
- package/templates/bundle-frontend-spa/skills/responsive-layout/references/breakpoint-guide.md +63 -0
- package/templates/bundle-frontend-spa/skills/state-management/SKILL.md +158 -30
- package/templates/bundle-frontend-spa/skills/state-management/references/react-query-config.md +64 -0
- package/templates/bundle-frontend-spa/skills/state-management/references/state-patterns.md +78 -0
- package/templates/bundle-jhipster-microservices/skills/ci-cd-pipeline/SKILL.md +135 -45
- package/templates/bundle-jhipster-microservices/skills/ci-cd-pipeline/references/gitlab-ci-templates.md +93 -0
- package/templates/bundle-jhipster-microservices/skills/clean-architecture/SKILL.md +87 -21
- package/templates/bundle-jhipster-microservices/skills/clean-architecture/references/layer-rules.md +78 -0
- package/templates/bundle-jhipster-microservices/skills/ddd-tactical/SKILL.md +94 -25
- package/templates/bundle-jhipster-microservices/skills/ddd-tactical/references/ddd-patterns.md +48 -0
- package/templates/bundle-jhipster-microservices/skills/jhipster-angular/SKILL.md +63 -21
- package/templates/bundle-jhipster-microservices/skills/jhipster-angular/references/angular-microservices.md +40 -0
- package/templates/bundle-jhipster-microservices/skills/jhipster-angular/references/angular-structure.md +59 -0
- package/templates/bundle-jhipster-microservices/skills/jhipster-docker-k8s/SKILL.md +125 -91
- package/templates/bundle-jhipster-microservices/skills/jhipster-docker-k8s/references/docker-k8s-commands.md +68 -0
- package/templates/bundle-jhipster-microservices/skills/jhipster-entities/SKILL.md +72 -20
- package/templates/bundle-jhipster-microservices/skills/jhipster-entities/references/cross-service-entities.md +36 -0
- package/templates/bundle-jhipster-microservices/skills/jhipster-entities/references/jdl-types.md +56 -0
- package/templates/bundle-jhipster-microservices/skills/jhipster-gateway/SKILL.md +80 -8
- package/templates/bundle-jhipster-microservices/skills/jhipster-gateway/references/gateway-config.md +43 -0
- package/templates/bundle-jhipster-microservices/skills/jhipster-kafka/SKILL.md +115 -22
- package/templates/bundle-jhipster-microservices/skills/jhipster-kafka/references/kafka-events.md +39 -0
- package/templates/bundle-jhipster-microservices/skills/jhipster-registry/SKILL.md +92 -23
- package/templates/bundle-jhipster-microservices/skills/jhipster-registry/references/consul-config.md +61 -0
- package/templates/bundle-jhipster-microservices/skills/jhipster-service/SKILL.md +81 -18
- package/templates/bundle-jhipster-microservices/skills/jhipster-service/references/service-patterns.md +40 -0
- package/templates/bundle-jhipster-microservices/skills/testing-strategy/SKILL.md +101 -20
- package/templates/bundle-jhipster-microservices/skills/testing-strategy/references/test-naming.md +55 -0
- package/templates/bundle-jhipster-monorepo/skills/clean-architecture/SKILL.md +87 -21
- package/templates/bundle-jhipster-monorepo/skills/clean-architecture/references/layer-rules.md +78 -0
- package/templates/bundle-jhipster-monorepo/skills/ddd-tactical/SKILL.md +94 -25
- package/templates/bundle-jhipster-monorepo/skills/ddd-tactical/references/ddd-patterns.md +48 -0
- package/templates/bundle-jhipster-monorepo/skills/jhipster-angular/SKILL.md +99 -52
- package/templates/bundle-jhipster-monorepo/skills/jhipster-angular/references/angular-structure.md +59 -0
- package/templates/bundle-jhipster-monorepo/skills/jhipster-entities/SKILL.md +89 -36
- package/templates/bundle-jhipster-monorepo/skills/jhipster-entities/references/jdl-types.md +56 -0
- package/templates/bundle-jhipster-monorepo/skills/jhipster-liquibase/SKILL.md +123 -23
- package/templates/bundle-jhipster-monorepo/skills/jhipster-liquibase/references/liquibase-operations.md +95 -0
- package/templates/bundle-jhipster-monorepo/skills/jhipster-security/SKILL.md +106 -19
- package/templates/bundle-jhipster-monorepo/skills/jhipster-security/references/security-checklist.md +47 -0
- package/templates/bundle-jhipster-monorepo/skills/jhipster-spring/SKILL.md +84 -16
- package/templates/bundle-jhipster-monorepo/skills/jhipster-spring/references/spring-layers.md +41 -0
- package/templates/bundle-jhipster-monorepo/skills/testing-strategy/SKILL.md +101 -20
- package/templates/bundle-jhipster-monorepo/skills/testing-strategy/references/test-naming.md +55 -0
|
@@ -1,54 +1,139 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: code-review
|
|
3
|
-
description:
|
|
3
|
+
description: Review code following organization standards for quality, security, and patterns. Use when reviewing a PR, evaluating code quality, or when a developer asks for a review.
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
author: Maestro
|
|
4
6
|
---
|
|
5
7
|
|
|
6
|
-
# Code Review
|
|
8
|
+
# Code Review
|
|
7
9
|
|
|
8
|
-
|
|
10
|
+
Perform structured code reviews following the organization's quality, security, and pattern standards.
|
|
9
11
|
|
|
10
|
-
|
|
11
|
-
-
|
|
12
|
-
-
|
|
13
|
-
-
|
|
12
|
+
## When to Use
|
|
13
|
+
- When asked to review a pull request or merge request
|
|
14
|
+
- When evaluating code quality of a file or module
|
|
15
|
+
- When a developer asks for feedback on their code
|
|
16
|
+
- Before approving a merge
|
|
14
17
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
- Código duplicado? Extrair se repetir 3+ vezes.
|
|
21
|
-
- Código morto? Remover.
|
|
18
|
+
## Available Operations
|
|
19
|
+
1. Full PR review with categorized feedback
|
|
20
|
+
2. Focused review (security-only, quality-only, tests-only)
|
|
21
|
+
3. Quick review of a single file
|
|
22
|
+
4. Compliance check against organization patterns
|
|
22
23
|
|
|
23
|
-
|
|
24
|
-
- Inputs validados nas fronteiras?
|
|
25
|
-
- Secrets hardcoded? REJEITAR.
|
|
26
|
-
- SQL injection possível? Usar parameterized queries.
|
|
27
|
-
- Rate limiting presente nas APIs?
|
|
24
|
+
## Multi-Step Workflow
|
|
28
25
|
|
|
29
|
-
###
|
|
30
|
-
|
|
31
|
-
- Nomes de testes descritivos?
|
|
32
|
-
- Testes cobrem caminho feliz E fluxos alternativos?
|
|
26
|
+
### Step 1: Gather Context
|
|
27
|
+
Read the changed files and understand the purpose of the changes.
|
|
33
28
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
29
|
+
```bash
|
|
30
|
+
# If reviewing a PR/MR
|
|
31
|
+
git diff main...HEAD --stat
|
|
32
|
+
git diff main...HEAD
|
|
33
|
+
git log main...HEAD --oneline
|
|
39
34
|
|
|
40
|
-
|
|
35
|
+
# If reviewing specific files
|
|
36
|
+
git diff --cached
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Step 2: Run Automated Checks
|
|
40
|
+
Execute linters, tests, and static analysis before manual review.
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
# Backend (Java/Spring)
|
|
44
|
+
./mvnw checkstyle:check
|
|
45
|
+
./mvnw test
|
|
46
|
+
|
|
47
|
+
# Frontend (Angular)
|
|
48
|
+
npm run lint
|
|
49
|
+
npm run test
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### Step 3: Apply Review Checklist
|
|
53
|
+
|
|
54
|
+
**Correctness:**
|
|
55
|
+
- Does the code do what the task/spec requires?
|
|
56
|
+
- Are edge cases covered?
|
|
57
|
+
- Are exception flows handled?
|
|
58
|
+
|
|
59
|
+
**Quality:**
|
|
60
|
+
- Files with more than 500 lines? Split them.
|
|
61
|
+
- Functions with more than 20 lines? Extract.
|
|
62
|
+
- Nested ifs (hadouken)? Use early return.
|
|
63
|
+
- Descriptive names? (`calculateComplianceScore` > `calc`)
|
|
64
|
+
- Duplicated code? Extract if repeated 3+ times.
|
|
65
|
+
- Dead code? Remove it.
|
|
66
|
+
|
|
67
|
+
**Security:**
|
|
68
|
+
- Inputs validated at boundaries?
|
|
69
|
+
- Hardcoded secrets? REJECT.
|
|
70
|
+
- SQL injection possible? Use parameterized queries.
|
|
71
|
+
- Rate limiting present on APIs?
|
|
41
72
|
|
|
42
|
-
|
|
73
|
+
**Tests:**
|
|
74
|
+
- Unit tests for business rules?
|
|
75
|
+
- Descriptive test names?
|
|
76
|
+
- Tests cover happy path AND alternative flows?
|
|
43
77
|
|
|
44
|
-
|
|
45
|
-
-
|
|
46
|
-
-
|
|
47
|
-
-
|
|
78
|
+
**Patterns:**
|
|
79
|
+
- Commit follows Conventional Commits?
|
|
80
|
+
- Branch follows naming convention?
|
|
81
|
+
- Directory structure by domain?
|
|
82
|
+
- Project ubiquitous language respected?
|
|
48
83
|
|
|
49
|
-
|
|
84
|
+
### Step 4: Categorize and Deliver Feedback
|
|
85
|
+
|
|
86
|
+
Use these categories for each comment:
|
|
87
|
+
|
|
88
|
+
- **[BLOCKER]** -- Must be fixed before merge
|
|
89
|
+
- **[SUGGESTION]** -- Recommended improvement, not mandatory
|
|
90
|
+
- **[QUESTION]** -- Question about code intent
|
|
91
|
+
- **[PRAISE]** -- Something well done worth highlighting
|
|
92
|
+
|
|
93
|
+
Format:
|
|
50
94
|
```
|
|
51
|
-
[BLOCKER]
|
|
52
|
-
[SUGGESTION]
|
|
53
|
-
[PRAISE]
|
|
95
|
+
[BLOCKER] line 45: API key hardcoded. Move to environment variable.
|
|
96
|
+
[SUGGESTION] line 120: This nested if would be more readable with early return.
|
|
97
|
+
[PRAISE] Good extraction of the ComplianceScore value object.
|
|
54
98
|
```
|
|
99
|
+
|
|
100
|
+
### Step 5: Provide Summary
|
|
101
|
+
End the review with an overall assessment:
|
|
102
|
+
- APPROVE: Ready to merge
|
|
103
|
+
- REQUEST CHANGES: Has blockers that must be fixed
|
|
104
|
+
- COMMENT: Has suggestions but no blockers
|
|
105
|
+
|
|
106
|
+
## Resources
|
|
107
|
+
- `references/review-checklist.md` - Complete review checklist with detailed criteria
|
|
108
|
+
|
|
109
|
+
## Examples
|
|
110
|
+
### Example 1: Full PR Review
|
|
111
|
+
User asks: "Review this PR"
|
|
112
|
+
Response approach:
|
|
113
|
+
1. Run `git diff main...HEAD` to see all changes
|
|
114
|
+
2. Read each changed file
|
|
115
|
+
3. Apply the checklist to each file
|
|
116
|
+
4. Deliver categorized feedback with line references
|
|
117
|
+
5. Provide overall assessment (APPROVE / REQUEST CHANGES / COMMENT)
|
|
118
|
+
|
|
119
|
+
### Example 2: Security-Focused Review
|
|
120
|
+
User asks: "Check this code for security issues"
|
|
121
|
+
Response approach:
|
|
122
|
+
1. Read the target files
|
|
123
|
+
2. Check for hardcoded secrets, SQL injection, missing input validation
|
|
124
|
+
3. Verify CORS, CSRF, rate limiting configuration
|
|
125
|
+
4. Report findings with severity
|
|
126
|
+
|
|
127
|
+
### Example 3: Quick File Review
|
|
128
|
+
User asks: "Is this service class well-structured?"
|
|
129
|
+
Response approach:
|
|
130
|
+
1. Read the file
|
|
131
|
+
2. Check method length, naming, separation of concerns
|
|
132
|
+
3. Verify dependency injection patterns
|
|
133
|
+
4. Provide focused feedback
|
|
134
|
+
|
|
135
|
+
## Notes
|
|
136
|
+
- Always read the full file context before commenting -- do not review snippets in isolation
|
|
137
|
+
- Be specific with line numbers and concrete suggestions
|
|
138
|
+
- Balance criticism with praise -- highlight what was done well
|
|
139
|
+
- For security issues, always mark as [BLOCKER]
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Code Review Checklist
|
|
2
|
+
|
|
3
|
+
## Correctness
|
|
4
|
+
- [ ] Code implements the task/spec requirements
|
|
5
|
+
- [ ] Edge cases are handled
|
|
6
|
+
- [ ] Exception flows are treated properly
|
|
7
|
+
- [ ] No off-by-one errors
|
|
8
|
+
- [ ] Null/undefined values are handled
|
|
9
|
+
|
|
10
|
+
## Quality
|
|
11
|
+
- [ ] Files under 500 lines
|
|
12
|
+
- [ ] Functions under 20 lines
|
|
13
|
+
- [ ] No nested ifs (use early return)
|
|
14
|
+
- [ ] Descriptive variable and function names
|
|
15
|
+
- [ ] No duplicated code (DRY: extract if repeated 3+ times)
|
|
16
|
+
- [ ] No dead code or commented-out code
|
|
17
|
+
- [ ] Single Responsibility Principle followed
|
|
18
|
+
|
|
19
|
+
## Security
|
|
20
|
+
- [ ] Inputs validated at boundaries (controllers, API endpoints)
|
|
21
|
+
- [ ] No hardcoded secrets, API keys, or passwords
|
|
22
|
+
- [ ] Parameterized queries used (no SQL injection risk)
|
|
23
|
+
- [ ] Rate limiting on public endpoints
|
|
24
|
+
- [ ] CORS properly configured (not `*` in production)
|
|
25
|
+
- [ ] Authentication/authorization enforced on protected endpoints
|
|
26
|
+
|
|
27
|
+
## Tests
|
|
28
|
+
- [ ] Unit tests for business rules
|
|
29
|
+
- [ ] Descriptive test names (`test_should_X_when_Y`)
|
|
30
|
+
- [ ] Happy path covered
|
|
31
|
+
- [ ] Error/alternative flows covered
|
|
32
|
+
- [ ] No test interdependencies
|
|
33
|
+
|
|
34
|
+
## Patterns
|
|
35
|
+
- [ ] Commits follow Conventional Commits
|
|
36
|
+
- [ ] Branch follows naming convention
|
|
37
|
+
- [ ] Directory structure follows domain organization
|
|
38
|
+
- [ ] Ubiquitous language respected
|
|
39
|
+
- [ ] DTOs at API boundary, entities in domain
|
|
40
|
+
|
|
41
|
+
## Feedback Categories
|
|
42
|
+
- **[BLOCKER]** -- Must fix before merge
|
|
43
|
+
- **[SUGGESTION]** -- Recommended, not mandatory
|
|
44
|
+
- **[QUESTION]** -- Clarification needed
|
|
45
|
+
- **[PRAISE]** -- Well done, worth highlighting
|
|
@@ -1,58 +1,117 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: commit-pattern
|
|
3
|
-
description:
|
|
3
|
+
description: Generate commit messages following Conventional Commits standard. Use when committing changes, creating commit messages, staging files, or finishing a task.
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
author: Maestro
|
|
4
6
|
---
|
|
5
7
|
|
|
6
|
-
#
|
|
8
|
+
# Commit Pattern
|
|
7
9
|
|
|
8
|
-
|
|
10
|
+
Generate structured commit messages following Conventional Commits and the organization's standards.
|
|
9
11
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
+
## When to Use
|
|
13
|
+
- When committing code changes
|
|
14
|
+
- When asked to generate a commit message
|
|
15
|
+
- When finishing a task and need to commit the result
|
|
16
|
+
- When reviewing commit messages for compliance
|
|
17
|
+
|
|
18
|
+
## Available Operations
|
|
19
|
+
1. Generate a commit message from staged changes
|
|
20
|
+
2. Validate an existing commit message
|
|
21
|
+
3. Create a multi-line commit with body and footer
|
|
22
|
+
4. Stage and commit changes in one workflow
|
|
23
|
+
|
|
24
|
+
## Multi-Step Workflow
|
|
25
|
+
|
|
26
|
+
### Step 1: Review Staged Changes
|
|
27
|
+
Inspect what has been changed to determine the appropriate commit type and scope.
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
git status
|
|
31
|
+
git diff --cached --stat
|
|
32
|
+
git diff --cached
|
|
12
33
|
```
|
|
13
34
|
|
|
14
|
-
|
|
35
|
+
### Step 2: Determine Commit Type
|
|
15
36
|
|
|
16
|
-
|
|
|
37
|
+
| Type | When to use |
|
|
17
38
|
|---|---|
|
|
18
|
-
| `feat` |
|
|
19
|
-
| `fix` |
|
|
20
|
-
| `refactor` |
|
|
21
|
-
| `docs` |
|
|
22
|
-
| `test` |
|
|
23
|
-
| `chore` | Build, deps, configs,
|
|
24
|
-
| `ci` |
|
|
25
|
-
| `perf` |
|
|
26
|
-
|
|
27
|
-
## Regras
|
|
28
|
-
|
|
29
|
-
1. Descrição no imperativo: "adicionar", não "adicionado" ou "adicionando"
|
|
30
|
-
2. Primeira letra minúscula
|
|
31
|
-
3. Sem ponto final
|
|
32
|
-
4. Máximo 72 caracteres na primeira linha
|
|
33
|
-
5. Escopo é o módulo/feature afetada
|
|
34
|
-
6. Se a mudança quebra compatibilidade, adicionar `BREAKING CHANGE:` no corpo
|
|
35
|
-
|
|
36
|
-
## Exemplos
|
|
39
|
+
| `feat` | New user-facing functionality |
|
|
40
|
+
| `fix` | Bug fix |
|
|
41
|
+
| `refactor` | Code change that does not alter behavior |
|
|
42
|
+
| `docs` | Documentation-only changes |
|
|
43
|
+
| `test` | Adding or fixing tests |
|
|
44
|
+
| `chore` | Build, deps, configs, maintenance tasks |
|
|
45
|
+
| `ci` | CI/CD changes (pipelines, workflows) |
|
|
46
|
+
| `perf` | Performance improvement |
|
|
37
47
|
|
|
48
|
+
### Step 3: Compose the Message
|
|
49
|
+
Follow the format:
|
|
38
50
|
```
|
|
39
|
-
|
|
40
|
-
fix(agents): corrigir timeout na execução de skill
|
|
41
|
-
refactor(bundles): extrair validação para value object
|
|
42
|
-
test(tracking): adicionar testes para eventos MCP
|
|
43
|
-
docs(readme): atualizar instruções de instalação
|
|
44
|
-
chore(deps): atualizar langchain para 0.3.x
|
|
45
|
-
ci(gitlab): adicionar stage de compliance check
|
|
51
|
+
<type>(<scope>): <imperative description in Portuguese>
|
|
46
52
|
```
|
|
47
53
|
|
|
48
|
-
|
|
54
|
+
Rules:
|
|
55
|
+
1. Imperative mood: "adicionar", not "adicionado" or "adicionando"
|
|
56
|
+
2. First letter lowercase
|
|
57
|
+
3. No period at the end
|
|
58
|
+
4. Maximum 72 characters on the first line
|
|
59
|
+
5. Scope is the affected module/feature
|
|
60
|
+
6. If the change breaks compatibility, add `BREAKING CHANGE:` in the body
|
|
49
61
|
|
|
50
|
-
|
|
62
|
+
### Step 4: Commit
|
|
51
63
|
|
|
52
|
-
```
|
|
53
|
-
|
|
64
|
+
```bash
|
|
65
|
+
# Simple commit
|
|
66
|
+
git commit -m "feat(demands): adicionar decomposicao automatica de tasks"
|
|
67
|
+
|
|
68
|
+
# Commit with body for complex changes
|
|
69
|
+
git commit -m "refactor(orchestrator): simplificar alocacao de agentes
|
|
54
70
|
|
|
55
|
-
A
|
|
71
|
+
A alocacao anterior usava um loop O(n^2) comparando todos os agentes
|
|
56
72
|
com todas as tasks. Agora usa um mapa indexado por tipo de agente,
|
|
57
|
-
reduzindo para O(n).
|
|
73
|
+
reduzindo para O(n)."
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### Step 5: Verify
|
|
77
|
+
```bash
|
|
78
|
+
git log --oneline -1
|
|
58
79
|
```
|
|
80
|
+
|
|
81
|
+
## Resources
|
|
82
|
+
- `references/conventional-commits.md` - Full Conventional Commits specification and examples
|
|
83
|
+
|
|
84
|
+
## Examples
|
|
85
|
+
### Example 1: New Feature
|
|
86
|
+
User asks: "Commit these changes that add automatic task decomposition"
|
|
87
|
+
Response approach:
|
|
88
|
+
1. Run `git diff --cached --stat` to see changed files
|
|
89
|
+
2. Identify scope from file paths (e.g., `demands/`)
|
|
90
|
+
3. Generate: `feat(demands): adicionar decomposicao automatica de tasks`
|
|
91
|
+
4. Run `git commit -m "..."` with the message
|
|
92
|
+
|
|
93
|
+
### Example 2: Bug Fix with Context
|
|
94
|
+
User asks: "Commit the timeout fix"
|
|
95
|
+
Response approach:
|
|
96
|
+
1. Run `git diff --cached` to understand the fix
|
|
97
|
+
2. Generate: `fix(agents): corrigir timeout na execucao de skill`
|
|
98
|
+
3. If the fix is complex, add a body explaining why it happened
|
|
99
|
+
|
|
100
|
+
### Example 3: Breaking Change
|
|
101
|
+
User asks: "Commit these API changes that break backward compatibility"
|
|
102
|
+
Response approach:
|
|
103
|
+
1. Review changes to confirm breaking nature
|
|
104
|
+
2. Generate message with BREAKING CHANGE footer:
|
|
105
|
+
```
|
|
106
|
+
feat(api): alterar formato de resposta do endpoint de demands
|
|
107
|
+
|
|
108
|
+
BREAKING CHANGE: o campo 'tasks' agora retorna objetos completos
|
|
109
|
+
ao inves de apenas IDs. Clientes que dependem do formato antigo
|
|
110
|
+
precisam atualizar.
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## Notes
|
|
114
|
+
- Always check `git diff --cached` before composing the message to ensure accuracy
|
|
115
|
+
- When in doubt about scope, use the top-level directory name of the changed files
|
|
116
|
+
- For commits touching multiple scopes, use the most significant one or omit the scope
|
|
117
|
+
- Never commit secrets, credentials, or `.env` files
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Conventional Commits Reference
|
|
2
|
+
|
|
3
|
+
## Format
|
|
4
|
+
```
|
|
5
|
+
<type>(<scope>): <description>
|
|
6
|
+
|
|
7
|
+
[optional body]
|
|
8
|
+
|
|
9
|
+
[optional footer(s)]
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
## Types
|
|
13
|
+
| Type | Description | Example |
|
|
14
|
+
|---|---|---|
|
|
15
|
+
| `feat` | New feature | `feat(demands): adicionar decomposicao automatica de tasks` |
|
|
16
|
+
| `fix` | Bug fix | `fix(agents): corrigir timeout na execucao de skill` |
|
|
17
|
+
| `refactor` | Code restructuring | `refactor(bundles): extrair validacao para value object` |
|
|
18
|
+
| `docs` | Documentation | `docs(readme): atualizar instrucoes de instalacao` |
|
|
19
|
+
| `test` | Tests | `test(tracking): adicionar testes para eventos MCP` |
|
|
20
|
+
| `chore` | Maintenance | `chore(deps): atualizar langchain para 0.3.x` |
|
|
21
|
+
| `ci` | CI/CD | `ci(gitlab): adicionar stage de compliance check` |
|
|
22
|
+
| `perf` | Performance | `perf(queries): otimizar consulta de demands ativas` |
|
|
23
|
+
|
|
24
|
+
## Rules
|
|
25
|
+
1. Imperative mood in Portuguese: "adicionar" not "adicionado"
|
|
26
|
+
2. First letter lowercase
|
|
27
|
+
3. No period at end
|
|
28
|
+
4. Max 72 characters on first line
|
|
29
|
+
5. Scope = affected module/feature
|
|
30
|
+
6. Breaking changes: add `BREAKING CHANGE:` in footer
|
|
31
|
+
|
|
32
|
+
## Body Guidelines
|
|
33
|
+
- Explain **why**, not what (the diff shows what)
|
|
34
|
+
- Wrap at 72 characters
|
|
35
|
+
- Separate from subject with blank line
|
|
36
|
+
|
|
37
|
+
## Footer Guidelines
|
|
38
|
+
- `BREAKING CHANGE: <description>` for incompatible changes
|
|
39
|
+
- `Refs: #123` for issue references
|
|
40
|
+
- `Co-authored-by: Name <email>` for pair programming
|
|
@@ -1,22 +1,60 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: data-preprocessing
|
|
3
|
-
description:
|
|
3
|
+
description: Preprocess data with Pandas and NumPy including cleaning, transformation, and exploratory analysis. Use when you need to clean data, run EDA, validate schemas, or prepare datasets for ML pipelines.
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
author: Maestro
|
|
4
6
|
---
|
|
5
7
|
|
|
6
8
|
# Data Preprocessing
|
|
7
9
|
|
|
8
|
-
|
|
10
|
+
Build data cleaning and preparation pipelines using Pandas, NumPy, and Pandera.
|
|
9
11
|
|
|
12
|
+
## When to Use
|
|
13
|
+
- User needs to clean a raw CSV/Parquet/JSON dataset
|
|
14
|
+
- User asks for exploratory data analysis (EDA)
|
|
15
|
+
- User needs to handle missing values, duplicates, or type conversions
|
|
16
|
+
- User wants to validate data against a schema
|
|
17
|
+
- User needs to prepare data before feature engineering or model training
|
|
18
|
+
|
|
19
|
+
## Available Operations
|
|
20
|
+
1. Run exploratory data analysis (EDA) on a dataset
|
|
21
|
+
2. Build a cleaning pipeline (dedup, nulls, types, normalization)
|
|
22
|
+
3. Validate data with Pandera schemas
|
|
23
|
+
4. Profile data quality and generate reports
|
|
24
|
+
5. Export cleaned data to Parquet/CSV
|
|
25
|
+
|
|
26
|
+
## Multi-Step Workflow
|
|
27
|
+
|
|
28
|
+
### Step 1: Install Dependencies
|
|
29
|
+
```bash
|
|
30
|
+
pip install pandas numpy pandera pyarrow openpyxl
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### Step 2: Load and Inspect Data
|
|
10
34
|
```python
|
|
11
35
|
import pandas as pd
|
|
12
36
|
import numpy as np
|
|
13
37
|
|
|
38
|
+
# Load data (adjust path/format as needed)
|
|
39
|
+
df = pd.read_csv("data/raw/dataset.csv")
|
|
40
|
+
# or: df = pd.read_parquet("data/raw/dataset.parquet")
|
|
41
|
+
# or: df = pd.read_json("data/raw/dataset.json")
|
|
42
|
+
|
|
43
|
+
# Quick inspection
|
|
44
|
+
print(f"Shape: {df.shape}")
|
|
45
|
+
print(f"Columns: {list(df.columns)}")
|
|
46
|
+
print(df.dtypes)
|
|
47
|
+
print(df.head())
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Step 3: Run Exploratory Data Analysis
|
|
51
|
+
```python
|
|
14
52
|
def eda_report(df: pd.DataFrame) -> dict:
|
|
15
53
|
return {
|
|
16
54
|
"shape": df.shape,
|
|
17
55
|
"dtypes": df.dtypes.to_dict(),
|
|
18
56
|
"nulls": df.isnull().sum().to_dict(),
|
|
19
|
-
"null_pct": (df.isnull().sum() / len(df) * 100).to_dict(),
|
|
57
|
+
"null_pct": (df.isnull().sum() / len(df) * 100).round(2).to_dict(),
|
|
20
58
|
"duplicates": df.duplicated().sum(),
|
|
21
59
|
"numeric_stats": df.describe().to_dict(),
|
|
22
60
|
"categorical_counts": {
|
|
@@ -24,52 +62,105 @@ def eda_report(df: pd.DataFrame) -> dict:
|
|
|
24
62
|
for col in df.select_dtypes(include='object').columns
|
|
25
63
|
}
|
|
26
64
|
}
|
|
27
|
-
```
|
|
28
65
|
|
|
29
|
-
|
|
66
|
+
report = eda_report(df)
|
|
67
|
+
for key, value in report.items():
|
|
68
|
+
print(f"\n--- {key} ---")
|
|
69
|
+
print(value)
|
|
70
|
+
```
|
|
30
71
|
|
|
72
|
+
### Step 4: Build and Run Cleaning Pipeline
|
|
31
73
|
```python
|
|
32
74
|
def clean_pipeline(df: pd.DataFrame) -> pd.DataFrame:
|
|
33
75
|
df = df.copy()
|
|
34
76
|
|
|
35
|
-
# 1.
|
|
77
|
+
# 1. Remove duplicates
|
|
78
|
+
before = len(df)
|
|
36
79
|
df = df.drop_duplicates()
|
|
80
|
+
print(f"Removed {before - len(df)} duplicate rows")
|
|
37
81
|
|
|
38
|
-
# 2.
|
|
39
|
-
date_cols = [c for c in df.columns if 'date' in c.lower() or
|
|
82
|
+
# 2. Fix date columns
|
|
83
|
+
date_cols = [c for c in df.columns if 'date' in c.lower() or c.endswith('_at')]
|
|
40
84
|
for col in date_cols:
|
|
41
85
|
df[col] = pd.to_datetime(df[col], errors='coerce')
|
|
42
86
|
|
|
43
|
-
# 3.
|
|
87
|
+
# 3. Handle numeric nulls
|
|
44
88
|
for col in df.select_dtypes(include=[np.number]).columns:
|
|
45
|
-
|
|
89
|
+
null_pct = df[col].isnull().sum() / len(df)
|
|
90
|
+
if null_pct < 0.05:
|
|
46
91
|
df[col] = df[col].fillna(df[col].median())
|
|
47
|
-
|
|
48
|
-
|
|
92
|
+
elif null_pct > 0.5:
|
|
93
|
+
print(f"Dropping column '{col}' ({null_pct:.0%} nulls)")
|
|
94
|
+
df = df.drop(columns=[col])
|
|
49
95
|
|
|
50
|
-
# 4.
|
|
96
|
+
# 4. Handle categorical nulls
|
|
51
97
|
for col in df.select_dtypes(include='object').columns:
|
|
52
98
|
df[col] = df[col].fillna('unknown')
|
|
53
99
|
|
|
54
|
-
# 5.
|
|
100
|
+
# 5. Normalize strings
|
|
55
101
|
for col in df.select_dtypes(include='object').columns:
|
|
56
102
|
df[col] = df[col].str.strip().str.lower()
|
|
57
103
|
|
|
58
104
|
return df
|
|
59
|
-
```
|
|
60
105
|
|
|
61
|
-
|
|
106
|
+
df_clean = clean_pipeline(df)
|
|
107
|
+
```
|
|
62
108
|
|
|
109
|
+
### Step 5: Validate with Pandera
|
|
63
110
|
```python
|
|
64
111
|
import pandera as pa
|
|
65
112
|
|
|
66
113
|
schema = pa.DataFrameSchema({
|
|
67
|
-
"
|
|
114
|
+
"id": pa.Column(str, nullable=False, unique=True),
|
|
68
115
|
"description": pa.Column(str, nullable=False),
|
|
69
116
|
"status": pa.Column(str, pa.Check.isin(["created", "planned", "completed"])),
|
|
70
|
-
"
|
|
117
|
+
"score": pa.Column(float, pa.Check.between(0, 100), nullable=True),
|
|
71
118
|
"created_at": pa.Column("datetime64[ns]", nullable=False),
|
|
72
119
|
})
|
|
73
120
|
|
|
74
|
-
validated_df = schema.validate(
|
|
121
|
+
validated_df = schema.validate(df_clean)
|
|
122
|
+
print("Validation passed!")
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### Step 6: Export Cleaned Data
|
|
126
|
+
```bash
|
|
127
|
+
mkdir -p data/processed
|
|
75
128
|
```
|
|
129
|
+
```python
|
|
130
|
+
df_clean.to_parquet("data/processed/dataset_clean.parquet", index=False)
|
|
131
|
+
# or: df_clean.to_csv("data/processed/dataset_clean.csv", index=False)
|
|
132
|
+
print(f"Exported {len(df_clean)} rows to data/processed/")
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### Step 7: Verify Output
|
|
136
|
+
```bash
|
|
137
|
+
python -c "import pandas as pd; df = pd.read_parquet('data/processed/dataset_clean.parquet'); print(f'Rows: {len(df)}, Cols: {len(df.columns)}'); print(df.dtypes)"
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## Resources
|
|
141
|
+
- `references/pandas-cheatsheet.md` - Common Pandas operations and patterns
|
|
142
|
+
- `references/pandera-schemas.md` - Schema validation examples
|
|
143
|
+
|
|
144
|
+
## Examples
|
|
145
|
+
### Example 1: Clean a CSV for Analysis
|
|
146
|
+
User asks: "Clean this sales data CSV and remove duplicates"
|
|
147
|
+
Response approach:
|
|
148
|
+
1. Load CSV with `pd.read_csv()`
|
|
149
|
+
2. Run `eda_report()` to understand the data
|
|
150
|
+
3. Apply `clean_pipeline()` to remove duplicates and handle nulls
|
|
151
|
+
4. Export cleaned data to Parquet
|
|
152
|
+
5. Print before/after comparison of row counts and null percentages
|
|
153
|
+
|
|
154
|
+
### Example 2: Validate Data Before Training
|
|
155
|
+
User asks: "Make sure this dataset matches our expected schema before training"
|
|
156
|
+
Response approach:
|
|
157
|
+
1. Load dataset and inspect dtypes
|
|
158
|
+
2. Define Pandera schema matching expected columns and constraints
|
|
159
|
+
3. Run `schema.validate(df)` and fix any failures
|
|
160
|
+
4. Export validated dataset
|
|
161
|
+
|
|
162
|
+
## Notes
|
|
163
|
+
- Always run EDA before cleaning to understand data distribution
|
|
164
|
+
- Use Parquet over CSV for production pipelines (better types, compression)
|
|
165
|
+
- Log the number of rows removed at each cleaning step
|
|
166
|
+
- Never modify the original data file -- write to a separate output path
|
package/templates/bundle-data-pipeline/skills/data-preprocessing/references/pandas-cheatsheet.md
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# Pandas Cheatsheet
|
|
2
|
+
|
|
3
|
+
## Loading Data
|
|
4
|
+
```python
|
|
5
|
+
pd.read_csv("file.csv")
|
|
6
|
+
pd.read_parquet("file.parquet")
|
|
7
|
+
pd.read_json("file.json")
|
|
8
|
+
pd.read_excel("file.xlsx", sheet_name="Sheet1")
|
|
9
|
+
pd.read_sql("SELECT * FROM table", connection)
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
## Inspection
|
|
13
|
+
```python
|
|
14
|
+
df.shape # (rows, cols)
|
|
15
|
+
df.dtypes # column types
|
|
16
|
+
df.info() # memory usage + types
|
|
17
|
+
df.describe() # numeric statistics
|
|
18
|
+
df.head(10) # first 10 rows
|
|
19
|
+
df.sample(5) # random 5 rows
|
|
20
|
+
df.nunique() # unique values per column
|
|
21
|
+
df.isnull().sum() # null counts per column
|
|
22
|
+
df.duplicated().sum() # total duplicate rows
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Filtering
|
|
26
|
+
```python
|
|
27
|
+
df[df['col'] > 10]
|
|
28
|
+
df[df['col'].isin(['a', 'b'])]
|
|
29
|
+
df.query("col > 10 and status == 'active'")
|
|
30
|
+
df[df['col'].between(5, 15)]
|
|
31
|
+
df[df['col'].str.contains('pattern', na=False)]
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Transformations
|
|
35
|
+
```python
|
|
36
|
+
df['col'] = df['col'].astype(int)
|
|
37
|
+
df['date'] = pd.to_datetime(df['date'])
|
|
38
|
+
df['col'] = df['col'].str.strip().str.lower()
|
|
39
|
+
df['new'] = df['a'] + df['b']
|
|
40
|
+
df['binned'] = pd.cut(df['value'], bins=5)
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Aggregations
|
|
44
|
+
```python
|
|
45
|
+
df.groupby('category')['value'].mean()
|
|
46
|
+
df.groupby('category').agg({'value': ['mean', 'std', 'count']})
|
|
47
|
+
df.pivot_table(values='value', index='category', columns='type', aggfunc='mean')
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Handling Nulls
|
|
51
|
+
```python
|
|
52
|
+
df.dropna(subset=['critical_col'])
|
|
53
|
+
df['col'].fillna(df['col'].median())
|
|
54
|
+
df['col'].fillna(method='ffill')
|
|
55
|
+
df.interpolate()
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Export
|
|
59
|
+
```python
|
|
60
|
+
df.to_csv("output.csv", index=False)
|
|
61
|
+
df.to_parquet("output.parquet", index=False)
|
|
62
|
+
df.to_json("output.json", orient="records")
|
|
63
|
+
```
|