agentic-qe 1.6.0 โ 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/brutal-honesty-review/README.md +218 -0
- package/.claude/skills/brutal-honesty-review/SKILL.md +725 -0
- package/.claude/skills/brutal-honesty-review/resources/assessment-rubrics.md +295 -0
- package/.claude/skills/brutal-honesty-review/resources/review-template.md +102 -0
- package/.claude/skills/brutal-honesty-review/scripts/assess-code.sh +179 -0
- package/.claude/skills/brutal-honesty-review/scripts/assess-tests.sh +223 -0
- package/.claude/skills/cicd-pipeline-qe-orchestrator/README.md +301 -0
- package/.claude/skills/cicd-pipeline-qe-orchestrator/SKILL.md +510 -0
- package/.claude/skills/cicd-pipeline-qe-orchestrator/resources/workflows/microservice-pipeline.md +239 -0
- package/.claude/skills/cicd-pipeline-qe-orchestrator/resources/workflows/mobile-pipeline.md +375 -0
- package/.claude/skills/cicd-pipeline-qe-orchestrator/resources/workflows/monolith-pipeline.md +268 -0
- package/.claude/skills/six-thinking-hats/README.md +190 -0
- package/.claude/skills/six-thinking-hats/SKILL.md +1215 -0
- package/.claude/skills/six-thinking-hats/resources/examples/api-testing-example.md +345 -0
- package/.claude/skills/six-thinking-hats/resources/templates/solo-session-template.md +167 -0
- package/.claude/skills/six-thinking-hats/resources/templates/team-session-template.md +336 -0
- package/CHANGELOG.md +2359 -2157
- package/README.md +41 -7
- package/dist/agents/BaseAgent.d.ts +22 -0
- package/dist/agents/BaseAgent.d.ts.map +1 -1
- package/dist/agents/BaseAgent.js +74 -0
- package/dist/agents/BaseAgent.js.map +1 -1
- package/dist/agents/FleetCommanderAgent.d.ts +16 -0
- package/dist/agents/FleetCommanderAgent.d.ts.map +1 -1
- package/dist/agents/FleetCommanderAgent.js +35 -20
- package/dist/agents/FleetCommanderAgent.js.map +1 -1
- package/dist/agents/index.d.ts.map +1 -1
- package/dist/agents/index.js +0 -2
- package/dist/agents/index.js.map +1 -1
- package/dist/agents/lifecycle/AgentLifecycleManager.d.ts +5 -0
- package/dist/agents/lifecycle/AgentLifecycleManager.d.ts.map +1 -1
- package/dist/agents/lifecycle/AgentLifecycleManager.js +10 -0
- package/dist/agents/lifecycle/AgentLifecycleManager.js.map +1 -1
- package/dist/cli/commands/agentdb/learn.d.ts.map +1 -1
- package/dist/cli/commands/agentdb/learn.js +190 -71
- package/dist/cli/commands/agentdb/learn.js.map +1 -1
- package/dist/cli/commands/debug/agent.d.ts.map +1 -1
- package/dist/cli/commands/debug/agent.js +40 -13
- package/dist/cli/commands/debug/agent.js.map +1 -1
- package/dist/cli/commands/debug/diagnostics.js +38 -11
- package/dist/cli/commands/debug/diagnostics.js.map +1 -1
- package/dist/cli/commands/debug/health-check.js +47 -12
- package/dist/cli/commands/debug/health-check.js.map +1 -1
- package/dist/cli/commands/debug/profile.js +7 -7
- package/dist/cli/commands/debug/profile.js.map +1 -1
- package/dist/cli/commands/debug/trace.js +4 -4
- package/dist/cli/commands/debug/trace.js.map +1 -1
- package/dist/cli/commands/debug/troubleshoot.js +41 -27
- package/dist/cli/commands/debug/troubleshoot.js.map +1 -1
- package/dist/cli/commands/init-claude-md-template.d.ts +16 -0
- package/dist/cli/commands/init-claude-md-template.d.ts.map +1 -0
- package/dist/cli/commands/init-claude-md-template.js +69 -0
- package/dist/cli/commands/init-claude-md-template.js.map +1 -0
- package/dist/cli/commands/init.d.ts +1 -1
- package/dist/cli/commands/init.d.ts.map +1 -1
- package/dist/cli/commands/init.js +499 -469
- package/dist/cli/commands/init.js.map +1 -1
- package/dist/cli/commands/test/clean.d.ts.map +1 -1
- package/dist/cli/commands/test/clean.js +26 -9
- package/dist/cli/commands/test/clean.js.map +1 -1
- package/dist/cli/commands/test/debug.js +6 -7
- package/dist/cli/commands/test/debug.js.map +1 -1
- package/dist/cli/commands/test/diff.js +4 -37
- package/dist/cli/commands/test/diff.js.map +1 -1
- package/dist/cli/commands/test/profile.js +7 -40
- package/dist/cli/commands/test/profile.js.map +1 -1
- package/dist/cli/commands/test/trace.js +4 -37
- package/dist/cli/commands/test/trace.js.map +1 -1
- package/dist/core/ArtifactWorkflow.d.ts +4 -0
- package/dist/core/ArtifactWorkflow.d.ts.map +1 -1
- package/dist/core/ArtifactWorkflow.js +34 -13
- package/dist/core/ArtifactWorkflow.js.map +1 -1
- package/dist/core/coordination/BlackboardCoordination.d.ts +4 -0
- package/dist/core/coordination/BlackboardCoordination.d.ts.map +1 -1
- package/dist/core/coordination/BlackboardCoordination.js +28 -22
- package/dist/core/coordination/BlackboardCoordination.js.map +1 -1
- package/dist/core/coordination/ConsensusGating.d.ts +4 -0
- package/dist/core/coordination/ConsensusGating.d.ts.map +1 -1
- package/dist/core/coordination/ConsensusGating.js +25 -18
- package/dist/core/coordination/ConsensusGating.js.map +1 -1
- package/dist/core/memory/AgentDBService.d.ts.map +1 -1
- package/dist/core/memory/AgentDBService.js +6 -3
- package/dist/core/memory/AgentDBService.js.map +1 -1
- package/dist/learning/LearningEngine.js +1 -1
- package/dist/learning/LearningEngine.js.map +1 -1
- package/dist/learning/StateExtractor.d.ts +1 -1
- package/dist/learning/StateExtractor.d.ts.map +1 -1
- package/dist/learning/StateExtractor.js +62 -13
- package/dist/learning/StateExtractor.js.map +1 -1
- package/dist/utils/Config.d.ts.map +1 -1
- package/dist/utils/Config.js +14 -5
- package/dist/utils/Config.js.map +1 -1
- package/dist/utils/Database.d.ts.map +1 -1
- package/dist/utils/Database.js +5 -2
- package/dist/utils/Database.js.map +1 -1
- package/dist/utils/Logger.d.ts +1 -1
- package/dist/utils/Logger.d.ts.map +1 -1
- package/dist/utils/Logger.js +4 -4
- package/dist/utils/Logger.js.map +1 -1
- package/dist/utils/SecurityScanner.js +1 -1
- package/dist/utils/SecurityScanner.js.map +1 -1
- package/package.json +2 -2
- package/.claude/agents/.claude-flow/metrics/agent-metrics.json +0 -1
- package/.claude/agents/.claude-flow/metrics/performance.json +0 -87
- package/.claude/agents/.claude-flow/metrics/task-metrics.json +0 -10
- package/.claude/commands/analysis/COMMAND_COMPLIANCE_REPORT.md +0 -54
- package/.claude/commands/analysis/performance-bottlenecks.md +0 -59
- package/.claude/commands/flow-nexus/app-store.md +0 -124
- package/.claude/commands/flow-nexus/challenges.md +0 -120
- package/.claude/commands/flow-nexus/login-registration.md +0 -65
- package/.claude/commands/flow-nexus/neural-network.md +0 -134
- package/.claude/commands/flow-nexus/payments.md +0 -116
- package/.claude/commands/flow-nexus/sandbox.md +0 -83
- package/.claude/commands/flow-nexus/swarm.md +0 -87
- package/.claude/commands/flow-nexus/user-tools.md +0 -152
- package/.claude/commands/flow-nexus/workflow.md +0 -115
- package/.claude/commands/memory/usage.md +0 -46
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
# Assessment Rubrics for Brutal Honesty Reviews
|
|
2
|
+
|
|
3
|
+
## Code Quality Rubric (Linus Mode)
|
|
4
|
+
|
|
5
|
+
### Correctness
|
|
6
|
+
| Level | Criteria | Example |
|
|
7
|
+
|-------|----------|---------|
|
|
8
|
+
| ๐ด **Failing** | Wrong algorithm, logic errors, crashes | `null` pointer dereference, off-by-one errors |
|
|
9
|
+
| ๐ก **Passing** | Works in tested cases, no obvious bugs | Handles expected inputs correctly |
|
|
10
|
+
| ๐ข **Excellent** | Proven correct across edge cases | Property-based tests, formal verification |
|
|
11
|
+
|
|
12
|
+
### Performance
|
|
13
|
+
| Level | Criteria | Example |
|
|
14
|
+
|-------|----------|---------|
|
|
15
|
+
| ๐ด **Failing** | Naive O(nยฒ) where O(n) exists | Nested loops for searchable data |
|
|
16
|
+
| ๐ก **Passing** | Acceptable complexity for scale | O(n log n) for reasonable n |
|
|
17
|
+
| ๐ข **Excellent** | Optimal algorithm + profiled | Cached, indexed, benchmarked |
|
|
18
|
+
|
|
19
|
+
### Error Handling
|
|
20
|
+
| Level | Criteria | Example |
|
|
21
|
+
|-------|----------|---------|
|
|
22
|
+
| ๐ด **Failing** | Crashes on invalid input | Uncaught exceptions, panics |
|
|
23
|
+
| ๐ก **Passing** | Returns error codes/exceptions | `try/catch`, error returns |
|
|
24
|
+
| ๐ข **Excellent** | Graceful degradation + logging | Circuit breakers, retry logic |
|
|
25
|
+
|
|
26
|
+
### Concurrency Safety
|
|
27
|
+
| Level | Criteria | Example |
|
|
28
|
+
|-------|----------|---------|
|
|
29
|
+
| ๐ด **Failing** | Race conditions, deadlocks | Shared mutable state, no locks |
|
|
30
|
+
| ๐ก **Passing** | Thread-safe with locks | Proper mutex usage |
|
|
31
|
+
| ๐ข **Excellent** | Lock-free or proven safe | Immutable data, atomic operations |
|
|
32
|
+
|
|
33
|
+
### Testability
|
|
34
|
+
| Level | Criteria | Example |
|
|
35
|
+
|-------|----------|---------|
|
|
36
|
+
| ๐ด **Failing** | Impossible to unit test | Hard-coded dependencies, global state |
|
|
37
|
+
| ๐ก **Passing** | Can be tested with mocks | Dependency injection |
|
|
38
|
+
| ๐ข **Excellent** | Self-testing design | Pure functions, property-based |
|
|
39
|
+
|
|
40
|
+
### Maintainability
|
|
41
|
+
| Level | Criteria | Example |
|
|
42
|
+
|-------|----------|---------|
|
|
43
|
+
| ๐ด **Failing** | "Clever" code, unclear intent | Obfuscated logic, magic numbers |
|
|
44
|
+
| ๐ก **Passing** | Clear intent, reasonable | Named variables, comments |
|
|
45
|
+
| ๐ข **Excellent** | Self-documenting + simple | Obvious code, minimal complexity |
|
|
46
|
+
|
|
47
|
+
**Passing Threshold**: Minimum ๐ก on ALL criteria
|
|
48
|
+
**Ship-Ready**: Minimum ๐ข on Correctness, Performance, Error Handling
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## Test Quality Rubric (Ramsay Mode)
|
|
53
|
+
|
|
54
|
+
### Coverage
|
|
55
|
+
| Level | Criteria | Acceptable % |
|
|
56
|
+
|-------|----------|--------------|
|
|
57
|
+
| ๐ด **Raw** | Only happy path | <50% branch |
|
|
58
|
+
| ๐ก **Acceptable** | Common failures covered | 80%+ branch |
|
|
59
|
+
| ๐ข **Michelin Star** | Complete boundary analysis | 95%+ branch + mutation tested |
|
|
60
|
+
|
|
61
|
+
### Edge Case Testing
|
|
62
|
+
| Level | Criteria | Example |
|
|
63
|
+
|-------|----------|---------|
|
|
64
|
+
| ๐ด **Raw** | Only happy path tested | `test('adds 2+2')` |
|
|
65
|
+
| ๐ก **Acceptable** | Common failures tested | Null, empty, invalid input |
|
|
66
|
+
| ๐ข **Michelin Star** | Boundary analysis complete | Min/max values, overflow, underflow |
|
|
67
|
+
|
|
68
|
+
### Test Clarity
|
|
69
|
+
| Level | Criteria | Example |
|
|
70
|
+
|-------|----------|---------|
|
|
71
|
+
| ๐ด **Raw** | Unclear what's being tested | `test('test1')` |
|
|
72
|
+
| ๐ก **Acceptable** | Clear test names | `test('handles null input')` |
|
|
73
|
+
| ๐ข **Michelin Star** | Self-documenting test pyramid | Given-When-Then, BDD style |
|
|
74
|
+
|
|
75
|
+
### Speed
|
|
76
|
+
| Level | Criteria | Example |
|
|
77
|
+
|-------|----------|---------|
|
|
78
|
+
| ๐ด **Raw** | Minutes to run unit tests | Calls real database/network |
|
|
79
|
+
| ๐ก **Acceptable** | <10s for unit tests | Mocked dependencies |
|
|
80
|
+
| ๐ข **Michelin Star** | <1s, parallelized | Pure functions, in-memory |
|
|
81
|
+
|
|
82
|
+
### Stability
|
|
83
|
+
| Level | Criteria | Flake Rate |
|
|
84
|
+
|-------|----------|------------|
|
|
85
|
+
| ๐ด **Raw** | Flaky, timing-dependent | >1% failure rate |
|
|
86
|
+
| ๐ก **Acceptable** | Stable but potentially slow | 0% flake, deterministic |
|
|
87
|
+
| ๐ข **Michelin Star** | Deterministic + fast | 0% flake, <100ms per test |
|
|
88
|
+
|
|
89
|
+
### Isolation
|
|
90
|
+
| Level | Criteria | Example |
|
|
91
|
+
|-------|----------|---------|
|
|
92
|
+
| ๐ด **Raw** | Tests depend on each other | Shared state, execution order matters |
|
|
93
|
+
| ๐ก **Acceptable** | Independent tests | Each test sets up own state |
|
|
94
|
+
| ๐ข **Michelin Star** | Pure functions, no shared state | Immutable, stateless |
|
|
95
|
+
|
|
96
|
+
**Merge Threshold**: Minimum ๐ก on ALL criteria
|
|
97
|
+
**Production-Ready**: Minimum ๐ข on Coverage, Stability, Isolation
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## BS Detection Rubric (Bach Mode)
|
|
102
|
+
|
|
103
|
+
### Red Flags in Testing Practices
|
|
104
|
+
|
|
105
|
+
| Red Flag | Evidence | Impact | Harshness Level |
|
|
106
|
+
|----------|----------|--------|-----------------|
|
|
107
|
+
| **Cargo Cult Practice** | "Best practice" with no context | Wasted effort, false confidence | ๐ก Harsh |
|
|
108
|
+
| **Certification Theater** | Required cert unrelated to actual skills | Filters out critical thinkers | ๐ข Brutal |
|
|
109
|
+
| **Vendor Lock-In** | Tool solves problem it created | Expensive dependency | ๐ก Harsh |
|
|
110
|
+
| **False Automation** | "AI testing" still needs human verification | Automation debt | ๐ก Harsh |
|
|
111
|
+
| **Checkbox Quality** | Compliance without outcome measurement | Audit passes, customers suffer | ๐ข Brutal |
|
|
112
|
+
| **Hype Cycle** | Promises 10x improvement without evidence | Budget waste, disillusionment | ๐ก Harsh |
|
|
113
|
+
| **Coverage Theater** | 100% coverage of trivial code | False sense of quality | ๐ก Harsh |
|
|
114
|
+
| **Test Script Slavery** | Following test cases without thinking | Misses actual bugs | ๐ข Brutal |
|
|
115
|
+
| **Magic Tool Thinking** | Tool will solve all problems | Dependency without skill growth | ๐ก Harsh |
|
|
116
|
+
| **Certification Over Competence** | Hiring based on credentials, not ability | Weak team, strong resumes | ๐ข Brutal |
|
|
117
|
+
|
|
118
|
+
### Green Flag Test
|
|
119
|
+
|
|
120
|
+
Ask these questions about any practice/tool/certification:
|
|
121
|
+
|
|
122
|
+
1. **Does this help testers/developers do better work in THIS context?**
|
|
123
|
+
- If yes โ Worth considering
|
|
124
|
+
- If no โ BS alert
|
|
125
|
+
|
|
126
|
+
2. **Who benefits economically from this?**
|
|
127
|
+
- Vendor/Consultant more than users โ BS alert
|
|
128
|
+
- Users demonstrably benefit โ Potentially useful
|
|
129
|
+
|
|
130
|
+
3. **Can you measure the impact?**
|
|
131
|
+
- Measurable outcomes โ Worth evaluating
|
|
132
|
+
- Vague claims โ BS alert
|
|
133
|
+
|
|
134
|
+
4. **Does this promote thinking or compliance?**
|
|
135
|
+
- Critical thinking โ Good
|
|
136
|
+
- Checkbox compliance โ BS alert
|
|
137
|
+
|
|
138
|
+
5. **What happens if you don't adopt this?**
|
|
139
|
+
- Concrete negative consequence โ Worth considering
|
|
140
|
+
- FOMO, vendor says so โ BS alert
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
## Calibration Matrix
|
|
145
|
+
|
|
146
|
+
### When to Be Brutal
|
|
147
|
+
|
|
148
|
+
| Scenario | Linus | Ramsay | Bach | Notes |
|
|
149
|
+
|----------|-------|--------|------|-------|
|
|
150
|
+
| **Senior engineer, repeated mistake** | โ
| โ
| โ
| They should know better |
|
|
151
|
+
| **Critical security bug** | โ
| โ
| โ | Technical precision needed |
|
|
152
|
+
| **Production incident** | โ
| โ
| โ | No time for sugar-coating |
|
|
153
|
+
| **Vendor evaluating claims** | โ | โ | โ
| BS detection prevents waste |
|
|
154
|
+
| **Team explicitly requests no-BS** | โ
| โ
| โ
| Permission granted |
|
|
155
|
+
| **Certification/process evaluation** | โ | โ | โ
| Bach's specialty |
|
|
156
|
+
|
|
157
|
+
### When to Dial Back
|
|
158
|
+
|
|
159
|
+
| Scenario | Instead Use | Reason |
|
|
160
|
+
|----------|-------------|--------|
|
|
161
|
+
| **Junior dev, first PR** | Constructive mentoring | Build confidence |
|
|
162
|
+
| **Demoralized team** | Supportive guidance | Rebuild trust |
|
|
163
|
+
| **Public forum** | Private feedback | Avoid humiliation |
|
|
164
|
+
| **Unclear if fixable** | Collaborative problem-solving | Avoid frustration |
|
|
165
|
+
| **Personal, not technical** | Empathy + support | Not a code issue |
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
## Scoring Guide
|
|
170
|
+
|
|
171
|
+
### Overall Code Review Score (Linus Mode)
|
|
172
|
+
|
|
173
|
+
```
|
|
174
|
+
Score = (Correctness ร 3) + (Performance ร 2) + (Error Handling ร 3) +
|
|
175
|
+
(Concurrency ร 2) + (Testability ร 1) + (Maintainability ร 1)
|
|
176
|
+
|
|
177
|
+
Maximum: 60 points (all Excellent)
|
|
178
|
+
Passing: 36 points (all Passing)
|
|
179
|
+
Failing: <36 points
|
|
180
|
+
|
|
181
|
+
Harshness Level:
|
|
182
|
+
- 0-24 points: ๐ด Brutal ("This is fundamentally broken")
|
|
183
|
+
- 25-35 points: ๐ก Harsh ("Multiple issues need addressing")
|
|
184
|
+
- 36-48 points: ๐ข Direct ("Some improvements needed")
|
|
185
|
+
- 49-60 points: โช Professional ("Minor suggestions")
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### Test Suite Score (Ramsay Mode)
|
|
189
|
+
|
|
190
|
+
```
|
|
191
|
+
Score = (Coverage ร 3) + (Edge Cases ร 3) + (Clarity ร 1) +
|
|
192
|
+
(Speed ร 1) + (Stability ร 3) + (Isolation ร 1)
|
|
193
|
+
|
|
194
|
+
Maximum: 60 points (all Michelin Star)
|
|
195
|
+
Merge Threshold: 36 points (all Acceptable)
|
|
196
|
+
Failing: <36 points
|
|
197
|
+
|
|
198
|
+
Harshness Level:
|
|
199
|
+
- 0-24 points: ๐ด Brutal ("This is RAW. Don't serve it.")
|
|
200
|
+
- 25-35 points: ๐ก Harsh ("You know what good looks like.")
|
|
201
|
+
- 36-48 points: ๐ข Direct ("Close, but needs refinement.")
|
|
202
|
+
- 49-60 points: โช Professional ("Well done, minor polish.")
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
### BS Detection Score (Bach Mode)
|
|
206
|
+
|
|
207
|
+
```
|
|
208
|
+
Red Flags: Count from BS Detection Rubric
|
|
209
|
+
Green Flags: Passes all 5 Green Flag Tests
|
|
210
|
+
|
|
211
|
+
Score = (Green Flags ร 20) - (Red Flags ร 10)
|
|
212
|
+
|
|
213
|
+
Maximum: 100 (all green flags, no red flags)
|
|
214
|
+
Acceptable: 50+ (more green than red)
|
|
215
|
+
BS Alert: <50 (more red than green)
|
|
216
|
+
|
|
217
|
+
Harshness Level:
|
|
218
|
+
- Negative score: ๐ด Brutal ("This is harmful")
|
|
219
|
+
- 0-40: ๐ก Harsh ("This is questionable")
|
|
220
|
+
- 41-70: ๐ข Direct ("Some concerns")
|
|
221
|
+
- 71-100: โช Professional ("Reasonable approach")
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
---
|
|
225
|
+
|
|
226
|
+
## Example Assessments
|
|
227
|
+
|
|
228
|
+
### Code Review Example (Linus Mode)
|
|
229
|
+
|
|
230
|
+
**Code**: Database query in HTTP handler without connection pooling
|
|
231
|
+
|
|
232
|
+
**Assessment**:
|
|
233
|
+
- Correctness: ๐ด Failing (connection leak)
|
|
234
|
+
- Performance: ๐ด Failing (O(n) connections)
|
|
235
|
+
- Error Handling: ๐ก Passing (has try/catch)
|
|
236
|
+
- Concurrency: ๐ด Failing (connection exhaustion)
|
|
237
|
+
- Testability: ๐ก Passing (can mock)
|
|
238
|
+
- Maintainability: ๐ก Passing (clear intent)
|
|
239
|
+
|
|
240
|
+
**Score**: (0ร3) + (0ร2) + (1ร3) + (0ร2) + (1ร1) + (1ร1) = 5/60
|
|
241
|
+
|
|
242
|
+
**Harshness**: ๐ด Brutal
|
|
243
|
+
|
|
244
|
+
**Feedback**:
|
|
245
|
+
> "This is fundamentally broken. You're creating a new database connection
|
|
246
|
+
> for every HTTP request without pooling. Under load, you'll exhaust
|
|
247
|
+
> connections in seconds. Did you even test this with concurrent users?
|
|
248
|
+
>
|
|
249
|
+
> Use a connection pool. This is Database 101."
|
|
250
|
+
|
|
251
|
+
---
|
|
252
|
+
|
|
253
|
+
### Test Suite Example (Ramsay Mode)
|
|
254
|
+
|
|
255
|
+
**Tests**: 15 tests, all happy path, no edge cases
|
|
256
|
+
|
|
257
|
+
**Assessment**:
|
|
258
|
+
- Coverage: ๐ด Raw (35% branch)
|
|
259
|
+
- Edge Cases: ๐ด Raw (none tested)
|
|
260
|
+
- Clarity: ๐ก Acceptable (clear names)
|
|
261
|
+
- Speed: ๐ข Michelin (fast)
|
|
262
|
+
- Stability: ๐ข Michelin (no flakes)
|
|
263
|
+
- Isolation: ๐ก Acceptable (independent)
|
|
264
|
+
|
|
265
|
+
**Score**: (0ร3) + (0ร3) + (1ร1) + (2ร1) + (2ร3) + (1ร1) = 9/60
|
|
266
|
+
|
|
267
|
+
**Harshness**: ๐ด Brutal
|
|
268
|
+
|
|
269
|
+
**Feedback**:
|
|
270
|
+
> "Look at this. You've got 15 tests and 14 of them are happy path.
|
|
271
|
+
> Where's the validation testing? Where's the error handling?
|
|
272
|
+
>
|
|
273
|
+
> This is RAW. You're testing if the code runs, not if it's correct.
|
|
274
|
+
> Don't merge this."
|
|
275
|
+
|
|
276
|
+
---
|
|
277
|
+
|
|
278
|
+
### BS Detection Example (Bach Mode)
|
|
279
|
+
|
|
280
|
+
**Claim**: "Our AI-powered test automation eliminates manual testing"
|
|
281
|
+
|
|
282
|
+
**Assessment**:
|
|
283
|
+
- Red Flags: Hype Cycle, Magic Tool Thinking, False Automation
|
|
284
|
+
- Green Flags: 0/5 (fails all tests)
|
|
285
|
+
|
|
286
|
+
**Score**: (0ร20) - (3ร10) = -30/100
|
|
287
|
+
|
|
288
|
+
**Harshness**: ๐ด Brutal
|
|
289
|
+
|
|
290
|
+
**Feedback**:
|
|
291
|
+
> "This is vendor hype. 'AI-powered' doesn't eliminate the need for humans
|
|
292
|
+
> to define test oracles, handle edge cases, or investigate failures.
|
|
293
|
+
>
|
|
294
|
+
> The real question: Does this tool help YOUR testers on YOUR product
|
|
295
|
+
> in YOUR context? If you can't answer specifically, you're buying hype."
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# Brutal Honesty Review Template
|
|
2
|
+
|
|
3
|
+
## Review Type
|
|
4
|
+
- [ ] Linus Mode (Technical Precision)
|
|
5
|
+
- [ ] Ramsay Mode (Standards-Driven Quality)
|
|
6
|
+
- [ ] Bach Mode (BS Detection)
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
## What's Broken
|
|
11
|
+
|
|
12
|
+
[Surgical description of the problem - be specific, not vague]
|
|
13
|
+
|
|
14
|
+
**Evidence**:
|
|
15
|
+
```
|
|
16
|
+
[Code snippet, claim, or practice being reviewed]
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## Why It's Wrong
|
|
22
|
+
|
|
23
|
+
[Technical/logical explanation - facts, not opinions]
|
|
24
|
+
|
|
25
|
+
**Fundamental Issue**:
|
|
26
|
+
- Root cause:
|
|
27
|
+
- Why this matters:
|
|
28
|
+
- When it breaks:
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## What Correct Looks Like
|
|
33
|
+
|
|
34
|
+
[Clear model of excellence - show, don't just tell]
|
|
35
|
+
|
|
36
|
+
**Best Practice**:
|
|
37
|
+
```
|
|
38
|
+
[Example of correct approach]
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
**Why This Works**:
|
|
42
|
+
- Reason 1:
|
|
43
|
+
- Reason 2:
|
|
44
|
+
- Reason 3:
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## How to Fix It
|
|
49
|
+
|
|
50
|
+
[Actionable steps - specific to context]
|
|
51
|
+
|
|
52
|
+
1. **Immediate Fix**:
|
|
53
|
+
```bash
|
|
54
|
+
[Command or code change]
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
2. **Verify Fix**:
|
|
58
|
+
```bash
|
|
59
|
+
[How to test the fix]
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
3. **Prevent Recurrence**:
|
|
63
|
+
- [ ] Add test coverage
|
|
64
|
+
- [ ] Update documentation
|
|
65
|
+
- [ ] Review similar code
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
69
|
+
## Why This Matters
|
|
70
|
+
|
|
71
|
+
**Impact if Not Fixed**:
|
|
72
|
+
- [ ] Security vulnerability
|
|
73
|
+
- [ ] Performance degradation
|
|
74
|
+
- [ ] Data corruption
|
|
75
|
+
- [ ] User experience failure
|
|
76
|
+
- [ ] Technical debt
|
|
77
|
+
- [ ] Team velocity reduction
|
|
78
|
+
|
|
79
|
+
**Priority**: ๐ด Critical | ๐ก High | ๐ข Medium | โช Low
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
## Harshness Calibration
|
|
84
|
+
|
|
85
|
+
**Audience**: [Junior/Mid/Senior]
|
|
86
|
+
**Context**: [First offense/Repeated mistake/Critical bug]
|
|
87
|
+
**Delivery**: [Private/Team review/Public]
|
|
88
|
+
|
|
89
|
+
**Tone Level**:
|
|
90
|
+
- [ ] Level 1 - Direct (experienced engineers)
|
|
91
|
+
- [ ] Level 2 - Harsh (repeated mistakes)
|
|
92
|
+
- [ ] Level 3 - Brutal (critical issues/willful ignorance)
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
## Follow-Up
|
|
97
|
+
|
|
98
|
+
- [ ] Reviewed by recipient
|
|
99
|
+
- [ ] Fix implemented
|
|
100
|
+
- [ ] Tests added
|
|
101
|
+
- [ ] Documentation updated
|
|
102
|
+
- [ ] Knowledge shared with team
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Brutal Honesty Code Assessment Script (Linus Mode)
|
|
3
|
+
|
|
4
|
+
set -e
|
|
5
|
+
|
|
6
|
+
# Colors
|
|
7
|
+
RED='\033[0;31m'
|
|
8
|
+
YELLOW='\033[1;33m'
|
|
9
|
+
GREEN='\033[0;32m'
|
|
10
|
+
NC='\033[0m' # No Color
|
|
11
|
+
|
|
12
|
+
echo "๐ฅ BRUTAL HONESTY CODE ASSESSMENT (Linus Mode)"
|
|
13
|
+
echo "================================================"
|
|
14
|
+
echo ""
|
|
15
|
+
|
|
16
|
+
# Check if file argument provided
|
|
17
|
+
if [ -z "$1" ]; then
|
|
18
|
+
echo "Usage: $0 <file-or-directory>"
|
|
19
|
+
exit 1
|
|
20
|
+
fi
|
|
21
|
+
|
|
22
|
+
TARGET="$1"
|
|
23
|
+
|
|
24
|
+
# Function to assess correctness
|
|
25
|
+
assess_correctness() {
|
|
26
|
+
echo "๐ CORRECTNESS CHECK"
|
|
27
|
+
echo "-------------------"
|
|
28
|
+
|
|
29
|
+
# Check for common bug patterns
|
|
30
|
+
if grep -r "TODO\|FIXME\|BUG\|HACK" "$TARGET" 2>/dev/null; then
|
|
31
|
+
echo -e "${RED}๐ด FAILING: Found TODO/FIXME/BUG/HACK comments${NC}"
|
|
32
|
+
echo " โ This code admits it's broken. Fix it before review."
|
|
33
|
+
return 0
|
|
34
|
+
fi
|
|
35
|
+
|
|
36
|
+
# Check for error-prone patterns
|
|
37
|
+
if grep -r "null\|undefined" "$TARGET" 2>/dev/null | grep -v "!== null" | grep -v "!== undefined" > /dev/null; then
|
|
38
|
+
echo -e "${YELLOW}๐ก WARNING: Potential null/undefined issues${NC}"
|
|
39
|
+
echo " โ Are you handling null cases properly?"
|
|
40
|
+
fi
|
|
41
|
+
|
|
42
|
+
echo -e "${GREEN}โ No obvious correctness issues${NC}"
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
# Function to assess performance
|
|
46
|
+
assess_performance() {
|
|
47
|
+
echo ""
|
|
48
|
+
echo "โก PERFORMANCE CHECK"
|
|
49
|
+
echo "-------------------"
|
|
50
|
+
|
|
51
|
+
# Check for nested loops (potential O(nยฒ))
|
|
52
|
+
nested_loops=$(grep -r "for.*{" "$TARGET" | wc -l)
|
|
53
|
+
if [ "$nested_loops" -gt 5 ]; then
|
|
54
|
+
echo -e "${RED}๐ด FAILING: Found $nested_loops loops${NC}"
|
|
55
|
+
echo " โ Are you creating O(nยฒ) complexity where O(n) exists?"
|
|
56
|
+
echo " โ Use hash maps, sets, or better algorithms."
|
|
57
|
+
fi
|
|
58
|
+
|
|
59
|
+
# Check for synchronous I/O in hot paths
|
|
60
|
+
if grep -r "readFileSync\|writeFileSync" "$TARGET" 2>/dev/null; then
|
|
61
|
+
echo -e "${RED}๐ด FAILING: Synchronous file I/O detected${NC}"
|
|
62
|
+
echo " โ You're blocking the event loop. Use async operations."
|
|
63
|
+
fi
|
|
64
|
+
|
|
65
|
+
echo -e "${GREEN}โ No obvious performance issues${NC}"
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
# Function to assess error handling
|
|
69
|
+
assess_error_handling() {
|
|
70
|
+
echo ""
|
|
71
|
+
echo "๐ก๏ธ ERROR HANDLING CHECK"
|
|
72
|
+
echo "----------------------"
|
|
73
|
+
|
|
74
|
+
# Check for try/catch usage
|
|
75
|
+
try_count=$(grep -r "try\|catch" "$TARGET" 2>/dev/null | wc -l)
|
|
76
|
+
if [ "$try_count" -eq 0 ]; then
|
|
77
|
+
echo -e "${RED}๐ด FAILING: No error handling found${NC}"
|
|
78
|
+
echo " โ What happens when this code fails? It crashes."
|
|
79
|
+
else
|
|
80
|
+
echo -e "${GREEN}โ Found error handling (verify it's sufficient)${NC}"
|
|
81
|
+
fi
|
|
82
|
+
|
|
83
|
+
# Check for empty catch blocks
|
|
84
|
+
if grep -A 1 "catch" "$TARGET" 2>/dev/null | grep -q "^\s*}"; then
|
|
85
|
+
echo -e "${RED}๐ด FAILING: Empty catch blocks detected${NC}"
|
|
86
|
+
echo " โ Swallowing errors silently is worse than crashing."
|
|
87
|
+
fi
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
# Function to assess concurrency
|
|
91
|
+
assess_concurrency() {
|
|
92
|
+
echo ""
|
|
93
|
+
echo "๐ CONCURRENCY CHECK"
|
|
94
|
+
echo "-------------------"
|
|
95
|
+
|
|
96
|
+
# Check for global state mutations
|
|
97
|
+
if grep -r "global\.\|window\." "$TARGET" 2>/dev/null; then
|
|
98
|
+
echo -e "${YELLOW}๐ก WARNING: Global state mutations detected${NC}"
|
|
99
|
+
echo " โ Are you handling concurrent access safely?"
|
|
100
|
+
fi
|
|
101
|
+
|
|
102
|
+
# Check for race condition patterns
|
|
103
|
+
if grep -r "setTimeout\|setInterval" "$TARGET" 2>/dev/null; then
|
|
104
|
+
echo -e "${YELLOW}๐ก WARNING: Timing-based code detected${NC}"
|
|
105
|
+
echo " โ Are you creating race conditions?"
|
|
106
|
+
fi
|
|
107
|
+
|
|
108
|
+
echo -e "${GREEN}โ Review concurrency manually${NC}"
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
# Function to assess testability
|
|
112
|
+
assess_testability() {
|
|
113
|
+
echo ""
|
|
114
|
+
echo "๐งช TESTABILITY CHECK"
|
|
115
|
+
echo "-------------------"
|
|
116
|
+
|
|
117
|
+
# Check if tests exist
|
|
118
|
+
if [ -d "tests" ] || [ -d "test" ] || [ -d "__tests__" ]; then
|
|
119
|
+
echo -e "${GREEN}โ Test directory exists${NC}"
|
|
120
|
+
else
|
|
121
|
+
echo -e "${RED}๐ด FAILING: No test directory found${NC}"
|
|
122
|
+
echo " โ Where are the tests? Did you even test this?"
|
|
123
|
+
fi
|
|
124
|
+
|
|
125
|
+
# Check for dependency injection
|
|
126
|
+
if grep -r "new\s\+\w\+(" "$TARGET" 2>/dev/null | grep -v "Error\|Date" > /dev/null; then
|
|
127
|
+
echo -e "${YELLOW}๐ก WARNING: Hard-coded dependencies detected${NC}"
|
|
128
|
+
echo " โ Use dependency injection for testability."
|
|
129
|
+
fi
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
# Function to assess maintainability
|
|
133
|
+
assess_maintainability() {
|
|
134
|
+
echo ""
|
|
135
|
+
echo "๐ง MAINTAINABILITY CHECK"
|
|
136
|
+
echo "-----------------------"
|
|
137
|
+
|
|
138
|
+
# Check function length (should be <50 lines)
|
|
139
|
+
if [ -f "$TARGET" ]; then
|
|
140
|
+
long_functions=$(awk '/^function|^const.*=>/ {start=NR} /^}/ {if(NR-start>50) print "Line",start}' "$TARGET" | wc -l)
|
|
141
|
+
if [ "$long_functions" -gt 0 ]; then
|
|
142
|
+
echo -e "${YELLOW}๐ก WARNING: Found $long_functions functions >50 lines${NC}"
|
|
143
|
+
echo " โ Break down complex functions."
|
|
144
|
+
fi
|
|
145
|
+
fi
|
|
146
|
+
|
|
147
|
+
# Check for magic numbers
|
|
148
|
+
if grep -rE "\s[0-9]{3,}" "$TARGET" 2>/dev/null | grep -v "1000\|2000" > /dev/null; then
|
|
149
|
+
echo -e "${YELLOW}๐ก WARNING: Magic numbers detected${NC}"
|
|
150
|
+
echo " โ Use named constants."
|
|
151
|
+
fi
|
|
152
|
+
|
|
153
|
+
echo -e "${GREEN}โ Review code clarity manually${NC}"
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
# Run all assessments
|
|
157
|
+
assess_correctness
|
|
158
|
+
assess_performance
|
|
159
|
+
assess_error_handling
|
|
160
|
+
assess_concurrency
|
|
161
|
+
assess_testability
|
|
162
|
+
assess_maintainability
|
|
163
|
+
|
|
164
|
+
# Final verdict
|
|
165
|
+
echo ""
|
|
166
|
+
echo "================================================"
|
|
167
|
+
echo "๐ฏ FINAL VERDICT"
|
|
168
|
+
echo "================================================"
|
|
169
|
+
echo ""
|
|
170
|
+
echo "Review the findings above. If you see multiple ๐ด FAILING marks,"
|
|
171
|
+
echo "this code is NOT ready for review."
|
|
172
|
+
echo ""
|
|
173
|
+
echo "Expected standards:"
|
|
174
|
+
echo " - All error paths handled"
|
|
175
|
+
echo " - No obvious performance issues"
|
|
176
|
+
echo " - Tests exist and pass"
|
|
177
|
+
echo " - Code is clear and maintainable"
|
|
178
|
+
echo ""
|
|
179
|
+
echo "If you wouldn't deploy this to production, don't submit it for review."
|