@cakemail-org/cakemail-cli 1.5.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +12 -0
- package/.env.example +40 -0
- package/.env.test.example +45 -0
- package/CHANGELOG.md +1031 -0
- package/README.md +319 -15
- package/audit-formats.js +128 -0
- package/cakemail.rb +20 -0
- package/dist/cli.js +27 -10
- package/dist/cli.js.map +1 -1
- package/dist/client.d.ts +2 -0
- package/dist/client.d.ts.map +1 -1
- package/dist/client.js +16 -6
- package/dist/client.js.map +1 -1
- package/dist/commands/account.js +1 -1
- package/dist/commands/account.js.map +1 -1
- package/dist/commands/attributes.js +1 -1
- package/dist/commands/attributes.js.map +1 -1
- package/dist/commands/campaigns.d.ts.map +1 -1
- package/dist/commands/campaigns.js +103 -8
- package/dist/commands/campaigns.js.map +1 -1
- package/dist/commands/config.d.ts.map +1 -1
- package/dist/commands/config.js +63 -4
- package/dist/commands/config.js.map +1 -1
- package/dist/commands/contacts.d.ts.map +1 -1
- package/dist/commands/contacts.js +91 -12
- package/dist/commands/contacts.js.map +1 -1
- package/dist/commands/emails.js +1 -1
- package/dist/commands/emails.js.map +1 -1
- package/dist/commands/interests.d.ts +5 -0
- package/dist/commands/interests.d.ts.map +1 -0
- package/dist/commands/interests.js +172 -0
- package/dist/commands/interests.js.map +1 -0
- package/dist/commands/lists.d.ts.map +1 -1
- package/dist/commands/lists.js +6 -8
- package/dist/commands/lists.js.map +1 -1
- package/dist/commands/logs.d.ts +5 -0
- package/dist/commands/logs.d.ts.map +1 -0
- package/dist/commands/logs.js +237 -0
- package/dist/commands/logs.js.map +1 -0
- package/dist/commands/reports.js +1 -1
- package/dist/commands/reports.js.map +1 -1
- package/dist/commands/segments.js +1 -1
- package/dist/commands/segments.js.map +1 -1
- package/dist/commands/senders.d.ts.map +1 -1
- package/dist/commands/senders.js +11 -8
- package/dist/commands/senders.js.map +1 -1
- package/dist/commands/suppressed.js +1 -1
- package/dist/commands/suppressed.js.map +1 -1
- package/dist/commands/tags.d.ts +5 -0
- package/dist/commands/tags.d.ts.map +1 -0
- package/dist/commands/tags.js +124 -0
- package/dist/commands/tags.js.map +1 -0
- package/dist/commands/templates.js +1 -1
- package/dist/commands/templates.js.map +1 -1
- package/dist/commands/transactional-templates.d.ts +5 -0
- package/dist/commands/transactional-templates.d.ts.map +1 -0
- package/dist/commands/transactional-templates.js +354 -0
- package/dist/commands/transactional-templates.js.map +1 -0
- package/dist/commands/webhooks.js +1 -1
- package/dist/commands/webhooks.js.map +1 -1
- package/dist/utils/auth.d.ts +8 -1
- package/dist/utils/auth.d.ts.map +1 -1
- package/dist/utils/auth.js +39 -11
- package/dist/utils/auth.js.map +1 -1
- package/dist/utils/config-file.d.ts +7 -0
- package/dist/utils/config-file.d.ts.map +1 -1
- package/dist/utils/config-file.js +15 -0
- package/dist/utils/config-file.js.map +1 -1
- package/dist/utils/config.d.ts +2 -0
- package/dist/utils/config.d.ts.map +1 -1
- package/dist/utils/config.js +12 -4
- package/dist/utils/config.js.map +1 -1
- package/dist/utils/errors.js +1 -1
- package/dist/utils/errors.js.map +1 -1
- package/dist/utils/list-defaults.d.ts +33 -0
- package/dist/utils/list-defaults.d.ts.map +1 -0
- package/dist/utils/list-defaults.js +52 -0
- package/dist/utils/list-defaults.js.map +1 -0
- package/dist/utils/output.d.ts.map +1 -1
- package/dist/utils/output.js +36 -13
- package/dist/utils/output.js.map +1 -1
- package/dist/utils/progress.d.ts.map +1 -1
- package/dist/utils/progress.js +32 -4
- package/dist/utils/progress.js.map +1 -1
- package/dist/utils/spinner.d.ts +17 -0
- package/dist/utils/spinner.d.ts.map +1 -0
- package/dist/utils/spinner.js +43 -0
- package/dist/utils/spinner.js.map +1 -0
- package/docs/DOCUMENTATION-STANDARD.md +1068 -0
- package/docs/README.md +161 -0
- package/docs/developer/ARCHITECTURE.md +516 -0
- package/docs/developer/AUTH.md +204 -0
- package/docs/developer/CONTRIBUTING.md +227 -0
- package/docs/developer/DOCUMENTATION_SUMMARY.md +346 -0
- package/docs/developer/PROJECT_INDEX.md +365 -0
- package/docs/planning/API_COVERAGE.md +1045 -0
- package/docs/planning/BACKLOG.md +1159 -0
- package/docs/planning/PROFILE_SYSTEM_TASKS.md +287 -0
- package/docs/planning/UX_IMPLEMENTATION_PLAN.md +691 -0
- package/docs/planning/archive/RELEASE_CHECKLIST_v1.3.0.md +332 -0
- package/docs/planning/archive/RELEASE_v1.3.0.md +428 -0
- package/docs/planning/archive/cakemail-cli-ux-improvements.md +438 -0
- package/docs/planning/cakemail-profile-system-plan.md +1121 -0
- package/docs/testing/AI_USER_SIMULATION_DESIGN.md +1342 -0
- package/docs/testing/KENOGAMI_BIDIRECTIONAL_FLOW.md +1517 -0
- package/docs/testing/KENOGAMI_TRUTH_RECONCILIATION_SYSTEM.md +1369 -0
- package/docs/user-manual/.obsidian/app.json +1 -0
- package/docs/user-manual/.obsidian/appearance.json +1 -0
- package/docs/user-manual/.obsidian/core-plugins.json +33 -0
- package/docs/user-manual/.obsidian/workspace.json +167 -0
- package/docs/user-manual/01-getting-started/01-installation.md +214 -0
- package/docs/user-manual/01-getting-started/02-quick-start.md +432 -0
- package/docs/user-manual/01-getting-started/03-authentication.md +448 -0
- package/docs/user-manual/01-getting-started/04-configuration.md +430 -0
- package/docs/user-manual/01-getting-started/05-output-formats.md +447 -0
- package/docs/user-manual/02-core-concepts/01-accounts.md +514 -0
- package/docs/user-manual/02-core-concepts/02-profile-system.md +771 -0
- package/docs/user-manual/02-core-concepts/03-smart-defaults.md +485 -0
- package/docs/user-manual/02-core-concepts/04-authentication-methods.md +435 -0
- package/docs/user-manual/02-core-concepts/05-pagination-filtering.md +600 -0
- package/docs/user-manual/02-core-concepts/06-error-handling.md +718 -0
- package/docs/user-manual/02-core-concepts/07-api-coverage.md +483 -0
- package/docs/user-manual/03-email-operations/01-senders.md +490 -0
- package/docs/user-manual/03-email-operations/02-templates.md +444 -0
- package/docs/user-manual/03-email-operations/03-transactional-emails.md +706 -0
- package/docs/user-manual/03-email-operations/04-email-tracking.md +407 -0
- package/docs/user-manual/04-campaign-management/01-campaigns-basics.md +394 -0
- package/docs/user-manual/04-campaign-management/02-campaign-scheduling.md +630 -0
- package/docs/user-manual/04-campaign-management/03-campaign-testing.md +997 -0
- package/docs/user-manual/04-campaign-management/04-campaign-lifecycle.md +709 -0
- package/docs/user-manual/04-campaign-management/05-campaign-links.md +934 -0
- package/docs/user-manual/05-contact-management/01-lists.md +836 -0
- package/docs/user-manual/05-contact-management/02-contacts.md +1035 -0
- package/docs/user-manual/05-contact-management/03-custom-attributes.md +788 -0
- package/docs/user-manual/05-contact-management/04-segments.md +1028 -0
- package/docs/user-manual/05-contact-management/05-contact-import-export.md +1031 -0
- package/docs/user-manual/06-analytics-reporting/01-campaign-analytics.md +867 -0
- package/docs/user-manual/06-analytics-reporting/02-account-reports.md +227 -0
- package/docs/user-manual/07-integrations/01-webhooks-integration.md +259 -0
- package/docs/user-manual/07-integrations/02-automation.md +326 -0
- package/docs/user-manual/08-advanced-usage/01-scripting-patterns.md +672 -0
- package/docs/user-manual/08-advanced-usage/02-bulk-operations.md +932 -0
- package/docs/user-manual/08-advanced-usage/03-ci-cd-integration.md +892 -0
- package/docs/user-manual/08-advanced-usage/04-performance-optimization.md +766 -0
- package/docs/user-manual/09-command-reference/01-config.md +776 -0
- package/docs/user-manual/09-command-reference/02-account.md +652 -0
- package/docs/user-manual/09-command-reference/03-lists.md +958 -0
- package/docs/user-manual/09-command-reference/04-contacts.md +1408 -0
- package/docs/user-manual/09-command-reference/05-attributes.md +617 -0
- package/docs/user-manual/09-command-reference/06-segments.md +894 -0
- package/docs/user-manual/09-command-reference/07-senders.md +803 -0
- package/docs/user-manual/09-command-reference/08-templates.md +818 -0
- package/docs/user-manual/09-command-reference/09-campaigns.md +1250 -0
- package/docs/user-manual/09-command-reference/10-emails.md +807 -0
- package/docs/user-manual/09-command-reference/11-reports.md +1135 -0
- package/docs/user-manual/09-command-reference/12-webhooks.md +773 -0
- package/docs/user-manual/09-command-reference/13-suppressed.md +797 -0
- package/docs/user-manual/09-command-reference/14-interests.md +630 -0
- package/docs/user-manual/09-command-reference/15-tags.md +584 -0
- package/docs/user-manual/09-command-reference/16-logs.md +656 -0
- package/docs/user-manual/09-command-reference/17-transactional-templates.md +850 -0
- package/docs/user-manual/10-troubleshooting/01-common-errors.md +457 -0
- package/docs/user-manual/10-troubleshooting/02-authentication-issues.md +558 -0
- package/docs/user-manual/10-troubleshooting/03-connection-problems.md +634 -0
- package/docs/user-manual/10-troubleshooting/04-debugging.md +725 -0
- package/docs/user-manual/11-appendix/04-faq.md +484 -0
- package/docs/user-manual/11-appendix/05-glossary.md +250 -0
- package/docs/user-manual/README.md +0 -0
- package/package.json +13 -47
- package/src/cli.ts +125 -0
- package/src/client.ts +16 -0
- package/src/commands/account.ts +267 -0
- package/src/commands/accounts.ts +78 -0
- package/src/commands/actions.ts +249 -0
- package/src/commands/attributes.ts +139 -0
- package/src/commands/campaign-blueprints.ts +106 -0
- package/src/commands/campaigns.ts +469 -0
- package/src/commands/config.ts +77 -0
- package/src/commands/contacts.ts +612 -0
- package/src/commands/custom-attributes.ts +127 -0
- package/src/commands/dkims.ts +117 -0
- package/src/commands/domains.ts +82 -0
- package/src/commands/email-apis.ts +569 -0
- package/src/commands/emails.ts +197 -0
- package/src/commands/forms.ts +283 -0
- package/src/commands/interests.ts +155 -0
- package/src/commands/links.ts +38 -0
- package/src/commands/lists.ts +406 -0
- package/src/commands/logos.ts +71 -0
- package/src/commands/logs.ts +386 -0
- package/src/commands/reports.ts +306 -0
- package/src/commands/segments.ts +158 -0
- package/src/commands/senders.ts +204 -0
- package/src/commands/sub-accounts.ts +271 -0
- package/src/commands/suppressed-emails.ts +234 -0
- package/src/commands/suppressed.ts +198 -0
- package/src/commands/system-emails.ts +85 -0
- package/src/commands/tags.ts +146 -0
- package/src/commands/tasks.ts +116 -0
- package/src/commands/templates.ts +189 -0
- package/src/commands/tokens.ts +83 -0
- package/src/commands/transactional-emails.ts +374 -0
- package/src/commands/transactional-templates.ts +385 -0
- package/src/commands/users.ts +506 -0
- package/src/commands/webhooks.ts +172 -0
- package/src/commands/workflow-blueprints.ts +123 -0
- package/src/commands/workflows.ts +265 -0
- package/src/types/profile.ts +93 -0
- package/src/utils/auth.ts +272 -0
- package/src/utils/config-file.ts +96 -0
- package/src/utils/config.ts +134 -0
- package/src/utils/confirm.ts +32 -0
- package/src/utils/defaults.ts +99 -0
- package/src/utils/errors.ts +116 -0
- package/src/utils/interactive.ts +91 -0
- package/src/utils/list-defaults.ts +74 -0
- package/src/utils/output.ts +190 -0
- package/src/utils/progress.ts +320 -0
- package/src/utils/spinner.ts +22 -0
- package/tests/IMPLEMENTATION_STATUS.md +258 -0
- package/tests/PTY_SETUP.md +118 -0
- package/tests/PTY_TESTING_GUIDE.md +507 -0
- package/tests/README.md +244 -0
- package/tests/fixtures/api-responses/campaigns.json +34 -0
- package/tests/fixtures/test-config.json +13 -0
- package/tests/helpers/cli-runner.ts +128 -0
- package/tests/helpers/mock-server.ts +301 -0
- package/tests/helpers/pty-runner.ts +181 -0
- package/tests/integration/campaigns-real-api.test.ts +196 -0
- package/tests/integration/setup-integration.ts +50 -0
- package/tests/pty/campaigns.test.ts +241 -0
- package/tests/setup.ts +34 -0
- package/tsconfig.json +15 -0
- package/vitest.config.ts +28 -0
|
@@ -0,0 +1,1342 @@
|
|
|
1
|
+
# AI-Based User Simulation Testing Framework
|
|
2
|
+
|
|
3
|
+
## Executive Summary
|
|
4
|
+
|
|
5
|
+
This document outlines a comprehensive AI-based user simulation testing framework designed to validate software systems (CLI, web, mobile) against their documentation. The framework uses AI agents to act as users, execute commands/actions based on documentation, and validate outcomes against expected behavior.
|
|
6
|
+
|
|
7
|
+
**Core Principle:** The AI reads documentation like a human user would, attempts to perform tasks, and reports discrepancies between documented behavior and actual behavior.
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Table of Contents
|
|
12
|
+
|
|
13
|
+
1. [Architecture Overview](#architecture-overview)
|
|
14
|
+
2. [Core Components](#core-components)
|
|
15
|
+
3. [Testing Workflow](#testing-workflow)
|
|
16
|
+
4. [Platform Adaptations](#platform-adaptations)
|
|
17
|
+
5. [Implementation Design](#implementation-design)
|
|
18
|
+
6. [Quality Metrics](#quality-metrics)
|
|
19
|
+
7. [Example Scenarios](#example-scenarios)
|
|
20
|
+
8. [Future Enhancements](#future-enhancements)
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Architecture Overview
|
|
25
|
+
|
|
26
|
+
### High-Level Architecture
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
30
|
+
│ AI User Simulation System │
|
|
31
|
+
└─────────────────────────────────────────────────────────────────┘
|
|
32
|
+
│
|
|
33
|
+
┌────────────────────────┼────────────────────────┐
|
|
34
|
+
│ │ │
|
|
35
|
+
▼ ▼ ▼
|
|
36
|
+
┌───────────────┐ ┌──────────────┐ ┌──────────────┐
|
|
37
|
+
│ Knowledge │ │ Execution │ │ Validation │
|
|
38
|
+
│ Ingestion │───────▶│ Engine │───────▶│ Engine │
|
|
39
|
+
│ Layer │ │ │ │ │
|
|
40
|
+
└───────────────┘ └──────────────┘ └──────────────┘
|
|
41
|
+
│ │ │
|
|
42
|
+
│ │ │
|
|
43
|
+
▼ ▼ ▼
|
|
44
|
+
┌───────────────┐ ┌──────────────┐ ┌──────────────┐
|
|
45
|
+
│ Documentation │ │ Target │ │ Test Results │
|
|
46
|
+
│ Repository │ │ System │ │ Database │
|
|
47
|
+
│ (Markdown, │ │ (CLI, Web, │ │ (Pass/Fail, │
|
|
48
|
+
│ API specs) │ │ Mobile) │ │ Evidence) │
|
|
49
|
+
└───────────────┘ └──────────────┘ └──────────────┘
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### Key Principles
|
|
53
|
+
|
|
54
|
+
1. **Documentation-Driven:** AI agents derive test cases exclusively from documentation
|
|
55
|
+
2. **Platform-Agnostic:** Core framework adapts to CLI, web, and mobile interfaces
|
|
56
|
+
3. **Autonomous Learning:** AI learns expected behaviors from examples in documentation
|
|
57
|
+
4. **Self-Healing:** AI can adapt to minor UI/output changes without manual updates
|
|
58
|
+
5. **Evidence-Based:** Every assertion is backed by screenshots, logs, or output captures
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## Core Components
|
|
63
|
+
|
|
64
|
+
### 1. Knowledge Ingestion Layer
|
|
65
|
+
|
|
66
|
+
**Purpose:** Parse and understand documentation to build a knowledge graph of system capabilities.
|
|
67
|
+
|
|
68
|
+
#### Components:
|
|
69
|
+
|
|
70
|
+
**1.1 Documentation Parser**
|
|
71
|
+
- **Input:** Markdown files, API specifications, command references, tutorials
|
|
72
|
+
- **Output:** Structured knowledge graph of commands, parameters, expected outputs, examples
|
|
73
|
+
- **Technology:** LLM with retrieval-augmented generation (RAG)
|
|
74
|
+
|
|
75
|
+
**Example Structure:**
|
|
76
|
+
```json
|
|
77
|
+
{
|
|
78
|
+
"command": "cakemail campaigns list",
|
|
79
|
+
"category": "campaigns",
|
|
80
|
+
"documentation_source": "docs/user-manual/09-command-reference/02-campaigns.md",
|
|
81
|
+
"description": "List all campaigns with optional filtering",
|
|
82
|
+
"parameters": {
|
|
83
|
+
"status": {
|
|
84
|
+
"type": "option",
|
|
85
|
+
"flag": "-s, --status",
|
|
86
|
+
"description": "Filter by status",
|
|
87
|
+
"values": ["draft", "scheduled", "sent", "failed"],
|
|
88
|
+
"required": false
|
|
89
|
+
},
|
|
90
|
+
"format": {
|
|
91
|
+
"type": "global_option",
|
|
92
|
+
"flag": "-f, --format",
|
|
93
|
+
"values": ["json", "table", "compact"],
|
|
94
|
+
"default_behavior": "profile-dependent"
|
|
95
|
+
}
|
|
96
|
+
},
|
|
97
|
+
"expected_outputs": {
|
|
98
|
+
"json": {
|
|
99
|
+
"structure": "array of campaign objects",
|
|
100
|
+
"sample": "{\"data\":[{\"id\":123,\"name\":\"Newsletter\",...}]}"
|
|
101
|
+
},
|
|
102
|
+
"table": {
|
|
103
|
+
"description": "Formatted table with key fields",
|
|
104
|
+
"sample": "ASCII table with columns: ID, Name, Status, Created"
|
|
105
|
+
}
|
|
106
|
+
},
|
|
107
|
+
"examples": [
|
|
108
|
+
{
|
|
109
|
+
"command": "cakemail campaigns list --status sent",
|
|
110
|
+
"expected_behavior": "Shows only sent campaigns",
|
|
111
|
+
"success_criteria": ["All returned campaigns have status=sent", "Exit code 0"]
|
|
112
|
+
}
|
|
113
|
+
],
|
|
114
|
+
"edge_cases": [
|
|
115
|
+
"Empty list returns empty array/table",
|
|
116
|
+
"Invalid status returns error with suggestion"
|
|
117
|
+
]
|
|
118
|
+
}
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
**1.2 Example Extractor**
|
|
122
|
+
- Parses code blocks in documentation
|
|
123
|
+
- Identifies input commands and expected outputs
|
|
124
|
+
- Builds test case templates from examples
|
|
125
|
+
|
|
126
|
+
**1.3 Context Builder**
|
|
127
|
+
- Understands relationships between commands (e.g., "create campaign" requires "list" first)
|
|
128
|
+
- Builds dependency graphs (must create list before adding contacts)
|
|
129
|
+
- Identifies authentication requirements
|
|
130
|
+
|
|
131
|
+
### 2. Execution Engine
|
|
132
|
+
|
|
133
|
+
**Purpose:** Execute commands/actions against the target system and capture results.
|
|
134
|
+
|
|
135
|
+
#### Platform-Specific Adapters:
|
|
136
|
+
|
|
137
|
+
**2.1 CLI Adapter**
|
|
138
|
+
```typescript
|
|
139
|
+
interface CLIAdapter {
|
|
140
|
+
executeCommand(command: string, env: Environment): ExecutionResult;
|
|
141
|
+
captureOutput(): { stdout: string, stderr: string, exitCode: number };
|
|
142
|
+
captureScreenshot(): string; // For terminal output capture
|
|
143
|
+
getEnvironmentState(): EnvironmentState; // Auth tokens, config files
|
|
144
|
+
}
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
**2.2 Web Adapter**
|
|
148
|
+
```typescript
|
|
149
|
+
interface WebAdapter {
|
|
150
|
+
navigate(url: string): void;
|
|
151
|
+
findElement(selector: string): Element;
|
|
152
|
+
click(element: Element): void;
|
|
153
|
+
type(element: Element, text: string): void;
|
|
154
|
+
captureScreenshot(): Buffer;
|
|
155
|
+
getPageSource(): string;
|
|
156
|
+
waitForElement(selector: string, timeout: number): Element;
|
|
157
|
+
evaluateJavaScript(script: string): any;
|
|
158
|
+
}
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
**2.3 Mobile Adapter**
|
|
162
|
+
```typescript
|
|
163
|
+
interface MobileAdapter {
|
|
164
|
+
tap(x: number, y: number): void;
|
|
165
|
+
swipe(direction: 'up' | 'down' | 'left' | 'right'): void;
|
|
166
|
+
enterText(text: string): void;
|
|
167
|
+
captureScreenshot(): Buffer;
|
|
168
|
+
getViewHierarchy(): ViewTree;
|
|
169
|
+
waitForElement(accessibilityId: string): Element;
|
|
170
|
+
}
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
#### Execution Context Manager
|
|
174
|
+
- Manages test data isolation (separate test accounts, sandboxed environments)
|
|
175
|
+
- Handles authentication and session management
|
|
176
|
+
- Cleanup after test runs (delete test data)
|
|
177
|
+
- State verification between test cases
|
|
178
|
+
|
|
179
|
+
### 3. Validation Engine
|
|
180
|
+
|
|
181
|
+
**Purpose:** Compare actual results against documented expected behavior.
|
|
182
|
+
|
|
183
|
+
#### Validation Strategies:
|
|
184
|
+
|
|
185
|
+
**3.1 Output Structure Validation**
|
|
186
|
+
```typescript
|
|
187
|
+
interface OutputValidator {
|
|
188
|
+
validateJSON(actual: object, expected: JSONSchema): ValidationResult;
|
|
189
|
+
validateTable(actual: string, expectedColumns: string[]): ValidationResult;
|
|
190
|
+
validateText(actual: string, expectedPatterns: RegExp[]): ValidationResult;
|
|
191
|
+
validateStatusCode(actual: number, expected: number): ValidationResult;
|
|
192
|
+
}
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
**3.2 Semantic Validation**
|
|
196
|
+
- Uses LLM to understand if output semantically matches documentation
|
|
197
|
+
- Example: "Shows only sent campaigns" → Validates all items have `status: "sent"`
|
|
198
|
+
- Handles variations in formatting (dates, numbers, etc.)
|
|
199
|
+
|
|
200
|
+
**3.3 Visual Validation** (Web/Mobile)
|
|
201
|
+
- Screenshot comparison using image diffing
|
|
202
|
+
- AI-based visual assertion ("button should be green", "table has 5 rows")
|
|
203
|
+
- Accessibility validation (screen reader compatibility)
|
|
204
|
+
|
|
205
|
+
**3.4 Behavioral Validation**
|
|
206
|
+
- Validates workflows: "Create → List → Delete" sequence
|
|
207
|
+
- Validates error handling: "Invalid input shows helpful error message"
|
|
208
|
+
- Validates state changes: "After delete, item no longer appears in list"
|
|
209
|
+
|
|
210
|
+
---
|
|
211
|
+
|
|
212
|
+
## Testing Workflow
|
|
213
|
+
|
|
214
|
+
### Phase 1: Test Plan Generation
|
|
215
|
+
|
|
216
|
+
```
|
|
217
|
+
┌──────────────────────────────────────────────────────────────┐
|
|
218
|
+
│ 1. AI reads documentation │
|
|
219
|
+
│ - Command reference pages │
|
|
220
|
+
│ - User guides and tutorials │
|
|
221
|
+
│ - API specifications │
|
|
222
|
+
└──────────────────────────────────────────────────────────────┘
|
|
223
|
+
│
|
|
224
|
+
▼
|
|
225
|
+
┌──────────────────────────────────────────────────────────────┐
|
|
226
|
+
│ 2. AI generates test plan │
|
|
227
|
+
│ - Identifies all testable commands/features │
|
|
228
|
+
│ - Extracts examples from documentation │
|
|
229
|
+
│ - Builds dependency graph (order of operations) │
|
|
230
|
+
│ - Identifies edge cases and error scenarios │
|
|
231
|
+
└──────────────────────────────────────────────────────────────┘
|
|
232
|
+
│
|
|
233
|
+
▼
|
|
234
|
+
┌──────────────────────────────────────────────────────────────┐
|
|
235
|
+
│ 3. Test plan review (optional human-in-the-loop) │
|
|
236
|
+
│ - Human reviews generated test cases │
|
|
237
|
+
│ - Adds missing scenarios │
|
|
238
|
+
│ - Approves or refines plan │
|
|
239
|
+
└──────────────────────────────────────────────────────────────┘
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
**Example Test Plan:**
|
|
243
|
+
```yaml
|
|
244
|
+
test_suite: "Campaigns Management"
|
|
245
|
+
prerequisites:
|
|
246
|
+
- authenticated: true
|
|
247
|
+
- minimum_lists: 1
|
|
248
|
+
- minimum_senders: 1
|
|
249
|
+
|
|
250
|
+
test_cases:
|
|
251
|
+
- id: "CAMP-001"
|
|
252
|
+
name: "List all campaigns"
|
|
253
|
+
command: "cakemail campaigns list"
|
|
254
|
+
expected_behavior:
|
|
255
|
+
- exit_code: 0
|
|
256
|
+
- output_format: "json by default (developer profile)"
|
|
257
|
+
- contains_fields: ["id", "name", "status", "created_on"]
|
|
258
|
+
- validates_against_schema: true
|
|
259
|
+
|
|
260
|
+
- id: "CAMP-002"
|
|
261
|
+
name: "List campaigns with status filter"
|
|
262
|
+
command: "cakemail campaigns list --status sent"
|
|
263
|
+
expected_behavior:
|
|
264
|
+
- exit_code: 0
|
|
265
|
+
- all_items_match: "status == 'sent'"
|
|
266
|
+
- error_if_no_matches: false (returns empty array)
|
|
267
|
+
|
|
268
|
+
- id: "CAMP-003"
|
|
269
|
+
name: "Create campaign interactively"
|
|
270
|
+
profile: "marketer"
|
|
271
|
+
command: "cakemail campaigns create"
|
|
272
|
+
interactions:
|
|
273
|
+
- prompt: "Campaign name:"
|
|
274
|
+
input: "Test Campaign {{timestamp}}"
|
|
275
|
+
- prompt: "Select a list:"
|
|
276
|
+
action: "select_first"
|
|
277
|
+
- prompt: "Select a sender:"
|
|
278
|
+
action: "select_first"
|
|
279
|
+
expected_behavior:
|
|
280
|
+
- exit_code: 0
|
|
281
|
+
- output_contains: "Campaign created successfully"
|
|
282
|
+
- new_campaign_appears_in_list: true
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
### Phase 2: Test Execution
|
|
286
|
+
|
|
287
|
+
```
|
|
288
|
+
┌──────────────────────────────────────────────────────────────┐
|
|
289
|
+
│ 1. Environment setup │
|
|
290
|
+
│ - Create isolated test account/environment │
|
|
291
|
+
│ - Configure authentication │
|
|
292
|
+
│ - Seed test data (lists, contacts, senders) │
|
|
293
|
+
└──────────────────────────────────────────────────────────────┘
|
|
294
|
+
│
|
|
295
|
+
▼
|
|
296
|
+
┌──────────────────────────────────────────────────────────────┐
|
|
297
|
+
│ 2. Execute test cases │
|
|
298
|
+
│ For each test case: │
|
|
299
|
+
│ a) Set up prerequisites │
|
|
300
|
+
│ b) Execute command/action │
|
|
301
|
+
│ c) Capture output/screenshots │
|
|
302
|
+
│ d) Validate against expected behavior │
|
|
303
|
+
│ e) Record results with evidence │
|
|
304
|
+
└──────────────────────────────────────────────────────────────┘
|
|
305
|
+
│
|
|
306
|
+
▼
|
|
307
|
+
┌──────────────────────────────────────────────────────────────┐
|
|
308
|
+
│ 3. Cleanup │
|
|
309
|
+
│ - Delete test data │
|
|
310
|
+
│ - Reset environment state │
|
|
311
|
+
│ - Archive test artifacts (logs, screenshots) │
|
|
312
|
+
└──────────────────────────────────────────────────────────────┘
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
### Phase 3: Results Analysis & Reporting
|
|
316
|
+
|
|
317
|
+
```
|
|
318
|
+
┌──────────────────────────────────────────────────────────────┐
|
|
319
|
+
│ 1. AI analyzes failures │
|
|
320
|
+
│ - Categorizes failures (bug, documentation issue, flaky) │
|
|
321
|
+
│ - Identifies root causes │
|
|
322
|
+
│ - Suggests fixes │
|
|
323
|
+
└──────────────────────────────────────────────────────────────┘
|
|
324
|
+
│
|
|
325
|
+
▼
|
|
326
|
+
┌──────────────────────────────────────────────────────────────┐
|
|
327
|
+
│ 2. Generate comprehensive report │
|
|
328
|
+
│ - Test coverage metrics │
|
|
329
|
+
│ - Pass/fail breakdown │
|
|
330
|
+
│ - Failed test details with evidence │
|
|
331
|
+
│ - Recommendations for documentation/code fixes │
|
|
332
|
+
└──────────────────────────────────────────────────────────────┘
|
|
333
|
+
│
|
|
334
|
+
▼
|
|
335
|
+
┌──────────────────────────────────────────────────────────────┐
|
|
336
|
+
│ 3. Create GitHub issues (optional automation) │
|
|
337
|
+
│ - Bug reports with reproduction steps │
|
|
338
|
+
│ - Documentation improvement suggestions │
|
|
339
|
+
│ - Links to test run artifacts │
|
|
340
|
+
└──────────────────────────────────────────────────────────────┘
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
---
|
|
344
|
+
|
|
345
|
+
## Platform Adaptations
|
|
346
|
+
|
|
347
|
+
### CLI Testing (Current: Cakemail CLI)
|
|
348
|
+
|
|
349
|
+
**Execution Strategy:**
|
|
350
|
+
```typescript
|
|
351
|
+
class CLIUserSimulation {
|
|
352
|
+
async testCommand(testCase: TestCase): Promise<TestResult> {
|
|
353
|
+
// 1. Set up environment
|
|
354
|
+
const env = await this.setupEnvironment(testCase.prerequisites);
|
|
355
|
+
|
|
356
|
+
// 2. Execute command
|
|
357
|
+
const result = await this.executeCommand(testCase.command, env);
|
|
358
|
+
|
|
359
|
+
// 3. Capture output
|
|
360
|
+
const evidence = {
|
|
361
|
+
stdout: result.stdout,
|
|
362
|
+
stderr: result.stderr,
|
|
363
|
+
exitCode: result.exitCode,
|
|
364
|
+
terminalScreenshot: await this.captureTerminal()
|
|
365
|
+
};
|
|
366
|
+
|
|
367
|
+
// 4. Validate
|
|
368
|
+
const validations = await this.validateOutput(
|
|
369
|
+
result,
|
|
370
|
+
testCase.expectedBehavior
|
|
371
|
+
);
|
|
372
|
+
|
|
373
|
+
// 5. Return result
|
|
374
|
+
return {
|
|
375
|
+
testCaseId: testCase.id,
|
|
376
|
+
passed: validations.every(v => v.passed),
|
|
377
|
+
evidence,
|
|
378
|
+
validations
|
|
379
|
+
};
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
```
|
|
383
|
+
|
|
384
|
+
**CLI-Specific Validations:**
|
|
385
|
+
- Exit code validation (0 = success, non-zero = error)
|
|
386
|
+
- stdout/stderr content validation
|
|
387
|
+
- JSON structure validation
|
|
388
|
+
- Table format validation (column headers, alignment)
|
|
389
|
+
- Color output validation (ANSI codes)
|
|
390
|
+
- Interactive prompt handling (PTY simulation)
|
|
391
|
+
- Configuration file state changes
|
|
392
|
+
|
|
393
|
+
### Web Testing (Adaptable to Cakemail Web App)
|
|
394
|
+
|
|
395
|
+
**Execution Strategy:**
|
|
396
|
+
```typescript
|
|
397
|
+
class WebUserSimulation {
|
|
398
|
+
async testUserFlow(testCase: TestCase): Promise<TestResult> {
|
|
399
|
+
// 1. Navigate to page
|
|
400
|
+
await this.browser.navigate(testCase.url);
|
|
401
|
+
|
|
402
|
+
// 2. Execute user actions
|
|
403
|
+
for (const step of testCase.steps) {
|
|
404
|
+
await this.executeStep(step);
|
|
405
|
+
await this.captureScreenshot(step.name);
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
// 3. Validate final state
|
|
409
|
+
const validations = await this.validatePageState(
|
|
410
|
+
testCase.expectedState
|
|
411
|
+
);
|
|
412
|
+
|
|
413
|
+
// 4. Return result
|
|
414
|
+
return {
|
|
415
|
+
testCaseId: testCase.id,
|
|
416
|
+
passed: validations.every(v => v.passed),
|
|
417
|
+
screenshots: this.screenshots,
|
|
418
|
+
validations
|
|
419
|
+
};
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
async executeStep(step: UIStep): Promise<void> {
|
|
423
|
+
switch (step.type) {
|
|
424
|
+
case 'click':
|
|
425
|
+
const element = await this.findElement(step.selector);
|
|
426
|
+
await element.click();
|
|
427
|
+
break;
|
|
428
|
+
case 'type':
|
|
429
|
+
const input = await this.findElement(step.selector);
|
|
430
|
+
await input.type(step.text);
|
|
431
|
+
break;
|
|
432
|
+
case 'verify':
|
|
433
|
+
await this.verifyElementText(step.selector, step.expectedText);
|
|
434
|
+
break;
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
```
|
|
439
|
+
|
|
440
|
+
**Web-Specific Validations:**
|
|
441
|
+
- Page title and URL validation
|
|
442
|
+
- Element presence/absence validation
|
|
443
|
+
- Text content validation
|
|
444
|
+
- Form submission validation
|
|
445
|
+
- JavaScript state validation
|
|
446
|
+
- Network request validation (API calls)
|
|
447
|
+
- Visual regression testing (screenshot comparison)
|
|
448
|
+
- Accessibility validation (WCAG compliance)
|
|
449
|
+
|
|
450
|
+
**Example Test Case (Web):**
|
|
451
|
+
```yaml
|
|
452
|
+
test_case:
|
|
453
|
+
id: "WEB-CAMP-001"
|
|
454
|
+
name: "Create campaign via web UI"
|
|
455
|
+
url: "https://app.cakemail.com/campaigns"
|
|
456
|
+
steps:
|
|
457
|
+
- type: "click"
|
|
458
|
+
selector: "button[data-testid='create-campaign']"
|
|
459
|
+
description: "Click Create Campaign button"
|
|
460
|
+
|
|
461
|
+
- type: "wait"
|
|
462
|
+
selector: "input[name='campaign-name']"
|
|
463
|
+
description: "Wait for modal to appear"
|
|
464
|
+
|
|
465
|
+
- type: "type"
|
|
466
|
+
selector: "input[name='campaign-name']"
|
|
467
|
+
text: "Test Campaign {{timestamp}}"
|
|
468
|
+
description: "Enter campaign name"
|
|
469
|
+
|
|
470
|
+
- type: "click"
|
|
471
|
+
selector: "select[name='list-id']"
|
|
472
|
+
description: "Open list dropdown"
|
|
473
|
+
|
|
474
|
+
- type: "click"
|
|
475
|
+
selector: "select[name='list-id'] option:first-child"
|
|
476
|
+
description: "Select first list"
|
|
477
|
+
|
|
478
|
+
- type: "click"
|
|
479
|
+
selector: "button[type='submit']"
|
|
480
|
+
description: "Submit form"
|
|
481
|
+
|
|
482
|
+
- type: "verify"
|
|
483
|
+
selector: ".success-message"
|
|
484
|
+
expectedText: "Campaign created successfully"
|
|
485
|
+
description: "Verify success message"
|
|
486
|
+
|
|
487
|
+
expected_state:
|
|
488
|
+
- url_contains: "/campaigns/"
|
|
489
|
+
- element_exists: ".campaign-details"
|
|
490
|
+
- api_called: "POST /campaigns"
|
|
491
|
+
- api_response_status: 201
|
|
492
|
+
```
|
|
493
|
+
|
|
494
|
+
### Mobile Testing (Adaptable to Cakemail Mobile App)
|
|
495
|
+
|
|
496
|
+
**Execution Strategy:**
|
|
497
|
+
```typescript
|
|
498
|
+
class MobileUserSimulation {
|
|
499
|
+
async testMobileFlow(testCase: TestCase): Promise<TestResult> {
|
|
500
|
+
// 1. Launch app
|
|
501
|
+
await this.app.launch();
|
|
502
|
+
|
|
503
|
+
// 2. Navigate to screen
|
|
504
|
+
await this.navigateToScreen(testCase.screen);
|
|
505
|
+
|
|
506
|
+
// 3. Execute gestures
|
|
507
|
+
for (const gesture of testCase.gestures) {
|
|
508
|
+
await this.executeGesture(gesture);
|
|
509
|
+
await this.captureScreenshot(gesture.name);
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
// 4. Validate screen state
|
|
513
|
+
const validations = await this.validateScreenState(
|
|
514
|
+
testCase.expectedState
|
|
515
|
+
);
|
|
516
|
+
|
|
517
|
+
return {
|
|
518
|
+
testCaseId: testCase.id,
|
|
519
|
+
passed: validations.every(v => v.passed),
|
|
520
|
+
screenshots: this.screenshots,
|
|
521
|
+
validations
|
|
522
|
+
};
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
```
|
|
526
|
+
|
|
527
|
+
**Mobile-Specific Validations:**
|
|
528
|
+
- Screen title validation
|
|
529
|
+
- Element hierarchy validation
|
|
530
|
+
- Gesture response validation (swipe, tap, long-press)
|
|
531
|
+
- Orientation changes
|
|
532
|
+
- Push notification handling
|
|
533
|
+
- Offline mode behavior
|
|
534
|
+
- Native API integration (camera, contacts, etc.)
|
|
535
|
+
|
|
536
|
+
---
|
|
537
|
+
|
|
538
|
+
## Implementation Design
|
|
539
|
+
|
|
540
|
+
### Technology Stack
|
|
541
|
+
|
|
542
|
+
**AI/LLM Layer:**
|
|
543
|
+
- **LLM Provider:** OpenAI GPT-4 or Anthropic Claude (for reasoning and validation)
|
|
544
|
+
- **RAG System:** LangChain or LlamaIndex for documentation ingestion
|
|
545
|
+
- **Vector Store:** Pinecone or Weaviate for semantic search
|
|
546
|
+
|
|
547
|
+
**Execution Layer:**
|
|
548
|
+
- **CLI:** Node.js with `execa` for process execution, `node-pty` for interactive terminals
|
|
549
|
+
- **Web:** Playwright or Selenium for browser automation
|
|
550
|
+
- **Mobile:** Appium for iOS/Android testing
|
|
551
|
+
|
|
552
|
+
**Validation Layer:**
|
|
553
|
+
- **JSON Schema:** Ajv for JSON validation
|
|
554
|
+
- **Visual Diff:** Pixelmatch or Percy for screenshot comparison
|
|
555
|
+
- **Semantic Analysis:** LLM-based natural language validation
|
|
556
|
+
|
|
557
|
+
**Reporting:**
|
|
558
|
+
- **Test Results:** PostgreSQL or MongoDB
|
|
559
|
+
- **Artifacts:** S3 or local filesystem
|
|
560
|
+
- **Dashboard:** Custom React app or Allure Reports
|
|
561
|
+
|
|
562
|
+
### Core Modules
|
|
563
|
+
|
|
564
|
+
#### Module 1: Documentation Analyzer
|
|
565
|
+
|
|
566
|
+
```typescript
|
|
567
|
+
class DocumentationAnalyzer {
|
|
568
|
+
constructor(
|
|
569
|
+
private llm: LLMProvider,
|
|
570
|
+
private vectorStore: VectorStore
|
|
571
|
+
) {}
|
|
572
|
+
|
|
573
|
+
async ingestDocumentation(docsPath: string): Promise<KnowledgeGraph> {
|
|
574
|
+
// 1. Read all markdown files
|
|
575
|
+
const files = await this.readDocumentationFiles(docsPath);
|
|
576
|
+
|
|
577
|
+
// 2. Parse and chunk
|
|
578
|
+
const chunks = await this.chunkDocuments(files);
|
|
579
|
+
|
|
580
|
+
// 3. Generate embeddings
|
|
581
|
+
await this.vectorStore.addDocuments(chunks);
|
|
582
|
+
|
|
583
|
+
// 4. Extract structured knowledge
|
|
584
|
+
const knowledge = await this.extractKnowledge(chunks);
|
|
585
|
+
|
|
586
|
+
return knowledge;
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
async extractKnowledge(chunks: Document[]): Promise<KnowledgeGraph> {
|
|
590
|
+
const prompt = `
|
|
591
|
+
Extract structured information from this documentation:
|
|
592
|
+
|
|
593
|
+
For each command or feature, extract:
|
|
594
|
+
1. Name and description
|
|
595
|
+
2. Parameters (required, optional, types, defaults)
|
|
596
|
+
3. Expected outputs (format, structure, examples)
|
|
597
|
+
4. Example commands from code blocks
|
|
598
|
+
5. Error scenarios and expected error messages
|
|
599
|
+
6. Dependencies (prerequisites, related commands)
|
|
600
|
+
|
|
601
|
+
${chunks.map(c => c.content).join('\n\n')}
|
|
602
|
+
`;
|
|
603
|
+
|
|
604
|
+
const response = await this.llm.complete(prompt);
|
|
605
|
+
return this.parseKnowledgeGraph(response);
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
async searchDocumentation(query: string): Promise<Document[]> {
|
|
609
|
+
return this.vectorStore.similaritySearch(query, 5);
|
|
610
|
+
}
|
|
611
|
+
}
|
|
612
|
+
```
|
|
613
|
+
|
|
614
|
+
#### Module 2: Test Case Generator
|
|
615
|
+
|
|
616
|
+
```typescript
|
|
617
|
+
class TestCaseGenerator {
|
|
618
|
+
constructor(
|
|
619
|
+
private analyzer: DocumentationAnalyzer,
|
|
620
|
+
private llm: LLMProvider
|
|
621
|
+
) {}
|
|
622
|
+
|
|
623
|
+
async generateTestPlan(knowledge: KnowledgeGraph): Promise<TestPlan> {
|
|
624
|
+
const testCases: TestCase[] = [];
|
|
625
|
+
|
|
626
|
+
// Generate test cases for each command
|
|
627
|
+
for (const command of knowledge.commands) {
|
|
628
|
+
// 1. Basic happy path test
|
|
629
|
+
testCases.push(await this.generateHappyPathTest(command));
|
|
630
|
+
|
|
631
|
+
// 2. Parameter variation tests
|
|
632
|
+
testCases.push(...await this.generateParameterTests(command));
|
|
633
|
+
|
|
634
|
+
// 3. Error scenario tests
|
|
635
|
+
testCases.push(...await this.generateErrorTests(command));
|
|
636
|
+
|
|
637
|
+
// 4. Example-based tests
|
|
638
|
+
testCases.push(...await this.generateExampleTests(command));
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
// Generate workflow tests (multi-step)
|
|
642
|
+
testCases.push(...await this.generateWorkflowTests(knowledge));
|
|
643
|
+
|
|
644
|
+
return {
|
|
645
|
+
testCases,
|
|
646
|
+
totalCommands: knowledge.commands.length,
|
|
647
|
+
coverage: this.calculateCoverage(testCases, knowledge)
|
|
648
|
+
};
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
async generateHappyPathTest(command: Command): Promise<TestCase> {
|
|
652
|
+
const prompt = `
|
|
653
|
+
Generate a test case for the command: ${command.name}
|
|
654
|
+
|
|
655
|
+
Documentation says: ${command.description}
|
|
656
|
+
Parameters: ${JSON.stringify(command.parameters)}
|
|
657
|
+
Expected output: ${JSON.stringify(command.expectedOutputs)}
|
|
658
|
+
|
|
659
|
+
Create a test case that validates the basic happy path:
|
|
660
|
+
- Command with required parameters only
|
|
661
|
+
- Expected successful execution
|
|
662
|
+
- Output validation criteria
|
|
663
|
+
`;
|
|
664
|
+
|
|
665
|
+
const response = await this.llm.complete(prompt);
|
|
666
|
+
return this.parseTestCase(response);
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
```
|
|
670
|
+
|
|
671
|
+
#### Module 3: Execution Orchestrator
|
|
672
|
+
|
|
673
|
+
```typescript
|
|
674
|
+
class ExecutionOrchestrator {
|
|
675
|
+
constructor(
|
|
676
|
+
private adapter: PlatformAdapter,
|
|
677
|
+
private validator: ValidationEngine
|
|
678
|
+
) {}
|
|
679
|
+
|
|
680
|
+
async runTestPlan(plan: TestPlan): Promise<TestResults> {
|
|
681
|
+
const results: TestResult[] = [];
|
|
682
|
+
|
|
683
|
+
// Set up test environment
|
|
684
|
+
await this.setupEnvironment();
|
|
685
|
+
|
|
686
|
+
try {
|
|
687
|
+
// Execute test cases in dependency order
|
|
688
|
+
for (const testCase of this.sortByDependencies(plan.testCases)) {
|
|
689
|
+
const result = await this.runTestCase(testCase);
|
|
690
|
+
results.push(result);
|
|
691
|
+
|
|
692
|
+
// Stop on critical failure
|
|
693
|
+
if (testCase.critical && !result.passed) {
|
|
694
|
+
break;
|
|
695
|
+
}
|
|
696
|
+
}
|
|
697
|
+
} finally {
|
|
698
|
+
// Clean up
|
|
699
|
+
await this.cleanupEnvironment();
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
return {
|
|
703
|
+
summary: this.calculateSummary(results),
|
|
704
|
+
results,
|
|
705
|
+
coverage: this.calculateCoverage(results, plan)
|
|
706
|
+
};
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
async runTestCase(testCase: TestCase): Promise<TestResult> {
|
|
710
|
+
const startTime = Date.now();
|
|
711
|
+
|
|
712
|
+
try {
|
|
713
|
+
// 1. Execute command/action
|
|
714
|
+
const executionResult = await this.adapter.execute(testCase);
|
|
715
|
+
|
|
716
|
+
// 2. Validate result
|
|
717
|
+
const validations = await this.validator.validate(
|
|
718
|
+
executionResult,
|
|
719
|
+
testCase.expectedBehavior
|
|
720
|
+
);
|
|
721
|
+
|
|
722
|
+
// 3. Collect evidence
|
|
723
|
+
const evidence = await this.collectEvidence(executionResult);
|
|
724
|
+
|
|
725
|
+
return {
|
|
726
|
+
testCaseId: testCase.id,
|
|
727
|
+
passed: validations.every(v => v.passed),
|
|
728
|
+
duration: Date.now() - startTime,
|
|
729
|
+
validations,
|
|
730
|
+
evidence
|
|
731
|
+
};
|
|
732
|
+
} catch (error) {
|
|
733
|
+
return {
|
|
734
|
+
testCaseId: testCase.id,
|
|
735
|
+
passed: false,
|
|
736
|
+
duration: Date.now() - startTime,
|
|
737
|
+
error: error.message,
|
|
738
|
+
evidence: { error: error.stack }
|
|
739
|
+
};
|
|
740
|
+
}
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
```
|
|
744
|
+
|
|
745
|
+
#### Module 4: AI Validation Engine
|
|
746
|
+
|
|
747
|
+
```typescript
|
|
748
|
+
class AIValidationEngine {
|
|
749
|
+
constructor(private llm: LLMProvider) {}
|
|
750
|
+
|
|
751
|
+
async validateSemantics(
|
|
752
|
+
actual: any,
|
|
753
|
+
expected: ExpectedBehavior
|
|
754
|
+
): Promise<ValidationResult> {
|
|
755
|
+
const prompt = `
|
|
756
|
+
You are validating software output against documented behavior.
|
|
757
|
+
|
|
758
|
+
Expected behavior from documentation:
|
|
759
|
+
${JSON.stringify(expected, null, 2)}
|
|
760
|
+
|
|
761
|
+
Actual output received:
|
|
762
|
+
${JSON.stringify(actual, null, 2)}
|
|
763
|
+
|
|
764
|
+
Analyze whether the actual output matches the expected behavior.
|
|
765
|
+
Consider:
|
|
766
|
+
1. Does the structure match?
|
|
767
|
+
2. Do the values make sense?
|
|
768
|
+
3. Are all required fields present?
|
|
769
|
+
4. Do error messages match expected patterns?
|
|
770
|
+
5. Are there any discrepancies?
|
|
771
|
+
|
|
772
|
+
Respond with:
|
|
773
|
+
{
|
|
774
|
+
"passed": true/false,
|
|
775
|
+
"confidence": 0.0-1.0,
|
|
776
|
+
"reasoning": "explanation",
|
|
777
|
+
"discrepancies": ["list of issues found"]
|
|
778
|
+
}
|
|
779
|
+
`;
|
|
780
|
+
|
|
781
|
+
const response = await this.llm.complete(prompt, {
|
|
782
|
+
responseFormat: 'json'
|
|
783
|
+
});
|
|
784
|
+
|
|
785
|
+
return JSON.parse(response);
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
async validateVisual(
|
|
789
|
+
screenshot: Buffer,
|
|
790
|
+
expectedDescription: string
|
|
791
|
+
): Promise<ValidationResult> {
|
|
792
|
+
const prompt = `
|
|
793
|
+
You are viewing a screenshot of a user interface.
|
|
794
|
+
|
|
795
|
+
The documentation states: "${expectedDescription}"
|
|
796
|
+
|
|
797
|
+
Based on the screenshot, does it match the documentation?
|
|
798
|
+
Consider layout, content, colors, and overall appearance.
|
|
799
|
+
|
|
800
|
+
Respond with validation result.
|
|
801
|
+
`;
|
|
802
|
+
|
|
803
|
+
// Use vision model (GPT-4V or Claude with vision)
|
|
804
|
+
const response = await this.llm.completeWithImage(prompt, screenshot);
|
|
805
|
+
|
|
806
|
+
return this.parseValidationResult(response);
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
```
|
|
810
|
+
|
|
811
|
+
### Configuration Example
|
|
812
|
+
|
|
813
|
+
```yaml
|
|
814
|
+
# ai-test-config.yaml
|
|
815
|
+
framework:
|
|
816
|
+
name: "Cakemail CLI AI Testing"
|
|
817
|
+
version: "1.0.0"
|
|
818
|
+
|
|
819
|
+
documentation:
|
|
820
|
+
sources:
|
|
821
|
+
- path: "docs/user-manual"
|
|
822
|
+
type: "markdown"
|
|
823
|
+
recursive: true
|
|
824
|
+
- path: "README.md"
|
|
825
|
+
type: "markdown"
|
|
826
|
+
- path: "dist/cli.js --help"
|
|
827
|
+
type: "command_help"
|
|
828
|
+
|
|
829
|
+
environment:
|
|
830
|
+
platform: "cli"
|
|
831
|
+
test_account:
|
|
832
|
+
email: "${CAKEMAIL_TEST_EMAIL}"
|
|
833
|
+
password: "${CAKEMAIL_TEST_PASSWORD}"
|
|
834
|
+
api_base: "https://api.cakemail.dev"
|
|
835
|
+
cleanup_after_tests: true
|
|
836
|
+
|
|
837
|
+
ai_config:
|
|
838
|
+
llm_provider: "anthropic"
|
|
839
|
+
model: "claude-sonnet-4"
|
|
840
|
+
temperature: 0.1 # Low temperature for consistent validation
|
|
841
|
+
max_tokens: 4000
|
|
842
|
+
|
|
843
|
+
test_generation:
|
|
844
|
+
auto_generate: true
|
|
845
|
+
include_edge_cases: true
|
|
846
|
+
include_error_scenarios: true
|
|
847
|
+
max_parameter_combinations: 5
|
|
848
|
+
|
|
849
|
+
execution:
|
|
850
|
+
parallel: false # Run sequentially for CLI
|
|
851
|
+
timeout_per_test: 60000 # 60 seconds
|
|
852
|
+
retry_on_failure: 1
|
|
853
|
+
capture_screenshots: true
|
|
854
|
+
capture_logs: true
|
|
855
|
+
|
|
856
|
+
validation:
|
|
857
|
+
strict_mode: false # Allow minor formatting differences
|
|
858
|
+
semantic_validation: true # Use AI for semantic matching
|
|
859
|
+
visual_validation: false # Not applicable for CLI
|
|
860
|
+
|
|
861
|
+
reporting:
|
|
862
|
+
output_format: "html"
|
|
863
|
+
output_path: "test-results/ai-simulation"
|
|
864
|
+
create_github_issues: false
|
|
865
|
+
slack_notifications: false
|
|
866
|
+
```
|
|
867
|
+
|
|
868
|
+
---
|
|
869
|
+
|
|
870
|
+
## Quality Metrics
|
|
871
|
+
|
|
872
|
+
### Coverage Metrics
|
|
873
|
+
|
|
874
|
+
```typescript
|
|
875
|
+
interface CoverageMetrics {
|
|
876
|
+
// Documentation coverage
|
|
877
|
+
totalCommands: number;
|
|
878
|
+
testedCommands: number;
|
|
879
|
+
untested Commands: string[];
|
|
880
|
+
|
|
881
|
+
// Parameter coverage
|
|
882
|
+
totalParameters: number;
|
|
883
|
+
testedParameters: number;
|
|
884
|
+
parameterCombinationsCovered: number;
|
|
885
|
+
|
|
886
|
+
// Scenario coverage
|
|
887
|
+
happyPathsCovered: number;
|
|
888
|
+
errorScenariosCovered: number;
|
|
889
|
+
edgeCasesCovered: number;
|
|
890
|
+
|
|
891
|
+
// Example coverage
|
|
892
|
+
examplesInDocumentation: number;
|
|
893
|
+
examplesTested: number;
|
|
894
|
+
}
|
|
895
|
+
```
|
|
896
|
+
|
|
897
|
+
### Quality Metrics
|
|
898
|
+
|
|
899
|
+
```typescript
|
|
900
|
+
interface QualityMetrics {
|
|
901
|
+
// Test execution
|
|
902
|
+
totalTests: number;
|
|
903
|
+
passed: number;
|
|
904
|
+
failed: number;
|
|
905
|
+
skipped: number;
|
|
906
|
+
flaky: number;
|
|
907
|
+
|
|
908
|
+
// Failure analysis
|
|
909
|
+
bugCount: number; // Actual bugs in code
|
|
910
|
+
documentationIssues: number; // Docs don't match reality
|
|
911
|
+
testIssues: number; // Problems with test itself
|
|
912
|
+
|
|
913
|
+
// Confidence
|
|
914
|
+
averageConfidence: number; // AI confidence in validations
|
|
915
|
+
manualReviewRequired: number; // Low confidence cases
|
|
916
|
+
}
|
|
917
|
+
```
|
|
918
|
+
|
|
919
|
+
### Reporting Dashboard
|
|
920
|
+
|
|
921
|
+
```
|
|
922
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
923
|
+
│ AI User Simulation Test Results │
|
|
924
|
+
│ Cakemail CLI v1.7.0 │
|
|
925
|
+
└─────────────────────────────────────────────────────────────┘
|
|
926
|
+
|
|
927
|
+
📊 Coverage Summary
|
|
928
|
+
├── Commands: 136/136 (100%)
|
|
929
|
+
├── Parameters: 342/450 (76%)
|
|
930
|
+
├── Examples: 45/45 (100%)
|
|
931
|
+
└── Workflows: 12/15 (80%)
|
|
932
|
+
|
|
933
|
+
✅ Test Results
|
|
934
|
+
├── Total: 234 tests
|
|
935
|
+
├── Passed: 228 (97.4%)
|
|
936
|
+
├── Failed: 6 (2.6%)
|
|
937
|
+
└── Duration: 12m 34s
|
|
938
|
+
|
|
939
|
+
❌ Failed Tests (6)
|
|
940
|
+
┌────────────────────────────────────────────────────────────┐
|
|
941
|
+
│ CAMP-042: Create campaign with invalid list ID │
|
|
942
|
+
│ Status: FAILED │
|
|
943
|
+
│ Category: Bug │
|
|
944
|
+
│ │
|
|
945
|
+
│ Expected: "Error: List not found" message │
|
|
946
|
+
│ Actual: Application crashed with unhandled exception │
|
|
947
|
+
│ │
|
|
948
|
+
│ Evidence: test-results/CAMP-042/stderr.txt │
|
|
949
|
+
│ Recommendation: Add error handling in campaigns.ts:145 │
|
|
950
|
+
└────────────────────────────────────────────────────────────┘
|
|
951
|
+
|
|
952
|
+
📝 Documentation Issues (2)
|
|
953
|
+
┌────────────────────────────────────────────────────────────┐
|
|
954
|
+
│ DOC-001: campaigns list output format │
|
|
955
|
+
│ │
|
|
956
|
+
│ Documentation states: "Returns JSON by default" │
|
|
957
|
+
│ Reality: Returns profile-based format (table/json/compact)│
|
|
958
|
+
│ │
|
|
959
|
+
│ Location: docs/user-manual/09-command-reference/02-*.md │
|
|
960
|
+
│ Suggestion: Update to mention profile-based defaults │
|
|
961
|
+
└────────────────────────────────────────────────────────────┘
|
|
962
|
+
|
|
963
|
+
🎯 Recommendations
|
|
964
|
+
1. Fix error handling in campaigns create (HIGH priority)
|
|
965
|
+
2. Update documentation for output formats (MEDIUM priority)
|
|
966
|
+
3. Add integration test for campaign deletion (LOW priority)
|
|
967
|
+
```
|
|
968
|
+
|
|
969
|
+
---
|
|
970
|
+
|
|
971
|
+
## Example Scenarios
|
|
972
|
+
|
|
973
|
+
### Scenario 1: CLI Command Validation
|
|
974
|
+
|
|
975
|
+
**Documentation Extract:**
|
|
976
|
+
```markdown
|
|
977
|
+
# Create Campaign
|
|
978
|
+
|
|
979
|
+
Creates a new email campaign.
|
|
980
|
+
|
|
981
|
+
## Usage
|
|
982
|
+
```bash
|
|
983
|
+
cakemail campaigns create -n "Newsletter" -l 123 -s 456
|
|
984
|
+
```
|
|
985
|
+
|
|
986
|
+
## Parameters
|
|
987
|
+
- `-n, --name <name>` - Campaign name (required)
|
|
988
|
+
- `-l, --list-id <id>` - List ID (required)
|
|
989
|
+
- `-s, --sender-id <id>` - Sender ID (required)
|
|
990
|
+
|
|
991
|
+
## Output
|
|
992
|
+
```json
|
|
993
|
+
{
|
|
994
|
+
"id": 789,
|
|
995
|
+
"name": "Newsletter",
|
|
996
|
+
"status": "draft",
|
|
997
|
+
"list_id": 123,
|
|
998
|
+
"sender_id": 456
|
|
999
|
+
}
|
|
1000
|
+
```
|
|
1001
|
+
```
|
|
1002
|
+
|
|
1003
|
+
**Generated Test Case:**
|
|
1004
|
+
```typescript
|
|
1005
|
+
const testCase = {
|
|
1006
|
+
id: "CAMP-CREATE-001",
|
|
1007
|
+
command: "cakemail campaigns create -n 'AI Test Campaign' -l 123 -s 456",
|
|
1008
|
+
expectedBehavior: {
|
|
1009
|
+
exitCode: 0,
|
|
1010
|
+
outputFormat: "json",
|
|
1011
|
+
schema: {
|
|
1012
|
+
type: "object",
|
|
1013
|
+
required: ["id", "name", "status", "list_id", "sender_id"],
|
|
1014
|
+
properties: {
|
|
1015
|
+
id: { type: "number" },
|
|
1016
|
+
name: { type: "string", enum: ["AI Test Campaign"] },
|
|
1017
|
+
status: { type: "string", enum: ["draft"] },
|
|
1018
|
+
list_id: { type: "number", enum: [123] },
|
|
1019
|
+
sender_id: { type: "number", enum: [456] }
|
|
1020
|
+
}
|
|
1021
|
+
}
|
|
1022
|
+
}
|
|
1023
|
+
};
|
|
1024
|
+
```
|
|
1025
|
+
|
|
1026
|
+
**Execution & Validation:**
|
|
1027
|
+
```typescript
|
|
1028
|
+
// Execute
|
|
1029
|
+
const result = await executor.run(testCase.command);
|
|
1030
|
+
|
|
1031
|
+
// Validate structure
|
|
1032
|
+
const structureValid = validateJSONSchema(
|
|
1033
|
+
JSON.parse(result.stdout),
|
|
1034
|
+
testCase.expectedBehavior.schema
|
|
1035
|
+
);
|
|
1036
|
+
|
|
1037
|
+
// AI semantic validation
|
|
1038
|
+
const semanticValid = await aiValidator.validate({
|
|
1039
|
+
actual: result.stdout,
|
|
1040
|
+
expected: "Should create a draft campaign with the specified name, list, and sender",
|
|
1041
|
+
confidence: 0.95
|
|
1042
|
+
});
|
|
1043
|
+
|
|
1044
|
+
// Final result
|
|
1045
|
+
return {
|
|
1046
|
+
passed: structureValid && semanticValid.passed,
|
|
1047
|
+
confidence: semanticValid.confidence,
|
|
1048
|
+
evidence: {
|
|
1049
|
+
stdout: result.stdout,
|
|
1050
|
+
exitCode: result.exitCode,
|
|
1051
|
+
validations: [structureValid, semanticValid]
|
|
1052
|
+
}
|
|
1053
|
+
};
|
|
1054
|
+
```
|
|
1055
|
+
|
|
1056
|
+
### Scenario 2: Error Handling Validation
|
|
1057
|
+
|
|
1058
|
+
**Documentation:**
|
|
1059
|
+
```markdown
|
|
1060
|
+
## Errors
|
|
1061
|
+
|
|
1062
|
+
If the list ID doesn't exist, returns:
|
|
1063
|
+
```
|
|
1064
|
+
Error: List not found
|
|
1065
|
+
💡 Tip: To see all lists, use: cakemail lists list
|
|
1066
|
+
```
|
|
1067
|
+
Exit code: 1
|
|
1068
|
+
```
|
|
1069
|
+
|
|
1070
|
+
**Test Case:**
|
|
1071
|
+
```typescript
|
|
1072
|
+
const testCase = {
|
|
1073
|
+
id: "CAMP-CREATE-ERR-001",
|
|
1074
|
+
command: "cakemail campaigns create -n 'Test' -l 999999 -s 456",
|
|
1075
|
+
expectedBehavior: {
|
|
1076
|
+
exitCode: 1,
|
|
1077
|
+
stderrContains: "Error: List not found",
|
|
1078
|
+
stderrContains: "cakemail lists list",
|
|
1079
|
+
semanticExpectation: "Should show helpful error message with suggestion"
|
|
1080
|
+
}
|
|
1081
|
+
};
|
|
1082
|
+
```
|
|
1083
|
+
|
|
1084
|
+
### Scenario 3: Workflow Validation
|
|
1085
|
+
|
|
1086
|
+
**Documentation:**
|
|
1087
|
+
```markdown
|
|
1088
|
+
# Campaign Lifecycle
|
|
1089
|
+
|
|
1090
|
+
1. Create campaign: `cakemail campaigns create`
|
|
1091
|
+
2. Schedule campaign: `cakemail campaigns schedule <id> -d "2025-12-01"`
|
|
1092
|
+
3. Verify status: `cakemail campaigns get <id>` (should show status: "scheduled")
|
|
1093
|
+
```
|
|
1094
|
+
|
|
1095
|
+
**Test Case:**
|
|
1096
|
+
```typescript
|
|
1097
|
+
const workflowTest = {
|
|
1098
|
+
id: "WORKFLOW-CAMP-001",
|
|
1099
|
+
steps: [
|
|
1100
|
+
{
|
|
1101
|
+
action: "create",
|
|
1102
|
+
command: "cakemail campaigns create -n 'Workflow Test' -l 123 -s 456",
|
|
1103
|
+
capture: "campaign_id",
|
|
1104
|
+
expectedStatus: "draft"
|
|
1105
|
+
},
|
|
1106
|
+
{
|
|
1107
|
+
action: "schedule",
|
|
1108
|
+
command: "cakemail campaigns schedule {{campaign_id}} -d '2025-12-01T10:00:00Z'",
|
|
1109
|
+
expectedOutput: "Campaign scheduled successfully"
|
|
1110
|
+
},
|
|
1111
|
+
{
|
|
1112
|
+
action: "verify",
|
|
1113
|
+
command: "cakemail campaigns get {{campaign_id}}",
|
|
1114
|
+
expectedField: { status: "scheduled", scheduled_for: "2025-12-01T10:00:00Z" }
|
|
1115
|
+
}
|
|
1116
|
+
],
|
|
1117
|
+
cleanup: [
|
|
1118
|
+
"cakemail campaigns delete {{campaign_id}} --force"
|
|
1119
|
+
]
|
|
1120
|
+
};
|
|
1121
|
+
```
|
|
1122
|
+
|
|
1123
|
+
---
|
|
1124
|
+
|
|
1125
|
+
## Future Enhancements
|
|
1126
|
+
|
|
1127
|
+
### Phase 2: Self-Healing Tests
|
|
1128
|
+
|
|
1129
|
+
**Adaptive Test Cases:**
|
|
1130
|
+
- AI detects when UI elements change (selectors, IDs)
|
|
1131
|
+
- Automatically updates test cases to match new implementation
|
|
1132
|
+
- Learns from manual corrections to improve future adaptations
|
|
1133
|
+
|
|
1134
|
+
**Example:**
|
|
1135
|
+
```typescript
|
|
1136
|
+
// Button selector changed from #create-btn to #new-campaign-btn
|
|
1137
|
+
// AI detects failure, searches for similar elements, updates selector
|
|
1138
|
+
const healedTest = await aiHealer.attemptHeal(failedTest, {
|
|
1139
|
+
oldSelector: "#create-btn",
|
|
1140
|
+
context: "Create campaign button on campaigns list page"
|
|
1141
|
+
});
|
|
1142
|
+
// → Suggests: "#new-campaign-btn" based on text content and position
|
|
1143
|
+
```
|
|
1144
|
+
|
|
1145
|
+
### Phase 3: Exploratory Testing
|
|
1146
|
+
|
|
1147
|
+
**AI Explorer:**
|
|
1148
|
+
- AI actively explores the application beyond documented features
|
|
1149
|
+
- Discovers undocumented features or edge cases
|
|
1150
|
+
- Tests combinations of actions not explicitly documented
|
|
1151
|
+
- Reports unexpected behavior or potential bugs
|
|
1152
|
+
|
|
1153
|
+
**Example:**
|
|
1154
|
+
```typescript
|
|
1155
|
+
const explorerAgent = new AIExplorer({
|
|
1156
|
+
objective: "Explore campaign management features",
|
|
1157
|
+
constraints: ["Don't delete production data", "Stay in test account"],
|
|
1158
|
+
creativity: 0.7 // Balance between following patterns and trying new things
|
|
1159
|
+
});
|
|
1160
|
+
|
|
1161
|
+
const findings = await explorerAgent.explore();
|
|
1162
|
+
// → Discovers: "Can create campaign without sender if template has default sender"
|
|
1163
|
+
// (Not documented, but works in practice)
|
|
1164
|
+
```
|
|
1165
|
+
|
|
1166
|
+
### Phase 4: Multi-Platform Consistency Testing
|
|
1167
|
+
|
|
1168
|
+
**Cross-Platform Validator:**
|
|
1169
|
+
- Tests same feature across CLI, web, and mobile
|
|
1170
|
+
- Validates consistent behavior and data
|
|
1171
|
+
- Identifies platform-specific bugs or discrepancies
|
|
1172
|
+
|
|
1173
|
+
**Example:**
|
|
1174
|
+
```yaml
|
|
1175
|
+
consistency_test:
|
|
1176
|
+
feature: "Create Campaign"
|
|
1177
|
+
platforms:
|
|
1178
|
+
- cli: "cakemail campaigns create -n 'Test' -l 123 -s 456"
|
|
1179
|
+
- web: "Navigate to /campaigns → Click Create → Fill form → Submit"
|
|
1180
|
+
- mobile: "Tap Campaigns → Tap + → Enter details → Tap Save"
|
|
1181
|
+
|
|
1182
|
+
expected_consistency:
|
|
1183
|
+
- all_create_same_campaign: true
|
|
1184
|
+
- all_show_same_success_message: true
|
|
1185
|
+
- campaign_appears_in_all_platforms: true
|
|
1186
|
+
```
|
|
1187
|
+
|
|
1188
|
+
### Phase 5: Continuous Documentation Validation
|
|
1189
|
+
|
|
1190
|
+
**CI/CD Integration:**
|
|
1191
|
+
```yaml
|
|
1192
|
+
# .github/workflows/ai-doc-validation.yml
|
|
1193
|
+
name: AI Documentation Validation
|
|
1194
|
+
|
|
1195
|
+
on:
|
|
1196
|
+
push:
|
|
1197
|
+
branches: [main]
|
|
1198
|
+
paths:
|
|
1199
|
+
- 'docs/**'
|
|
1200
|
+
- 'src/**'
|
|
1201
|
+
pull_request:
|
|
1202
|
+
|
|
1203
|
+
jobs:
|
|
1204
|
+
validate-docs:
|
|
1205
|
+
runs-on: ubuntu-latest
|
|
1206
|
+
steps:
|
|
1207
|
+
- uses: actions/checkout@v2
|
|
1208
|
+
|
|
1209
|
+
- name: Run AI User Simulation
|
|
1210
|
+
run: npm run ai-test
|
|
1211
|
+
env:
|
|
1212
|
+
CAKEMAIL_TEST_EMAIL: ${{ secrets.TEST_EMAIL }}
|
|
1213
|
+
CAKEMAIL_TEST_PASSWORD: ${{ secrets.TEST_PASSWORD }}
|
|
1214
|
+
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
|
1215
|
+
|
|
1216
|
+
- name: Generate Report
|
|
1217
|
+
run: npm run ai-test:report
|
|
1218
|
+
|
|
1219
|
+
- name: Create GitHub Issues for Failures
|
|
1220
|
+
if: failure()
|
|
1221
|
+
run: npm run ai-test:create-issues
|
|
1222
|
+
|
|
1223
|
+
- name: Comment on PR
|
|
1224
|
+
if: github.event_name == 'pull_request'
|
|
1225
|
+
uses: actions/github-script@v6
|
|
1226
|
+
with:
|
|
1227
|
+
script: |
|
|
1228
|
+
const report = require('./test-results/summary.json');
|
|
1229
|
+
github.rest.issues.createComment({
|
|
1230
|
+
issue_number: context.issue.number,
|
|
1231
|
+
body: `## AI Documentation Validation Results\n\n${report.summary}`
|
|
1232
|
+
});
|
|
1233
|
+
```
|
|
1234
|
+
|
|
1235
|
+
---
|
|
1236
|
+
|
|
1237
|
+
## Cost Considerations
|
|
1238
|
+
|
|
1239
|
+
### LLM API Costs (Estimated)
|
|
1240
|
+
|
|
1241
|
+
**Assumptions:**
|
|
1242
|
+
- 136 commands × 3 test cases each = 408 test cases
|
|
1243
|
+
- Average 2,000 tokens per test generation
|
|
1244
|
+
- Average 1,000 tokens per validation
|
|
1245
|
+
|
|
1246
|
+
**Test Generation:**
|
|
1247
|
+
- 408 tests × 2,000 tokens = 816,000 tokens
|
|
1248
|
+
- Cost (Claude Sonnet): ~$2.45 per test run
|
|
1249
|
+
|
|
1250
|
+
**Test Validation:**
|
|
1251
|
+
- 408 tests × 1,000 tokens = 408,000 tokens
|
|
1252
|
+
- Cost (Claude Sonnet): ~$1.22 per test run
|
|
1253
|
+
|
|
1254
|
+
**Total per full test run:** ~$3.67
|
|
1255
|
+
|
|
1256
|
+
**Monthly cost (daily runs):** ~$110/month
|
|
1257
|
+
|
|
1258
|
+
### Optimization Strategies
|
|
1259
|
+
|
|
1260
|
+
1. **Cache generated test cases** (regenerate only when docs change)
|
|
1261
|
+
2. **Use cheaper models for simple validations** (GPT-3.5 for schema validation)
|
|
1262
|
+
3. **Batch API calls** to reduce overhead
|
|
1263
|
+
4. **Run full suite weekly**, quick validations daily
|
|
1264
|
+
5. **Use local models** (Llama 3) for non-critical validations
|
|
1265
|
+
|
|
1266
|
+
---
|
|
1267
|
+
|
|
1268
|
+
## Conclusion
|
|
1269
|
+
|
|
1270
|
+
This AI-based user simulation framework provides:
|
|
1271
|
+
|
|
1272
|
+
✅ **Automated documentation validation** - Ensures docs match reality
|
|
1273
|
+
✅ **Platform-agnostic design** - Works for CLI, web, mobile
|
|
1274
|
+
✅ **Continuous quality assurance** - Runs in CI/CD
|
|
1275
|
+
✅ **Self-healing capabilities** - Adapts to minor changes
|
|
1276
|
+
✅ **Comprehensive coverage** - Tests happy paths, errors, edge cases
|
|
1277
|
+
✅ **Evidence-based reporting** - Screenshots, logs, detailed analysis
|
|
1278
|
+
|
|
1279
|
+
The framework shifts testing from "what developers think works" to "what users experience based on documentation," ensuring a seamless user experience across all platforms.
|
|
1280
|
+
|
|
1281
|
+
---
|
|
1282
|
+
|
|
1283
|
+
## Appendix: Sample Test Output
|
|
1284
|
+
|
|
1285
|
+
```json
|
|
1286
|
+
{
|
|
1287
|
+
"testRunId": "run-2025-10-26-143022",
|
|
1288
|
+
"framework": "ai-user-simulation",
|
|
1289
|
+
"version": "1.0.0",
|
|
1290
|
+
"platform": "cli",
|
|
1291
|
+
"application": {
|
|
1292
|
+
"name": "Cakemail CLI",
|
|
1293
|
+
"version": "1.7.0"
|
|
1294
|
+
},
|
|
1295
|
+
"summary": {
|
|
1296
|
+
"totalTests": 408,
|
|
1297
|
+
"passed": 396,
|
|
1298
|
+
"failed": 10,
|
|
1299
|
+
"skipped": 2,
|
|
1300
|
+
"duration": 1847000,
|
|
1301
|
+
"passRate": 97.06
|
|
1302
|
+
},
|
|
1303
|
+
"coverage": {
|
|
1304
|
+
"commands": { "total": 136, "tested": 136, "percentage": 100 },
|
|
1305
|
+
"parameters": { "total": 450, "tested": 342, "percentage": 76 },
|
|
1306
|
+
"examples": { "total": 45, "tested": 45, "percentage": 100 }
|
|
1307
|
+
},
|
|
1308
|
+
"failures": [
|
|
1309
|
+
{
|
|
1310
|
+
"testId": "CAMP-042",
|
|
1311
|
+
"category": "bug",
|
|
1312
|
+
"severity": "high",
|
|
1313
|
+
"title": "Create campaign with invalid list ID crashes",
|
|
1314
|
+
"expected": "Error message with helpful suggestion",
|
|
1315
|
+
"actual": "Unhandled exception",
|
|
1316
|
+
"evidence": {
|
|
1317
|
+
"stderr": "TypeError: Cannot read property 'id' of undefined\n at createCampaign (src/commands/campaigns.ts:145)",
|
|
1318
|
+
"exitCode": 1
|
|
1319
|
+
},
|
|
1320
|
+
"recommendation": "Add null check before accessing list properties",
|
|
1321
|
+
"githubIssue": null
|
|
1322
|
+
}
|
|
1323
|
+
],
|
|
1324
|
+
"documentationIssues": [
|
|
1325
|
+
{
|
|
1326
|
+
"issueId": "DOC-001",
|
|
1327
|
+
"severity": "medium",
|
|
1328
|
+
"title": "Default output format documentation incorrect",
|
|
1329
|
+
"location": "README.md:104",
|
|
1330
|
+
"expected": "Returns JSON by default",
|
|
1331
|
+
"actual": "Returns profile-based format (developer=json, marketer=compact, balanced=table)",
|
|
1332
|
+
"suggestion": "Update documentation to mention profile-based defaults",
|
|
1333
|
+
"fixedIn": "PR #123"
|
|
1334
|
+
}
|
|
1335
|
+
],
|
|
1336
|
+
"aiMetrics": {
|
|
1337
|
+
"averageConfidence": 0.94,
|
|
1338
|
+
"lowConfidenceTests": 3,
|
|
1339
|
+
"manualReviewRequired": 2
|
|
1340
|
+
}
|
|
1341
|
+
}
|
|
1342
|
+
```
|