ai-inference-stepper 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +169 -0
- package/.eslintrc.cjs +23 -0
- package/.github/workflows/ci.yml +51 -0
- package/.github/workflows/keep-alive.yml +22 -0
- package/.github/workflows/publish.yml +34 -0
- package/ARCHITECTURE.md +594 -0
- package/Dockerfile +16 -0
- package/LICENSE +28 -0
- package/README.md +261 -0
- package/dist/alerts/discord.d.ts +19 -0
- package/dist/alerts/discord.d.ts.map +1 -0
- package/dist/alerts/discord.js +70 -0
- package/dist/alerts/discord.js.map +1 -0
- package/dist/cache/redisCache.d.ts +45 -0
- package/dist/cache/redisCache.d.ts.map +1 -0
- package/dist/cache/redisCache.js +171 -0
- package/dist/cache/redisCache.js.map +1 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +8 -0
- package/dist/cli.js.map +1 -0
- package/dist/config.d.ts +6 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +251 -0
- package/dist/config.js.map +1 -0
- package/dist/fallback/templateFallback.d.ts +7 -0
- package/dist/fallback/templateFallback.d.ts.map +1 -0
- package/dist/fallback/templateFallback.js +29 -0
- package/dist/fallback/templateFallback.js.map +1 -0
- package/dist/index.d.ts +121 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +198 -0
- package/dist/index.js.map +1 -0
- package/dist/logging.d.ts +10 -0
- package/dist/logging.d.ts.map +1 -0
- package/dist/logging.js +44 -0
- package/dist/logging.js.map +1 -0
- package/dist/metrics/metrics.d.ts +22 -0
- package/dist/metrics/metrics.d.ts.map +1 -0
- package/dist/metrics/metrics.js +78 -0
- package/dist/metrics/metrics.js.map +1 -0
- package/dist/providers/factory.d.ts +11 -0
- package/dist/providers/factory.d.ts.map +1 -0
- package/dist/providers/factory.js +52 -0
- package/dist/providers/factory.js.map +1 -0
- package/dist/providers/hfSpace.adapter.d.ts +21 -0
- package/dist/providers/hfSpace.adapter.d.ts.map +1 -0
- package/dist/providers/hfSpace.adapter.js +110 -0
- package/dist/providers/hfSpace.adapter.js.map +1 -0
- package/dist/providers/httpTemplate.adapter.d.ts +42 -0
- package/dist/providers/httpTemplate.adapter.d.ts.map +1 -0
- package/dist/providers/httpTemplate.adapter.js +98 -0
- package/dist/providers/httpTemplate.adapter.js.map +1 -0
- package/dist/providers/promptBuilder.d.ts +34 -0
- package/dist/providers/promptBuilder.d.ts.map +1 -0
- package/dist/providers/promptBuilder.js +315 -0
- package/dist/providers/promptBuilder.js.map +1 -0
- package/dist/providers/provider.interface.d.ts +45 -0
- package/dist/providers/provider.interface.d.ts.map +1 -0
- package/dist/providers/provider.interface.js +47 -0
- package/dist/providers/provider.interface.js.map +1 -0
- package/dist/providers/specs.d.ts +18 -0
- package/dist/providers/specs.d.ts.map +1 -0
- package/dist/providers/specs.js +326 -0
- package/dist/providers/specs.js.map +1 -0
- package/dist/providers/unified.adapter.d.ts +37 -0
- package/dist/providers/unified.adapter.d.ts.map +1 -0
- package/dist/providers/unified.adapter.js +141 -0
- package/dist/providers/unified.adapter.js.map +1 -0
- package/dist/queue/producer.d.ts +30 -0
- package/dist/queue/producer.d.ts.map +1 -0
- package/dist/queue/producer.js +87 -0
- package/dist/queue/producer.js.map +1 -0
- package/dist/queue/worker.d.ts +9 -0
- package/dist/queue/worker.d.ts.map +1 -0
- package/dist/queue/worker.js +137 -0
- package/dist/queue/worker.js.map +1 -0
- package/dist/server/app.d.ts +4 -0
- package/dist/server/app.d.ts.map +1 -0
- package/dist/server/app.js +394 -0
- package/dist/server/app.js.map +1 -0
- package/dist/server/start.d.ts +16 -0
- package/dist/server/start.d.ts.map +1 -0
- package/dist/server/start.js +45 -0
- package/dist/server/start.js.map +1 -0
- package/dist/stepper/orchestrator.d.ts +22 -0
- package/dist/stepper/orchestrator.d.ts.map +1 -0
- package/dist/stepper/orchestrator.js +333 -0
- package/dist/stepper/orchestrator.js.map +1 -0
- package/dist/types.d.ts +216 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +14 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/redaction.d.ts +9 -0
- package/dist/utils/redaction.d.ts.map +1 -0
- package/dist/utils/redaction.js +41 -0
- package/dist/utils/redaction.js.map +1 -0
- package/dist/utils/safeRequest.d.ts +38 -0
- package/dist/utils/safeRequest.d.ts.map +1 -0
- package/dist/utils/safeRequest.js +104 -0
- package/dist/utils/safeRequest.js.map +1 -0
- package/dist/validation/report.schema.d.ts +48 -0
- package/dist/validation/report.schema.d.ts.map +1 -0
- package/dist/validation/report.schema.js +72 -0
- package/dist/validation/report.schema.js.map +1 -0
- package/dist/webhooks/delivery.d.ts +31 -0
- package/dist/webhooks/delivery.d.ts.map +1 -0
- package/dist/webhooks/delivery.js +102 -0
- package/dist/webhooks/delivery.js.map +1 -0
- package/docs/assets/architecture.png +0 -0
- package/package.json +75 -0
- package/render.yaml +25 -0
- package/src/alerts/README.md +25 -0
- package/src/alerts/discord.ts +86 -0
- package/src/cache/How redis caching works in package stepper.md +971 -0
- package/src/cache/README.md +51 -0
- package/src/cache/redisCache.ts +194 -0
- package/src/ci/deploy.sh +36 -0
- package/src/cli.ts +9 -0
- package/src/config.ts +265 -0
- package/src/fallback/templateFallback.ts +32 -0
- package/src/index.ts +246 -0
- package/src/logging.ts +46 -0
- package/src/metrics/README.md +24 -0
- package/src/metrics/metrics.ts +84 -0
- package/src/providers/How the providers interact.md +121 -0
- package/src/providers/README.md +121 -0
- package/src/providers/factory.ts +57 -0
- package/src/providers/hfSpace.adapter.ts +119 -0
- package/src/providers/httpTemplate.adapter.ts +138 -0
- package/src/providers/promptBuilder.ts +330 -0
- package/src/providers/provider.interface.ts +73 -0
- package/src/providers/specs.ts +366 -0
- package/src/providers/unified.adapter.ts +172 -0
- package/src/queue/How queue works in package stepper.md +149 -0
- package/src/queue/README.md +41 -0
- package/src/queue/producer.ts +108 -0
- package/src/queue/worker.ts +170 -0
- package/src/server/app.ts +451 -0
- package/src/server/start.ts +68 -0
- package/src/stepper/Dockerfile +48 -0
- package/src/stepper/How orchestrator works in package stepper.md +746 -0
- package/src/stepper/README.md +43 -0
- package/src/stepper/orchestrator.ts +437 -0
- package/src/types.ts +238 -0
- package/src/utils/redaction.ts +50 -0
- package/src/utils/safeRequest.ts +140 -0
- package/src/validation/README.md +25 -0
- package/src/validation/report.schema.ts +96 -0
- package/src/webhooks/delivery.ts +162 -0
- package/tests/integration/full-flow.test.ts +192 -0
- package/tests/unit/alerts/discord.test.ts +119 -0
- package/tests/unit/cache.test.ts +87 -0
- package/tests/unit/orchestrator-fallback.test.ts +92 -0
- package/tests/unit/orchestrator.test.ts +105 -0
- package/tests/unit/providers/factory.test.ts +161 -0
- package/tests/unit/providers/unified.adapter.test.ts +206 -0
- package/tests/unit/utils/redaction.test.ts +140 -0
- package/tests/unit/utils/safeRequest.test.ts +164 -0
- package/tsconfig.json +26 -0
package/src/index.ts
ADDED
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
// packages/stepper/src/index.ts
|
|
2
|
+
|
|
3
|
+
import { PromptInput, ReportOutput, ProviderResult, StepperCallbacks, StepperConfig, ProviderConfig } from './types.js';
|
|
4
|
+
import { logger } from './logging.js';
|
|
5
|
+
import {
|
|
6
|
+
buildCacheKey,
|
|
7
|
+
getReportCache,
|
|
8
|
+
setDehydrated,
|
|
9
|
+
isHydratedFresh,
|
|
10
|
+
isStaleButUsable,
|
|
11
|
+
deleteCacheEntry,
|
|
12
|
+
} from './cache/redisCache.js';
|
|
13
|
+
import { enqueueReportJob, getJobStatus } from './queue/producer.js';
|
|
14
|
+
import { generateReportNow, registerCallbacks as registerOrchestratorCallbacks, initializeProviders, getProviderHealth } from './stepper/orchestrator.js';
|
|
15
|
+
import { recordCacheHit, recordCacheMiss } from './metrics/metrics.js';
|
|
16
|
+
import crypto from 'crypto';
|
|
17
|
+
import { applyConfigOverrides } from './config.js';
|
|
18
|
+
|
|
19
|
+
let isInitialized = false;
|
|
20
|
+
|
|
21
|
+
function ensureInitialized(): void {
|
|
22
|
+
if (!isInitialized) {
|
|
23
|
+
const existingProviders = getProviderHealth();
|
|
24
|
+
if (existingProviders.length > 0) {
|
|
25
|
+
isInitialized = true;
|
|
26
|
+
return;
|
|
27
|
+
}
|
|
28
|
+
initStepper();
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Initialize Stepper with optional config overrides.
|
|
34
|
+
* Useful for npm consumers who want programmatic config instead of env.
|
|
35
|
+
*/
|
|
36
|
+
export function initStepper(options?: { config?: Partial<StepperConfig>; providers?: ProviderConfig[] }): StepperConfig {
|
|
37
|
+
const overrides: Partial<StepperConfig> = options?.config ? { ...options.config } : {};
|
|
38
|
+
if (options?.providers) {
|
|
39
|
+
overrides.providers = options.providers;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const nextConfig = applyConfigOverrides(overrides);
|
|
43
|
+
initializeProviders(nextConfig.providers);
|
|
44
|
+
isInitialized = true;
|
|
45
|
+
return nextConfig;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Compute template hash for cache key
|
|
50
|
+
*/
|
|
51
|
+
function computeTemplateHash(template?: string): string {
|
|
52
|
+
const templateStr = template || 'default';
|
|
53
|
+
return crypto.createHash('sha256').update(templateStr).digest('hex').slice(0, 16);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Register lifecycle callbacks
|
|
58
|
+
*
|
|
59
|
+
* @example
|
|
60
|
+
* registerCallbacks({
|
|
61
|
+
* onSuccess: (jobId, provider, result) => {
|
|
62
|
+
* // handle success
|
|
63
|
+
* },
|
|
64
|
+
* onFallback: (jobId, result, meta) => {
|
|
65
|
+
* // handle fallback
|
|
66
|
+
* }
|
|
67
|
+
* });
|
|
68
|
+
*/
|
|
69
|
+
export function registerCallbacks(callbacks: StepperCallbacks): void {
|
|
70
|
+
registerOrchestratorCallbacks(callbacks);
|
|
71
|
+
logger.info('Callbacks registered');
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Enqueue a report generation job (async, non-blocking)
|
|
76
|
+
*
|
|
77
|
+
* Returns cached result immediately if available (fresh or stale),
|
|
78
|
+
* or enqueues job and returns 202 status with jobId.
|
|
79
|
+
*
|
|
80
|
+
* @param input - Commit information
|
|
81
|
+
* @returns Promise with either immediate result or job info
|
|
82
|
+
*
|
|
83
|
+
* @example
|
|
84
|
+
* const result = await enqueueReport({
|
|
85
|
+
* userId: 'user_123',
|
|
86
|
+
* commitSha: 'abc123',
|
|
87
|
+
* repo: 'myorg/myrepo',
|
|
88
|
+
* message: 'Fix bug in auth',
|
|
89
|
+
* files: ['src/auth.ts'],
|
|
90
|
+
* components: ['auth'],
|
|
91
|
+
* diffSummary: '+ fixed token validation'
|
|
92
|
+
* });
|
|
93
|
+
*
|
|
94
|
+
* if (result.status === 200) {
|
|
95
|
+
* // handle cached result
|
|
96
|
+
* } else {
|
|
97
|
+
* // handle enqueued result
|
|
98
|
+
* }
|
|
99
|
+
*/
|
|
100
|
+
export async function enqueueReport(
|
|
101
|
+
input: PromptInput
|
|
102
|
+
): Promise<
|
|
103
|
+
| { status: 200; data: ReportOutput; cached: true; stale?: boolean }
|
|
104
|
+
| { status: 202; jobId: string; cached: false }
|
|
105
|
+
> {
|
|
106
|
+
ensureInitialized();
|
|
107
|
+
const templateHash = computeTemplateHash(input.template);
|
|
108
|
+
const cacheKey = buildCacheKey(input.userId, input.commitSha, templateHash);
|
|
109
|
+
|
|
110
|
+
// Check cache
|
|
111
|
+
const cached = await getReportCache(cacheKey);
|
|
112
|
+
|
|
113
|
+
if (cached && cached.status === 'hydrated' && cached.result) {
|
|
114
|
+
const fresh = isHydratedFresh(cached);
|
|
115
|
+
|
|
116
|
+
if (fresh) {
|
|
117
|
+
// Fresh cache hit - return immediately and cleanup
|
|
118
|
+
recordCacheHit('fresh');
|
|
119
|
+
logger.info({ cacheKey, userId: input.userId }, 'Cache hit (fresh), returning and clearing');
|
|
120
|
+
|
|
121
|
+
// We clear immediately because caller is expected to save this
|
|
122
|
+
deleteCacheEntry(cacheKey).catch(err => {
|
|
123
|
+
logger.error({ err, cacheKey }, 'Failed to cleanup cache after fresh hit');
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
return { status: 200, data: cached.result, cached: true };
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Stale but usable - return and schedule background refresh
|
|
130
|
+
if (isStaleButUsable(cached)) {
|
|
131
|
+
recordCacheHit('stale');
|
|
132
|
+
logger.info({ cacheKey, userId: input.userId }, 'Cache hit (stale), scheduling refresh');
|
|
133
|
+
|
|
134
|
+
// Schedule low-priority background refresh
|
|
135
|
+
enqueueReportJob(input, cacheKey, { priority: 10 }).catch((err) => {
|
|
136
|
+
logger.error({ err, cacheKey }, 'Failed to enqueue background refresh');
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
return { status: 200, data: cached.result, cached: true, stale: true };
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Cache miss or dehydrated - enqueue job
|
|
144
|
+
recordCacheMiss();
|
|
145
|
+
logger.info({ cacheKey, userId: input.userId }, 'Cache miss, enqueueing job');
|
|
146
|
+
|
|
147
|
+
const jobId = await enqueueReportJob(input, cacheKey);
|
|
148
|
+
|
|
149
|
+
// Create dehydrated placeholder
|
|
150
|
+
await setDehydrated(cacheKey, jobId);
|
|
151
|
+
|
|
152
|
+
return { status: 202, jobId, cached: false };
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Generate report synchronously (blocking, immediate)
|
|
157
|
+
*
|
|
158
|
+
* Useful for testing or when you need the result immediately.
|
|
159
|
+
* This bypasses the queue and calls providers directly.
|
|
160
|
+
*
|
|
161
|
+
* @param input - Commit information
|
|
162
|
+
* @returns Promise with generated report and metadata
|
|
163
|
+
*
|
|
164
|
+
* @example
|
|
165
|
+
* const result = await generateReportNow({
|
|
166
|
+
* userId: 'user_123',
|
|
167
|
+
* commitSha: 'abc123',
|
|
168
|
+
* repo: 'myorg/myrepo',
|
|
169
|
+
* message: 'Refactor API',
|
|
170
|
+
* files: ['src/api.ts'],
|
|
171
|
+
* components: ['api'],
|
|
172
|
+
* diffSummary: '- old code\n+ new code'
|
|
173
|
+
* });
|
|
174
|
+
*
|
|
175
|
+
* // handle provider and report result
|
|
176
|
+
*/
|
|
177
|
+
export async function generateReport(input: PromptInput): Promise<ProviderResult> {
|
|
178
|
+
ensureInitialized();
|
|
179
|
+
const jobId = `sync_${Date.now()}`;
|
|
180
|
+
return generateReportNow(input, jobId);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Get job status by ID
|
|
185
|
+
*
|
|
186
|
+
* @param jobId - Job identifier returned from enqueueReport
|
|
187
|
+
* @returns Job status information or null if not found
|
|
188
|
+
*/
|
|
189
|
+
export async function getJob(jobId: string): Promise<{
|
|
190
|
+
id: string;
|
|
191
|
+
state: string;
|
|
192
|
+
progress?: number;
|
|
193
|
+
result?: unknown;
|
|
194
|
+
failedReason?: string;
|
|
195
|
+
data?: unknown;
|
|
196
|
+
} | null> {
|
|
197
|
+
return getJobStatus(jobId);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Delete a cached report entry.
|
|
202
|
+
*
|
|
203
|
+
* Call this once you have successfully saved the report to your own database
|
|
204
|
+
* to keep the Stepper's Redis storage footprint minimal.
|
|
205
|
+
*
|
|
206
|
+
* @param userId - User identifier
|
|
207
|
+
* @param commitSha - Commit SHA
|
|
208
|
+
* @param template - Template name (optional)
|
|
209
|
+
*/
|
|
210
|
+
export async function deleteReport(userId: string, commitSha: string, template?: string): Promise<void> {
|
|
211
|
+
const templateHash = computeTemplateHash(template);
|
|
212
|
+
const cacheKey = buildCacheKey(userId, commitSha, templateHash);
|
|
213
|
+
await deleteCacheEntry(cacheKey);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* Health check - returns provider status and system health
|
|
218
|
+
*/
|
|
219
|
+
export async function healthcheck(): Promise<{
|
|
220
|
+
status: 'healthy' | 'degraded' | 'unhealthy';
|
|
221
|
+
providers: Array<{ name: string; healthy: boolean }>;
|
|
222
|
+
timestamp: string;
|
|
223
|
+
}> {
|
|
224
|
+
ensureInitialized();
|
|
225
|
+
const providerHealth = getProviderHealth();
|
|
226
|
+
const healthyCount = providerHealth.filter((p) => p.healthy).length;
|
|
227
|
+
|
|
228
|
+
let status: 'healthy' | 'degraded' | 'unhealthy';
|
|
229
|
+
if (healthyCount === 0) {
|
|
230
|
+
status = 'unhealthy';
|
|
231
|
+
} else if (healthyCount < providerHealth.length) {
|
|
232
|
+
status = 'degraded';
|
|
233
|
+
} else {
|
|
234
|
+
status = 'healthy';
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
return {
|
|
238
|
+
status,
|
|
239
|
+
providers: providerHealth.map((p) => ({ name: p.name, healthy: p.healthy })),
|
|
240
|
+
timestamp: new Date().toISOString(),
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// Re-export types for consumers
|
|
245
|
+
export * from './types.js';
|
|
246
|
+
export { config } from './config.js';
|
package/src/logging.ts
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import pino from 'pino';
|
|
2
|
+
|
|
3
|
+
const isDev = process.env.NODE_ENV !== 'production';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Base pino logger instance with structured logging
|
|
7
|
+
*/
|
|
8
|
+
export const logger = pino({
|
|
9
|
+
level: process.env.LOG_LEVEL || (isDev ? 'debug' : 'info'),
|
|
10
|
+
transport: isDev
|
|
11
|
+
? {
|
|
12
|
+
target: 'pino-pretty',
|
|
13
|
+
options: {
|
|
14
|
+
colorize: true,
|
|
15
|
+
translateTime: 'SYS:standard',
|
|
16
|
+
ignore: 'pid,hostname',
|
|
17
|
+
},
|
|
18
|
+
}
|
|
19
|
+
: undefined,
|
|
20
|
+
base: {
|
|
21
|
+
service: 'stepper',
|
|
22
|
+
env: process.env.NODE_ENV || 'development',
|
|
23
|
+
},
|
|
24
|
+
redact: {
|
|
25
|
+
paths: [
|
|
26
|
+
'req.headers.authorization',
|
|
27
|
+
'req.headers.cookie',
|
|
28
|
+
'req.body.token',
|
|
29
|
+
'req.body.password',
|
|
30
|
+
'req.body.apiKey',
|
|
31
|
+
'input.token',
|
|
32
|
+
'config.providers[*].apiKeyEnvVar', // Don't log env var names if they contain secrets (unlikely but safe)
|
|
33
|
+
'context.input.token',
|
|
34
|
+
'error.config.headers.Authorization', // Redact axios/fetch error headers
|
|
35
|
+
'context.tokens',
|
|
36
|
+
],
|
|
37
|
+
remove: true
|
|
38
|
+
}
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Create a child logger with additional context (e.g., jobId, requestId)
|
|
43
|
+
*/
|
|
44
|
+
export function createChildLogger(context: Record<string, unknown>) {
|
|
45
|
+
return logger.child(context);
|
|
46
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# ๐ Observability & Metrics
|
|
2
|
+
|
|
3
|
+
The **Inference Stepper** provides deep insight into its internal operations via **Prometheus** metrics.
|
|
4
|
+
|
|
5
|
+
## ๐ฏ Purpose
|
|
6
|
+
|
|
7
|
+
- **Health Monitoring**: Track provider success rates and latencies.
|
|
8
|
+
- **Capacity Planning**: Monitor job queue sizes and cache hit ratios.
|
|
9
|
+
- **Alerting**: Provide the data source for the Discord alert system.
|
|
10
|
+
|
|
11
|
+
## ๐ Key Metrics Tracked
|
|
12
|
+
|
|
13
|
+
| Metric | Type | Description |
|
|
14
|
+
| ----------------------------- | --------- | ----------------------------------------------------------------- |
|
|
15
|
+
| `ai_requests_total` | Counter | Total requests per provider and status (success/fail). |
|
|
16
|
+
| `ai_request_duration_seconds` | Histogram | How long each provider takes to respond. |
|
|
17
|
+
| `cache_hits_total` | Counter | Number of fresh and stale cache hits. |
|
|
18
|
+
| `cache_misses_total` | Counter | Number of requests that weren't in the cache. |
|
|
19
|
+
| `provider_failures_total` | Counter | Detailed breakdown of why providers failed (timeout, auth, etc.). |
|
|
20
|
+
| `job_queue_size` | Gauge | How many jobs are waiting in the queue. |
|
|
21
|
+
|
|
22
|
+
## ๐ ๏ธ Usage
|
|
23
|
+
|
|
24
|
+
Metrics are exposed at the `/metrics` endpoint if the HTTP server is running. These can be scraped by a Prometheus server and visualized in **Grafana**.
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
// packages/stepper/src/metrics/metrics.ts
|
|
4
|
+
|
|
5
|
+
import { Registry, Counter, Histogram, Gauge } from 'prom-client';
|
|
6
|
+
|
|
7
|
+
// Create registry
|
|
8
|
+
export const register = new Registry();
|
|
9
|
+
|
|
10
|
+
// Metrics
|
|
11
|
+
export const aiRequestsTotal = new Counter({
|
|
12
|
+
name: 'ai_requests_total',
|
|
13
|
+
help: 'Total number of AI provider requests',
|
|
14
|
+
labelNames: ['provider', 'status'],
|
|
15
|
+
registers: [register],
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
export const aiRequestDuration = new Histogram({
|
|
19
|
+
name: 'ai_request_duration_seconds',
|
|
20
|
+
help: 'Duration of AI provider requests in seconds',
|
|
21
|
+
labelNames: ['provider'],
|
|
22
|
+
buckets: [0.1, 0.5, 1, 2, 5, 10, 30],
|
|
23
|
+
registers: [register],
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
export const cacheHitsTotal = new Counter({
|
|
27
|
+
name: 'cache_hits_total',
|
|
28
|
+
help: 'Total number of cache hits',
|
|
29
|
+
labelNames: ['status'],
|
|
30
|
+
registers: [register],
|
|
31
|
+
});
|
|
32
|
+
export const cacheMissesTotal = new Counter({
|
|
33
|
+
name: 'cache_misses_total',
|
|
34
|
+
help: 'Total number of cache misses',
|
|
35
|
+
registers: [register],
|
|
36
|
+
});
|
|
37
|
+
export const jobQueueSize = new Gauge({
|
|
38
|
+
name: 'job_queue_size',
|
|
39
|
+
help: 'Current size of job queue',
|
|
40
|
+
registers: [register],
|
|
41
|
+
});
|
|
42
|
+
export const providerFailuresTotal = new Counter({
|
|
43
|
+
name: 'provider_failures_total',
|
|
44
|
+
help: 'Total number of provider failures',
|
|
45
|
+
labelNames: ['provider', 'reason'],
|
|
46
|
+
registers: [register],
|
|
47
|
+
});
|
|
48
|
+
export const jobsProcessedTotal = new Counter({
|
|
49
|
+
name: 'jobs_processed_total',
|
|
50
|
+
help: 'Total number of jobs processed',
|
|
51
|
+
labelNames: ['status'],
|
|
52
|
+
registers: [register],
|
|
53
|
+
});
|
|
54
|
+
// Helper functions
|
|
55
|
+
export function recordProviderAttempt(provider: string): void {
|
|
56
|
+
aiRequestsTotal.inc({ provider, status: 'attempted' });
|
|
57
|
+
}
|
|
58
|
+
export function recordProviderSuccess(provider: string, durationMs: number): void {
|
|
59
|
+
aiRequestsTotal.inc({ provider, status: 'success' });
|
|
60
|
+
aiRequestDuration.observe({ provider }, durationMs / 1000);
|
|
61
|
+
}
|
|
62
|
+
export function recordProviderFailure(provider: string, reason: string): void {
|
|
63
|
+
aiRequestsTotal.inc({ provider, status: 'failed' });
|
|
64
|
+
providerFailuresTotal.inc({ provider, reason });
|
|
65
|
+
}
|
|
66
|
+
export function recordCacheHit(status: 'fresh' | 'stale'): void {
|
|
67
|
+
cacheHitsTotal.inc({ status });
|
|
68
|
+
}
|
|
69
|
+
export function recordCacheMiss(): void {
|
|
70
|
+
cacheMissesTotal.inc();
|
|
71
|
+
}
|
|
72
|
+
export function recordJobProcessed(): void {
|
|
73
|
+
jobsProcessedTotal.inc({ status: 'success' });
|
|
74
|
+
}
|
|
75
|
+
export function recordJobFailed(): void {
|
|
76
|
+
jobsProcessedTotal.inc({ status: 'failed' });
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
|
|
80
|
+
Get metrics in Prometheus format
|
|
81
|
+
*/
|
|
82
|
+
export async function getMetrics(): Promise<string> {
|
|
83
|
+
return register.metrics();
|
|
84
|
+
}
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# Chat Conversation
|
|
2
|
+
|
|
3
|
+
Note: _This is purely the output of the chat conversation and does not contain any raw data, codebase snippets, etc. used to generate the output._
|
|
4
|
+
|
|
5
|
+
### User Input
|
|
6
|
+
|
|
7
|
+
lets break down the explanation of @[packages/stepper/src/providers] one by one and separetely
|
|
8
|
+
. i want to know what each functions means, how they work, possible results of it, why they work like that and how they help achieve what the inference stepper package is for.
|
|
9
|
+
break down evey detail and explanton for me, to the lowest and clear enough for even a non programmer or technial person
|
|
10
|
+
|
|
11
|
+
*Listed directory [providers](file:///home/blaze/mine/commitdiary/packages/stepper/src/providers) *
|
|
12
|
+
|
|
13
|
+
*Viewed [README.md](file:///home/blaze/mine/commitdiary/packages/stepper/src/providers/README.md) *
|
|
14
|
+
|
|
15
|
+
*Viewed [provider.interface.ts](file:///home/blaze/mine/commitdiary/packages/stepper/src/providers/provider.interface.ts) *
|
|
16
|
+
|
|
17
|
+
*Viewed [factory.ts](file:///home/blaze/mine/commitdiary/packages/stepper/src/providers/factory.ts) *
|
|
18
|
+
|
|
19
|
+
*Viewed [promptBuilder.ts](file:///home/blaze/mine/commitdiary/packages/stepper/src/providers/promptBuilder.ts) *
|
|
20
|
+
|
|
21
|
+
*Viewed [specs.ts](file:///home/blaze/mine/commitdiary/packages/stepper/src/providers/specs.ts) *
|
|
22
|
+
|
|
23
|
+
### Planner Response
|
|
24
|
+
|
|
25
|
+
The `packages/stepper/src/providers` directory is the **"Communication Hub"** of the Inference Stepper.
|
|
26
|
+
|
|
27
|
+
Its job is simple: **Take the code changes you made, package them into a question (prompt), send them to an AI (like Gemini or OpenAI), and get back a structured report.**
|
|
28
|
+
|
|
29
|
+
Think of it like a translation office that speaks "Software Code" to "AI Models" and brings back "Human Reports."
|
|
30
|
+
|
|
31
|
+
Here is a breakdown of each file and its functions, explained simply:
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
### 1. `provider.interface.ts` (The Rule Book)
|
|
36
|
+
This file doesn't "do" things itself; it sets the **Rules** for how any AI provider must behave. It's like a blueprint for a house.
|
|
37
|
+
|
|
38
|
+
* **`ProviderError`**: This is a special "Red Flag." If an AI goes wrong, this function helps the system label exactly *why* (e.g., "The AI is too busy" or "The password was wrong").
|
|
39
|
+
* **`ProviderAdapter` (Interface)**: This is a list of requirements. It says: "If you want to be an AI provider in this system, you **must** have a name and you **must** have a way to be 'called'."
|
|
40
|
+
|
|
41
|
+
**Why it works like that:** It ensures that no matter which AI we use (Google, OpenAI, etc.), the rest of the system can talk to them in the exact same way.
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
### 2. `factory.ts` (The Concierge)
|
|
46
|
+
This file acts like a hotel concierge that chooses the right AI for you.
|
|
47
|
+
|
|
48
|
+
* **`createProviderAdapter`**:
|
|
49
|
+
* **What it means:** "Make me a connector."
|
|
50
|
+
* **How it works:** You give it a name (like "gemini"). It looks at your settings, checks if that AI is enabled, and then builds the "connector" object for it.
|
|
51
|
+
* **Result:** You get a working "phone line" to that specific AI.
|
|
52
|
+
* **`createProviderAdapters`**:
|
|
53
|
+
* **What it means:** "Make me a list of connectors."
|
|
54
|
+
* **How it works:** It just runs the function above for every AI you have configured.
|
|
55
|
+
|
|
56
|
+
**How it helps:** It prevents the system from breaking if you misspell an AI name or forget a password. It gracefully says "I can't build that one right now."
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
### 3. `promptBuilder.ts` (The Professional Interviewer)
|
|
61
|
+
This is the most important part for the "Inference Stepper." It writes the script that is sent to the AI.
|
|
62
|
+
|
|
63
|
+
* **`buildComprehensivePrompt`**:
|
|
64
|
+
* **What it means:** "Write a very detailed set of instructions."
|
|
65
|
+
* **How it works:** It takes your code changes and wraps them in a professional set of rules. It tells the AI: "You are a senior engineer. Look at these files. Tell me the title, summary, and next steps. **Only** give me JSON (a data format)."
|
|
66
|
+
* **Result:** A long, clear instruction manual for the AI to follow.
|
|
67
|
+
* **`redactSecrets`**:
|
|
68
|
+
* **What it means:** "Hide the secrets."
|
|
69
|
+
* **How it works:** Before the message is sent, it scans for things like passwords or keys and replaces them with `[REDACTED]`.
|
|
70
|
+
* **Why:** So you don't accidentally send your private passwords to the AI companies.
|
|
71
|
+
|
|
72
|
+
**How it helps:** It ensures the AI gives us a **structured report** instead of just a conversational "Hey, nice code!" It makes the AI useful.
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
### 4. `specs.ts` (The Dictionary)
|
|
77
|
+
Every AI provider (Gemini, OpenAI, Anthropic) has its own "dialect." This file is a dictionary that translates our request into their dialect.
|
|
78
|
+
|
|
79
|
+
* **`PROVIDER_SPECS`**:
|
|
80
|
+
* **What it means:** "How each AI likes to be talked to."
|
|
81
|
+
* **How it works:** It lists the "Address" (URL) for each AI, what "Language" (Headers) they need, and how they "Package" (Body) their data.
|
|
82
|
+
* **`getProviderSpec`**:
|
|
83
|
+
* **What it means:** "Look up the rules for one AI."
|
|
84
|
+
|
|
85
|
+
**Why it works like that:** Instead of writing complex code for every new AI, we just add a few lines of "rules" here. It makes the system very easy to expand.
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
### 5. `unified.adapter.ts` (The Multi-Lingual Robot)
|
|
90
|
+
This is a single piece of machinery that can talk to *any* AI that follows standard rules.
|
|
91
|
+
|
|
92
|
+
* **`call`**:
|
|
93
|
+
* **What it means:** "Do the actual work."
|
|
94
|
+
* **How it works:**
|
|
95
|
+
1. It uses the `promptBuilder` to write the interview questions.
|
|
96
|
+
2. It uses the `specs.ts` to package the questions correctly.
|
|
97
|
+
3. It "calls" the AI over the internet.
|
|
98
|
+
4. It waits for the answer and "unpacks" it.
|
|
99
|
+
* **Result:** A clean, finished report about your code.
|
|
100
|
+
|
|
101
|
+
**How it helps:** It handles the boring technical stuffโlike internet timeouts or "waiting in line"โso the rest of the app doesn't have to.
|
|
102
|
+
|
|
103
|
+
---
|
|
104
|
+
|
|
105
|
+
### 6. `hfSpace.adapter.ts` (The Specialist)
|
|
106
|
+
Some AI providers (like Hugging Face Spaces) are unique and don't fit the standard "Multi-Lingual Robot" above.
|
|
107
|
+
|
|
108
|
+
* **`healthCheck`**:
|
|
109
|
+
* **What it means:** "Are you awake?"
|
|
110
|
+
* **How it works:** It pings the Hugging Face AI to see if it's sleeping (they often "go to sleep" to save power). If it is, it waits for it to wake up before sending your code.
|
|
111
|
+
|
|
112
|
+
**How it helps:** It handles the quirks of specific platforms so you never get a "failed" error just because the AI was taking a nap.
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
### Summary for a Non-Programmer:
|
|
117
|
+
The **Inference Stepper** is like an automated newspaper.
|
|
118
|
+
1. Your code changes are the **News**.
|
|
119
|
+
2. `promptBuilder` is the **Editor** who writes the story outline.
|
|
120
|
+
3. `factory` and `specs` are the **Courier** who knows exactly which office (AI) to take the story to.
|
|
121
|
+
4. `unified.adapter` is the **Delivery Truck** that drives the story there, waits for a response, and brings back the finished **Newspaper (The Report)**.
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# ๐ AI Providers & Adapters
|
|
2
|
+
|
|
3
|
+
The **Inference Stepper** supports multiple AI providers through a flexible adapter architecture. This allows the system to switch between providers if one is unavailable or rate-limited.
|
|
4
|
+
|
|
5
|
+
## ๐ฏ Purpose
|
|
6
|
+
|
|
7
|
+
The providers are the "brains" of the system. They:
|
|
8
|
+
|
|
9
|
+
1. **Format** code changes (diffs) into prompts the AI understands.
|
|
10
|
+
2. **Communicate** with external AI services (Hugging Face, Gemini, etc.).
|
|
11
|
+
3. **Parse** the AI's response into a standardized JSON report.
|
|
12
|
+
4. **Handle** errors specific to each service.
|
|
13
|
+
|
|
14
|
+
## ๐๏ธ Architecture
|
|
15
|
+
|
|
16
|
+
We use an **Adapter Pattern**. Every provider must implement the `ProviderAdapter` interface:
|
|
17
|
+
|
|
18
|
+
- `name`: Unique identifier for the provider.
|
|
19
|
+
- `call(input: PromptInput)`: The main method that sends data to the AI and returns a result.
|
|
20
|
+
|
|
21
|
+
### Implementation Types
|
|
22
|
+
|
|
23
|
+
1. **HttpTemplateAdapter**: A universal adapter that can be configured for any HTTP-based AI service.
|
|
24
|
+
2. **HuggingFaceSpaceAdapter**: Specialized for Hugging Face Spaces with built-in health checks and specific prompt formatting.
|
|
25
|
+
|
|
26
|
+
## ๐ ๏ธ Security
|
|
27
|
+
|
|
28
|
+
Before sending any code to an AI provider, the system can **redact secrets**. It scans the diffs for passwords, API keys, and other sensitive information to ensure they never leave your infrastructure.
|
|
29
|
+
|
|
30
|
+
## ๐ Common Errors Handled
|
|
31
|
+
|
|
32
|
+
| Error | Description |
|
|
33
|
+
| ---------------------- | -------------------------------------------------------------------- |
|
|
34
|
+
| `AuthError` | Invalid API key or expired credentials. |
|
|
35
|
+
| `RateLimitError` | The provider is busy; we need to wait (respected via `Retry-After`). |
|
|
36
|
+
| `TimeoutError` | The AI took too long to think (default limit is 1 minute). |
|
|
37
|
+
| `InvalidResponseError` | The AI returned something that wasn't a valid report. |
|
|
38
|
+
|
|
39
|
+
## ๐ Adding a New Provider
|
|
40
|
+
|
|
41
|
+
To add a new provider:
|
|
42
|
+
|
|
43
|
+
1. Create a new adapter class (or use `HttpTemplateAdapter`).
|
|
44
|
+
2. Register it in `config.ts`.
|
|
45
|
+
3. The `Orchestrator` will automatically include it in the fallback rotation.
|
|
46
|
+
|
|
47
|
+
## ๐ Provider-Specific Implementations
|
|
48
|
+
|
|
49
|
+
### Google Gemini (Gemini 3 Models)
|
|
50
|
+
|
|
51
|
+
**Why Gemini is Different:**
|
|
52
|
+
|
|
53
|
+
Gemini 3 models (like `gemini-2.5-flash`) have unique requirements that differ from other AI providers. Our implementation follows [Google's official prompting strategies](https://ai.google.dev/gemini-api/docs/prompting-strategies) to maximize performance and reliability.
|
|
54
|
+
|
|
55
|
+
#### Key Differences:
|
|
56
|
+
|
|
57
|
+
1. **XML-Structured Prompts**
|
|
58
|
+
- Gemini 3 responds best to prompts with clear XML-style tags
|
|
59
|
+
- Tags like `<role>`, `<instructions>`, `<constraints>`, `<context>`, `<task>`, and `<output_format>` help the model understand the request structure
|
|
60
|
+
- This is different from other providers that use markdown or plain text formatting
|
|
61
|
+
- Implemented in `buildGeminiPrompt()` function in `promptBuilder.ts`
|
|
62
|
+
|
|
63
|
+
2. **API Key Authentication**
|
|
64
|
+
- Gemini requires the API key as a **query parameter** (`?key=YOUR_KEY`), not in headers
|
|
65
|
+
- Most other providers use `Authorization: Bearer` headers
|
|
66
|
+
- Conditional logic in `unified.adapter.ts` appends the key to the URL for Gemini only
|
|
67
|
+
|
|
68
|
+
3. **Temperature Configuration**
|
|
69
|
+
- **CRITICAL**: Gemini 3 models MUST use `temperature: 1.0`
|
|
70
|
+
- Google's documentation explicitly warns: "Changing the temperature (setting it below 1.0) may lead to unexpected behavior, such as looping or degraded performance"
|
|
71
|
+
- Other providers typically use lower temperatures (0.2-0.7) for deterministic outputs
|
|
72
|
+
- Our specs.ts locks Gemini's temperature at 1.0
|
|
73
|
+
|
|
74
|
+
4. **Increased Token Limit**
|
|
75
|
+
- Gemini 3 supports up to 4096 output tokens
|
|
76
|
+
- We use this higher limit for more detailed commit analysis reports
|
|
77
|
+
- Other providers typically limit to 2048 tokens
|
|
78
|
+
|
|
79
|
+
5. **Model Naming**
|
|
80
|
+
- Gemini uses versioned model names: `gemini-2.5-flash`, `gemini-3-flash-preview`
|
|
81
|
+
- Different from OpenAI's `gpt-4` or Anthropic's `claude-3` naming schemes
|
|
82
|
+
|
|
83
|
+
#### Implementation Pattern:
|
|
84
|
+
|
|
85
|
+
```typescript
|
|
86
|
+
// Conditional rendering based on provider name
|
|
87
|
+
if (this.spec.name === 'gemini') {
|
|
88
|
+
// Use Gemini-specific XML prompt
|
|
89
|
+
prompt = buildGeminiPrompt(input);
|
|
90
|
+
// Append API key to URL
|
|
91
|
+
actualEndpoint = `${actualEndpoint}?key=${this.apiKey}`;
|
|
92
|
+
} else {
|
|
93
|
+
// Use standard prompt for other providers
|
|
94
|
+
prompt = buildComprehensivePrompt(input);
|
|
95
|
+
}
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
#### Configuration:
|
|
99
|
+
|
|
100
|
+
```env
|
|
101
|
+
GEMINI_ENABLED=true
|
|
102
|
+
GEMINI_API_KEY=your_key_here
|
|
103
|
+
GEMINI_MODEL=gemini-2.5-flash
|
|
104
|
+
GEMINI_BASE_URL=https://generativelanguage.googleapis.com
|
|
105
|
+
GEMINI_TIMEOUT=60000 # 60 seconds for complex analysis
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
#### References:
|
|
109
|
+
- [Google Gemini API Prompting Strategies](https://ai.google.dev/gemini-api/docs/prompting-strategies)
|
|
110
|
+
- [Gemini 3 Model Documentation](https://ai.google.dev/gemini-api/docs/models/gemini-v3)
|
|
111
|
+
- [Text Generation Guide](https://ai.google.dev/gemini-api/docs/text-generation)
|
|
112
|
+
|
|
113
|
+
#### When to Add Provider-Specific Logic:
|
|
114
|
+
|
|
115
|
+
Consider adding provider-specific implementations when:
|
|
116
|
+
1. The provider's API authentication differs from standard Bearer tokens
|
|
117
|
+
2. The model performs significantly better with specific prompt structures
|
|
118
|
+
3. The provider has unique configuration requirements (like temperature constraints)
|
|
119
|
+
4. Response formats need special parsing logic
|
|
120
|
+
|
|
121
|
+
This pattern ensures each provider can be optimized for maximum performance while maintaining a clean, maintainable codebase.
|