@ateam-ai/mcp 0.3.33 → 0.3.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/tools.js +92 -8
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ateam-ai/mcp",
3
- "version": "0.3.33",
3
+ "version": "0.3.35",
4
4
  "mcpName": "io.github.ariekogan/ateam-mcp",
5
5
  "description": "A-Team MCP Server — build, validate, and deploy multi-agent solutions from any AI environment",
6
6
  "type": "module",
package/src/tools.js CHANGED
@@ -16,6 +16,44 @@ import {
16
16
  } from "./api.js";
17
17
  import { renderAgentDocHeader, mergeAgentDoc, AGENT_DOC_SENTINEL } from "./agentDoc.js";
18
18
 
19
+ // ─── Async deploy helper ────────────────────────────────────────────
20
+ //
21
+ // All long-running deploy endpoints (build_and_run, redeploy, github_pull)
22
+ // support async mode: POST returns {job_id, poll_url} in <1s, the work runs
23
+ // in the background, and the client polls /deploy/jobs/:jobId until status
24
+ // is "done" or "failed". This bypasses the upstream Cloudflare 100s timeout
25
+ // that used to kill bulk redeploys with 524.
26
+ //
27
+ // pollDeployJob is the client side of that contract: it polls the job and
28
+ // returns the final job entry (which is the same shape as the original
29
+ // sync response would have been, plus job metadata). MCP tool wrappers use
30
+ // this so the agent gets a normal response from a long-running tool call —
31
+ // no async API leaks out to agent prompts.
32
+ async function pollDeployJob(jobId, sid, { label = 'deploy', maxMs = 15 * 60_000, intervalMs = 2000 } = {}) {
33
+ const start = Date.now();
34
+ let lastStatus = null;
35
+ while (Date.now() - start < maxMs) {
36
+ await new Promise(r => setTimeout(r, intervalMs));
37
+ try {
38
+ const job = await get(`/deploy/jobs/${jobId}`, sid);
39
+ lastStatus = job?.status;
40
+ if (job?.status === 'done' || job?.status === 'failed') {
41
+ return job; // job entry has the full result merged in
42
+ }
43
+ } catch (err) {
44
+ // Transient — keep polling. Log at debug level if requested.
45
+ if (process.env.MCP_DEBUG_POLLS) console.warn(`[pollDeployJob:${label}] poll error (will retry): ${err.message}`);
46
+ }
47
+ }
48
+ return {
49
+ ok: false,
50
+ error: `${label} polling timed out after ${Math.round(maxMs / 60_000)}min`,
51
+ last_status: lastStatus,
52
+ job_id: jobId,
53
+ hint: 'The job may still be running on the server. Call get(`/deploy/jobs/<job_id>`) directly to check.',
54
+ };
55
+ }
56
+
19
57
  // ─── Tool definitions ───────────────────────────────────────────────
20
58
 
21
59
  export const tools = [
@@ -1643,7 +1681,13 @@ const handlers = {
1643
1681
  phases.push({ phase: "deploy", status: job.status });
1644
1682
  break;
1645
1683
  }
1646
- } catch { /* keep polling */ }
1684
+ } catch (err) {
1685
+ // #4 Silent-catch audit: poll errors are usually transient
1686
+ // (network blip, restart). Logging at debug level so they
1687
+ // don't drown the console but ARE visible if you bump the
1688
+ // log level after a stuck deploy.
1689
+ if (process.env.MCP_DEBUG_POLLS) console.warn(`[ateam_build_and_run] poll ${jobId} error (will retry): ${err.message}`);
1690
+ }
1647
1691
  }
1648
1692
  if (!deploy) {
1649
1693
  return { ok: false, phase: "deployment", phases, error: "Async deploy timed out after 10 minutes", validation_warnings: validation.warnings || [],
@@ -2221,8 +2265,20 @@ const handlers = {
2221
2265
  ateam_github_push: async ({ solution_id, message }, sid) =>
2222
2266
  post(`/deploy/solutions/${solution_id}/github/push`, { push_to_github: true, message }, sid, { timeoutMs: 60_000 }),
2223
2267
 
2224
- ateam_github_pull: async ({ solution_id }, sid) =>
2225
- post(`/deploy/solutions/${solution_id}/github/pull`, {}, sid, { timeoutMs: 300_000, retries: 2 }),
2268
+ ateam_github_pull: async ({ solution_id }, sid) => {
2269
+ // Async-first: github_pull is the #1 Cloudflare-524 culprit on large
2270
+ // solutions. Kick the job off, then poll. Falls back to sync if the
2271
+ // backend doesn't support async (older deployments).
2272
+ let kicked;
2273
+ try {
2274
+ kicked = await post(`/deploy/solutions/${solution_id}/github/pull`, { async: true }, sid, { timeoutMs: 30_000 });
2275
+ } catch (err) {
2276
+ // Sync fallback (older backend without async support)
2277
+ return await post(`/deploy/solutions/${solution_id}/github/pull`, {}, sid, { timeoutMs: 300_000, retries: 2 });
2278
+ }
2279
+ if (!kicked?.async || !kicked.job_id) return kicked; // backend didn't honor async — return as-is
2280
+ return await pollDeployJob(kicked.job_id, sid, { label: 'github-pull', maxMs: 15 * 60_000, intervalMs: 2000 });
2281
+ },
2226
2282
 
2227
2283
  ateam_github_status: async ({ solution_id }, sid) =>
2228
2284
  get(`/deploy/solutions/${solution_id}/github/status`, sid),
@@ -2271,23 +2327,51 @@ const handlers = {
2271
2327
  const endpoint = skill_id
2272
2328
  ? `/deploy/solutions/${solution_id}/skills/${skill_id}/redeploy`
2273
2329
  : `/deploy/solutions/${solution_id}/redeploy`;
2330
+
2331
+ // Async-first: bulk redeploys used to 524 on >5-skill solutions because
2332
+ // the upstream Cloudflare timeout is ~100s. Kick the job and poll. If
2333
+ // the backend doesn't support async (older deployment), fall back to
2334
+ // the legacy sync path with longer retry. If both fail, surface a
2335
+ // useful error/hint to the agent.
2274
2336
  let result;
2337
+ let lastErr = null;
2275
2338
  try {
2276
- result = await post(endpoint, {}, sid, { timeoutMs: 300_000, retries: 2 });
2339
+ const kicked = await post(endpoint, { async: true }, sid, { timeoutMs: 30_000 });
2340
+ if (kicked?.async && kicked.job_id) {
2341
+ result = await pollDeployJob(kicked.job_id, sid, {
2342
+ label: skill_id ? `redeploy-skill ${skill_id}` : 'redeploy-bulk',
2343
+ maxMs: 15 * 60_000,
2344
+ intervalMs: 2000,
2345
+ });
2346
+ } else {
2347
+ result = kicked; // backend didn't honor async — already-finished sync result
2348
+ }
2277
2349
  } catch (err) {
2278
- const notFound = /not found|404|ENOENT/i.test(err.message);
2279
- const isTimeout = /524|502|503|timeout|ETIMEDOUT/i.test(err.message);
2350
+ lastErr = err;
2351
+ // Sync fallback for backends without async support
2352
+ try {
2353
+ result = await post(endpoint, {}, sid, { timeoutMs: 300_000, retries: 2 });
2354
+ lastErr = null;
2355
+ } catch (syncErr) {
2356
+ lastErr = syncErr;
2357
+ }
2358
+ }
2359
+
2360
+ if (!result && lastErr) {
2361
+ const notFound = /not found|404|ENOENT/i.test(lastErr.message);
2362
+ const isTimeout = /524|502|503|timeout|ETIMEDOUT/i.test(lastErr.message);
2280
2363
  return {
2281
2364
  ok: false,
2282
- error: err.message,
2365
+ error: lastErr.message,
2283
2366
  ...(notFound && {
2284
2367
  hint: "Skill not found in Builder storage. Edit the skill on GitHub with ateam_github_patch(solution_id, path: 'skills/<skill-id>/skill.json', search: '...', replace: '...'), then use ateam_build_and_run(solution_id, github: true) or ask the platform operator to deploy the single skill.",
2285
2368
  }),
2286
2369
  ...(isTimeout && {
2287
- hint: "Redeploy timed out. For large solutions, redeploy one skill at a time: ateam_redeploy(solution_id, skill_id: '<specific-skill>').",
2370
+ hint: "Redeploy timed out even after async polling (15min). Use ateam_redeploy(solution_id, skill_id: '<specific-skill>') to redeploy one skill at a time.",
2288
2371
  }),
2289
2372
  };
2290
2373
  }
2374
+ if (!result) result = { ok: false, error: 'Redeploy returned no result' };
2291
2375
  // Pull through the underlying error/message instead of fabricating "0/0/0
2292
2376
  // success-shaped" output. Old wrapper hid backend errors (e.g. validator
2293
2377
  // failures from sentinel files in user repos) and reported `total: 0` with