@ateam-ai/mcp 0.3.34 → 0.3.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/tools.js +85 -7
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ateam-ai/mcp",
3
- "version": "0.3.34",
3
+ "version": "0.3.35",
4
4
  "mcpName": "io.github.ariekogan/ateam-mcp",
5
5
  "description": "A-Team MCP Server — build, validate, and deploy multi-agent solutions from any AI environment",
6
6
  "type": "module",
package/src/tools.js CHANGED
@@ -16,6 +16,44 @@ import {
16
16
  } from "./api.js";
17
17
  import { renderAgentDocHeader, mergeAgentDoc, AGENT_DOC_SENTINEL } from "./agentDoc.js";
18
18
 
19
+ // ─── Async deploy helper ────────────────────────────────────────────
20
+ //
21
+ // All long-running deploy endpoints (build_and_run, redeploy, github_pull)
22
+ // support async mode: POST returns {job_id, poll_url} in <1s, the work runs
23
+ // in the background, and the client polls /deploy/jobs/:jobId until status
24
+ // is "done" or "failed". This bypasses the upstream Cloudflare 100s timeout
25
+ // that used to kill bulk redeploys with 524.
26
+ //
27
+ // pollDeployJob is the client side of that contract: it polls the job and
28
+ // returns the final job entry (which is the same shape as the original
29
+ // sync response would have been, plus job metadata). MCP tool wrappers use
30
+ // this so the agent gets a normal response from a long-running tool call —
31
+ // no async API leaks out to agent prompts.
32
+ async function pollDeployJob(jobId, sid, { label = 'deploy', maxMs = 15 * 60_000, intervalMs = 2000 } = {}) {
33
+ const start = Date.now();
34
+ let lastStatus = null;
35
+ while (Date.now() - start < maxMs) {
36
+ await new Promise(r => setTimeout(r, intervalMs));
37
+ try {
38
+ const job = await get(`/deploy/jobs/${jobId}`, sid);
39
+ lastStatus = job?.status;
40
+ if (job?.status === 'done' || job?.status === 'failed') {
41
+ return job; // job entry has the full result merged in
42
+ }
43
+ } catch (err) {
44
+ // Transient — keep polling. Log at debug level if requested.
45
+ if (process.env.MCP_DEBUG_POLLS) console.warn(`[pollDeployJob:${label}] poll error (will retry): ${err.message}`);
46
+ }
47
+ }
48
+ return {
49
+ ok: false,
50
+ error: `${label} polling timed out after ${Math.round(maxMs / 60_000)}min`,
51
+ last_status: lastStatus,
52
+ job_id: jobId,
53
+ hint: 'The job may still be running on the server. Call get(`/deploy/jobs/<job_id>`) directly to check.',
54
+ };
55
+ }
56
+
19
57
  // ─── Tool definitions ───────────────────────────────────────────────
20
58
 
21
59
  export const tools = [
@@ -2227,8 +2265,20 @@ const handlers = {
2227
2265
  ateam_github_push: async ({ solution_id, message }, sid) =>
2228
2266
  post(`/deploy/solutions/${solution_id}/github/push`, { push_to_github: true, message }, sid, { timeoutMs: 60_000 }),
2229
2267
 
2230
- ateam_github_pull: async ({ solution_id }, sid) =>
2231
- post(`/deploy/solutions/${solution_id}/github/pull`, {}, sid, { timeoutMs: 300_000, retries: 2 }),
2268
+ ateam_github_pull: async ({ solution_id }, sid) => {
2269
+ // Async-first: github_pull is the #1 Cloudflare-524 culprit on large
2270
+ // solutions. Kick the job off, then poll. Falls back to sync if the
2271
+ // backend doesn't support async (older deployments).
2272
+ let kicked;
2273
+ try {
2274
+ kicked = await post(`/deploy/solutions/${solution_id}/github/pull`, { async: true }, sid, { timeoutMs: 30_000 });
2275
+ } catch (err) {
2276
+ // Sync fallback (older backend without async support)
2277
+ return await post(`/deploy/solutions/${solution_id}/github/pull`, {}, sid, { timeoutMs: 300_000, retries: 2 });
2278
+ }
2279
+ if (!kicked?.async || !kicked.job_id) return kicked; // backend didn't honor async — return as-is
2280
+ return await pollDeployJob(kicked.job_id, sid, { label: 'github-pull', maxMs: 15 * 60_000, intervalMs: 2000 });
2281
+ },
2232
2282
 
2233
2283
  ateam_github_status: async ({ solution_id }, sid) =>
2234
2284
  get(`/deploy/solutions/${solution_id}/github/status`, sid),
@@ -2277,23 +2327,51 @@ const handlers = {
2277
2327
  const endpoint = skill_id
2278
2328
  ? `/deploy/solutions/${solution_id}/skills/${skill_id}/redeploy`
2279
2329
  : `/deploy/solutions/${solution_id}/redeploy`;
2330
+
2331
+ // Async-first: bulk redeploys used to 524 on >5-skill solutions because
2332
+ // the upstream Cloudflare timeout is ~100s. Kick the job and poll. If
2333
+ // the backend doesn't support async (older deployment), fall back to
2334
+ // the legacy sync path with longer retry. If both fail, surface a
2335
+ // useful error/hint to the agent.
2280
2336
  let result;
2337
+ let lastErr = null;
2281
2338
  try {
2282
- result = await post(endpoint, {}, sid, { timeoutMs: 300_000, retries: 2 });
2339
+ const kicked = await post(endpoint, { async: true }, sid, { timeoutMs: 30_000 });
2340
+ if (kicked?.async && kicked.job_id) {
2341
+ result = await pollDeployJob(kicked.job_id, sid, {
2342
+ label: skill_id ? `redeploy-skill ${skill_id}` : 'redeploy-bulk',
2343
+ maxMs: 15 * 60_000,
2344
+ intervalMs: 2000,
2345
+ });
2346
+ } else {
2347
+ result = kicked; // backend didn't honor async — already-finished sync result
2348
+ }
2283
2349
  } catch (err) {
2284
- const notFound = /not found|404|ENOENT/i.test(err.message);
2285
- const isTimeout = /524|502|503|timeout|ETIMEDOUT/i.test(err.message);
2350
+ lastErr = err;
2351
+ // Sync fallback for backends without async support
2352
+ try {
2353
+ result = await post(endpoint, {}, sid, { timeoutMs: 300_000, retries: 2 });
2354
+ lastErr = null;
2355
+ } catch (syncErr) {
2356
+ lastErr = syncErr;
2357
+ }
2358
+ }
2359
+
2360
+ if (!result && lastErr) {
2361
+ const notFound = /not found|404|ENOENT/i.test(lastErr.message);
2362
+ const isTimeout = /524|502|503|timeout|ETIMEDOUT/i.test(lastErr.message);
2286
2363
  return {
2287
2364
  ok: false,
2288
- error: err.message,
2365
+ error: lastErr.message,
2289
2366
  ...(notFound && {
2290
2367
  hint: "Skill not found in Builder storage. Edit the skill on GitHub with ateam_github_patch(solution_id, path: 'skills/<skill-id>/skill.json', search: '...', replace: '...'), then use ateam_build_and_run(solution_id, github: true) or ask the platform operator to deploy the single skill.",
2291
2368
  }),
2292
2369
  ...(isTimeout && {
2293
- hint: "Redeploy timed out. For large solutions, redeploy one skill at a time: ateam_redeploy(solution_id, skill_id: '<specific-skill>').",
2370
+ hint: "Redeploy timed out even after async polling (15min). Use ateam_redeploy(solution_id, skill_id: '<specific-skill>') to redeploy one skill at a time.",
2294
2371
  }),
2295
2372
  };
2296
2373
  }
2374
+ if (!result) result = { ok: false, error: 'Redeploy returned no result' };
2297
2375
  // Pull through the underlying error/message instead of fabricating "0/0/0
2298
2376
  // success-shaped" output. Old wrapper hid backend errors (e.g. validator
2299
2377
  // failures from sentinel files in user repos) and reported `total: 0` with