ralph-hero-mcp-server 2.5.129 → 2.5.139

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,9 +6,33 @@
6
6
  * rateLimit fragment for continuous tracking.
7
7
  */
8
8
  import { graphql } from "@octokit/graphql";
9
+ import { trace, SpanStatusCode } from "@opentelemetry/api";
9
10
  import { RateLimiter } from "./lib/rate-limiter.js";
10
11
  import { SessionCache } from "./lib/cache.js";
11
12
  import { extractOperationName, sanitize } from "./lib/debug-logger.js";
13
+ /**
14
+ * Classify a GraphQL error into one of: "rate_limit" | "network" | "graphql".
15
+ *
16
+ * - `rate_limit` — HTTP 403 with a `retry-after` header (GitHub's secondary
17
+ * rate limit signal). Plain 403s without retry-after fall through to
18
+ * `graphql` since they're more commonly permission errors.
19
+ * - `network` — no `status` field on the error (fetch-level failure, DNS,
20
+ * socket reset, etc.)
21
+ * - `graphql` — everything else (GraphQL validation errors, 4xx, 5xx).
22
+ */
23
+ function classifyGraphQLError(error) {
24
+ if (!error || typeof error !== "object") {
25
+ return "graphql";
26
+ }
27
+ const e = error;
28
+ if (typeof e.status !== "number") {
29
+ return "network";
30
+ }
31
+ if (e.status === 403 && e.headers?.["retry-after"]) {
32
+ return "rate_limit";
33
+ }
34
+ return "graphql";
35
+ }
12
36
  /**
13
37
  * The rateLimit fragment to include in every query for proactive tracking.
14
38
  */
@@ -44,6 +68,11 @@ export function createGitHubClient(clientConfig, debugLogger) {
44
68
  const cache = new SessionCache();
45
69
  /**
46
70
  * Execute a raw GraphQL request and handle rate limit tracking.
71
+ *
72
+ * Wraps the request in a `ralph_hero.graphql` OpenTelemetry span when a
73
+ * tracer is available. When `RALPH_DEBUG` is unset and the SDK has not been
74
+ * initialized, `@opentelemetry/api` returns a no-op tracer/span — calls are
75
+ * essentially free.
47
76
  */
48
77
  async function executeGraphQL(queryString, variables, graphqlFn = graphqlWithAuth) {
49
78
  await rateLimiter.checkBeforeRequest();
@@ -62,53 +91,88 @@ export function createGitHubClient(clientConfig, debugLogger) {
62
91
  fullQuery.slice(insertPos);
63
92
  }
64
93
  }
65
- const t0 = Date.now();
66
- try {
67
- const response = await graphqlFn(fullQuery, variables || {});
68
- // Update rate limit tracker from response
69
- if (response && typeof response === "object" && "rateLimit" in response) {
70
- const rl = response.rateLimit;
71
- if (rl) {
72
- rateLimiter.update(rl);
94
+ const tracer = trace.getTracer("ralph-hero");
95
+ const operation = extractOperationName(fullQuery);
96
+ return tracer.startActiveSpan("ralph_hero.graphql", async (span) => {
97
+ if (operation) {
98
+ span.setAttribute("ralph_hero.operation", operation);
99
+ }
100
+ const t0 = Date.now();
101
+ try {
102
+ const response = await graphqlFn(fullQuery, variables || {});
103
+ // Update rate limit tracker from response
104
+ if (response && typeof response === "object" && "rateLimit" in response) {
105
+ const rl = response.rateLimit;
106
+ if (rl) {
107
+ rateLimiter.update(rl);
108
+ if (typeof rl.remaining === "number") {
109
+ span.setAttribute("ralph_hero.rate_limit.remaining", rl.remaining);
110
+ }
111
+ if (typeof rl.cost === "number") {
112
+ span.setAttribute("ralph_hero.rate_limit.cost", rl.cost);
113
+ }
114
+ }
73
115
  }
116
+ debugLogger?.logGraphQL({
117
+ operation,
118
+ variables: sanitize(variables),
119
+ durationMs: Date.now() - t0,
120
+ status: 200,
121
+ rateLimitRemaining: response
122
+ .rateLimit?.remaining,
123
+ rateLimitCost: response.rateLimit
124
+ ?.cost,
125
+ });
126
+ return response;
74
127
  }
75
- debugLogger?.logGraphQL({
76
- operation: extractOperationName(fullQuery),
77
- variables: sanitize(variables),
78
- durationMs: Date.now() - t0,
79
- status: 200,
80
- rateLimitRemaining: response.rateLimit?.remaining,
81
- rateLimitCost: response.rateLimit?.cost,
82
- });
83
- return response;
84
- }
85
- catch (error) {
86
- debugLogger?.logGraphQL({
87
- operation: extractOperationName(fullQuery),
88
- variables: sanitize(variables),
89
- durationMs: Date.now() - t0,
90
- status: error && typeof error === "object" && "status" in error
91
- ? error.status
92
- : 500,
93
- error: error instanceof Error ? error.message : String(error),
94
- });
95
- // Handle rate limit errors (403)
96
- if (error &&
97
- typeof error === "object" &&
98
- "status" in error &&
99
- error.status === 403) {
100
- const retryAfter = error && typeof error === "object" && "headers" in error
128
+ catch (error) {
129
+ // Detect rate-limit retry-able case FIRST. On the retry path we
130
+ // intentionally do NOT mark this span ERROR (or log a 500-shaped
131
+ // entry) the retry may succeed and we don't want Langfuse to
132
+ // show a permanently-failed parent for a request that eventually
133
+ // returned 200. Only the non-retry path mutates span status.
134
+ const is403 = error &&
135
+ typeof error === "object" &&
136
+ "status" in error &&
137
+ error.status === 403;
138
+ const retryAfter = is403 && error && typeof error === "object" && "headers" in error
101
139
  ? error.headers?.["retry-after"]
102
140
  : undefined;
103
141
  if (retryAfter) {
104
142
  const waitMs = parseInt(retryAfter, 10) * 1000;
105
143
  console.error(`[github-client] Rate limited. Waiting ${retryAfter}s before retry.`);
106
144
  await new Promise((resolve) => setTimeout(resolve, waitMs));
107
- return executeGraphQL(queryString, variables, graphqlFn);
145
+ // `await` is critical: in an async fn, `finally { span.end() }`
146
+ // runs as soon as the return expression evaluates. Without
147
+ // `await`, the inner Promise would still be pending while
148
+ // `span.end()` fires, exporting a half-finished outer span.
149
+ return await executeGraphQL(queryString, variables, graphqlFn);
108
150
  }
151
+ // Non-retry error path: mark span ERROR, log, rethrow.
152
+ const errorType = classifyGraphQLError(error);
153
+ span.setAttribute("ralph_hero.error_type", errorType);
154
+ span.setStatus({
155
+ code: SpanStatusCode.ERROR,
156
+ message: error instanceof Error ? error.message : String(error),
157
+ });
158
+ if (error instanceof Error) {
159
+ span.recordException(error);
160
+ }
161
+ debugLogger?.logGraphQL({
162
+ operation,
163
+ variables: sanitize(variables),
164
+ durationMs: Date.now() - t0,
165
+ status: error && typeof error === "object" && "status" in error
166
+ ? error.status
167
+ : 500,
168
+ error: error instanceof Error ? error.message : String(error),
169
+ });
170
+ throw error;
109
171
  }
110
- throw error;
111
- }
172
+ finally {
173
+ span.end();
174
+ }
175
+ });
112
176
  }
113
177
  return {
114
178
  config: clientConfig,
package/dist/index.js CHANGED
@@ -14,6 +14,7 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"
14
14
  import { createGitHubClient } from "./github-client.js";
15
15
  import { FieldOptionCache } from "./lib/cache.js";
16
16
  import { createDebugLogger, wrapServerToolWithLogging } from "./lib/debug-logger.js";
17
+ import { initTelemetry } from "./lib/telemetry.js";
17
18
  import { toolSuccess, resolveProjectOwner } from "./types.js";
18
19
  import { resolveRepoFromProject } from "./lib/helpers.js";
19
20
  import { detectOrphanRepoIssues } from "./lib/health.js";
@@ -360,6 +361,21 @@ function registerCoreTools(server, client) {
360
361
  */
361
362
  async function main() {
362
363
  console.error("[ralph-hero] Starting MCP server...");
364
+ // OTel SDK init MUST happen before initGitHubClient so the first GraphQL
365
+ // call from the client (repo inference) is captured as a span. initTelemetry
366
+ // returns null when RALPH_DEBUG !== "true" — no SDK objects allocated and
367
+ // no exporter threads in that path.
368
+ const sdk = (await initTelemetry());
369
+ if (sdk) {
370
+ console.error("[ralph-hero] OTel telemetry enabled");
371
+ // Best-effort flush on graceful shutdown. Errors swallowed because by the
372
+ // time SIGTERM fires we're already on the way out — partial trace loss is
373
+ // acceptable. SIGINT is not wired because Claude Code's stdio transport
374
+ // already cleans up on EOF.
375
+ process.on("SIGTERM", () => {
376
+ void sdk.shutdown().catch(() => undefined);
377
+ });
378
+ }
363
379
  const debugLogger = createDebugLogger();
364
380
  if (debugLogger) {
365
381
  console.error("[ralph-hero] Debug logging enabled (RALPH_DEBUG=true)");
@@ -0,0 +1,156 @@
1
+ /**
2
+ * OpenTelemetry initialization for the ralph-hero MCP server.
3
+ *
4
+ * Lazy-initialized when `RALPH_DEBUG=true`. When the env var is unset or any
5
+ * value other than the literal string `"true"`, `initTelemetry()` returns
6
+ * `null` and no OpenTelemetry SDK objects are constructed — zero overhead.
7
+ *
8
+ * The OTLP HTTP exporter reads its endpoint from `OTEL_EXPORTER_OTLP_ENDPOINT`
9
+ * (standard OTel convention). Auto-instrumentation is explicitly OFF — only
10
+ * the explicit `ralph_hero.graphql` spans emitted from `github-client.ts`
11
+ * appear in the resulting trace.
12
+ *
13
+ * A custom `SpanProcessor` redacts token-shaped attribute values at span
14
+ * start so secrets never reach the exporter. See `redactTokenAttributes()`.
15
+ */
16
+ import { readFileSync } from "node:fs";
17
+ import { fileURLToPath } from "node:url";
18
+ import { dirname, resolve } from "node:path";
19
+ /**
20
+ * Attribute value matching `^gh[ps]_` (GitHub PAT/server-to-server token shape)
21
+ * and key matching `_TOKEN$` (case-insensitive) or `^authorization$` are
22
+ * replaced with this sentinel before the span is exported.
23
+ */
24
+ const REDACTED = "[REDACTED]";
25
+ const TOKEN_VALUE_RE = /^gh[ps]_/;
26
+ const TOKEN_KEY_RE = /(_TOKEN$|^authorization$)/i;
27
+ /**
28
+ * Pure function — exported for unit tests. Returns a shallow copy of `attrs`
29
+ * with any token-shaped value or key replaced by `[REDACTED]`.
30
+ *
31
+ * Keys are matched case-insensitively against `_TOKEN$` and `^authorization$`.
32
+ * Values are matched (when they are strings) against `^gh[ps]_`.
33
+ *
34
+ * Non-matching attributes (including non-string values like numbers and
35
+ * booleans) pass through unchanged.
36
+ */
37
+ export function redactTokenAttributes(attrs) {
38
+ if (!attrs)
39
+ return {};
40
+ const out = {};
41
+ for (const [key, value] of Object.entries(attrs)) {
42
+ if (TOKEN_KEY_RE.test(key)) {
43
+ out[key] = REDACTED;
44
+ continue;
45
+ }
46
+ if (typeof value === "string" && TOKEN_VALUE_RE.test(value)) {
47
+ out[key] = REDACTED;
48
+ continue;
49
+ }
50
+ out[key] = value;
51
+ }
52
+ return out;
53
+ }
54
+ /**
55
+ * SpanProcessor that scrubs token-shaped attributes from each span.
56
+ *
57
+ * The scrub runs on `onEnd` rather than `onStart` because we need to see the
58
+ * full set of attributes that any caller has set on the span. Crucially, once
59
+ * a span has ended, `span.setAttribute()` is a documented no-op — the only
60
+ * way to mutate the final exported attribute set is to write directly to the
61
+ * `attributes` object. TypeScript types it as readonly but the runtime
62
+ * representation is a plain mutable object owned by the span instance.
63
+ *
64
+ * Order matters: this processor must be registered BEFORE the exporting
65
+ * processor (`BatchSpanProcessor` or `SimpleSpanProcessor`) so the mutation
66
+ * is visible by the time the export call reads `attributes`.
67
+ */
68
+ export class TokenScrubbingSpanProcessor {
69
+ onStart(_span, _parentContext) {
70
+ // No-op — attributes set on an active span go through `setAttribute`,
71
+ // not the readable snapshot. We catch them all in `onEnd`.
72
+ }
73
+ onEnd(span) {
74
+ const attrs = span.attributes;
75
+ if (!attrs)
76
+ return;
77
+ // attrs is `Attributes` (readonly per the type) but mutable at runtime.
78
+ // Mutate in-place so downstream processors see the redacted values.
79
+ const mut = attrs;
80
+ for (const [key, value] of Object.entries(mut)) {
81
+ if (TOKEN_KEY_RE.test(key)) {
82
+ mut[key] = REDACTED;
83
+ }
84
+ else if (typeof value === "string" && TOKEN_VALUE_RE.test(value)) {
85
+ mut[key] = REDACTED;
86
+ }
87
+ }
88
+ }
89
+ async shutdown() {
90
+ // No-op — this processor holds no resources.
91
+ }
92
+ async forceFlush() {
93
+ // No-op — this processor performs no async work.
94
+ }
95
+ }
96
+ /**
97
+ * Read the MCP server semver from package.json next to this module.
98
+ *
99
+ * Falls back to `"unknown"` if the file is missing or unreadable so the SDK
100
+ * still starts up — the version is informational, not load-bearing.
101
+ */
102
+ function resolveServiceVersion() {
103
+ try {
104
+ // In ESM, __dirname isn't defined; compute it from import.meta.url.
105
+ const here = dirname(fileURLToPath(import.meta.url));
106
+ // Walk up from src/lib (or dist/lib at runtime) to the package root.
107
+ const pkgPath = resolve(here, "..", "..", "package.json");
108
+ const raw = readFileSync(pkgPath, "utf8");
109
+ const pkg = JSON.parse(raw);
110
+ return pkg.version ?? "unknown";
111
+ }
112
+ catch {
113
+ return "unknown";
114
+ }
115
+ }
116
+ /**
117
+ * Initialize the OpenTelemetry NodeSDK when `RALPH_DEBUG=true`.
118
+ *
119
+ * - Returns `null` (zero overhead) when `process.env.RALPH_DEBUG !== "true"`.
120
+ * - When enabled: configures an OTLP/HTTP trace exporter, no auto-instrumentation,
121
+ * a `TokenScrubbingSpanProcessor` ahead of the default batch processor, and
122
+ * resource attrs `service.name = "ralph-hero"`, `service.version = <semver>`.
123
+ *
124
+ * Caller is responsible for calling `sdk.shutdown()` (e.g., on SIGTERM) to
125
+ * flush in-flight spans.
126
+ */
127
+ export async function initTelemetry() {
128
+ if (process.env.RALPH_DEBUG !== "true") {
129
+ return null;
130
+ }
131
+ // Dynamic imports keep zero-overhead in the disabled path — when RALPH_DEBUG
132
+ // is unset, none of these modules are loaded into memory.
133
+ const { NodeSDK } = await import("@opentelemetry/sdk-node");
134
+ const { OTLPTraceExporter } = await import("@opentelemetry/exporter-trace-otlp-http");
135
+ const { Resource } = await import("@opentelemetry/resources");
136
+ const { SEMRESATTRS_SERVICE_NAME, SEMRESATTRS_SERVICE_VERSION, } = await import("@opentelemetry/semantic-conventions");
137
+ const { BatchSpanProcessor } = await import("@opentelemetry/sdk-trace-base");
138
+ const endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT ??
139
+ "http://localhost:3100/api/public/otel/v1/traces";
140
+ const exporter = new OTLPTraceExporter({ url: endpoint });
141
+ const sdk = new NodeSDK({
142
+ resource: new Resource({
143
+ [SEMRESATTRS_SERVICE_NAME]: "ralph-hero",
144
+ [SEMRESATTRS_SERVICE_VERSION]: resolveServiceVersion(),
145
+ }),
146
+ spanProcessors: [
147
+ new TokenScrubbingSpanProcessor(),
148
+ new BatchSpanProcessor(exporter),
149
+ ],
150
+ // No auto-instrumentation — only explicit ralph_hero.* spans are emitted.
151
+ instrumentations: [],
152
+ });
153
+ sdk.start();
154
+ return sdk;
155
+ }
156
+ //# sourceMappingURL=telemetry.js.map
@@ -129,6 +129,6 @@ export const WORKFLOW_STATE_TO_STATUS = {
129
129
  "In Review": "In Progress",
130
130
  "Done": "Done",
131
131
  "Canceled": "Done",
132
- "Human Needed": "Done",
132
+ "Human Needed": "Todo",
133
133
  };
134
134
  //# sourceMappingURL=workflow-states.js.map
@@ -25,7 +25,7 @@ export function registerIssueTools(server, client, fieldCache) {
25
25
  // -------------------------------------------------------------------------
26
26
  // ralph_hero__list_issues
27
27
  // -------------------------------------------------------------------------
28
- server.tool("ralph_hero__list_issues", "List issues from a GitHub repository with optional filters. Fetches all project items (full project scan, no silent 500-cap) and applies filters client-side, so items at any board position are visible regardless of default ordering. Returns: number, title, state, workflowState, estimate, priority, iteration, labels, assignees. Use workflowState filter to find issues in a specific phase. Use iteration filter with @current/@next or sprint title. Recovery: if no results, broaden filters or check that issues exist in the project.", {
28
+ server.tool("ralph_hero__list_issues", "List issues from a GitHub repository with optional filters. Fetches all project items (full project scan, no silent 500-cap) and applies filters client-side, so items at any board position are visible regardless of default ordering. By default returns issues in any state (both OPEN and CLOSED) so visibility matches the dashboard family (pipeline_dashboard, next_actions, project_hygiene); pass the `state` parameter (\"OPEN\" or \"CLOSED\") to narrow. Returns: number, title, state, workflowState, estimate, priority, iteration, labels, assignees. Use workflowState filter to find issues in a specific phase. Use iteration filter with @current/@next or sprint title. Recovery: if no results, broaden filters or check that issues exist in the project.", {
29
29
  owner: z
30
30
  .string()
31
31
  .optional()
@@ -68,8 +68,8 @@ export function registerIssueTools(server, client, fieldCache) {
68
68
  state: z
69
69
  .enum(["OPEN", "CLOSED"])
70
70
  .optional()
71
- .default("OPEN")
72
- .describe("Issue state filter (default: OPEN)"),
71
+ .describe("Issue state filter. When omitted, returns issues in any state " +
72
+ "(matches dashboard-family behavior). Pass 'OPEN' or 'CLOSED' to narrow."),
73
73
  reason: z
74
74
  .enum(["completed", "not_planned", "reopened"])
75
75
  .optional()
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ralph-hero-mcp-server",
3
- "version": "2.5.129",
3
+ "version": "2.5.139",
4
4
  "description": "MCP server for GitHub Projects V2 - Ralph workflow automation",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -20,6 +20,12 @@
20
20
  "@modelcontextprotocol/sdk": "^1.26.0",
21
21
  "@octokit/graphql": "^9.0.3",
22
22
  "@octokit/plugin-paginate-graphql": "^6.0.0",
23
+ "@opentelemetry/api": "^1.9.0",
24
+ "@opentelemetry/exporter-trace-otlp-http": "^0.57.0",
25
+ "@opentelemetry/resources": "^1.30.0",
26
+ "@opentelemetry/sdk-node": "^0.57.0",
27
+ "@opentelemetry/sdk-trace-base": "^1.30.0",
28
+ "@opentelemetry/semantic-conventions": "^1.28.0",
23
29
  "yaml": "^2.7.0",
24
30
  "zod": "^3.25.0"
25
31
  },