@thotischner/observability-mcp 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/dist/conformance/mcp-2025-11-25.test.js +41 -0
  2. package/dist/connectors/loki.js +24 -15
  3. package/dist/connectors/loki.test.js +15 -0
  4. package/dist/connectors/prometheus.d.ts +1 -0
  5. package/dist/connectors/prometheus.js +75 -3
  6. package/dist/connectors/prometheus.test.js +81 -0
  7. package/dist/context.d.ts +11 -2
  8. package/dist/context.js +10 -2
  9. package/dist/context.test.js +6 -0
  10. package/dist/enrich/ip-dataset.d.ts +25 -0
  11. package/dist/enrich/ip-dataset.js +113 -0
  12. package/dist/enrich/ip-dataset.test.d.ts +1 -0
  13. package/dist/enrich/ip-dataset.test.js +85 -0
  14. package/dist/index.js +94 -9
  15. package/dist/tools/enrich-ips.d.ts +30 -0
  16. package/dist/tools/enrich-ips.js +60 -0
  17. package/dist/tools/enrich-ips.test.d.ts +1 -0
  18. package/dist/tools/enrich-ips.test.js +38 -0
  19. package/dist/tools/query-logs-schema.test.d.ts +1 -0
  20. package/dist/tools/query-logs-schema.test.js +38 -0
  21. package/dist/tools/query-logs.d.ts +5 -2
  22. package/dist/tools/query-logs.js +31 -13
  23. package/dist/tools/query-metrics.d.ts +7 -3
  24. package/dist/tools/query-metrics.js +33 -12
  25. package/dist/tools/query-raw-gate.test.d.ts +1 -0
  26. package/dist/tools/query-raw-gate.test.js +52 -0
  27. package/dist/tools/registry-names.d.ts +1 -1
  28. package/dist/tools/registry-names.js +2 -0
  29. package/dist/tools/topology.js +14 -0
  30. package/dist/tools/topology.test.js +15 -0
  31. package/dist/tools/validation.d.ts +17 -0
  32. package/dist/tools/validation.js +27 -0
  33. package/dist/tools/validation.test.js +24 -1
  34. package/dist/types.d.ts +10 -0
  35. package/package.json +1 -1
@@ -113,6 +113,47 @@ test("MCP 2025-11-25: tools/list returns a Tool[] each with name + inputSchema",
113
113
  assert.ok(t.inputSchema && typeof t.inputSchema === "object", `tool ${t.name} missing inputSchema`);
114
114
  }
115
115
  });
116
+ test("MCP 2025-11-25: query_logs advertises labels + aggregate params (issue #415)", opts, async () => {
117
+ // Regression guard for the v3.1.0 ship gap: the labels/aggregate handler
118
+ // code existed but the inline MCP schema in createMcpServer never declared
119
+ // them, so a live tools/list omitted them and the SDK stripped them from
120
+ // calls — a silent no-op. Assert the live server advertises both.
121
+ const session = await newSession();
122
+ const { response } = await jsonRpc("tools/list", {}, { id: 2, session });
123
+ const r = response.result;
124
+ const queryLogs = r.tools?.find((t) => t.name === "query_logs");
125
+ assert.ok(queryLogs, "query_logs tool must be advertised");
126
+ const props = queryLogs.inputSchema?.properties ?? {};
127
+ assert.ok("labels" in props, "query_logs must advertise a `labels` param (issue #415 #1)");
128
+ assert.ok("aggregate" in props, "query_logs must advertise an `aggregate` param (issue #415 #2)");
129
+ });
130
+ test("MCP 2025-11-25: query_metrics advertises labels param (issue #415 #4)", opts, async () => {
131
+ const session = await newSession();
132
+ const { response } = await jsonRpc("tools/list", {}, { id: 2, session });
133
+ const r = response.result;
134
+ const queryMetrics = r.tools?.find((t) => t.name === "query_metrics");
135
+ assert.ok(queryMetrics, "query_metrics tool must be advertised");
136
+ const props = queryMetrics.inputSchema?.properties ?? {};
137
+ assert.ok("labels" in props, "query_metrics must advertise a `labels` param (issue #415 #4)");
138
+ });
139
+ test("MCP 2025-11-25: query_metrics + query_logs advertise raw_query (issue #415 #3)", opts, async () => {
140
+ const session = await newSession();
141
+ const { response } = await jsonRpc("tools/list", {}, { id: 2, session });
142
+ const r = response.result;
143
+ for (const name of ["query_metrics", "query_logs"]) {
144
+ const tool = r.tools?.find((t) => t.name === name);
145
+ assert.ok(tool, `${name} tool must be advertised`);
146
+ assert.ok("raw_query" in (tool.inputSchema?.properties ?? {}), `${name} must advertise a raw_query param`);
147
+ }
148
+ });
149
+ test("MCP 2025-11-25: enrich_ips tool is advertised (issue #415 Gap B)", opts, async () => {
150
+ const session = await newSession();
151
+ const { response } = await jsonRpc("tools/list", {}, { id: 2, session });
152
+ const r = response.result;
153
+ const tool = r.tools?.find((t) => t.name === "enrich_ips");
154
+ assert.ok(tool, "enrich_ips tool must be advertised");
155
+ assert.ok("ips" in (tool.inputSchema?.properties ?? {}), "enrich_ips must advertise an `ips` param");
156
+ });
116
157
  test("MCP 2025-11-25: tools/call dispatches and returns CallToolResult", opts, async () => {
117
158
  const session = await newSession();
118
159
  const { response } = await jsonRpc("tools/call", { name: "list_sources", arguments: {} }, { id: 3, session });
@@ -166,22 +166,31 @@ export class LokiConnector {
166
166
  async queryLogs(params) {
167
167
  const { start, end } = this.parseTimeRange(params.duration);
168
168
  const limit = Math.min(Math.max(params.limit || 100, 1), 1000);
169
- // Resolve label + actual selector value. For the 'container' label the
170
- // value stored in Loki may be '/my-app-1' while the caller passes the
171
- // sanitized 'my-app-1' return the prefixed form so the LogQL selector
172
- // matches the real stream.
173
- const { label: matchedLabel, value: rawValue } = await this.resolveServiceSelector(params.service);
174
- const service = this.escapeLogQLValue(rawValue);
175
- let logql = `{${matchedLabel}="${service}"} | json`;
176
- if (params.level) {
177
- logql += ` | level="${this.escapeLogQLValue(params.level)}"`;
169
+ let logql;
170
+ if (params.rawQuery) {
171
+ // Raw LogQL passthrough (capability-gated at the tool layer): the caller
172
+ // supplied a verbatim log-selector query. Skip the curated stream
173
+ // selector / | json / label-filter construction and run it as-is.
174
+ logql = params.rawQuery;
178
175
  }
179
- // Structured equality filters (method/status/url/environment/…) — run
180
- // after `| json` so backend-extracted fields are selectable.
181
- logql += logqlLabelFilters(params.labels);
182
- if (params.query) {
183
- const query = this.escapeLogQLRegex(params.query);
184
- logql += ` |~ \`${query}\``;
176
+ else {
177
+ // Resolve label + actual selector value. For the 'container' label the
178
+ // value stored in Loki may be '/my-app-1' while the caller passes the
179
+ // sanitized 'my-app-1' — return the prefixed form so the LogQL selector
180
+ // matches the real stream.
181
+ const { label: matchedLabel, value: rawValue } = await this.resolveServiceSelector(params.service);
182
+ const service = this.escapeLogQLValue(rawValue);
183
+ logql = `{${matchedLabel}="${service}"} | json`;
184
+ if (params.level) {
185
+ logql += ` | level="${this.escapeLogQLValue(params.level)}"`;
186
+ }
187
+ // Structured equality filters (method/status/url/environment/…) — run
188
+ // after `| json` so backend-extracted fields are selectable.
189
+ logql += logqlLabelFilters(params.labels);
190
+ if (params.query) {
191
+ const query = this.escapeLogQLRegex(params.query);
192
+ logql += ` |~ \`${query}\``;
193
+ }
185
194
  }
186
195
  const url = `/loki/api/v1/query_range?query=${encodeURIComponent(logql)}` +
187
196
  `&start=${start}000000000&end=${end}000000000&limit=${limit}`;
@@ -81,6 +81,21 @@ describe("Q-LOG1: queryLogs LogQL assembly", () => {
81
81
  const q = await captureQuery({});
82
82
  assert.equal(q, '{service_name="payment"} | json');
83
83
  });
84
+ it("R4: rawQuery is sent verbatim, bypassing the curated selector", async () => {
85
+ const q = await captureQuery({
86
+ rawQuery: '{app="x", env="prod"} | json | status>=`500`',
87
+ });
88
+ assert.equal(q, '{app="x", env="prod"} | json | status>=`500`');
89
+ });
90
+ it("R4: rawQuery ignores service/labels/level/query", async () => {
91
+ const q = await captureQuery({
92
+ rawQuery: '{job="raw"}',
93
+ labels: { method: "GET" },
94
+ level: "error",
95
+ query: "ignored",
96
+ });
97
+ assert.equal(q, '{job="raw"}');
98
+ });
84
99
  });
85
100
  describe("Q-LOG2: parseDurationSeconds / defaultBucketSeconds", () => {
86
101
  it("parses m/h/d", () => {
@@ -22,6 +22,7 @@ export declare class PrometheusConnector implements ObservabilityConnector {
22
22
  private listServicesFromJobLabel;
23
23
  listAvailableMetrics(_service: string): Promise<MetricInfo[]>;
24
24
  queryMetrics(params: MetricQuery): Promise<MetricResult>;
25
+ private queryRaw;
25
26
  private buildQuery;
26
27
  private groupKey;
27
28
  private getDistinctLabelValues;
@@ -196,7 +196,13 @@ export class PrometheusConnector {
196
196
  return metrics;
197
197
  }
198
198
  async queryMetrics(params) {
199
- const { promql, label, candidate } = await this.buildQuery(params.service, params.metric, params.groupBy);
199
+ // Raw passthrough: the caller supplied verbatim PromQL (capability-gated
200
+ // at the tool layer). Skip the curated catalog/selector machinery and the
201
+ // breakdown-hint probe; run the query as-is over query_range.
202
+ if (params.rawQuery) {
203
+ return this.queryRaw(params.rawQuery, params.duration, params.step);
204
+ }
205
+ const { promql, label, candidate } = await this.buildQuery(params.service, params.metric, params.groupBy, params.labels);
200
206
  const { start, end, step } = this.parseTimeRange(params.duration, params.step);
201
207
  const data = await this.apiGet(`/api/v1/query_range?query=${encodeURIComponent(promql)}&start=${start}&end=${end}&step=${step}`);
202
208
  const seriesList = data?.data?.result || [];
@@ -253,8 +259,48 @@ export class PrometheusConnector {
253
259
  }
254
260
  return result;
255
261
  }
262
+ // Raw PromQL passthrough — used by queryMetrics when params.rawQuery is set.
263
+ // Returns the same MetricResult shape: one group per returned series, the
264
+ // top-level values/summary mirroring the first series. service/metric are
265
+ // reported as "(raw)" since the curated catalog doesn't apply.
266
+ async queryRaw(rawQuery, duration, step) {
267
+ const { start, end, step: resolvedStep } = this.parseTimeRange(duration, step);
268
+ const data = await this.apiGet(`/api/v1/query_range?query=${encodeURIComponent(rawQuery)}&start=${start}&end=${end}&step=${resolvedStep}`);
269
+ const seriesList = data?.data?.result || [];
270
+ const groups = [];
271
+ for (const series of seriesList) {
272
+ const seriesValues = [];
273
+ const rawValues = [];
274
+ for (const [ts, val] of series.values || []) {
275
+ const numVal = parseFloat(val);
276
+ if (!isNaN(numVal)) {
277
+ seriesValues.push({ timestamp: new Date(ts * 1000).toISOString(), value: numVal });
278
+ rawValues.push(numVal);
279
+ }
280
+ }
281
+ groups.push({
282
+ key: this.groupKey(series.metric || {}),
283
+ values: seriesValues,
284
+ summary: this.computeSummary(rawValues),
285
+ });
286
+ }
287
+ const top = groups[0] || { values: [], summary: this.computeSummary([]) };
288
+ const result = {
289
+ source: this.name,
290
+ service: "(raw)",
291
+ metric: "(raw)",
292
+ unit: "",
293
+ values: top.values,
294
+ summary: top.summary,
295
+ resolvedSeries: rawQuery,
296
+ resolvedLabel: "",
297
+ };
298
+ if (groups.length > 1)
299
+ result.groups = groups;
300
+ return result;
301
+ }
256
302
  // --- Private helpers ---
257
- async buildQuery(service, metric, groupBy) {
303
+ async buildQuery(service, metric, groupBy, labels) {
258
304
  // Resolve the service-filter label first. Candidate probing uses this
259
305
  // label to scope existence checks per-service rather than per-source.
260
306
  const escaped = service.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
@@ -272,13 +318,39 @@ export class PrometheusConnector {
272
318
  const def = this.metrics.find((m) => m.name === metric);
273
319
  template = def?.query || `${metric}{ {{selector}} }`;
274
320
  }
321
+ // Extra exact-match label filters (caller-supplied), AND'd into every
322
+ // {{selector}} occurrence. Label names are constrained to the safe
323
+ // Prometheus identifier set (the tool layer already validated them, but
324
+ // we re-constrain here so the connector is safe in isolation); values are
325
+ // escaped for the surrounding PromQL double-quoted string, same as the
326
+ // service value above.
327
+ let extraMatchers = "";
328
+ if (labels) {
329
+ const parts = [];
330
+ for (const [k, v] of Object.entries(labels)) {
331
+ if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(k))
332
+ continue;
333
+ // Escape backslash first, then quote, then control chars — matches
334
+ // the Loki escapeLogQLValue sibling so a value with a newline yields
335
+ // a valid PromQL string literal rather than a 400 parse error.
336
+ const ev = v
337
+ .replace(/\\/g, "\\\\")
338
+ .replace(/"/g, '\\"')
339
+ .replace(/\n/g, "\\n")
340
+ .replace(/\r/g, "\\r")
341
+ .replace(/\t/g, "\\t");
342
+ parts.push(`${k}="${ev}"`);
343
+ }
344
+ if (parts.length)
345
+ extraMatchers = ", " + parts.join(", ");
346
+ }
275
347
  let promql = template;
276
348
  if (template.includes("{{selector}}")) {
277
349
  // Resolve label here for non-candidate paths that haven't done it yet.
278
350
  if (label === "job" && !PROMETHEUS_METRIC_CANDIDATES[metric]) {
279
351
  label = await this.resolveServiceLabel(service);
280
352
  }
281
- const selector = `${label}="${escaped}"`;
353
+ const selector = `${label}="${escaped}"${extraMatchers}`;
282
354
  promql = promql.replace(/\{\{selector\}\}/g, selector);
283
355
  }
284
356
  if (groupBy && template.includes("{{groupBy}}")) {
@@ -117,5 +117,86 @@ describe("PrometheusConnector", () => {
117
117
  const { promql } = await proto.buildQuery.call(connector, "api", "custom");
118
118
  assert.equal(promql, 'my_custom_metric{svc="api"}');
119
119
  });
120
+ it("AND's labels into the {{selector}} (issue #415 #4)", async () => {
121
+ const connector = new PrometheusConnector();
122
+ await connector.connect({
123
+ ...fakeSource,
124
+ metrics: [{ name: "reqs", query: "http_requests_total{ {{selector}} }", unit: "", description: "" }],
125
+ });
126
+ // Stub the network label-resolver so the test is hermetic.
127
+ connector.resolveServiceLabel =
128
+ async () => "job";
129
+ const { promql } = await proto.buildQuery.call(connector, "api", "reqs", undefined, {
130
+ status: "500",
131
+ route: "/checkout",
132
+ });
133
+ // Insertion order preserved: status then route, after the service matcher.
134
+ assert.equal(promql, 'http_requests_total{ job="api", status="500", route="/checkout" }');
135
+ });
136
+ it("escapes quotes/backslashes in label values (PromQL injection guard)", async () => {
137
+ const connector = new PrometheusConnector();
138
+ await connector.connect({
139
+ ...fakeSource,
140
+ metrics: [{ name: "reqs", query: "http_requests_total{ {{selector}} }", unit: "", description: "" }],
141
+ });
142
+ connector.resolveServiceLabel =
143
+ async () => "job";
144
+ const { promql } = await proto.buildQuery.call(connector, "api", "reqs", undefined, {
145
+ path: 'a"b\\c',
146
+ });
147
+ assert.equal(promql, 'http_requests_total{ job="api", path="a\\"b\\\\c" }');
148
+ });
149
+ it("escapes newlines/control chars in label values (Loki parity)", async () => {
150
+ const connector = new PrometheusConnector();
151
+ await connector.connect({
152
+ ...fakeSource,
153
+ metrics: [{ name: "reqs", query: "http_requests_total{ {{selector}} }", unit: "", description: "" }],
154
+ });
155
+ connector.resolveServiceLabel =
156
+ async () => "job";
157
+ const { promql } = await proto.buildQuery.call(connector, "api", "reqs", undefined, {
158
+ note: "a\nb\tc",
159
+ });
160
+ assert.equal(promql, 'http_requests_total{ job="api", note="a\\nb\\tc" }');
161
+ });
162
+ it("ignores labels when the template has no {{selector}}", async () => {
163
+ const connector = new PrometheusConnector();
164
+ await connector.connect({
165
+ ...fakeSource,
166
+ metrics: [{ name: "explicit", query: 'm{job="{{service}}"}', unit: "", description: "" }],
167
+ });
168
+ const { promql } = await proto.buildQuery.call(connector, "svc", "explicit", undefined, {
169
+ status: "500",
170
+ });
171
+ assert.equal(promql, 'm{job="svc"}');
172
+ });
173
+ });
174
+ describe("queryMetrics rawQuery passthrough (R4, issue #415 #3)", () => {
175
+ const fakeSource = { name: "test", type: "prometheus", url: "http://localhost:9090", enabled: true };
176
+ it("sends raw PromQL verbatim to query_range, bypassing the catalog", async () => {
177
+ const connector = new PrometheusConnector();
178
+ await connector.connect({ ...fakeSource });
179
+ let captured = "";
180
+ const orig = globalThis.fetch;
181
+ globalThis.fetch = (async (url) => {
182
+ captured = decodeURIComponent((String(url).match(/query=([^&]+)/) || [])[1] || "");
183
+ return {
184
+ ok: true,
185
+ status: 200,
186
+ json: async () => ({ data: { result: [{ metric: { foo: "bar" }, values: [[1700000000, "42"]] }] } }),
187
+ };
188
+ });
189
+ try {
190
+ const raw = "topk(5, sum by(route) (rate(http_requests_total[5m])))";
191
+ const result = await connector.queryMetrics({ service: "", metric: "", duration: "15m", rawQuery: raw });
192
+ assert.equal(captured, raw);
193
+ assert.equal(result.resolvedSeries, raw);
194
+ assert.equal(result.metric, "(raw)");
195
+ assert.equal(result.values[0].value, 42);
196
+ }
197
+ finally {
198
+ globalThis.fetch = orig;
199
+ }
200
+ });
120
201
  });
121
202
  });
package/dist/context.d.ts CHANGED
@@ -36,8 +36,17 @@ export interface RequestContext {
36
36
  /** Correlates all tool calls within one transport request/session. */
37
37
  correlationId: string;
38
38
  }
39
- /** Default all-access anonymous context — preserves current behaviour. */
40
- export declare function defaultContext(): RequestContext;
39
+ /** Default all-access anonymous context — preserves current behaviour.
40
+ * `opts.allowBypassRedaction` lets an operator opt the anonymous identity
41
+ * into per-call redaction bypass (OMCP_BYPASS_REDACTION_ANON) — in an
42
+ * anonymous deployment there is no named credential to add to
43
+ * OMCP_KEY_BYPASS_REDACTION, so this is the only way a single-user
44
+ * self-hosted agent can see raw IPs on its own logs without the blunt
45
+ * global OMCP_REDACTION=off. Defaults off; all existing call sites that
46
+ * omit opts are unchanged. */
47
+ export declare function defaultContext(opts?: {
48
+ allowBypassRedaction?: boolean;
49
+ }): RequestContext;
41
50
  /** Context for an authenticated API-key principal. */
42
51
  export declare function principalContext(principalId: string, allowedSources?: string[], opts?: {
43
52
  allowBypassRedaction?: boolean;
package/dist/context.js CHANGED
@@ -1,11 +1,19 @@
1
1
  import { randomUUID } from "node:crypto";
2
2
  import { DEFAULT_TENANT, normaliseTenant } from "./tenancy/context.js";
3
- /** Default all-access anonymous context — preserves current behaviour. */
4
- export function defaultContext() {
3
+ /** Default all-access anonymous context — preserves current behaviour.
4
+ * `opts.allowBypassRedaction` lets an operator opt the anonymous identity
5
+ * into per-call redaction bypass (OMCP_BYPASS_REDACTION_ANON) — in an
6
+ * anonymous deployment there is no named credential to add to
7
+ * OMCP_KEY_BYPASS_REDACTION, so this is the only way a single-user
8
+ * self-hosted agent can see raw IPs on its own logs without the blunt
9
+ * global OMCP_REDACTION=off. Defaults off; all existing call sites that
10
+ * omit opts are unchanged. */
11
+ export function defaultContext(opts = {}) {
5
12
  return {
6
13
  principalId: "anonymous",
7
14
  auth: "anonymous",
8
15
  tenant: DEFAULT_TENANT,
16
+ allowBypassRedaction: opts.allowBypassRedaction || undefined,
9
17
  correlationId: randomUUID(),
10
18
  };
11
19
  }
@@ -34,6 +34,12 @@ test("defaultContext — no allowedTools (anonymous sees every tool, back-compat
34
34
  assert.equal(ctx.allowedTools, undefined);
35
35
  assert.equal(allowsTool(ctx.allowedTools, "any_tool"), true);
36
36
  });
37
+ test("defaultContext — allowBypassRedaction is off by default, opt-in via opts (R5, issue #415 Gap A)", () => {
38
+ assert.equal(defaultContext().allowBypassRedaction, undefined);
39
+ assert.equal(defaultContext({}).allowBypassRedaction, undefined);
40
+ assert.equal(defaultContext({ allowBypassRedaction: false }).allowBypassRedaction, undefined);
41
+ assert.equal(defaultContext({ allowBypassRedaction: true }).allowBypassRedaction, true);
42
+ });
37
43
  import { sessionContext } from "./context.js";
38
44
  test("sessionContext — undefined session → defaultContext shape (anonymous, default tenant)", () => {
39
45
  const ctx = sessionContext(undefined);
@@ -0,0 +1,25 @@
1
+ export interface IpEnrichment {
2
+ country?: string;
3
+ city?: string;
4
+ asn?: string;
5
+ org?: string;
6
+ hosting?: boolean;
7
+ }
8
+ /** Parse an IPv4 string to an unsigned 32-bit integer, or null if invalid. */
9
+ export declare function ipv4ToInt(ip: string): number | null;
10
+ /** Parse an IPv4 CIDR (or bare IPv4 = /32) to an inclusive integer range. */
11
+ export declare function parseCidr(cidr: string): {
12
+ start: number;
13
+ end: number;
14
+ prefix: number;
15
+ } | null;
16
+ export declare class IpEnrichmentDataset {
17
+ private ranges;
18
+ /** Rows that couldn't be parsed (bad CIDR, IPv6, malformed) — surfaced for diagnostics. */
19
+ readonly skipped: number;
20
+ readonly size: number;
21
+ private constructor();
22
+ static fromCsv(text: string): IpEnrichmentDataset;
23
+ /** Look up an IPv4 string. Returns the most specific matching row, or null. */
24
+ lookup(ip: string): IpEnrichment | null;
25
+ }
@@ -0,0 +1,113 @@
1
+ // Offline IPv4 enrichment dataset (issue #415 Gap B).
2
+ //
3
+ // Air-gapped by construction: enrichment comes from a LOCAL dataset the
4
+ // operator supplies (OMCP_IP_ENRICH_FILE), never a per-line phone-home to an
5
+ // external geo/ASN API. The format is a dependency-free CSV so no parser
6
+ // library (and no npm install on the host) is needed:
7
+ //
8
+ // network,country,city,asn,org,hosting
9
+ // 1.2.3.0/24,US,Ashburn,AS14618,Example Cloud,true
10
+ // 203.0.113.5,DE,Berlin,AS3320,Example ISP,false
11
+ //
12
+ // - `network` is an IPv4 CIDR (or a bare IPv4, treated as /32). IPv6 rows are
13
+ // skipped (logged by the caller) — IPv4 covers the access-log case the
14
+ // report was about; IPv6 can follow.
15
+ // - Remaining columns are optional; an empty cell is omitted from the result.
16
+ // - `hosting` is the "is this a datacenter / hosting / proxy range" flag — the
17
+ // signal that separates real humans from bots/scanners/VPN-exit-nodes. Parsed
18
+ // truthily from true/1/yes (case-insensitive); anything else is false.
19
+ // - Lines that are blank or start with `#` are ignored. A header row whose
20
+ // first cell is literally `network` is skipped.
21
+ /** Parse an IPv4 string to an unsigned 32-bit integer, or null if invalid. */
22
+ export function ipv4ToInt(ip) {
23
+ const parts = ip.trim().split(".");
24
+ if (parts.length !== 4)
25
+ return null;
26
+ let n = 0;
27
+ for (const p of parts) {
28
+ if (!/^\d{1,3}$/.test(p))
29
+ return null;
30
+ const o = Number(p);
31
+ if (o > 255)
32
+ return null;
33
+ n = n * 256 + o;
34
+ }
35
+ return n >>> 0;
36
+ }
37
+ /** Parse an IPv4 CIDR (or bare IPv4 = /32) to an inclusive integer range. */
38
+ export function parseCidr(cidr) {
39
+ const [addr, prefixStr] = cidr.trim().split("/");
40
+ const base = ipv4ToInt(addr);
41
+ if (base === null)
42
+ return null;
43
+ const prefix = prefixStr === undefined ? 32 : Number(prefixStr);
44
+ if (!Number.isInteger(prefix) || prefix < 0 || prefix > 32)
45
+ return null;
46
+ // Mask: top `prefix` bits. prefix 0 → whole space; 32 → single host.
47
+ const hostBits = 32 - prefix;
48
+ const mask = prefix === 0 ? 0 : (0xffffffff << hostBits) >>> 0;
49
+ const start = (base & mask) >>> 0;
50
+ const end = (start + (hostBits === 32 ? 0xffffffff : (1 << hostBits) - 1)) >>> 0;
51
+ return { start, end, prefix };
52
+ }
53
+ export class IpEnrichmentDataset {
54
+ ranges = [];
55
+ /** Rows that couldn't be parsed (bad CIDR, IPv6, malformed) — surfaced for diagnostics. */
56
+ skipped;
57
+ size;
58
+ constructor(ranges, skipped) {
59
+ // Sort by start asc; lookup picks the most specific (largest prefix)
60
+ // containing range so nested/overlapping rows resolve deterministically.
61
+ this.ranges = ranges.sort((a, b) => a.start - b.start || a.end - b.end);
62
+ this.skipped = skipped;
63
+ this.size = ranges.length;
64
+ }
65
+ static fromCsv(text) {
66
+ const ranges = [];
67
+ let skipped = 0;
68
+ for (const rawLine of text.split(/\r?\n/)) {
69
+ const line = rawLine.trim();
70
+ if (!line || line.startsWith("#"))
71
+ continue;
72
+ const cells = line.split(",").map((c) => c.trim());
73
+ if (cells[0].toLowerCase() === "network")
74
+ continue; // header
75
+ const r = parseCidr(cells[0]);
76
+ if (!r) {
77
+ skipped++;
78
+ continue;
79
+ }
80
+ const data = {};
81
+ if (cells[1])
82
+ data.country = cells[1];
83
+ if (cells[2])
84
+ data.city = cells[2];
85
+ if (cells[3])
86
+ data.asn = cells[3];
87
+ if (cells[4])
88
+ data.org = cells[4];
89
+ if (cells[5] !== undefined && cells[5] !== "") {
90
+ data.hosting = ["true", "1", "yes"].includes(cells[5].toLowerCase());
91
+ }
92
+ ranges.push({ start: r.start, end: r.end, prefix: r.prefix, data });
93
+ }
94
+ return new IpEnrichmentDataset(ranges, skipped);
95
+ }
96
+ /** Look up an IPv4 string. Returns the most specific matching row, or null. */
97
+ lookup(ip) {
98
+ const n = ipv4ToInt(ip);
99
+ if (n === null)
100
+ return null;
101
+ let best = null;
102
+ // Linear scan is fine for the dataset sizes this is meant for (curated
103
+ // ranges of interest, not a full global table). Pick the most specific
104
+ // (largest prefix) range that contains the IP.
105
+ for (const r of this.ranges) {
106
+ if (r.start > n)
107
+ break; // sorted by start asc — all remaining ranges start after n
108
+ if (n <= r.end && (best === null || r.prefix > best.prefix))
109
+ best = r;
110
+ }
111
+ return best ? { ...best.data } : null;
112
+ }
113
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,85 @@
1
+ import { describe, it } from "node:test";
2
+ import assert from "node:assert/strict";
3
+ import { ipv4ToInt, parseCidr, IpEnrichmentDataset } from "./ip-dataset.js";
4
+ describe("ipv4ToInt", () => {
5
+ it("parses valid IPv4", () => {
6
+ assert.equal(ipv4ToInt("0.0.0.0"), 0);
7
+ assert.equal(ipv4ToInt("255.255.255.255"), 4294967295);
8
+ assert.equal(ipv4ToInt("1.2.3.4"), 0x01020304);
9
+ });
10
+ it("rejects malformed / out-of-range / non-IPv4", () => {
11
+ assert.equal(ipv4ToInt("1.2.3"), null);
12
+ assert.equal(ipv4ToInt("1.2.3.256"), null);
13
+ assert.equal(ipv4ToInt("1.2.3.4.5"), null);
14
+ assert.equal(ipv4ToInt("a.b.c.d"), null);
15
+ assert.equal(ipv4ToInt("::1"), null);
16
+ assert.equal(ipv4ToInt(""), null);
17
+ });
18
+ });
19
+ describe("parseCidr", () => {
20
+ it("parses a /24 to its inclusive range", () => {
21
+ const r = parseCidr("1.2.3.0/24");
22
+ assert.deepEqual(r, { start: 0x01020300, end: 0x010203ff, prefix: 24 });
23
+ });
24
+ it("treats a bare IP as /32", () => {
25
+ const r = parseCidr("203.0.113.5");
26
+ assert.equal(r?.prefix, 32);
27
+ assert.equal(r?.start, r?.end);
28
+ });
29
+ it("normalises a non-aligned base to the network address", () => {
30
+ // 1.2.3.42/24 → network 1.2.3.0
31
+ const r = parseCidr("1.2.3.42/24");
32
+ assert.equal(r?.start, 0x01020300);
33
+ });
34
+ it("handles /0 (whole space)", () => {
35
+ const r = parseCidr("0.0.0.0/0");
36
+ assert.deepEqual(r, { start: 0, end: 4294967295, prefix: 0 });
37
+ });
38
+ it("rejects bad prefixes / addresses", () => {
39
+ assert.equal(parseCidr("1.2.3.0/33"), null);
40
+ assert.equal(parseCidr("1.2.3.0/-1"), null);
41
+ assert.equal(parseCidr("nope/24"), null);
42
+ });
43
+ });
44
+ describe("IpEnrichmentDataset.fromCsv + lookup", () => {
45
+ const csv = [
46
+ "network,country,city,asn,org,hosting", // header skipped
47
+ "# a comment line",
48
+ "",
49
+ "10.0.0.0/8,US,,AS100,Example Cloud,true",
50
+ "10.1.2.0/24,US,Ashburn,AS100,Example Cloud Edge,true",
51
+ "203.0.113.5,DE,Berlin,AS3320,Example ISP,false",
52
+ "2001:db8::/32,XX,,,,", // IPv6 → skipped
53
+ "garbage-row",
54
+ ].join("\n");
55
+ it("parses rows, skips header/comments/blank, counts skipped", () => {
56
+ const ds = IpEnrichmentDataset.fromCsv(csv);
57
+ assert.equal(ds.size, 3); // 3 valid IPv4 rows
58
+ assert.equal(ds.skipped, 2); // IPv6 + garbage
59
+ });
60
+ it("returns the most specific (longest-prefix) match", () => {
61
+ const ds = IpEnrichmentDataset.fromCsv(csv);
62
+ // 10.1.2.5 is inside both /8 and /24 → the /24 (more specific) wins.
63
+ const hit = ds.lookup("10.1.2.5");
64
+ assert.equal(hit?.city, "Ashburn");
65
+ assert.equal(hit?.org, "Example Cloud Edge");
66
+ assert.equal(hit?.hosting, true);
67
+ });
68
+ it("falls back to the broader range when no specific one matches", () => {
69
+ const ds = IpEnrichmentDataset.fromCsv(csv);
70
+ const hit = ds.lookup("10.5.5.5"); // only in /8
71
+ assert.equal(hit?.asn, "AS100");
72
+ assert.equal(hit?.city, undefined); // empty cell omitted
73
+ });
74
+ it("matches a /32 exactly and parses hosting=false", () => {
75
+ const ds = IpEnrichmentDataset.fromCsv(csv);
76
+ const hit = ds.lookup("203.0.113.5");
77
+ assert.equal(hit?.country, "DE");
78
+ assert.equal(hit?.hosting, false);
79
+ });
80
+ it("returns null for an unmatched or invalid IP", () => {
81
+ const ds = IpEnrichmentDataset.fromCsv(csv);
82
+ assert.equal(ds.lookup("8.8.8.8"), null);
83
+ assert.equal(ds.lookup("not-an-ip"), null);
84
+ });
85
+ });