npm - maqam - Versions diffs - 0.1.0 - Mend

maqam 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/LICENSE +21 -0
package/README.md +176 -0
package/app/app.js +113 -0
package/app/assets/maqam-brand-board.png +0 -0
package/app/assets/maqam-logo.svg +17 -0
package/app/assets/maqam-readme-hero.png +0 -0
package/app/assets/maqam-system-map.svg +114 -0
package/app/index.html +113 -0
package/app/styles.css +397 -0
package/bin/ajnas-crawl.js +119 -0
package/bin/maqam.js +22 -0
package/package.json +74 -0
package/src/framework/errors.js +35 -0
package/src/framework/evidence-ledger.js +72 -0
package/src/framework/policy.js +119 -0
package/src/framework/research-workflow.js +80 -0
package/src/framework/runtime.js +101 -0
package/src/framework/skill-registry.js +52 -0
package/src/framework/tool-gateway.js +65 -0
package/src/index.js +351 -0
package/src/maqam/server.js +189 -0

package/src/index.js ADDED Viewed

@@ -0,0 +1,351 @@
+import * as cheerio from "cheerio";
+import robotsParser from "robots-parser";
+import TurndownService from "turndown";
+const DEFAULT_USER_AGENT = "Maqam/0.1 (+https://github.com/AjnasNB/maqam)";
+const DEFAULT_MAX_BYTES = 3 * 1024 * 1024;
+const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
+function toUrl(value, base) {
+  try {
+    return new URL(value, base).toString();
+  } catch {
+    return null;
+  }
+}
+function normalizeUrl(value) {
+  const url = new URL(value);
+  url.hash = "";
+  if ((url.protocol === "http:" && url.port === "80") || (url.protocol === "https:" && url.port === "443")) {
+    url.port = "";
+  }
+  return url.toString();
+}
+function isHttpUrl(value) {
+  try {
+    const url = new URL(value);
+    return url.protocol === "http:" || url.protocol === "https:";
+  } catch {
+    return false;
+  }
+}
+function sameOrigin(a, b) {
+  return new URL(a).origin === new URL(b).origin;
+}
+async function fetchText(url, options) {
+  const controller = new AbortController();
+  const timeout = setTimeout(() => controller.abort(), options.timeoutMs);
+  try {
+    const response = await fetch(url, {
+      headers: {
+        "user-agent": options.userAgent,
+        accept: options.accept || "text/html,application/xhtml+xml,application/xml;q=0.9,text/plain;q=0.8,*/*;q=0.5"
+      },
+      redirect: "follow",
+      signal: controller.signal
+    });
+    const contentType = response.headers.get("content-type") || "";
+    const length = Number(response.headers.get("content-length") || 0);
+    if (length > options.maxBytes) {
+      throw new Error(`Response too large: ${length} bytes`);
+    }
+    const reader = response.body?.getReader();
+    if (!reader) {
+      return { response, text: await response.text(), contentType };
+    }
+    const chunks = [];
+    let received = 0;
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+      received += value.byteLength;
+      if (received > options.maxBytes) {
+        throw new Error(`Response exceeded maxBytes: ${options.maxBytes}`);
+      }
+      chunks.push(value);
+    }
+    const buffer = Buffer.concat(chunks.map((chunk) => Buffer.from(chunk)));
+    return { response, text: buffer.toString("utf8"), contentType };
+  } finally {
+    clearTimeout(timeout);
+  }
+}
+function parseRobotsSitemaps(robotsText) {
+  return robotsText
+    .split(/\r?\n/)
+    .map((line) => line.trim())
+    .filter((line) => /^sitemap:/i.test(line))
+    .map((line) => line.replace(/^sitemap:\s*/i, "").trim())
+    .filter(Boolean);
+}
+async function loadRobots(origin, options) {
+  const robotsUrl = new URL("/robots.txt", origin).toString();
+  try {
+    const { response, text } = await fetchText(robotsUrl, {
+      ...options,
+      accept: "text/plain,*/*;q=0.5",
+      maxBytes: Math.min(options.maxBytes, 512 * 1024)
+    });
+    if (!response.ok) {
+      return {
+        parser: robotsParser(robotsUrl, ""),
+        sitemaps: []
+      };
+    }
+    return {
+      parser: robotsParser(robotsUrl, text),
+      sitemaps: parseRobotsSitemaps(text)
+    };
+  } catch {
+    return {
+      parser: robotsParser(robotsUrl, ""),
+      sitemaps: []
+    };
+  }
+}
+async function discoverSitemapUrls(sitemapUrl, options) {
+  const discovered = [];
+  try {
+    const { response, text, contentType } = await fetchText(sitemapUrl, {
+      ...options,
+      accept: "application/xml,text/xml,*/*;q=0.5"
+    });
+    if (!response.ok) return discovered;
+    if (!contentType.includes("xml") && !text.trim().startsWith("<")) return discovered;
+    const $ = cheerio.load(text, { xmlMode: true });
+    $("url > loc").each((_, el) => {
+      const value = $(el).text().trim();
+      if (isHttpUrl(value)) discovered.push(normalizeUrl(value));
+    });
+    $("sitemap > loc").each((_, el) => {
+      const value = $(el).text().trim();
+      if (isHttpUrl(value)) discovered.push(normalizeUrl(value));
+    });
+  } catch {
+    return discovered;
+  }
+  return discovered;
+}
+function extractLinks($, baseUrl) {
+  const links = [];
+  $("a[href]").each((_, el) => {
+    const href = $(el).attr("href");
+    const resolved = toUrl(href, baseUrl);
+    if (resolved && isHttpUrl(resolved)) {
+      links.push(normalizeUrl(resolved));
+    }
+  });
+  return [...new Set(links)];
+}
+function cleanForExtraction($) {
+  $("script, style, noscript, template, svg, canvas, iframe").remove();
+  $("[hidden], [aria-hidden='true']").remove();
+}
+export function extractPage(html, url) {
+  const $ = cheerio.load(html);
+  cleanForExtraction($);
+  const title = ($("title").first().text() || $("h1").first().text() || "").trim().replace(/\s+/g, " ");
+  const description = ($("meta[name='description']").attr("content") || "").trim();
+  const h1 = $("h1").first().text().trim().replace(/\s+/g, " ");
+  const canonical = toUrl($("link[rel='canonical']").attr("href") || url, url);
+  const links = extractLinks($, url);
+  const main = $("main").first();
+  const contentRoot = main.length ? main : $("body");
+  const htmlFragment = contentRoot.html() || "";
+  const text = contentRoot.text().replace(/\s+/g, " ").trim();
+  const turndown = new TurndownService({
+    headingStyle: "atx",
+    codeBlockStyle: "fenced",
+    bulletListMarker: "-"
+  });
+  const markdown = turndown.turndown(htmlFragment).replace(/\n{3,}/g, "\n\n").trim();
+  return {
+    url,
+    canonical,
+    title,
+    description,
+    h1,
+    text,
+    markdown,
+    links,
+    fetchedAt: new Date().toISOString()
+  };
+}
+class CrawlQueue {
+  constructor() {
+    this.items = [];
+    this.offset = 0;
+  }
+  push(url) {
+    this.items.push(url);
+  }
+  shift() {
+    if (this.offset >= this.items.length) return null;
+    const value = this.items[this.offset];
+    this.offset += 1;
+    if (this.offset > 1000 && this.offset * 2 > this.items.length) {
+      this.items = this.items.slice(this.offset);
+      this.offset = 0;
+    }
+    return value;
+  }
+  get length() {
+    return this.items.length - this.offset;
+  }
+}
+export async function crawl(input = {}) {
+  const seeds = (input.seeds || input.urls || [])
+    .map((seed) => (isHttpUrl(seed) ? normalizeUrl(seed) : null))
+    .filter(Boolean);
+  if (!seeds.length) {
+    throw new Error("At least one http(s) seed URL is required.");
+  }
+  const options = {
+    maxPages: input.maxPages ?? 50,
+    concurrency: input.concurrency ?? 4,
+    sameOrigin: input.sameOrigin ?? true,
+    includeSitemaps: input.includeSitemaps ?? false,
+    obeyRobots: input.obeyRobots ?? true,
+    userAgent: input.userAgent || DEFAULT_USER_AGENT,
+    delayMs: input.delayMs ?? 250,
+    timeoutMs: input.timeoutMs ?? 15_000,
+    maxBytes: input.maxBytes ?? DEFAULT_MAX_BYTES,
+    onPage: input.onPage || null,
+    onError: input.onError || null
+  };
+  const queue = new CrawlQueue();
+  const seen = new Set();
+  const enqueued = new Set();
+  const results = [];
+  const robotsByOrigin = new Map();
+  const lastFetchByOrigin = new Map();
+  const seedOrigins = new Set(seeds.map((seed) => new URL(seed).origin));
+  const enqueue = (url) => {
+    if (!url || enqueued.has(url) || seen.has(url)) return;
+    if (options.sameOrigin && ![...seedOrigins].some((origin) => sameOrigin(url, origin))) return;
+    enqueued.add(url);
+    queue.push(url);
+  };
+  for (const seed of seeds) enqueue(seed);
+  async function getRobots(url) {
+    const origin = new URL(url).origin;
+    if (!robotsByOrigin.has(origin)) {
+      robotsByOrigin.set(origin, await loadRobots(origin, options));
+    }
+    return robotsByOrigin.get(origin);
+  }
+  if (options.includeSitemaps) {
+    for (const seed of seeds) {
+      const robots = await getRobots(seed);
+      const sitemapUrls = robots.sitemaps.length
+        ? robots.sitemaps
+        : [new URL("/sitemap.xml", new URL(seed).origin).toString()];
+      for (const sitemapUrl of sitemapUrls) {
+        const urls = await discoverSitemapUrls(sitemapUrl, options);
+        for (const url of urls) enqueue(url);
+      }
+    }
+  }
+  async function waitForOrigin(url) {
+    const origin = new URL(url).origin;
+    const last = lastFetchByOrigin.get(origin) || 0;
+    const waitMs = Math.max(0, last + options.delayMs - Date.now());
+    if (waitMs) await sleep(waitMs);
+    lastFetchByOrigin.set(origin, Date.now());
+  }
+  async function worker() {
+    while (results.length < options.maxPages) {
+      const url = queue.shift();
+      if (!url) return;
+      if (seen.has(url)) continue;
+      seen.add(url);
+      try {
+        if (options.obeyRobots) {
+          const robots = await getRobots(url);
+          if (robots.parser && !robots.parser.isAllowed(url, options.userAgent)) {
+            continue;
+          }
+        }
+        await waitForOrigin(url);
+        const { response, text, contentType } = await fetchText(url, options);
+        if (!response.ok) {
+          throw new Error(`HTTP ${response.status}`);
+        }
+        if (!/html|xml|text\//i.test(contentType)) {
+          continue;
+        }
+        const page = extractPage(text, response.url || url);
+        page.status = response.status;
+        page.contentType = contentType;
+        results.push(page);
+        if (options.onPage) await options.onPage(page);
+        for (const link of page.links) {
+          if (results.length + queue.length >= options.maxPages * 6) break;
+          enqueue(link);
+        }
+      } catch (error) {
+        const failure = { url, error: error.message || String(error) };
+        if (options.onError) await options.onError(failure);
+      }
+    }
+  }
+  const workerCount = Math.max(1, Math.min(options.concurrency, options.maxPages));
+  await Promise.all(Array.from({ length: workerCount }, () => worker()));
+  return results.slice(0, options.maxPages);
+}
+export { discoverSitemapUrls, normalizeUrl };
+export { AjnasFrameworkError, ApprovalRequiredError, PolicyDeniedError, toErrorRecord } from "./framework/errors.js";
+export { PolicyEngine } from "./framework/policy.js";
+export { EvidenceLedger } from "./framework/evidence-ledger.js";
+export { ToolGateway } from "./framework/tool-gateway.js";
+export { SkillRegistry } from "./framework/skill-registry.js";
+export { AgentRuntime } from "./framework/runtime.js";
+export { createResearchWorkflow } from "./framework/research-workflow.js";
+export function createCrawlerTool(defaultOptions = {}) {
+  return async function crawlerTool(input = {}) {
+    return crawl({
+      ...defaultOptions,
+      ...input
+    });
+  };
+}

package/src/maqam/server.js ADDED Viewed

@@ -0,0 +1,189 @@
+import { createServer } from "node:http";
+import { readFile } from "node:fs/promises";
+import { extname, resolve } from "node:path";
+import { fileURLToPath } from "node:url";
+import {
+  AgentRuntime,
+  EvidenceLedger,
+  PolicyEngine,
+  ToolGateway,
+  createCrawlerTool,
+  createResearchWorkflow
+} from "../index.js";
+const PRODUCT = {
+  name: "Maqam",
+  tagline: "Compose governed agents",
+  description: "Enterprise agent framework console for policy-bound research, evidence capture, and auditable workflow runs."
+};
+const DEFAULT_PUBLIC_DIR = fileURLToPath(new URL("../../app/", import.meta.url));
+const CONTENT_TYPES = {
+  ".html": "text/html; charset=utf-8",
+  ".css": "text/css; charset=utf-8",
+  ".js": "text/javascript; charset=utf-8",
+  ".svg": "image/svg+xml; charset=utf-8",
+  ".png": "image/png",
+  ".json": "application/json; charset=utf-8"
+};
+function sendJson(response, statusCode, payload) {
+  response.writeHead(statusCode, { "content-type": CONTENT_TYPES[".json"] });
+  response.end(JSON.stringify(payload, null, 2));
+}
+function httpError(statusCode, message) {
+  const error = new Error(message);
+  error.statusCode = statusCode;
+  return error;
+}
+async function readJsonBody(request) {
+  const chunks = [];
+  let size = 0;
+  for await (const chunk of request) {
+    size += chunk.byteLength;
+    if (size > 1024 * 1024) throw httpError(413, "Request body is too large.");
+    chunks.push(chunk);
+  }
+  if (!chunks.length) return {};
+  try {
+    return JSON.parse(Buffer.concat(chunks).toString("utf8"));
+  } catch {
+    throw httpError(400, "Request body must be valid JSON.");
+  }
+}
+function normalizeSeeds(seeds) {
+  if (!Array.isArray(seeds)) throw httpError(400, "`seeds` must be an array of URLs.");
+  const normalized = seeds.map((seed) => {
+    try {
+      const url = new URL(seed);
+      if (url.protocol !== "http:" && url.protocol !== "https:") return null;
+      url.hash = "";
+      return url.toString();
+    } catch {
+      return null;
+    }
+  }).filter(Boolean);
+  if (!normalized.length) throw httpError(400, "At least one http(s) seed URL is required.");
+  return [...new Set(normalized)];
+}
+function clampMaxPages(value) {
+  const maxPages = Number(value || 5);
+  if (!Number.isFinite(maxPages)) return 5;
+  return Math.max(1, Math.min(25, Math.floor(maxPages)));
+}
+function deriveOrigins(seeds) {
+  return [...new Set(seeds.map((seed) => new URL(seed).origin))];
+}
+async function runResearch(body, crawlerTool) {
+  const seeds = normalizeSeeds(body.seeds || []);
+  const maxPages = clampMaxPages(body.maxPages);
+  const allowedOrigins = Array.isArray(body.allowedOrigins) && body.allowedOrigins.length
+    ? body.allowedOrigins
+    : deriveOrigins(seeds);
+  const evidenceLedger = new EvidenceLedger();
+  const policyEngine = new PolicyEngine({
+    allowedTools: ["crawler"],
+    allowedOrigins,
+    maxToolCalls: 40
+  });
+  const toolGateway = new ToolGateway({ policyEngine, evidenceLedger });
+  toolGateway.registerTool("crawler", crawlerTool || createCrawlerTool({
+    concurrency: 2,
+    delayMs: 250,
+    timeoutMs: 12_000
+  }));
+  const runtime = new AgentRuntime({ policyEngine, evidenceLedger, toolGateway });
+  const run = await runtime.runWorkflow(
+    createResearchWorkflow({ seeds, maxPages, sameOrigin: body.sameOrigin ?? true }),
+    {
+      objective: body.objective || "Run a governed public research workflow.",
+      allowedTools: ["crawler"],
+      allowedOrigins,
+      budget: { maxToolCalls: 40, maxRuntimeMs: 600_000 }
+    }
+  );
+  return {
+    product: PRODUCT,
+    run,
+    toolTrace: toolGateway.trace,
+    generatedAt: new Date().toISOString()
+  };
+}
+async function serveStatic(request, response, publicDir) {
+  const url = new URL(request.url, "http://localhost");
+  const pathname = url.pathname === "/" ? "/index.html" : url.pathname;
+  const root = resolve(publicDir);
+  const filePath = resolve(root, `.${decodeURIComponent(pathname)}`);
+  if (!filePath.startsWith(root)) {
+    sendJson(response, 403, { error: "Forbidden" });
+    return;
+  }
+  try {
+    const file = await readFile(filePath);
+    response.writeHead(200, {
+      "content-type": CONTENT_TYPES[extname(filePath)] || "application/octet-stream"
+    });
+    response.end(file);
+  } catch {
+    sendJson(response, 404, { error: "Not found" });
+  }
+}
+export function createMaqamServer(options = {}) {
+  const publicDir = options.publicDir || DEFAULT_PUBLIC_DIR;
+  const crawlerTool = options.crawlerTool || null;
+  return createServer(async (request, response) => {
+    try {
+      const url = new URL(request.url, "http://localhost");
+      if (request.method === "GET" && url.pathname === "/api/health") {
+        sendJson(response, 200, { product: PRODUCT, status: "ok" });
+        return;
+      }
+      if (request.method === "POST" && url.pathname === "/api/runs/research") {
+        const body = await readJsonBody(request);
+        sendJson(response, 200, await runResearch(body, crawlerTool));
+        return;
+      }
+      if (request.method === "GET" || request.method === "HEAD") {
+        await serveStatic(request, response, publicDir);
+        return;
+      }
+      sendJson(response, 405, { error: "Method not allowed" });
+    } catch (error) {
+      sendJson(response, error.statusCode || 500, {
+        error: error.message || "Unexpected server error"
+      });
+    }
+  });
+}
+export function startMaqamServer(options = {}) {
+  const port = Number(options.port || process.env.PORT || 8787);
+  const host = options.host || process.env.HOST || "127.0.0.1";
+  const server = createMaqamServer(options);
+  server.listen(port, host, () => {
+    const address = `http://${host}:${port}`;
+    process.stdout.write(`Maqam console running at ${address}\n`);
+  });
+  return server;
+}
+export { PRODUCT as MAQAM_PRODUCT };