npm - alys-akusa - Versions diffs - 0.1.6 → 0.1.7 - Mend

alys-akusa 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/index.cjs +60 -46
package/package.json +1 -1

package/dist/index.cjs CHANGED Viewed

@@ -6477,22 +6477,22 @@ async function generateWithOpenAI(options) {
   };
 }
 async function generateWithOpenAIBatched(options) {
-  const batchSize = Math.max(1, Math.min(24, Number(process.env.ALYS_OPENAI_RECORDS_PER_CALL ?? 8)));
-  const batches = Math.ceil(options.targetCount / batchSize);
-  const records = [];
-  let latencyMs = 0;
-  let model = process.env.ALYS_GENERATOR_MODEL || DEFAULT_OPENAI_MODEL;
-  for (let i = 0; i < batches; i++) {
-    const remaining = options.targetCount - records.length;
-    if (remaining <= 0) break;
-    const result = await generateWithOpenAI({
+  const batchSize = Math.max(1, Math.min(48, Number(process.env.ALYS_OPENAI_RECORDS_PER_CALL ?? 8)));
+  const batches = Array.from(
+    { length: Math.ceil(options.targetCount / batchSize) },
+    (_, index) => Math.min(batchSize, Math.max(0, options.targetCount - index * batchSize))
+  ).filter((count) => count > 0);
+  const results = await mapLimit(
+    batches,
+    Number(process.env.ALYS_PROVIDER_BATCH_CONCURRENCY ?? 3),
+    (count) => generateWithOpenAI({
       ...options,
-      targetCount: Math.min(batchSize, remaining)
-    });
-    latencyMs += result.latencyMs;
-    model = result.model;
-    records.push(...result.records);
-  }
+      targetCount: count
+    })
+  );
+  const records = results.flatMap((result) => result.records);
+  const latencyMs = results.reduce((sum, result) => sum + result.latencyMs, 0);
+  const model = results[0]?.model ?? process.env.ALYS_GENERATOR_MODEL ?? DEFAULT_OPENAI_MODEL;
   return {
     provider: "openai",
     model,
@@ -6543,22 +6543,22 @@ async function generateWithGroq(options) {
   };
 }
 async function generateWithGroqBatched(options) {
-  const batchSize = Math.max(1, Math.min(16, Number(process.env.ALYS_GROQ_RECORDS_PER_CALL ?? 8)));
-  const batches = Math.ceil(options.targetCount / batchSize);
-  const records = [];
-  let latencyMs = 0;
-  let model = process.env.GROQ_MODEL || process.env.ALYS_GENERATOR_MODEL || DEFAULT_GROQ_MODEL;
-  for (let i = 0; i < batches; i++) {
-    const remaining = options.targetCount - records.length;
-    if (remaining <= 0) break;
-    const result = await generateWithGroq({
+  const batchSize = Math.max(1, Math.min(32, Number(process.env.ALYS_GROQ_RECORDS_PER_CALL ?? 8)));
+  const batches = Array.from(
+    { length: Math.ceil(options.targetCount / batchSize) },
+    (_, index) => Math.min(batchSize, Math.max(0, options.targetCount - index * batchSize))
+  ).filter((count) => count > 0);
+  const results = await mapLimit(
+    batches,
+    Number(process.env.ALYS_PROVIDER_BATCH_CONCURRENCY ?? 3),
+    (count) => generateWithGroq({
       ...options,
-      targetCount: Math.min(batchSize, remaining)
-    });
-    latencyMs += result.latencyMs;
-    model = result.model;
-    records.push(...result.records);
-  }
+      targetCount: count
+    })
+  );
+  const records = results.flatMap((result) => result.records);
+  const latencyMs = results.reduce((sum, result) => sum + result.latencyMs, 0);
+  const model = results[0]?.model ?? process.env.GROQ_MODEL ?? process.env.ALYS_GENERATOR_MODEL ?? DEFAULT_GROQ_MODEL;
   return {
     provider: "groq",
     model,
@@ -6566,6 +6566,20 @@ async function generateWithGroqBatched(options) {
     records: records.slice(0, options.targetCount)
   };
 }
+async function mapLimit(items, limit, worker) {
+  const results = new Array(items.length);
+  let nextIndex = 0;
+  const workerCount = Math.max(1, Math.min(items.length, Math.floor(limit || 1)));
+  async function runWorker() {
+    while (nextIndex < items.length) {
+      const index = nextIndex;
+      nextIndex += 1;
+      results[index] = await worker(items[index], index);
+    }
+  }
+  await Promise.all(Array.from({ length: workerCount }, runWorker));
+  return results;
+}
 function providerRecordSchema() {
   return {
     type: "object",
@@ -6680,7 +6694,7 @@ var DiscoveryAgent = class {
 var ExtractionAgent = class {
   name = "ExtractionAgent";
   async run(sources) {
-    const crawls = await mapLimit(
+    const crawls = await mapLimit2(
       sources,
       Math.max(1, Number(process.env.ALYS_CRAWL_CONCURRENCY ?? 6)),
       (source) => crawlSource(source)
@@ -6754,7 +6768,7 @@ var StructuringAgent = class {
     }
     const concurrency = Math.max(1, Math.min(documents.length, Number(process.env.ALYS_PROVIDER_CONCURRENCY ?? 3)));
     let totalGenerated = 0;
-    const grouped = await mapLimit(documents, concurrency, async (document, index) => {
+    const grouped = await mapLimit2(documents, concurrency, async (document, index) => {
       const finding = findings[index] ?? findings[0];
       const trustScore = document.sourceScores?.trustScore ?? 0.62;
       const authorityScore = document.sourceScores?.authorityScore ?? 0.55;
@@ -6830,7 +6844,7 @@ function domainFromUrl3(url) {
     return "unknown";
   }
 }
-async function mapLimit(items, limit, worker) {
+async function mapLimit2(items, limit, worker) {
   const results = new Array(items.length);
   let nextIndex = 0;
   async function runWorker() {
@@ -7313,7 +7327,7 @@ async function generateDatasets(options) {
   const verificationEnabled = options.enableVerificationSwarm ?? performanceMode !== "fast";
   const debateEnabled = verificationEnabled && perf.debateEnabled;
   const multiplier = depthMultiplier(options.generationDepth);
-  const results = await mapLimit2(Array.from({ length: datasetCount }, (_, i) => i), Number(process.env.ALYS_DATASET_CONCURRENCY ?? perf.datasetConcurrency), async (i) => {
+  const results = await mapLimit3(Array.from({ length: datasetCount }, (_, i) => i), Number(process.env.ALYS_DATASET_CONCURRENCY ?? perf.datasetConcurrency), async (i) => {
     const datasetIndex = i + 1;
     const datasetSourceLimit = Math.max(1, Math.floor((options.sourceLimit ?? 24) * multiplier));
     const datasetTargetRows = Math.max(1, Math.floor((options.targetRows ?? 100) * multiplier));
@@ -7343,7 +7357,7 @@ async function generateDatasets(options) {
   const datasets = results;
   return { manifests, artifacts, previews, datasets };
 }
-async function mapLimit2(items, limit, worker) {
+async function mapLimit3(items, limit, worker) {
   const results = new Array(items.length);
   let nextIndex = 0;
   const workerCount = Math.max(1, Math.min(items.length, Math.floor(limit || 1)));
@@ -7682,7 +7696,7 @@ function printUsage(profile) {
   );
 }
 function printRunPlan(args) {
-  const multiplier = depthMultiplier2(args.depth);
+  const multiplier = args.performanceMode === "fast" ? 1 : depthMultiplier2(args.depth);
   const effectiveSources = Math.max(1, Math.floor(args.sourceLimit * multiplier));
   const effectiveRows = Math.max(1, Math.floor(args.targetRows * multiplier));
   const totalRows = effectiveRows * args.datasetCount;
@@ -8000,7 +8014,7 @@ async function handleGenerate(args, command) {
     validate: (v) => v.trim().length ? true : "Please enter a topic."
   })).topic);
   if (!topic) throw new Error("Missing topic.");
-  const datasetType = parseDatasetType(values.type) ?? (await (0, import_prompts3.default)({
+  const datasetType = parseDatasetType(values.type) ?? (values.yes === true ? "instruction" : void 0) ?? (await (0, import_prompts3.default)({
     type: "select",
     name: "datasetType",
     message: "Dataset type?",
@@ -8010,7 +8024,7 @@ async function handleGenerate(args, command) {
       { title: "Question/Answer", value: "qa" }
     ]
   })).datasetType;
-  const requestedDatasetCount = values.datasets ? Math.max(1, Math.floor(Number(values.datasets))) : (await (0, import_prompts3.default)({
+  const requestedDatasetCount = values.datasets ? Math.max(1, Math.floor(Number(values.datasets))) : values.yes === true ? 1 : (await (0, import_prompts3.default)({
     type: "number",
     name: "datasetCount",
     message: "How many datasets?",
@@ -8023,7 +8037,7 @@ async function handleGenerate(args, command) {
     return;
   }
   const datasetCount = requestedDatasetCount;
-  const exportFormats = values.format ? parseFormats(values.format) : (await (0, import_prompts3.default)({
+  const exportFormats = values.format ? parseFormats(values.format) : values.yes === true ? ["jsonl", "csv", "markdown"] : (await (0, import_prompts3.default)({
     type: "multiselect",
     name: "exportFormats",
     message: "Output formats?",
@@ -8036,7 +8050,7 @@ async function handleGenerate(args, command) {
     ],
     hint: "Use space to select multiple."
   })).exportFormats;
-  const depth = parseDepth(values.depth) ?? (await (0, import_prompts3.default)({
+  const depth = parseDepth(values.depth) ?? (values.yes === true ? "medium" : void 0) ?? (await (0, import_prompts3.default)({
     type: "select",
     name: "depth",
     message: "Research depth?",
@@ -8046,7 +8060,7 @@ async function handleGenerate(args, command) {
       { title: "Deep", value: "deep" }
     ]
   })).depth;
-  const sourceLimit = values.sources ? Math.min(maxSources, Math.max(1, Number(values.sources))) : (await (0, import_prompts3.default)({
+  const sourceLimit = values.sources ? Math.min(maxSources, Math.max(1, Number(values.sources))) : values.yes === true ? benchmarkMode ? 48 : MAX_SOURCES_PER_RUN : (await (0, import_prompts3.default)({
     type: "number",
     name: "sourceLimit",
     message: "How many sources?",
@@ -8054,7 +8068,7 @@ async function handleGenerate(args, command) {
     min: 1,
     max: maxSources
   })).sourceLimit;
-  const targetRows = values.rows ? Math.min(maxRows, Math.max(1, Number(values.rows))) : (await (0, import_prompts3.default)({
+  const targetRows = values.rows ? Math.min(maxRows, Math.max(1, Number(values.rows))) : values.yes === true ? benchmarkMode ? 5e3 : MAX_ROWS_PER_DATASET : (await (0, import_prompts3.default)({
     type: "number",
     name: "targetRows",
     message: "Rows per dataset?",
@@ -8062,14 +8076,14 @@ async function handleGenerate(args, command) {
     min: 1,
     max: maxRows
   })).targetRows;
-  const workspaceRoot = (values.workspace ? String(values.workspace) : "").trim() || (await (0, import_prompts3.default)({
+  const workspaceRoot = (values.workspace ? String(values.workspace) : "").trim() || (values.yes === true ? "~/Alys" : "") || (await (0, import_prompts3.default)({
     type: "text",
     name: "workspaceRoot",
     message: "Export directory?",
     initial: "~/Alys",
     validate: (v) => v.trim().length ? true : "Enter an export directory."
   })).workspaceRoot;
-  const verificationEnabled = values.verify === true ? true : values["no-verify"] === true ? false : (await (0, import_prompts3.default)({
+  const verificationEnabled = values.verify === true ? true : values["no-verify"] === true ? false : values.yes === true ? performanceMode !== "fast" : (await (0, import_prompts3.default)({
     type: "toggle",
     name: "verificationEnabled",
     message: "Enable verification checks?",
@@ -8119,9 +8133,9 @@ async function handleGenerate(args, command) {
   console.log(paint("Runtime", "white"));
   printStage("AUTH", "OK", "Usage linked", appUrl());
   printStage("PLAN", "OK", "Generations charged only after successful completion", `${datasetCount} requested`);
-  printStage("SRC", "RUN", "Research pipeline starting", `${sourceLimit} source target`);
+  printStage("RUN", "RUN", "Dataset runtime starting", `${performanceMode} mode`);
   const response = await withSpinner(
-    "Alys research runtime executing",
+    "Alys runtime executing",
     requestJson(
       "/api/cli/generate",
       {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "alys-akusa",
-  "version": "0.1.6",
+  "version": "0.1.7",
   "private": false,
   "description": "Alys local CLI runtime for autonomous dataset generation.",
   "license": "UNLICENSED",