alys-akusa 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.cjs +60 -46
  2. package/package.json +1 -1
package/dist/index.cjs CHANGED
@@ -6477,22 +6477,22 @@ async function generateWithOpenAI(options) {
6477
6477
  };
6478
6478
  }
6479
6479
  async function generateWithOpenAIBatched(options) {
6480
- const batchSize = Math.max(1, Math.min(24, Number(process.env.ALYS_OPENAI_RECORDS_PER_CALL ?? 8)));
6481
- const batches = Math.ceil(options.targetCount / batchSize);
6482
- const records = [];
6483
- let latencyMs = 0;
6484
- let model = process.env.ALYS_GENERATOR_MODEL || DEFAULT_OPENAI_MODEL;
6485
- for (let i = 0; i < batches; i++) {
6486
- const remaining = options.targetCount - records.length;
6487
- if (remaining <= 0) break;
6488
- const result = await generateWithOpenAI({
6480
+ const batchSize = Math.max(1, Math.min(48, Number(process.env.ALYS_OPENAI_RECORDS_PER_CALL ?? 8)));
6481
+ const batches = Array.from(
6482
+ { length: Math.ceil(options.targetCount / batchSize) },
6483
+ (_, index) => Math.min(batchSize, Math.max(0, options.targetCount - index * batchSize))
6484
+ ).filter((count) => count > 0);
6485
+ const results = await mapLimit(
6486
+ batches,
6487
+ Number(process.env.ALYS_PROVIDER_BATCH_CONCURRENCY ?? 3),
6488
+ (count) => generateWithOpenAI({
6489
6489
  ...options,
6490
- targetCount: Math.min(batchSize, remaining)
6491
- });
6492
- latencyMs += result.latencyMs;
6493
- model = result.model;
6494
- records.push(...result.records);
6495
- }
6490
+ targetCount: count
6491
+ })
6492
+ );
6493
+ const records = results.flatMap((result) => result.records);
6494
+ const latencyMs = results.reduce((sum, result) => sum + result.latencyMs, 0);
6495
+ const model = results[0]?.model ?? process.env.ALYS_GENERATOR_MODEL ?? DEFAULT_OPENAI_MODEL;
6496
6496
  return {
6497
6497
  provider: "openai",
6498
6498
  model,
@@ -6543,22 +6543,22 @@ async function generateWithGroq(options) {
6543
6543
  };
6544
6544
  }
6545
6545
  async function generateWithGroqBatched(options) {
6546
- const batchSize = Math.max(1, Math.min(16, Number(process.env.ALYS_GROQ_RECORDS_PER_CALL ?? 8)));
6547
- const batches = Math.ceil(options.targetCount / batchSize);
6548
- const records = [];
6549
- let latencyMs = 0;
6550
- let model = process.env.GROQ_MODEL || process.env.ALYS_GENERATOR_MODEL || DEFAULT_GROQ_MODEL;
6551
- for (let i = 0; i < batches; i++) {
6552
- const remaining = options.targetCount - records.length;
6553
- if (remaining <= 0) break;
6554
- const result = await generateWithGroq({
6546
+ const batchSize = Math.max(1, Math.min(32, Number(process.env.ALYS_GROQ_RECORDS_PER_CALL ?? 8)));
6547
+ const batches = Array.from(
6548
+ { length: Math.ceil(options.targetCount / batchSize) },
6549
+ (_, index) => Math.min(batchSize, Math.max(0, options.targetCount - index * batchSize))
6550
+ ).filter((count) => count > 0);
6551
+ const results = await mapLimit(
6552
+ batches,
6553
+ Number(process.env.ALYS_PROVIDER_BATCH_CONCURRENCY ?? 3),
6554
+ (count) => generateWithGroq({
6555
6555
  ...options,
6556
- targetCount: Math.min(batchSize, remaining)
6557
- });
6558
- latencyMs += result.latencyMs;
6559
- model = result.model;
6560
- records.push(...result.records);
6561
- }
6556
+ targetCount: count
6557
+ })
6558
+ );
6559
+ const records = results.flatMap((result) => result.records);
6560
+ const latencyMs = results.reduce((sum, result) => sum + result.latencyMs, 0);
6561
+ const model = results[0]?.model ?? process.env.GROQ_MODEL ?? process.env.ALYS_GENERATOR_MODEL ?? DEFAULT_GROQ_MODEL;
6562
6562
  return {
6563
6563
  provider: "groq",
6564
6564
  model,
@@ -6566,6 +6566,20 @@ async function generateWithGroqBatched(options) {
6566
6566
  records: records.slice(0, options.targetCount)
6567
6567
  };
6568
6568
  }
6569
+ async function mapLimit(items, limit, worker) {
6570
+ const results = new Array(items.length);
6571
+ let nextIndex = 0;
6572
+ const workerCount = Math.max(1, Math.min(items.length, Math.floor(limit || 1)));
6573
+ async function runWorker() {
6574
+ while (nextIndex < items.length) {
6575
+ const index = nextIndex;
6576
+ nextIndex += 1;
6577
+ results[index] = await worker(items[index], index);
6578
+ }
6579
+ }
6580
+ await Promise.all(Array.from({ length: workerCount }, runWorker));
6581
+ return results;
6582
+ }
6569
6583
  function providerRecordSchema() {
6570
6584
  return {
6571
6585
  type: "object",
@@ -6680,7 +6694,7 @@ var DiscoveryAgent = class {
6680
6694
  var ExtractionAgent = class {
6681
6695
  name = "ExtractionAgent";
6682
6696
  async run(sources) {
6683
- const crawls = await mapLimit(
6697
+ const crawls = await mapLimit2(
6684
6698
  sources,
6685
6699
  Math.max(1, Number(process.env.ALYS_CRAWL_CONCURRENCY ?? 6)),
6686
6700
  (source) => crawlSource(source)
@@ -6754,7 +6768,7 @@ var StructuringAgent = class {
6754
6768
  }
6755
6769
  const concurrency = Math.max(1, Math.min(documents.length, Number(process.env.ALYS_PROVIDER_CONCURRENCY ?? 3)));
6756
6770
  let totalGenerated = 0;
6757
- const grouped = await mapLimit(documents, concurrency, async (document, index) => {
6771
+ const grouped = await mapLimit2(documents, concurrency, async (document, index) => {
6758
6772
  const finding = findings[index] ?? findings[0];
6759
6773
  const trustScore = document.sourceScores?.trustScore ?? 0.62;
6760
6774
  const authorityScore = document.sourceScores?.authorityScore ?? 0.55;
@@ -6830,7 +6844,7 @@ function domainFromUrl3(url) {
6830
6844
  return "unknown";
6831
6845
  }
6832
6846
  }
6833
- async function mapLimit(items, limit, worker) {
6847
+ async function mapLimit2(items, limit, worker) {
6834
6848
  const results = new Array(items.length);
6835
6849
  let nextIndex = 0;
6836
6850
  async function runWorker() {
@@ -7313,7 +7327,7 @@ async function generateDatasets(options) {
7313
7327
  const verificationEnabled = options.enableVerificationSwarm ?? performanceMode !== "fast";
7314
7328
  const debateEnabled = verificationEnabled && perf.debateEnabled;
7315
7329
  const multiplier = depthMultiplier(options.generationDepth);
7316
- const results = await mapLimit2(Array.from({ length: datasetCount }, (_, i) => i), Number(process.env.ALYS_DATASET_CONCURRENCY ?? perf.datasetConcurrency), async (i) => {
7330
+ const results = await mapLimit3(Array.from({ length: datasetCount }, (_, i) => i), Number(process.env.ALYS_DATASET_CONCURRENCY ?? perf.datasetConcurrency), async (i) => {
7317
7331
  const datasetIndex = i + 1;
7318
7332
  const datasetSourceLimit = Math.max(1, Math.floor((options.sourceLimit ?? 24) * multiplier));
7319
7333
  const datasetTargetRows = Math.max(1, Math.floor((options.targetRows ?? 100) * multiplier));
@@ -7343,7 +7357,7 @@ async function generateDatasets(options) {
7343
7357
  const datasets = results;
7344
7358
  return { manifests, artifacts, previews, datasets };
7345
7359
  }
7346
- async function mapLimit2(items, limit, worker) {
7360
+ async function mapLimit3(items, limit, worker) {
7347
7361
  const results = new Array(items.length);
7348
7362
  let nextIndex = 0;
7349
7363
  const workerCount = Math.max(1, Math.min(items.length, Math.floor(limit || 1)));
@@ -7682,7 +7696,7 @@ function printUsage(profile) {
7682
7696
  );
7683
7697
  }
7684
7698
  function printRunPlan(args) {
7685
- const multiplier = depthMultiplier2(args.depth);
7699
+ const multiplier = args.performanceMode === "fast" ? 1 : depthMultiplier2(args.depth);
7686
7700
  const effectiveSources = Math.max(1, Math.floor(args.sourceLimit * multiplier));
7687
7701
  const effectiveRows = Math.max(1, Math.floor(args.targetRows * multiplier));
7688
7702
  const totalRows = effectiveRows * args.datasetCount;
@@ -8000,7 +8014,7 @@ async function handleGenerate(args, command) {
8000
8014
  validate: (v) => v.trim().length ? true : "Please enter a topic."
8001
8015
  })).topic);
8002
8016
  if (!topic) throw new Error("Missing topic.");
8003
- const datasetType = parseDatasetType(values.type) ?? (await (0, import_prompts3.default)({
8017
+ const datasetType = parseDatasetType(values.type) ?? (values.yes === true ? "instruction" : void 0) ?? (await (0, import_prompts3.default)({
8004
8018
  type: "select",
8005
8019
  name: "datasetType",
8006
8020
  message: "Dataset type?",
@@ -8010,7 +8024,7 @@ async function handleGenerate(args, command) {
8010
8024
  { title: "Question/Answer", value: "qa" }
8011
8025
  ]
8012
8026
  })).datasetType;
8013
- const requestedDatasetCount = values.datasets ? Math.max(1, Math.floor(Number(values.datasets))) : (await (0, import_prompts3.default)({
8027
+ const requestedDatasetCount = values.datasets ? Math.max(1, Math.floor(Number(values.datasets))) : values.yes === true ? 1 : (await (0, import_prompts3.default)({
8014
8028
  type: "number",
8015
8029
  name: "datasetCount",
8016
8030
  message: "How many datasets?",
@@ -8023,7 +8037,7 @@ async function handleGenerate(args, command) {
8023
8037
  return;
8024
8038
  }
8025
8039
  const datasetCount = requestedDatasetCount;
8026
- const exportFormats = values.format ? parseFormats(values.format) : (await (0, import_prompts3.default)({
8040
+ const exportFormats = values.format ? parseFormats(values.format) : values.yes === true ? ["jsonl", "csv", "markdown"] : (await (0, import_prompts3.default)({
8027
8041
  type: "multiselect",
8028
8042
  name: "exportFormats",
8029
8043
  message: "Output formats?",
@@ -8036,7 +8050,7 @@ async function handleGenerate(args, command) {
8036
8050
  ],
8037
8051
  hint: "Use space to select multiple."
8038
8052
  })).exportFormats;
8039
- const depth = parseDepth(values.depth) ?? (await (0, import_prompts3.default)({
8053
+ const depth = parseDepth(values.depth) ?? (values.yes === true ? "medium" : void 0) ?? (await (0, import_prompts3.default)({
8040
8054
  type: "select",
8041
8055
  name: "depth",
8042
8056
  message: "Research depth?",
@@ -8046,7 +8060,7 @@ async function handleGenerate(args, command) {
8046
8060
  { title: "Deep", value: "deep" }
8047
8061
  ]
8048
8062
  })).depth;
8049
- const sourceLimit = values.sources ? Math.min(maxSources, Math.max(1, Number(values.sources))) : (await (0, import_prompts3.default)({
8063
+ const sourceLimit = values.sources ? Math.min(maxSources, Math.max(1, Number(values.sources))) : values.yes === true ? benchmarkMode ? 48 : MAX_SOURCES_PER_RUN : (await (0, import_prompts3.default)({
8050
8064
  type: "number",
8051
8065
  name: "sourceLimit",
8052
8066
  message: "How many sources?",
@@ -8054,7 +8068,7 @@ async function handleGenerate(args, command) {
8054
8068
  min: 1,
8055
8069
  max: maxSources
8056
8070
  })).sourceLimit;
8057
- const targetRows = values.rows ? Math.min(maxRows, Math.max(1, Number(values.rows))) : (await (0, import_prompts3.default)({
8071
+ const targetRows = values.rows ? Math.min(maxRows, Math.max(1, Number(values.rows))) : values.yes === true ? benchmarkMode ? 5e3 : MAX_ROWS_PER_DATASET : (await (0, import_prompts3.default)({
8058
8072
  type: "number",
8059
8073
  name: "targetRows",
8060
8074
  message: "Rows per dataset?",
@@ -8062,14 +8076,14 @@ async function handleGenerate(args, command) {
8062
8076
  min: 1,
8063
8077
  max: maxRows
8064
8078
  })).targetRows;
8065
- const workspaceRoot = (values.workspace ? String(values.workspace) : "").trim() || (await (0, import_prompts3.default)({
8079
+ const workspaceRoot = (values.workspace ? String(values.workspace) : "").trim() || (values.yes === true ? "~/Alys" : "") || (await (0, import_prompts3.default)({
8066
8080
  type: "text",
8067
8081
  name: "workspaceRoot",
8068
8082
  message: "Export directory?",
8069
8083
  initial: "~/Alys",
8070
8084
  validate: (v) => v.trim().length ? true : "Enter an export directory."
8071
8085
  })).workspaceRoot;
8072
- const verificationEnabled = values.verify === true ? true : values["no-verify"] === true ? false : (await (0, import_prompts3.default)({
8086
+ const verificationEnabled = values.verify === true ? true : values["no-verify"] === true ? false : values.yes === true ? performanceMode !== "fast" : (await (0, import_prompts3.default)({
8073
8087
  type: "toggle",
8074
8088
  name: "verificationEnabled",
8075
8089
  message: "Enable verification checks?",
@@ -8119,9 +8133,9 @@ async function handleGenerate(args, command) {
8119
8133
  console.log(paint("Runtime", "white"));
8120
8134
  printStage("AUTH", "OK", "Usage linked", appUrl());
8121
8135
  printStage("PLAN", "OK", "Generations charged only after successful completion", `${datasetCount} requested`);
8122
- printStage("SRC", "RUN", "Research pipeline starting", `${sourceLimit} source target`);
8136
+ printStage("RUN", "RUN", "Dataset runtime starting", `${performanceMode} mode`);
8123
8137
  const response = await withSpinner(
8124
- "Alys research runtime executing",
8138
+ "Alys runtime executing",
8125
8139
  requestJson(
8126
8140
  "/api/cli/generate",
8127
8141
  {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "alys-akusa",
3
- "version": "0.1.6",
3
+ "version": "0.1.7",
4
4
  "private": false,
5
5
  "description": "Alys local CLI runtime for autonomous dataset generation.",
6
6
  "license": "UNLICENSED",