@arbidocs/cli 0.3.30 → 0.3.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,25 @@
1
1
  # Changelog
2
2
 
3
+ ## v0.3.32
4
+
5
+ [compare changes](https://github.com/arbicity/ARBI-frontend/compare/v0.3.31...HEAD)
6
+
7
+ ### 🚀 Enhancements
8
+
9
+ - **cli:** Robust doc reprocess with retry, per-doc fallback, ext filter ([#576](https://github.com/arbicity/ARBI-frontend/pull/576))
10
+
11
+ ## v0.3.31
12
+
13
+ [compare changes](https://github.com/arbicity/ARBI-frontend/compare/v0.3.30...HEAD)
14
+
15
+ ### 🚀 Enhancements
16
+
17
+ - **cli:** --interval for reprocess batches, --output for docs export ([#574](https://github.com/arbicity/ARBI-frontend/pull/574))
18
+
19
+ ### 🔥 Performance
20
+
21
+ - Instant workspace click feedback + bulk reprocess cap ([#573](https://github.com/arbicity/ARBI-frontend/pull/573))
22
+
3
23
  ## v0.3.30
4
24
 
5
25
  [compare changes](https://github.com/arbicity/ARBI-frontend/compare/v0.3.29...HEAD)
package/dist/index.js CHANGED
@@ -3637,7 +3637,7 @@ function getLatestVersion(skipCache = false) {
3637
3637
  }
3638
3638
  }
3639
3639
  function getCurrentVersion() {
3640
- return "0.3.30";
3640
+ return "0.3.32";
3641
3641
  }
3642
3642
  function readChangelog(fromVersion, toVersion) {
3643
3643
  try {
@@ -3690,17 +3690,17 @@ function showChangelog(fromVersion, toVersion) {
3690
3690
  async function checkForUpdates(autoUpdate) {
3691
3691
  try {
3692
3692
  const latest = getLatestVersion();
3693
- if (!latest || latest === "0.3.30") return;
3693
+ if (!latest || latest === "0.3.32") return;
3694
3694
  if (autoUpdate) {
3695
3695
  warn(`
3696
- Your arbi version is out of date (${"0.3.30"} \u2192 ${latest}). Updating...`);
3696
+ Your arbi version is out of date (${"0.3.32"} \u2192 ${latest}). Updating...`);
3697
3697
  child_process.execSync("npm install -g @arbidocs/cli@latest", { stdio: "inherit" });
3698
- showChangelog("0.3.30", latest);
3698
+ showChangelog("0.3.32", latest);
3699
3699
  console.log(`Updated to ${latest}.`);
3700
3700
  } else {
3701
3701
  warn(
3702
3702
  `
3703
- Your arbi version is out of date (${"0.3.30"} \u2192 ${latest}).
3703
+ Your arbi version is out of date (${"0.3.32"} \u2192 ${latest}).
3704
3704
  Run "arbi update" to upgrade, or "arbi update auto" to always stay up to date.`
3705
3705
  );
3706
3706
  }
@@ -3710,9 +3710,9 @@ Run "arbi update" to upgrade, or "arbi update auto" to always stay up to date.`
3710
3710
  function hintUpdateOnError() {
3711
3711
  try {
3712
3712
  const cached = readCache();
3713
- if (cached && cached.latest !== "0.3.30") {
3713
+ if (cached && cached.latest !== "0.3.32") {
3714
3714
  warn(
3715
- `Your arbi version is out of date (${"0.3.30"} \u2192 ${cached.latest}). Run "arbi update".`
3715
+ `Your arbi version is out of date (${"0.3.32"} \u2192 ${cached.latest}). Run "arbi update".`
3716
3716
  );
3717
3717
  }
3718
3718
  } catch {
@@ -4669,7 +4669,10 @@ async function fetchDocChoices(arbi, _workspaceId) {
4669
4669
  }));
4670
4670
  }
4671
4671
  function registerDocsCommand(program2) {
4672
- program2.command("docs").description("List documents in the active workspace").option("-w, --workspace <id>", "Workspace ID (defaults to selected workspace)").option("--json", "Output as JSON").option("--csv", "Output as CSV").option("--ids", "Output only document IDs (one per line)").option("--count", "Output only the count (combine with --status for filtered count)").option("-s, --status <status>", "Filter by status (comma-separated: completed,failed,queued)").option("-f, --folder <pattern>", "Filter by folder (substring match)").option("-e, --ext <extensions>", "Filter by file extension (comma-separated: pdf,docx,eml)").option("-q, --query <text>", "Search file name, title, or summary").option("--sort <field>", "Sort by field (name, status, date, size, created)", "status").option("-n, --limit <n>", "Limit number of results").action(
4672
+ program2.command("docs").description("List documents in the active workspace").option("-w, --workspace <id>", "Workspace ID (defaults to selected workspace)").option("--json", "Output as JSON").option("--csv", "Output as CSV").option("--ids", "Output only document IDs (one per line)").option("--count", "Output only the count (combine with --status for filtered count)").option("-s, --status <status>", "Filter by status (comma-separated: completed,failed,queued)").option("-f, --folder <pattern>", "Filter by folder (substring match)").option("-e, --ext <extensions>", "Filter by file extension (comma-separated: pdf,docx,eml)").option("-q, --query <text>", "Search file name, title, or summary").option("--sort <field>", "Sort by field (name, status, date, size, created)", "status").option("-n, --limit <n>", "Limit number of results").option(
4673
+ "-o, --output <path>",
4674
+ "Write output to a file instead of stdout. Extension determines the format when combined with --ids/--json/--csv (default: csv with doc_id, file_name, folder, status)"
4675
+ ).action(
4673
4676
  (opts) => runAction(async () => {
4674
4677
  const { arbi } = await resolveWorkspace(opts.workspace);
4675
4678
  let data = await sdk.documents.listDocuments(arbi);
@@ -4723,20 +4726,30 @@ function registerDocsCommand(program2) {
4723
4726
  }
4724
4727
  return;
4725
4728
  }
4729
+ const writeOut = (payload, defaultExt) => {
4730
+ if (opts.output) {
4731
+ fs4.writeFileSync(opts.output, payload.endsWith("\n") ? payload : payload + "\n");
4732
+ success(`Wrote ${data.length} rows to ${opts.output} (${defaultExt})`);
4733
+ } else {
4734
+ console.log(payload.endsWith("\n") ? payload.slice(0, -1) : payload);
4735
+ }
4736
+ };
4737
+ const csvEscape = (s) => `"${(s ?? "").replace(/"/g, '""')}"`;
4726
4738
  if (opts.ids) {
4727
- data.forEach((d) => console.log(d.external_id));
4739
+ writeOut(data.map((d) => d.external_id).join("\n"), "ids");
4728
4740
  return;
4729
4741
  }
4730
4742
  if (opts.json) {
4731
- console.log(JSON.stringify(data));
4743
+ writeOut(JSON.stringify(data, null, opts.output ? 2 : 0), "json");
4732
4744
  return;
4733
4745
  }
4734
- if (opts.csv) {
4735
- console.log("external_id,status,file_name,file_size,folder,n_pages,tokens,doc_date,title");
4746
+ if (opts.csv || opts.output && !opts.ids && !opts.json) {
4747
+ const lines = [
4748
+ "external_id,status,file_name,file_size,folder,n_pages,tokens,doc_date,title"
4749
+ ];
4736
4750
  for (const d of data) {
4737
4751
  const meta = d.doc_metadata;
4738
- const csvEscape = (s) => `"${(s ?? "").replace(/"/g, '""')}"`;
4739
- console.log(
4752
+ lines.push(
4740
4753
  [
4741
4754
  d.external_id,
4742
4755
  d.status,
@@ -4750,6 +4763,7 @@ function registerDocsCommand(program2) {
4750
4763
  ].join(",")
4751
4764
  );
4752
4765
  }
4766
+ writeOut(lines.join("\n"), "csv");
4753
4767
  return;
4754
4768
  }
4755
4769
  console.log(chalk2__default.default.dim(`${data.length} documents
@@ -4916,11 +4930,29 @@ function registerDocsCommand(program2) {
4916
4930
  console.log(JSON.stringify(data, null, 2));
4917
4931
  })()
4918
4932
  );
4919
- doc.command("reprocess [ids...]").description("Reprocess failed/completed documents (sets status back to processing)").option("-s, --status <status>", "Reprocess all docs with this status (e.g. failed)").option("-f, --folder <pattern>", "Filter by folder (substring match)").option("--dry-run", "Show what would be reprocessed without doing it").option("-b, --batch-size <n>", "Batch size for update requests", "50").action(
4933
+ doc.command("reprocess [ids...]").description("Reprocess failed/completed documents (sets status back to processing)").option("-s, --status <status>", "Reprocess all docs with this status (e.g. failed)").option("-f, --folder <pattern>", "Filter by folder (substring match)").option(
4934
+ "-e, --ext <extension>",
4935
+ "Filter by file extension (e.g. pdf, docx). Requires --status."
4936
+ ).option("--dry-run", "Show what would be reprocessed without doing it").option("-b, --batch-size <n>", "Batch size for update requests", "50").option(
4937
+ "-i, --interval <seconds>",
4938
+ "Seconds to wait between batches (float allowed, e.g. 0.5). Default 0 \u2014 send as fast as the server acks.",
4939
+ "0"
4940
+ ).option(
4941
+ "-r, --max-retries <n>",
4942
+ "Retry attempts per failing batch before falling back to per-doc submission.",
4943
+ "3"
4944
+ ).option(
4945
+ "--status-interval <seconds>",
4946
+ "Seconds between progress lines (0 to disable periodic printing).",
4947
+ "5"
4948
+ ).option("-v, --verbose", "Print a log line after each batch in addition to periodic stats.").option("-q, --quiet", "Suppress periodic progress lines \u2014 only print the final summary.").action(
4920
4949
  (ids, opts) => runAction(async () => {
4921
- const { arbi } = await resolveWorkspace();
4950
+ const { arbi } = await resolveWorkspace(void 0, { skipNotifications: true });
4922
4951
  let docIds;
4923
4952
  if (ids && ids.length > 0) {
4953
+ if (opts.ext || opts.folder) {
4954
+ warn("--ext/--folder are ignored when explicit IDs are given.");
4955
+ }
4924
4956
  docIds = ids;
4925
4957
  } else if (opts.status) {
4926
4958
  const allDocs = await sdk.documents.listDocuments(arbi);
@@ -4931,6 +4963,12 @@ function registerDocsCommand(program2) {
4931
4963
  (d) => (d.folder ?? "").toLowerCase().includes(pattern)
4932
4964
  );
4933
4965
  }
4966
+ if (opts.ext) {
4967
+ const ext = opts.ext.toLowerCase().replace(/^\./, "");
4968
+ filtered = filtered.filter(
4969
+ (d) => (d.file_name ?? "").toLowerCase().endsWith("." + ext)
4970
+ );
4971
+ }
4934
4972
  docIds = filtered.map((d) => d.external_id);
4935
4973
  } else {
4936
4974
  error("Provide document IDs or use --status to select documents to reprocess.");
@@ -4948,18 +4986,115 @@ function registerDocsCommand(program2) {
4948
4986
  return;
4949
4987
  }
4950
4988
  const batchSize = parseInt(opts.batchSize ?? "50", 10);
4951
- let processed = 0;
4952
- for (let i = 0; i < docIds.length; i += batchSize) {
4953
- const batch = docIds.slice(i, i + batchSize);
4954
- const updates = batch.map((id) => ({ external_id: id, status: "processing" }));
4955
- await sdk.documents.updateDocuments(
4956
- arbi,
4957
- updates
4958
- );
4959
- processed += batch.length;
4960
- console.log(` [${processed}/${docIds.length}] Triggered reprocessing...`);
4989
+ const maxRetries = parseInt(opts.maxRetries ?? "3", 10);
4990
+ const intervalSec = parseFloat(opts.interval ?? "0");
4991
+ const intervalMs = Number.isFinite(intervalSec) && intervalSec > 0 ? intervalSec * 1e3 : 0;
4992
+ const statusIntervalSec = parseFloat(opts.statusInterval ?? "5");
4993
+ const statusIntervalMs = Number.isFinite(statusIntervalSec) && statusIntervalSec > 0 ? statusIntervalSec * 1e3 : 0;
4994
+ const total = docIds.length;
4995
+ const startMs = Date.now();
4996
+ let submitted = 0;
4997
+ let succeeded = 0;
4998
+ let failed = 0;
4999
+ const failedIds = [];
5000
+ const formatDuration = (ms) => {
5001
+ const s = Math.floor(ms / 1e3);
5002
+ if (s < 60) return `${s}s`;
5003
+ const m = Math.floor(s / 60);
5004
+ if (m < 60) return `${m}m${s % 60}s`;
5005
+ const h = Math.floor(m / 60);
5006
+ return `${h}h${m % 60}m`;
5007
+ };
5008
+ const printStatus = (prefix = "") => {
5009
+ const elapsed = Date.now() - startMs;
5010
+ const rate = submitted > 0 ? submitted / elapsed * 1e3 : 0;
5011
+ const remaining = total - submitted;
5012
+ const etaMs = rate > 0 ? remaining / rate * 1e3 : 0;
5013
+ const bar = `submitted=${submitted}/${total} ok=${succeeded} fail=${failed} rate=${rate.toFixed(1)}/s elapsed=${formatDuration(elapsed)}` + (rate > 0 && remaining > 0 ? ` eta=${formatDuration(etaMs)}` : "");
5014
+ console.log(`${prefix}${bar}`);
5015
+ };
5016
+ let statusTimer;
5017
+ if (!opts.quiet && statusIntervalMs > 0) {
5018
+ statusTimer = setInterval(() => printStatus(" "), statusIntervalMs);
5019
+ }
5020
+ const sleep = (ms) => new Promise((resolve3) => setTimeout(resolve3, ms));
5021
+ const backoff = async (attempt) => {
5022
+ const delayMs = Math.min(1e3 * Math.pow(2, attempt), 3e4);
5023
+ await sleep(delayMs);
5024
+ };
5025
+ const submitBatch = async (batchIds) => {
5026
+ const updates = batchIds.map((id) => ({
5027
+ external_id: id,
5028
+ status: "processing"
5029
+ }));
5030
+ try {
5031
+ await sdk.documents.updateDocuments(
5032
+ arbi,
5033
+ updates
5034
+ );
5035
+ return true;
5036
+ } catch {
5037
+ return false;
5038
+ }
5039
+ };
5040
+ try {
5041
+ for (let i = 0; i < docIds.length; i += batchSize) {
5042
+ const batch = docIds.slice(i, i + batchSize);
5043
+ let batchOk = false;
5044
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
5045
+ batchOk = await submitBatch(batch);
5046
+ if (batchOk) break;
5047
+ if (attempt < maxRetries) {
5048
+ await backoff(attempt);
5049
+ }
5050
+ }
5051
+ if (batchOk) {
5052
+ succeeded += batch.length;
5053
+ submitted += batch.length;
5054
+ if (opts.verbose) {
5055
+ console.log(` [${submitted}/${total}] Triggered reprocessing...`);
5056
+ }
5057
+ } else {
5058
+ console.log(
5059
+ chalk2__default.default.dim(
5060
+ ` batch of ${batch.length} failed after ${maxRetries} retries, falling back to per-doc`
5061
+ )
5062
+ );
5063
+ for (const id of batch) {
5064
+ let docOk = await submitBatch([id]);
5065
+ if (!docOk) {
5066
+ await sleep(500);
5067
+ docOk = await submitBatch([id]);
5068
+ }
5069
+ if (docOk) {
5070
+ succeeded += 1;
5071
+ } else {
5072
+ failed += 1;
5073
+ failedIds.push(id);
5074
+ }
5075
+ submitted += 1;
5076
+ }
5077
+ }
5078
+ if (intervalMs > 0 && i + batchSize < docIds.length) {
5079
+ await sleep(intervalMs);
5080
+ }
5081
+ }
5082
+ } finally {
5083
+ if (statusTimer) clearInterval(statusTimer);
5084
+ }
5085
+ console.log("");
5086
+ printStatus();
5087
+ if (failed > 0) {
5088
+ warn(`${failed} document(s) failed to reprocess.`);
5089
+ const preview = failedIds.slice(0, 20);
5090
+ preview.forEach((id) => console.log(` ${chalk2__default.default.red("\u2717")} ${id}`));
5091
+ if (failedIds.length > preview.length) {
5092
+ console.log(chalk2__default.default.dim(` ... and ${failedIds.length - preview.length} more`));
5093
+ }
5094
+ }
5095
+ if (succeeded > 0) {
5096
+ success(`Triggered reprocessing for ${succeeded} document(s).`);
4961
5097
  }
4962
- success(`Triggered reprocessing for ${docIds.length} document(s).`);
4963
5098
  })()
4964
5099
  );
4965
5100
  }
@@ -7745,7 +7880,7 @@ console.info = (...args) => {
7745
7880
  _origInfo(...args);
7746
7881
  };
7747
7882
  var program = new commander.Command();
7748
- program.name("arbi").description("ARBI CLI \u2014 interact with ARBI from the terminal").version("0.3.30");
7883
+ program.name("arbi").description("ARBI CLI \u2014 interact with ARBI from the terminal").version("0.3.32");
7749
7884
  registerConfigCommand(program);
7750
7885
  registerLoginCommand(program);
7751
7886
  registerRegisterCommand(program);