@arbidocs/cli 0.3.30 → 0.3.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +20 -0
- package/dist/index.js +163 -28
- package/dist/index.js.map +1 -1
- package/package.json +4 -4
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,25 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## v0.3.32
|
|
4
|
+
|
|
5
|
+
[compare changes](https://github.com/arbicity/ARBI-frontend/compare/v0.3.31...HEAD)
|
|
6
|
+
|
|
7
|
+
### 🚀 Enhancements
|
|
8
|
+
|
|
9
|
+
- **cli:** Robust doc reprocess with retry, per-doc fallback, ext filter ([#576](https://github.com/arbicity/ARBI-frontend/pull/576))
|
|
10
|
+
|
|
11
|
+
## v0.3.31
|
|
12
|
+
|
|
13
|
+
[compare changes](https://github.com/arbicity/ARBI-frontend/compare/v0.3.30...HEAD)
|
|
14
|
+
|
|
15
|
+
### 🚀 Enhancements
|
|
16
|
+
|
|
17
|
+
- **cli:** --interval for reprocess batches, --output for docs export ([#574](https://github.com/arbicity/ARBI-frontend/pull/574))
|
|
18
|
+
|
|
19
|
+
### 🔥 Performance
|
|
20
|
+
|
|
21
|
+
- Instant workspace click feedback + bulk reprocess cap ([#573](https://github.com/arbicity/ARBI-frontend/pull/573))
|
|
22
|
+
|
|
3
23
|
## v0.3.30
|
|
4
24
|
|
|
5
25
|
[compare changes](https://github.com/arbicity/ARBI-frontend/compare/v0.3.29...HEAD)
|
package/dist/index.js
CHANGED
|
@@ -3637,7 +3637,7 @@ function getLatestVersion(skipCache = false) {
|
|
|
3637
3637
|
}
|
|
3638
3638
|
}
|
|
3639
3639
|
function getCurrentVersion() {
|
|
3640
|
-
return "0.3.
|
|
3640
|
+
return "0.3.32";
|
|
3641
3641
|
}
|
|
3642
3642
|
function readChangelog(fromVersion, toVersion) {
|
|
3643
3643
|
try {
|
|
@@ -3690,17 +3690,17 @@ function showChangelog(fromVersion, toVersion) {
|
|
|
3690
3690
|
async function checkForUpdates(autoUpdate) {
|
|
3691
3691
|
try {
|
|
3692
3692
|
const latest = getLatestVersion();
|
|
3693
|
-
if (!latest || latest === "0.3.
|
|
3693
|
+
if (!latest || latest === "0.3.32") return;
|
|
3694
3694
|
if (autoUpdate) {
|
|
3695
3695
|
warn(`
|
|
3696
|
-
Your arbi version is out of date (${"0.3.
|
|
3696
|
+
Your arbi version is out of date (${"0.3.32"} \u2192 ${latest}). Updating...`);
|
|
3697
3697
|
child_process.execSync("npm install -g @arbidocs/cli@latest", { stdio: "inherit" });
|
|
3698
|
-
showChangelog("0.3.
|
|
3698
|
+
showChangelog("0.3.32", latest);
|
|
3699
3699
|
console.log(`Updated to ${latest}.`);
|
|
3700
3700
|
} else {
|
|
3701
3701
|
warn(
|
|
3702
3702
|
`
|
|
3703
|
-
Your arbi version is out of date (${"0.3.
|
|
3703
|
+
Your arbi version is out of date (${"0.3.32"} \u2192 ${latest}).
|
|
3704
3704
|
Run "arbi update" to upgrade, or "arbi update auto" to always stay up to date.`
|
|
3705
3705
|
);
|
|
3706
3706
|
}
|
|
@@ -3710,9 +3710,9 @@ Run "arbi update" to upgrade, or "arbi update auto" to always stay up to date.`
|
|
|
3710
3710
|
function hintUpdateOnError() {
|
|
3711
3711
|
try {
|
|
3712
3712
|
const cached = readCache();
|
|
3713
|
-
if (cached && cached.latest !== "0.3.
|
|
3713
|
+
if (cached && cached.latest !== "0.3.32") {
|
|
3714
3714
|
warn(
|
|
3715
|
-
`Your arbi version is out of date (${"0.3.
|
|
3715
|
+
`Your arbi version is out of date (${"0.3.32"} \u2192 ${cached.latest}). Run "arbi update".`
|
|
3716
3716
|
);
|
|
3717
3717
|
}
|
|
3718
3718
|
} catch {
|
|
@@ -4669,7 +4669,10 @@ async function fetchDocChoices(arbi, _workspaceId) {
|
|
|
4669
4669
|
}));
|
|
4670
4670
|
}
|
|
4671
4671
|
function registerDocsCommand(program2) {
|
|
4672
|
-
program2.command("docs").description("List documents in the active workspace").option("-w, --workspace <id>", "Workspace ID (defaults to selected workspace)").option("--json", "Output as JSON").option("--csv", "Output as CSV").option("--ids", "Output only document IDs (one per line)").option("--count", "Output only the count (combine with --status for filtered count)").option("-s, --status <status>", "Filter by status (comma-separated: completed,failed,queued)").option("-f, --folder <pattern>", "Filter by folder (substring match)").option("-e, --ext <extensions>", "Filter by file extension (comma-separated: pdf,docx,eml)").option("-q, --query <text>", "Search file name, title, or summary").option("--sort <field>", "Sort by field (name, status, date, size, created)", "status").option("-n, --limit <n>", "Limit number of results").
|
|
4672
|
+
program2.command("docs").description("List documents in the active workspace").option("-w, --workspace <id>", "Workspace ID (defaults to selected workspace)").option("--json", "Output as JSON").option("--csv", "Output as CSV").option("--ids", "Output only document IDs (one per line)").option("--count", "Output only the count (combine with --status for filtered count)").option("-s, --status <status>", "Filter by status (comma-separated: completed,failed,queued)").option("-f, --folder <pattern>", "Filter by folder (substring match)").option("-e, --ext <extensions>", "Filter by file extension (comma-separated: pdf,docx,eml)").option("-q, --query <text>", "Search file name, title, or summary").option("--sort <field>", "Sort by field (name, status, date, size, created)", "status").option("-n, --limit <n>", "Limit number of results").option(
|
|
4673
|
+
"-o, --output <path>",
|
|
4674
|
+
"Write output to a file instead of stdout. Extension determines the format when combined with --ids/--json/--csv (default: csv with doc_id, file_name, folder, status)"
|
|
4675
|
+
).action(
|
|
4673
4676
|
(opts) => runAction(async () => {
|
|
4674
4677
|
const { arbi } = await resolveWorkspace(opts.workspace);
|
|
4675
4678
|
let data = await sdk.documents.listDocuments(arbi);
|
|
@@ -4723,20 +4726,30 @@ function registerDocsCommand(program2) {
|
|
|
4723
4726
|
}
|
|
4724
4727
|
return;
|
|
4725
4728
|
}
|
|
4729
|
+
const writeOut = (payload, defaultExt) => {
|
|
4730
|
+
if (opts.output) {
|
|
4731
|
+
fs4.writeFileSync(opts.output, payload.endsWith("\n") ? payload : payload + "\n");
|
|
4732
|
+
success(`Wrote ${data.length} rows to ${opts.output} (${defaultExt})`);
|
|
4733
|
+
} else {
|
|
4734
|
+
console.log(payload.endsWith("\n") ? payload.slice(0, -1) : payload);
|
|
4735
|
+
}
|
|
4736
|
+
};
|
|
4737
|
+
const csvEscape = (s) => `"${(s ?? "").replace(/"/g, '""')}"`;
|
|
4726
4738
|
if (opts.ids) {
|
|
4727
|
-
data.
|
|
4739
|
+
writeOut(data.map((d) => d.external_id).join("\n"), "ids");
|
|
4728
4740
|
return;
|
|
4729
4741
|
}
|
|
4730
4742
|
if (opts.json) {
|
|
4731
|
-
|
|
4743
|
+
writeOut(JSON.stringify(data, null, opts.output ? 2 : 0), "json");
|
|
4732
4744
|
return;
|
|
4733
4745
|
}
|
|
4734
|
-
if (opts.csv) {
|
|
4735
|
-
|
|
4746
|
+
if (opts.csv || opts.output && !opts.ids && !opts.json) {
|
|
4747
|
+
const lines = [
|
|
4748
|
+
"external_id,status,file_name,file_size,folder,n_pages,tokens,doc_date,title"
|
|
4749
|
+
];
|
|
4736
4750
|
for (const d of data) {
|
|
4737
4751
|
const meta = d.doc_metadata;
|
|
4738
|
-
|
|
4739
|
-
console.log(
|
|
4752
|
+
lines.push(
|
|
4740
4753
|
[
|
|
4741
4754
|
d.external_id,
|
|
4742
4755
|
d.status,
|
|
@@ -4750,6 +4763,7 @@ function registerDocsCommand(program2) {
|
|
|
4750
4763
|
].join(",")
|
|
4751
4764
|
);
|
|
4752
4765
|
}
|
|
4766
|
+
writeOut(lines.join("\n"), "csv");
|
|
4753
4767
|
return;
|
|
4754
4768
|
}
|
|
4755
4769
|
console.log(chalk2__default.default.dim(`${data.length} documents
|
|
@@ -4916,11 +4930,29 @@ function registerDocsCommand(program2) {
|
|
|
4916
4930
|
console.log(JSON.stringify(data, null, 2));
|
|
4917
4931
|
})()
|
|
4918
4932
|
);
|
|
4919
|
-
doc.command("reprocess [ids...]").description("Reprocess failed/completed documents (sets status back to processing)").option("-s, --status <status>", "Reprocess all docs with this status (e.g. failed)").option("-f, --folder <pattern>", "Filter by folder (substring match)").option(
|
|
4933
|
+
doc.command("reprocess [ids...]").description("Reprocess failed/completed documents (sets status back to processing)").option("-s, --status <status>", "Reprocess all docs with this status (e.g. failed)").option("-f, --folder <pattern>", "Filter by folder (substring match)").option(
|
|
4934
|
+
"-e, --ext <extension>",
|
|
4935
|
+
"Filter by file extension (e.g. pdf, docx). Requires --status."
|
|
4936
|
+
).option("--dry-run", "Show what would be reprocessed without doing it").option("-b, --batch-size <n>", "Batch size for update requests", "50").option(
|
|
4937
|
+
"-i, --interval <seconds>",
|
|
4938
|
+
"Seconds to wait between batches (float allowed, e.g. 0.5). Default 0 \u2014 send as fast as the server acks.",
|
|
4939
|
+
"0"
|
|
4940
|
+
).option(
|
|
4941
|
+
"-r, --max-retries <n>",
|
|
4942
|
+
"Retry attempts per failing batch before falling back to per-doc submission.",
|
|
4943
|
+
"3"
|
|
4944
|
+
).option(
|
|
4945
|
+
"--status-interval <seconds>",
|
|
4946
|
+
"Seconds between progress lines (0 to disable periodic printing).",
|
|
4947
|
+
"5"
|
|
4948
|
+
).option("-v, --verbose", "Print a log line after each batch in addition to periodic stats.").option("-q, --quiet", "Suppress periodic progress lines \u2014 only print the final summary.").action(
|
|
4920
4949
|
(ids, opts) => runAction(async () => {
|
|
4921
|
-
const { arbi } = await resolveWorkspace();
|
|
4950
|
+
const { arbi } = await resolveWorkspace(void 0, { skipNotifications: true });
|
|
4922
4951
|
let docIds;
|
|
4923
4952
|
if (ids && ids.length > 0) {
|
|
4953
|
+
if (opts.ext || opts.folder) {
|
|
4954
|
+
warn("--ext/--folder are ignored when explicit IDs are given.");
|
|
4955
|
+
}
|
|
4924
4956
|
docIds = ids;
|
|
4925
4957
|
} else if (opts.status) {
|
|
4926
4958
|
const allDocs = await sdk.documents.listDocuments(arbi);
|
|
@@ -4931,6 +4963,12 @@ function registerDocsCommand(program2) {
|
|
|
4931
4963
|
(d) => (d.folder ?? "").toLowerCase().includes(pattern)
|
|
4932
4964
|
);
|
|
4933
4965
|
}
|
|
4966
|
+
if (opts.ext) {
|
|
4967
|
+
const ext = opts.ext.toLowerCase().replace(/^\./, "");
|
|
4968
|
+
filtered = filtered.filter(
|
|
4969
|
+
(d) => (d.file_name ?? "").toLowerCase().endsWith("." + ext)
|
|
4970
|
+
);
|
|
4971
|
+
}
|
|
4934
4972
|
docIds = filtered.map((d) => d.external_id);
|
|
4935
4973
|
} else {
|
|
4936
4974
|
error("Provide document IDs or use --status to select documents to reprocess.");
|
|
@@ -4948,18 +4986,115 @@ function registerDocsCommand(program2) {
|
|
|
4948
4986
|
return;
|
|
4949
4987
|
}
|
|
4950
4988
|
const batchSize = parseInt(opts.batchSize ?? "50", 10);
|
|
4951
|
-
|
|
4952
|
-
|
|
4953
|
-
|
|
4954
|
-
|
|
4955
|
-
|
|
4956
|
-
|
|
4957
|
-
|
|
4958
|
-
|
|
4959
|
-
|
|
4960
|
-
|
|
4989
|
+
const maxRetries = parseInt(opts.maxRetries ?? "3", 10);
|
|
4990
|
+
const intervalSec = parseFloat(opts.interval ?? "0");
|
|
4991
|
+
const intervalMs = Number.isFinite(intervalSec) && intervalSec > 0 ? intervalSec * 1e3 : 0;
|
|
4992
|
+
const statusIntervalSec = parseFloat(opts.statusInterval ?? "5");
|
|
4993
|
+
const statusIntervalMs = Number.isFinite(statusIntervalSec) && statusIntervalSec > 0 ? statusIntervalSec * 1e3 : 0;
|
|
4994
|
+
const total = docIds.length;
|
|
4995
|
+
const startMs = Date.now();
|
|
4996
|
+
let submitted = 0;
|
|
4997
|
+
let succeeded = 0;
|
|
4998
|
+
let failed = 0;
|
|
4999
|
+
const failedIds = [];
|
|
5000
|
+
const formatDuration = (ms) => {
|
|
5001
|
+
const s = Math.floor(ms / 1e3);
|
|
5002
|
+
if (s < 60) return `${s}s`;
|
|
5003
|
+
const m = Math.floor(s / 60);
|
|
5004
|
+
if (m < 60) return `${m}m${s % 60}s`;
|
|
5005
|
+
const h = Math.floor(m / 60);
|
|
5006
|
+
return `${h}h${m % 60}m`;
|
|
5007
|
+
};
|
|
5008
|
+
const printStatus = (prefix = "") => {
|
|
5009
|
+
const elapsed = Date.now() - startMs;
|
|
5010
|
+
const rate = submitted > 0 ? submitted / elapsed * 1e3 : 0;
|
|
5011
|
+
const remaining = total - submitted;
|
|
5012
|
+
const etaMs = rate > 0 ? remaining / rate * 1e3 : 0;
|
|
5013
|
+
const bar = `submitted=${submitted}/${total} ok=${succeeded} fail=${failed} rate=${rate.toFixed(1)}/s elapsed=${formatDuration(elapsed)}` + (rate > 0 && remaining > 0 ? ` eta=${formatDuration(etaMs)}` : "");
|
|
5014
|
+
console.log(`${prefix}${bar}`);
|
|
5015
|
+
};
|
|
5016
|
+
let statusTimer;
|
|
5017
|
+
if (!opts.quiet && statusIntervalMs > 0) {
|
|
5018
|
+
statusTimer = setInterval(() => printStatus(" "), statusIntervalMs);
|
|
5019
|
+
}
|
|
5020
|
+
const sleep = (ms) => new Promise((resolve3) => setTimeout(resolve3, ms));
|
|
5021
|
+
const backoff = async (attempt) => {
|
|
5022
|
+
const delayMs = Math.min(1e3 * Math.pow(2, attempt), 3e4);
|
|
5023
|
+
await sleep(delayMs);
|
|
5024
|
+
};
|
|
5025
|
+
const submitBatch = async (batchIds) => {
|
|
5026
|
+
const updates = batchIds.map((id) => ({
|
|
5027
|
+
external_id: id,
|
|
5028
|
+
status: "processing"
|
|
5029
|
+
}));
|
|
5030
|
+
try {
|
|
5031
|
+
await sdk.documents.updateDocuments(
|
|
5032
|
+
arbi,
|
|
5033
|
+
updates
|
|
5034
|
+
);
|
|
5035
|
+
return true;
|
|
5036
|
+
} catch {
|
|
5037
|
+
return false;
|
|
5038
|
+
}
|
|
5039
|
+
};
|
|
5040
|
+
try {
|
|
5041
|
+
for (let i = 0; i < docIds.length; i += batchSize) {
|
|
5042
|
+
const batch = docIds.slice(i, i + batchSize);
|
|
5043
|
+
let batchOk = false;
|
|
5044
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
5045
|
+
batchOk = await submitBatch(batch);
|
|
5046
|
+
if (batchOk) break;
|
|
5047
|
+
if (attempt < maxRetries) {
|
|
5048
|
+
await backoff(attempt);
|
|
5049
|
+
}
|
|
5050
|
+
}
|
|
5051
|
+
if (batchOk) {
|
|
5052
|
+
succeeded += batch.length;
|
|
5053
|
+
submitted += batch.length;
|
|
5054
|
+
if (opts.verbose) {
|
|
5055
|
+
console.log(` [${submitted}/${total}] Triggered reprocessing...`);
|
|
5056
|
+
}
|
|
5057
|
+
} else {
|
|
5058
|
+
console.log(
|
|
5059
|
+
chalk2__default.default.dim(
|
|
5060
|
+
` batch of ${batch.length} failed after ${maxRetries} retries, falling back to per-doc`
|
|
5061
|
+
)
|
|
5062
|
+
);
|
|
5063
|
+
for (const id of batch) {
|
|
5064
|
+
let docOk = await submitBatch([id]);
|
|
5065
|
+
if (!docOk) {
|
|
5066
|
+
await sleep(500);
|
|
5067
|
+
docOk = await submitBatch([id]);
|
|
5068
|
+
}
|
|
5069
|
+
if (docOk) {
|
|
5070
|
+
succeeded += 1;
|
|
5071
|
+
} else {
|
|
5072
|
+
failed += 1;
|
|
5073
|
+
failedIds.push(id);
|
|
5074
|
+
}
|
|
5075
|
+
submitted += 1;
|
|
5076
|
+
}
|
|
5077
|
+
}
|
|
5078
|
+
if (intervalMs > 0 && i + batchSize < docIds.length) {
|
|
5079
|
+
await sleep(intervalMs);
|
|
5080
|
+
}
|
|
5081
|
+
}
|
|
5082
|
+
} finally {
|
|
5083
|
+
if (statusTimer) clearInterval(statusTimer);
|
|
5084
|
+
}
|
|
5085
|
+
console.log("");
|
|
5086
|
+
printStatus();
|
|
5087
|
+
if (failed > 0) {
|
|
5088
|
+
warn(`${failed} document(s) failed to reprocess.`);
|
|
5089
|
+
const preview = failedIds.slice(0, 20);
|
|
5090
|
+
preview.forEach((id) => console.log(` ${chalk2__default.default.red("\u2717")} ${id}`));
|
|
5091
|
+
if (failedIds.length > preview.length) {
|
|
5092
|
+
console.log(chalk2__default.default.dim(` ... and ${failedIds.length - preview.length} more`));
|
|
5093
|
+
}
|
|
5094
|
+
}
|
|
5095
|
+
if (succeeded > 0) {
|
|
5096
|
+
success(`Triggered reprocessing for ${succeeded} document(s).`);
|
|
4961
5097
|
}
|
|
4962
|
-
success(`Triggered reprocessing for ${docIds.length} document(s).`);
|
|
4963
5098
|
})()
|
|
4964
5099
|
);
|
|
4965
5100
|
}
|
|
@@ -7745,7 +7880,7 @@ console.info = (...args) => {
|
|
|
7745
7880
|
_origInfo(...args);
|
|
7746
7881
|
};
|
|
7747
7882
|
var program = new commander.Command();
|
|
7748
|
-
program.name("arbi").description("ARBI CLI \u2014 interact with ARBI from the terminal").version("0.3.
|
|
7883
|
+
program.name("arbi").description("ARBI CLI \u2014 interact with ARBI from the terminal").version("0.3.32");
|
|
7749
7884
|
registerConfigCommand(program);
|
|
7750
7885
|
registerLoginCommand(program);
|
|
7751
7886
|
registerRegisterCommand(program);
|