@oxygen-agent/cli 1.123.1 → 1.125.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/index.js +157 -20
- package/node_modules/@oxygen/shared/dist/file-import.d.ts +4 -0
- package/node_modules/@oxygen/shared/dist/file-import.js +110 -0
- package/node_modules/@oxygen/shared/dist/object-storage.d.ts +8 -0
- package/node_modules/@oxygen/shared/dist/object-storage.js +20 -1
- package/node_modules/@oxygen/shared/dist/version.d.ts +1 -1
- package/node_modules/@oxygen/shared/dist/version.js +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
package/dist/index.js
CHANGED
|
@@ -212,6 +212,12 @@ export function createProgram() {
|
|
|
212
212
|
.option("--json", "Print a JSON envelope.")
|
|
213
213
|
.action(async (options) => {
|
|
214
214
|
await handleAuthUseTokenAction(options);
|
|
215
|
+
}))
|
|
216
|
+
.addCommand(new Command("doctor")
|
|
217
|
+
.description("Diagnose the active Oxygen CLI credentials without printing token material.")
|
|
218
|
+
.option("--json", "Print a JSON envelope.")
|
|
219
|
+
.action(async (options) => {
|
|
220
|
+
await handleAuthDoctorAction(options);
|
|
215
221
|
}));
|
|
216
222
|
program
|
|
217
223
|
.command("profiles")
|
|
@@ -434,6 +440,27 @@ export function createProgram() {
|
|
|
434
440
|
},
|
|
435
441
|
});
|
|
436
442
|
});
|
|
443
|
+
}))
|
|
444
|
+
.addCommand(new Command("reconcile")
|
|
445
|
+
.description("Inspect tenant database registry drift. Staff only; defaults to dry-run.")
|
|
446
|
+
.option("--dry-run", "Report drift without applying repairs. This is the default.")
|
|
447
|
+
.option("--apply", "Apply safe registry repairs. Requires --confirm.")
|
|
448
|
+
.option("--confirm", "Confirm --apply for safe registry repairs.")
|
|
449
|
+
.option("--json", "Print a JSON envelope.")
|
|
450
|
+
.action(async (options) => {
|
|
451
|
+
await handleAsyncAction("db reconcile", options, () => {
|
|
452
|
+
if (options.apply && !options.confirm) {
|
|
453
|
+
throw new OxygenError("confirmation_required", "Refusing to apply tenant database reconciliation without --confirm.", { exitCode: 1 });
|
|
454
|
+
}
|
|
455
|
+
return requestOxygen("/api/cli/db/reconcile", {
|
|
456
|
+
method: "POST",
|
|
457
|
+
body: {
|
|
458
|
+
apply: Boolean(options.apply),
|
|
459
|
+
dry_run: !options.apply,
|
|
460
|
+
confirm: Boolean(options.confirm),
|
|
461
|
+
},
|
|
462
|
+
});
|
|
463
|
+
});
|
|
437
464
|
}))
|
|
438
465
|
.addCommand(new Command("cost-policy")
|
|
439
466
|
.description("Show tenant database cost controls and reconciliation status.")
|
|
@@ -2014,7 +2041,7 @@ export function createProgram() {
|
|
|
2014
2041
|
.description("Compile a company-search prompt into ordered provider routes without provider calls.")
|
|
2015
2042
|
.requiredOption("--prompt <text-or-file>", "Company-search prompt, or a path to a prompt file.")
|
|
2016
2043
|
.option("--target-count <n>", "Desired company count for routing and estimates.")
|
|
2017
|
-
.option("--source-intent <intent>", "Override detected intent: structured, technology, hiring, local, known_source, web, or fallback.")
|
|
2044
|
+
.option("--source-intent <intent>", "Override detected intent: sizing, structured, technology, hiring, local, known_source, concept, web, url, or fallback.")
|
|
2018
2045
|
.option("--materialize-preview", "Create a preview table with route rows.")
|
|
2019
2046
|
.option("--json", "Print a JSON envelope.")
|
|
2020
2047
|
.action(async (options) => {
|
|
@@ -2035,6 +2062,7 @@ export function createProgram() {
|
|
|
2035
2062
|
.option("--max-credits <n>", "Required credit ceiling for live runs.")
|
|
2036
2063
|
.option("--target-count <n>", "Desired company count when planning from --prompt.")
|
|
2037
2064
|
.option("--source-intent <intent>", "Override detected intent when planning from --prompt.")
|
|
2065
|
+
.option("--preflight-complete", "Confirm required descriptor, count, enum, or provider-specific preflight checks before live mode.")
|
|
2038
2066
|
.option("--approved", "Required for live runs after inspecting dry-run output.")
|
|
2039
2067
|
.option("--json", "Print a JSON envelope.")
|
|
2040
2068
|
.action(async (options) => {
|
|
@@ -2093,7 +2121,7 @@ export function createProgram() {
|
|
|
2093
2121
|
}))
|
|
2094
2122
|
.addCommand(new Command("failures")
|
|
2095
2123
|
.description("List failed background action and ingestion items.")
|
|
2096
|
-
.option("--queue <queue>", "all, actions, ingestions, or
|
|
2124
|
+
.option("--queue <queue>", "all, actions, ingestions, or postgres_jobs. Defaults to all. Legacy aliases: bullmq, redis, jobs.")
|
|
2097
2125
|
.option("--limit <n>", "Maximum failed items per queue. Defaults to 25; server cap is 100.")
|
|
2098
2126
|
.option("--json", "Print a JSON envelope.")
|
|
2099
2127
|
.action(async (options) => {
|
|
@@ -2118,13 +2146,13 @@ export function createProgram() {
|
|
|
2118
2146
|
}));
|
|
2119
2147
|
}))
|
|
2120
2148
|
.addCommand(new Command("run-once")
|
|
2121
|
-
.description("Compatibility alias for worker repair;
|
|
2122
|
-
.option("--claim-limit <n>", "Compatibility option; ignored by the
|
|
2123
|
-
.option("--concurrency <n>", "Compatibility option; ignored by the
|
|
2124
|
-
.option("--enrichment-concurrency <n>", "Compatibility option; ignored by the
|
|
2125
|
-
.option("--lease-seconds <n>", "Compatibility option; ignored by the
|
|
2126
|
-
.option("--provider-timeout-ms <n>", "Compatibility option; ignored by the
|
|
2127
|
-
.option("--recipe-timeout-ms <n>", "Compatibility option; ignored by the
|
|
2149
|
+
.description("Compatibility alias for worker repair; repairs Postgres-backed worker state for the current organization.")
|
|
2150
|
+
.option("--claim-limit <n>", "Compatibility option; ignored by the Postgres worker.")
|
|
2151
|
+
.option("--concurrency <n>", "Compatibility option; ignored by the Postgres worker.")
|
|
2152
|
+
.option("--enrichment-concurrency <n>", "Compatibility option; ignored by the Postgres worker.")
|
|
2153
|
+
.option("--lease-seconds <n>", "Compatibility option; ignored by the Postgres worker.")
|
|
2154
|
+
.option("--provider-timeout-ms <n>", "Compatibility option; ignored by the Postgres worker.")
|
|
2155
|
+
.option("--recipe-timeout-ms <n>", "Compatibility option; ignored by the Postgres worker.")
|
|
2128
2156
|
.option("--json", "Print a JSON envelope.")
|
|
2129
2157
|
.action(async (options) => {
|
|
2130
2158
|
const claimLimit = readPositiveInt(options.claimLimit);
|
|
@@ -2549,10 +2577,10 @@ export function createProgram() {
|
|
|
2549
2577
|
.description("Preflight an enrichment column without provider calls or credit usage.")
|
|
2550
2578
|
.argument("<table>", "Table id or slug.")
|
|
2551
2579
|
.option("--source-column <column>", "Source column key or id. For mobile_phone this is the LinkedIn URL column.")
|
|
2552
|
-
.option("--full-name-column <column>", "Column key or id containing the person's full name for work_email. Pair with company-domain/name for Prospeo, LeadMagic, Hunter, ContactOut, or
|
|
2580
|
+
.option("--full-name-column <column>", "Column key or id containing the person's full name for work_email. Pair with company-domain/name for Prospeo, RocketReach, LeadMagic, Hunter, Dropleads, ContactOut, BetterContact, or People Data Labs.")
|
|
2553
2581
|
.option("--first-name-column <column>", "Column key or id containing the person's first name for work_email.")
|
|
2554
2582
|
.option("--last-name-column <column>", "Column key or id containing the person's last name for work_email.")
|
|
2555
|
-
.option("--linkedin-url-column <column>", "Column key or id containing the person's LinkedIn URL. Required by
|
|
2583
|
+
.option("--linkedin-url-column <column>", "Column key or id containing the person's LinkedIn URL. Required by BlitzAPI and enough for RocketReach, Prospeo, Hunter, ContactOut, BetterContact, Crustdata, or People Data Labs.")
|
|
2556
2584
|
.option("--email-column <column>", "Column key or id containing a known work email for providers that accept email as an identity fallback.")
|
|
2557
2585
|
.option("--company-domain-column <column>", "Column key or id containing the company domain for work_email. Pair with full-name/first+last for name+company providers.")
|
|
2558
2586
|
.option("--company-name-column <column>", "Column key or id containing the company name for work_email when no domain is available.")
|
|
@@ -2560,8 +2588,8 @@ export function createProgram() {
|
|
|
2560
2588
|
.option("--capability <capability>", "Capability to enrich: mobile_phone or work_email. Defaults to mobile_phone.")
|
|
2561
2589
|
.option("--target-column <column>", "Target enrichment column key. Defaults to the capability payload column.")
|
|
2562
2590
|
.option("--on-existing-manual-column <mode>", "How to handle an existing manual target: error, write_if_empty, or create_enrichment_column.")
|
|
2563
|
-
.option("--provider-order <providers>", "Comma-separated provider order. Overrides the default waterfall profile.")
|
|
2564
|
-
.option("--email-waterfall-profile <profile>", "Work-email waterfall profile: auto, name_domain, linkedin_url, or first_last_domain.")
|
|
2591
|
+
.option("--provider-order <providers>", "Comma-separated provider order. Overrides the default cost-aware waterfall profile.")
|
|
2592
|
+
.option("--email-waterfall-profile <profile>", "Work-email waterfall profile: auto, name_domain, linkedin_url, or first_last_domain. Defaults prefer BlitzAPI/RocketReach/Prospeo where inputs fit.")
|
|
2565
2593
|
.option("--email-pattern-validation <mode>", "Work-email pattern pre-step: leadmagic_valid_only or disabled.")
|
|
2566
2594
|
.option("--verify-phone", "Validate found phone numbers with ClearoutPhone before returning them.")
|
|
2567
2595
|
.option("--phone-verification-credential-mode <mode>", "ClearoutPhone credential mode for phone verification: managed or user_api_key.")
|
|
@@ -2581,10 +2609,10 @@ export function createProgram() {
|
|
|
2581
2609
|
.description("Create or reuse an enrichment column and queue a background run.")
|
|
2582
2610
|
.argument("<table>", "Table id or slug.")
|
|
2583
2611
|
.option("--source-column <column>", "Source column key or id. For mobile_phone this is the LinkedIn URL column.")
|
|
2584
|
-
.option("--full-name-column <column>", "Column key or id containing the person's full name for work_email. Pair with company-domain/name for Prospeo, LeadMagic, Hunter, ContactOut, or
|
|
2612
|
+
.option("--full-name-column <column>", "Column key or id containing the person's full name for work_email. Pair with company-domain/name for Prospeo, RocketReach, LeadMagic, Hunter, Dropleads, ContactOut, BetterContact, or People Data Labs.")
|
|
2585
2613
|
.option("--first-name-column <column>", "Column key or id containing the person's first name for work_email.")
|
|
2586
2614
|
.option("--last-name-column <column>", "Column key or id containing the person's last name for work_email.")
|
|
2587
|
-
.option("--linkedin-url-column <column>", "Column key or id containing the person's LinkedIn URL. Required by
|
|
2615
|
+
.option("--linkedin-url-column <column>", "Column key or id containing the person's LinkedIn URL. Required by BlitzAPI and enough for RocketReach, Prospeo, Hunter, ContactOut, BetterContact, Crustdata, or People Data Labs.")
|
|
2588
2616
|
.option("--email-column <column>", "Column key or id containing a known work email for providers that accept email as an identity fallback.")
|
|
2589
2617
|
.option("--company-domain-column <column>", "Column key or id containing the company domain for work_email. Pair with full-name/first+last for name+company providers.")
|
|
2590
2618
|
.option("--company-name-column <column>", "Column key or id containing the company name for work_email when no domain is available.")
|
|
@@ -2593,8 +2621,8 @@ export function createProgram() {
|
|
|
2593
2621
|
.option("--capability <capability>", "Capability to enrich: mobile_phone or work_email. Defaults to mobile_phone.")
|
|
2594
2622
|
.option("--target-column <column>", "Target enrichment column key. Defaults to the capability payload column.")
|
|
2595
2623
|
.option("--on-existing-manual-column <mode>", "How to handle an existing manual target: error, write_if_empty, or create_enrichment_column.")
|
|
2596
|
-
.option("--provider-order <providers>", "Comma-separated provider order. Overrides the default waterfall profile.")
|
|
2597
|
-
.option("--email-waterfall-profile <profile>", "Work-email waterfall profile: auto, name_domain, linkedin_url, or first_last_domain.")
|
|
2624
|
+
.option("--provider-order <providers>", "Comma-separated provider order. Overrides the default cost-aware waterfall profile.")
|
|
2625
|
+
.option("--email-waterfall-profile <profile>", "Work-email waterfall profile: auto, name_domain, linkedin_url, or first_last_domain. Defaults prefer BlitzAPI/RocketReach/Prospeo where inputs fit.")
|
|
2598
2626
|
.option("--email-pattern-validation <mode>", "Work-email pattern pre-step: leadmagic_valid_only or disabled.")
|
|
2599
2627
|
.option("--verify-phone", "Validate found phone numbers with ClearoutPhone before returning them.")
|
|
2600
2628
|
.option("--phone-verification-credential-mode <mode>", "ClearoutPhone credential mode for phone verification: managed or user_api_key.")
|
|
@@ -2748,8 +2776,8 @@ export function createProgram() {
|
|
|
2748
2776
|
}))
|
|
2749
2777
|
.addCommand(new Command("connect")
|
|
2750
2778
|
.description("Connect a Composio integration. OAuth toolkits return a redirect URL; API-key toolkits accept --api-key.")
|
|
2751
|
-
.argument("<integration_id>", "Integration id, such as 'slack' or '
|
|
2752
|
-
.option("--api-key <value>", "API key for Composio API-key toolkits (e.g.
|
|
2779
|
+
.argument("<integration_id>", "Integration id, such as 'slack' or 'serpapi'.")
|
|
2780
|
+
.option("--api-key <value>", "API key for Composio API-key toolkits (e.g. SerpAPI, Resend).")
|
|
2753
2781
|
.option("--json", "Print a JSON envelope.")
|
|
2754
2782
|
.action(async (integrationId, options) => {
|
|
2755
2783
|
await handleAsyncAction("integrations connect", options, async () => {
|
|
@@ -3454,7 +3482,7 @@ runId, options) {
|
|
|
3454
3482
|
...(queuedWithoutWorker
|
|
3455
3483
|
? {
|
|
3456
3484
|
worker_status: "queued_no_worker",
|
|
3457
|
-
guidance: "No worker has claimed this workflow run. Check `oxygen worker queue-stats --json` for
|
|
3485
|
+
guidance: "No worker has claimed this workflow run. Check `oxygen worker queue-stats --json` for Postgres-backed worker queue health.",
|
|
3458
3486
|
}
|
|
3459
3487
|
: {}),
|
|
3460
3488
|
},
|
|
@@ -3652,6 +3680,7 @@ function readCompaniesSearchRunBody(options) {
|
|
|
3652
3680
|
...(targetCount !== undefined ? { target_count: targetCount } : {}),
|
|
3653
3681
|
...(options.sourceIntent ? { source_intent: options.sourceIntent } : {}),
|
|
3654
3682
|
...(options.approved ? { approved: true } : {}),
|
|
3683
|
+
...(options.preflightComplete ? { preflight_complete: true } : {}),
|
|
3655
3684
|
};
|
|
3656
3685
|
}
|
|
3657
3686
|
function readCompanySearchPlanJson(value) {
|
|
@@ -4870,6 +4899,80 @@ async function handleAuthUseTokenAction(options) {
|
|
|
4870
4899
|
process.exitCode = error instanceof OxygenError ? error.exitCode : 1;
|
|
4871
4900
|
}
|
|
4872
4901
|
}
|
|
4902
|
+
async function handleAuthDoctorAction(options) {
|
|
4903
|
+
try {
|
|
4904
|
+
const data = await runAuthDoctor();
|
|
4905
|
+
if (options.json) {
|
|
4906
|
+
writeJson(success("auth doctor", data));
|
|
4907
|
+
return;
|
|
4908
|
+
}
|
|
4909
|
+
process.stdout.write(formatAuthDoctor(data));
|
|
4910
|
+
}
|
|
4911
|
+
catch (error) {
|
|
4912
|
+
const failure = toFailure("auth doctor", error);
|
|
4913
|
+
writeJson(failure);
|
|
4914
|
+
process.exitCode = error instanceof OxygenError ? error.exitCode : 1;
|
|
4915
|
+
}
|
|
4916
|
+
}
|
|
4917
|
+
async function runAuthDoctor() {
|
|
4918
|
+
let context = null;
|
|
4919
|
+
let profileError = null;
|
|
4920
|
+
try {
|
|
4921
|
+
context = await resolveActiveProfileWithSource();
|
|
4922
|
+
}
|
|
4923
|
+
catch (error) {
|
|
4924
|
+
const failure = toFailure("profiles current", error);
|
|
4925
|
+
profileError = {
|
|
4926
|
+
code: failure.error.code,
|
|
4927
|
+
message: failure.error.message,
|
|
4928
|
+
...(failure.error.details !== undefined ? { details: failure.error.details } : {}),
|
|
4929
|
+
};
|
|
4930
|
+
}
|
|
4931
|
+
const credentials = context?.resolution.credentials ?? null;
|
|
4932
|
+
const apiUrl = credentials?.apiUrl ?? defaultApiUrl();
|
|
4933
|
+
const healthCredentials = credentials ?? { token: "", apiUrl };
|
|
4934
|
+
const health = await runAuthDoctorCheck(() => requestOxygen("/api/health", {
|
|
4935
|
+
credentials: healthCredentials,
|
|
4936
|
+
requireAuth: false,
|
|
4937
|
+
enforceMinimumCliVersion: false,
|
|
4938
|
+
}));
|
|
4939
|
+
const identity = credentials
|
|
4940
|
+
? await runAuthDoctorCheck(() => requestOxygen("/api/cli/whoami", {
|
|
4941
|
+
credentials,
|
|
4942
|
+
enforceMinimumCliVersion: false,
|
|
4943
|
+
}))
|
|
4944
|
+
: { ok: false, skipped: true, reason: "no_credentials" };
|
|
4945
|
+
const cachedIdentity = credentials?.identity ?? null;
|
|
4946
|
+
return {
|
|
4947
|
+
profile: context?.resolution.exists ? context.resolution.name : null,
|
|
4948
|
+
profile_source: context?.source ?? null,
|
|
4949
|
+
stored: Boolean(context?.resolution.exists),
|
|
4950
|
+
api_url: apiUrl,
|
|
4951
|
+
auth_kind: credentials?.authKind ?? null,
|
|
4952
|
+
token_fingerprint: credentials ? formatFingerprint(createCredentialFingerprint(credentials.token)) : null,
|
|
4953
|
+
cached_organization: credentials?.activeOrganization ?? cachedIdentity?.organization ?? null,
|
|
4954
|
+
cached_user: cachedIdentity?.user ?? null,
|
|
4955
|
+
profile_error: profileError,
|
|
4956
|
+
health,
|
|
4957
|
+
identity,
|
|
4958
|
+
};
|
|
4959
|
+
}
|
|
4960
|
+
async function runAuthDoctorCheck(fn) {
|
|
4961
|
+
try {
|
|
4962
|
+
return { ok: true, data: await fn() };
|
|
4963
|
+
}
|
|
4964
|
+
catch (error) {
|
|
4965
|
+
const failure = toFailure("auth doctor", error);
|
|
4966
|
+
return {
|
|
4967
|
+
ok: false,
|
|
4968
|
+
error: {
|
|
4969
|
+
code: failure.error.code,
|
|
4970
|
+
message: failure.error.message,
|
|
4971
|
+
...(failure.error.details !== undefined ? { details: failure.error.details } : {}),
|
|
4972
|
+
},
|
|
4973
|
+
};
|
|
4974
|
+
}
|
|
4975
|
+
}
|
|
4873
4976
|
async function handleOrgUseAction(organization, options, command) {
|
|
4874
4977
|
try {
|
|
4875
4978
|
const data = await requestOxygen("/api/cli/orgs/select", {
|
|
@@ -5560,6 +5663,40 @@ function formatProfilesList(data) {
|
|
|
5560
5663
|
}
|
|
5561
5664
|
return lines.join("\n");
|
|
5562
5665
|
}
|
|
5666
|
+
function formatAuthDoctor(data) {
|
|
5667
|
+
const styles = ansi(output.isTTY === true && !process.env.NO_COLOR);
|
|
5668
|
+
const lines = [
|
|
5669
|
+
"",
|
|
5670
|
+
styles.bold("Oxygen Auth Doctor"),
|
|
5671
|
+
"",
|
|
5672
|
+
` ${styles.dim("Profile")} ${data.profile ?? "(none)"}${data.profile_source ? ` (${data.profile_source})` : ""}`,
|
|
5673
|
+
` ${styles.dim("API")} ${data.api_url}`,
|
|
5674
|
+
` ${styles.dim("Stored")} ${data.stored ? "yes" : "no"}`,
|
|
5675
|
+
];
|
|
5676
|
+
if (data.auth_kind)
|
|
5677
|
+
lines.push(` ${styles.dim("Auth kind")} ${data.auth_kind}`);
|
|
5678
|
+
if (data.token_fingerprint)
|
|
5679
|
+
lines.push(` ${styles.dim("Fingerprint")} ${data.token_fingerprint}`);
|
|
5680
|
+
if (data.cached_organization) {
|
|
5681
|
+
const org = data.cached_organization.slug ?? data.cached_organization.id;
|
|
5682
|
+
lines.push(` ${styles.dim("Cached org")} ${data.cached_organization.name} (${org})`);
|
|
5683
|
+
}
|
|
5684
|
+
if (data.profile_error) {
|
|
5685
|
+
lines.push(` ${styles.dim("Profile error")} ${data.profile_error.code}: ${data.profile_error.message}`);
|
|
5686
|
+
}
|
|
5687
|
+
lines.push(` ${styles.dim("Health")} ${data.health.ok ? "ok" : `failed (${data.health.error.code})`}`);
|
|
5688
|
+
if ("skipped" in data.identity) {
|
|
5689
|
+
lines.push(` ${styles.dim("Identity")} skipped (${data.identity.reason})`);
|
|
5690
|
+
}
|
|
5691
|
+
else {
|
|
5692
|
+
lines.push(` ${styles.dim("Identity")} ${data.identity.ok ? "ok" : `failed (${data.identity.error.code})`}`);
|
|
5693
|
+
if (!data.identity.ok) {
|
|
5694
|
+
lines.push(` ${styles.dim("Identity error")} ${data.identity.error.message}`);
|
|
5695
|
+
}
|
|
5696
|
+
}
|
|
5697
|
+
lines.push("");
|
|
5698
|
+
return lines.join("\n");
|
|
5699
|
+
}
|
|
5563
5700
|
function formatProfileOrgCell(profile) {
|
|
5564
5701
|
if (!profile.organization)
|
|
5565
5702
|
return "(unknown org — run `oxygen whoami` to refresh)";
|
|
@@ -20,6 +20,10 @@ export declare function iterateRowsFileBufferBatches(buffer: Buffer, format: Row
|
|
|
20
20
|
sheet?: string;
|
|
21
21
|
batchSize?: number;
|
|
22
22
|
}): AsyncGenerator<Record<string, unknown>[]>;
|
|
23
|
+
export declare function iterateRowsFileStreamBatches(chunks: AsyncIterable<Uint8Array | string>, format: RowsFileFormat, options?: {
|
|
24
|
+
sheet?: string;
|
|
25
|
+
batchSize?: number;
|
|
26
|
+
}): AsyncGenerator<Record<string, unknown>[]>;
|
|
23
27
|
export declare function parseRowsText(text: string, format: Exclude<RowsFileFormat, "xlsx">): Record<string, unknown>[];
|
|
24
28
|
export declare function inferImportColumnLabels(rows: Record<string, unknown>[]): string[];
|
|
25
29
|
export declare function normalizeRowsForNewTable(rows: Record<string, unknown>[]): NewTableImportRows;
|
|
@@ -46,6 +46,20 @@ export async function* iterateRowsFileBufferBatches(buffer, format, options = {}
|
|
|
46
46
|
const rows = await parseRowsFileBuffer(buffer, format, options.sheet ? { sheet: options.sheet } : {});
|
|
47
47
|
yield* chunkRows(rows, batchSize);
|
|
48
48
|
}
|
|
49
|
+
export async function* iterateRowsFileStreamBatches(chunks, format, options = {}) {
|
|
50
|
+
const batchSize = normalizeBatchSize(options.batchSize);
|
|
51
|
+
if (format === "csv") {
|
|
52
|
+
yield* iterateCsvRowStreamBatches(chunks, batchSize);
|
|
53
|
+
return;
|
|
54
|
+
}
|
|
55
|
+
if (format === "jsonl") {
|
|
56
|
+
yield* iterateJsonlRowStreamBatches(chunks, batchSize);
|
|
57
|
+
return;
|
|
58
|
+
}
|
|
59
|
+
const buffer = await streamToLimitedBuffer(chunks, format);
|
|
60
|
+
const rows = await parseRowsFileBuffer(buffer, format, options.sheet ? { sheet: options.sheet } : {});
|
|
61
|
+
yield* chunkRows(rows, batchSize);
|
|
62
|
+
}
|
|
49
63
|
export function parseRowsText(text, format) {
|
|
50
64
|
if (format === "json")
|
|
51
65
|
return normalizeRowObjects(parseJsonArray(text));
|
|
@@ -208,6 +222,55 @@ function* iterateCsvRowBatches(text, batchSize) {
|
|
|
208
222
|
if (state.batch.length > 0)
|
|
209
223
|
yield state.batch;
|
|
210
224
|
}
|
|
225
|
+
async function* iterateCsvRowStreamBatches(chunks, batchSize) {
|
|
226
|
+
const decoder = new TextDecoder();
|
|
227
|
+
const state = {
|
|
228
|
+
header: null,
|
|
229
|
+
batch: [],
|
|
230
|
+
record: [],
|
|
231
|
+
field: "",
|
|
232
|
+
inQuotes: false,
|
|
233
|
+
};
|
|
234
|
+
let carry = "";
|
|
235
|
+
for await (const chunk of chunks) {
|
|
236
|
+
const combined = carry + decoder.decode(toUint8Array(chunk), { stream: true });
|
|
237
|
+
if (!combined)
|
|
238
|
+
continue;
|
|
239
|
+
const processUntil = Math.max(0, combined.length - 1);
|
|
240
|
+
for (let index = 0; index < processUntil; index += 1) {
|
|
241
|
+
const ready = applyCsvTextCharacter(state, combined, index, batchSize);
|
|
242
|
+
if (ready.skipNext)
|
|
243
|
+
index += 1;
|
|
244
|
+
if (ready.batch)
|
|
245
|
+
yield ready.batch;
|
|
246
|
+
}
|
|
247
|
+
carry = combined.charAt(combined.length - 1);
|
|
248
|
+
}
|
|
249
|
+
const tail = carry + decoder.decode();
|
|
250
|
+
for (let index = 0; index < tail.length; index += 1) {
|
|
251
|
+
const ready = applyCsvTextCharacter(state, tail, index, batchSize);
|
|
252
|
+
if (ready.skipNext)
|
|
253
|
+
index += 1;
|
|
254
|
+
if (ready.batch)
|
|
255
|
+
yield ready.batch;
|
|
256
|
+
}
|
|
257
|
+
if (state.field || state.record.length > 0) {
|
|
258
|
+
const ready = appendCsvRecordToBatch(state, finishCsvRecord(state), batchSize);
|
|
259
|
+
if (ready)
|
|
260
|
+
yield ready;
|
|
261
|
+
}
|
|
262
|
+
if (state.batch.length > 0)
|
|
263
|
+
yield state.batch;
|
|
264
|
+
}
|
|
265
|
+
function applyCsvTextCharacter(state, text, index, batchSize) {
|
|
266
|
+
const result = applyCsvCharacter(state, text.charAt(index), text.charAt(index + 1));
|
|
267
|
+
if (!result.recordComplete)
|
|
268
|
+
return { skipNext: result.skipNext, batch: null };
|
|
269
|
+
return {
|
|
270
|
+
skipNext: result.skipNext,
|
|
271
|
+
batch: appendCsvRecordToBatch(state, finishCsvRecord(state), batchSize),
|
|
272
|
+
};
|
|
273
|
+
}
|
|
211
274
|
function applyCsvCharacter(state, char, next) {
|
|
212
275
|
return state.inQuotes
|
|
213
276
|
? applyQuotedCsvCharacter(state, char, next)
|
|
@@ -294,6 +357,53 @@ function* iterateJsonlRowBatches(text, batchSize) {
|
|
|
294
357
|
if (batch.length > 0)
|
|
295
358
|
yield batch;
|
|
296
359
|
}
|
|
360
|
+
async function* iterateJsonlRowStreamBatches(chunks, batchSize) {
|
|
361
|
+
const decoder = new TextDecoder();
|
|
362
|
+
let pending = "";
|
|
363
|
+
let batch = [];
|
|
364
|
+
for await (const chunk of chunks) {
|
|
365
|
+
pending += decoder.decode(toUint8Array(chunk), { stream: true });
|
|
366
|
+
let newline = pending.indexOf("\n");
|
|
367
|
+
while (newline >= 0) {
|
|
368
|
+
const line = pending.slice(0, newline).replace(/\r$/, "").trim();
|
|
369
|
+
pending = pending.slice(newline + 1);
|
|
370
|
+
if (line)
|
|
371
|
+
batch.push(normalizeRowObject(JSON.parse(line)));
|
|
372
|
+
if (batch.length >= batchSize) {
|
|
373
|
+
yield batch;
|
|
374
|
+
batch = [];
|
|
375
|
+
}
|
|
376
|
+
newline = pending.indexOf("\n");
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
const tail = (pending + decoder.decode()).replace(/\r$/, "").trim();
|
|
380
|
+
if (tail)
|
|
381
|
+
batch.push(normalizeRowObject(JSON.parse(tail)));
|
|
382
|
+
if (batch.length > 0)
|
|
383
|
+
yield batch;
|
|
384
|
+
}
|
|
385
|
+
async function streamToLimitedBuffer(chunks, format) {
|
|
386
|
+
const buffers = [];
|
|
387
|
+
let total = 0;
|
|
388
|
+
for await (const chunk of chunks) {
|
|
389
|
+
const buffer = Buffer.from(toUint8Array(chunk));
|
|
390
|
+
total += buffer.byteLength;
|
|
391
|
+
if (total > MAX_BUFFERED_IMPORT_PARSE_BYTES) {
|
|
392
|
+
throw new OxygenError("import_file_too_large_for_format", `${format.toUpperCase()} imports must be uploaded as CSV or JSONL for large files.`, {
|
|
393
|
+
details: {
|
|
394
|
+
format,
|
|
395
|
+
max_buffered_parse_bytes: MAX_BUFFERED_IMPORT_PARSE_BYTES,
|
|
396
|
+
},
|
|
397
|
+
exitCode: 1,
|
|
398
|
+
});
|
|
399
|
+
}
|
|
400
|
+
buffers.push(buffer);
|
|
401
|
+
}
|
|
402
|
+
return Buffer.concat(buffers);
|
|
403
|
+
}
|
|
404
|
+
function toUint8Array(chunk) {
|
|
405
|
+
return typeof chunk === "string" ? Buffer.from(chunk) : chunk;
|
|
406
|
+
}
|
|
297
407
|
function* chunkRows(rows, batchSize) {
|
|
298
408
|
for (let index = 0; index < rows.length; index += batchSize) {
|
|
299
409
|
yield rows.slice(index, index + batchSize);
|
|
@@ -21,6 +21,14 @@ export declare function presignImportUpload(input: {
|
|
|
21
21
|
export declare function downloadImportObject(input: {
|
|
22
22
|
storageKey: string;
|
|
23
23
|
}): Promise<Buffer>;
|
|
24
|
+
export declare function getImportObjectMetadata(input: {
|
|
25
|
+
storageKey: string;
|
|
26
|
+
}): Promise<{
|
|
27
|
+
contentLength: number | null;
|
|
28
|
+
}>;
|
|
29
|
+
export declare function openImportObjectStream(input: {
|
|
30
|
+
storageKey: string;
|
|
31
|
+
}): Promise<NodeJS.ReadableStream>;
|
|
24
32
|
export declare function deleteImportObject(input: {
|
|
25
33
|
storageKey: string;
|
|
26
34
|
}): Promise<void>;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { randomUUID } from "node:crypto";
|
|
2
|
-
import { DeleteObjectCommand, GetObjectCommand, PutObjectCommand, S3Client, } from "@aws-sdk/client-s3";
|
|
2
|
+
import { DeleteObjectCommand, GetObjectCommand, HeadObjectCommand, PutObjectCommand, S3Client, } from "@aws-sdk/client-s3";
|
|
3
3
|
import { getSignedUrl } from "@aws-sdk/s3-request-presigner";
|
|
4
4
|
import { OxygenError } from "./index.js";
|
|
5
5
|
// S3-compatible object storage for large CSV/file imports. The CLI uploads the
|
|
@@ -96,6 +96,25 @@ export async function downloadImportObject(input) {
|
|
|
96
96
|
}
|
|
97
97
|
return streamToBuffer(body);
|
|
98
98
|
}
|
|
99
|
+
export async function getImportObjectMetadata(input) {
|
|
100
|
+
const { client, config } = resolveClient();
|
|
101
|
+
const result = await client.send(new HeadObjectCommand({ Bucket: config.bucket, Key: input.storageKey }));
|
|
102
|
+
return {
|
|
103
|
+
contentLength: typeof result.ContentLength === "number" ? result.ContentLength : null,
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
export async function openImportObjectStream(input) {
|
|
107
|
+
const { client, config } = resolveClient();
|
|
108
|
+
const result = await client.send(new GetObjectCommand({ Bucket: config.bucket, Key: input.storageKey }));
|
|
109
|
+
const body = result.Body;
|
|
110
|
+
if (!body) {
|
|
111
|
+
throw new OxygenError("import_object_missing", "Import object had no body.", {
|
|
112
|
+
details: { storage_key: input.storageKey },
|
|
113
|
+
exitCode: 1,
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
return body;
|
|
117
|
+
}
|
|
99
118
|
export async function deleteImportObject(input) {
|
|
100
119
|
const { client, config } = resolveClient();
|
|
101
120
|
await client.send(new DeleteObjectCommand({ Bucket: config.bucket, Key: input.storageKey }));
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export declare const OXYGEN_VERSION = "1.
|
|
1
|
+
export declare const OXYGEN_VERSION = "1.125.19";
|
|
2
2
|
export declare const OXYGEN_MINIMUM_CLI_VERSION = "1.0.0";
|