arrow-config-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,73 @@
1
+ # arrow-config-cli
2
+
3
+ CLI tool for Apache Arrow/Parquet project setup, schema generation, and format conversion
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ npm install -g arrow-config-cli
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ### Initialize configuration
14
+
15
+ ```bash
16
+ arrow-config init
17
+ arrow-config init --template advanced
18
+ arrow-config init --output custom-config.json
19
+ ```
20
+
21
+ ### Validate configuration
22
+
23
+ ```bash
24
+ arrow-config validate
25
+ arrow-config validate path/to/config.json
26
+ ```
27
+
28
+ ### View configuration
29
+
30
+ ```bash
31
+ arrow-config show
32
+ arrow-config show --env production
33
+ arrow-config show --json
34
+ ```
35
+
36
+ ### Modify configuration
37
+
38
+ ```bash
39
+ arrow-config set settings.debug true
40
+ arrow-config set settings.logLevel \"warn\"
41
+ ```
42
+
43
+ ### Compare configurations
44
+
45
+ ```bash
46
+ arrow-config diff config-dev.json config-prod.json
47
+ ```
48
+
49
+ ### List templates
50
+
51
+ ```bash
52
+ arrow-config templates
53
+ ```
54
+
55
+ ## Templates
56
+
57
+ | Template | Description |
58
+ |----------|-------------|
59
+ | `minimal` | Bare minimum configuration |
60
+ | `standard` | Recommended defaults for most projects |
61
+ | `advanced` | Full-featured with security, caching, and multi-environment support |
62
+
63
+ ## Why arrow-config-cli?
64
+
65
+ - **Zero dependencies at runtime** — just `commander` and `chalk`
66
+ - **Template-based** — start with minimal, standard, or advanced presets
67
+ - **Validation built-in** — catch config errors before deployment
68
+ - **Environment-aware** — manage dev/staging/production configs in one file
69
+ - **Diff support** — compare configs across environments
70
+
71
+ ## License
72
+
73
+ MIT
@@ -0,0 +1,2 @@
1
+
2
+ export { }
package/dist/index.js ADDED
@@ -0,0 +1,316 @@
1
+ #!/usr/bin/env node
2
+
3
+ // src/index.ts
4
+ import { Command } from "commander";
5
+ import chalk from "chalk";
6
+ var program = new Command();
7
+ program.name("arrow-config").description("CLI tool for Apache Arrow/Parquet project setup and utilities").version("1.0.0");
8
+ program.command("init").description("Create Apache Arrow/Parquet project setup with schema definition and read/write helpers for Node.js").option("-n, --name <name>", "project name", "my-arrow-project").action((opts) => {
9
+ console.log(chalk.bold.cyan("\n Apache Arrow / Parquet Project Setup\n"));
10
+ console.log(chalk.gray("Project:"), chalk.white(opts.name));
11
+ console.log();
12
+ console.log(chalk.bold.yellow("1. Install dependencies"));
13
+ console.log(chalk.gray("\u2500".repeat(50)));
14
+ console.log(chalk.green("npm install apache-arrow parquet-wasm"));
15
+ console.log(chalk.green("npm install -D @types/node typescript ts-node"));
16
+ console.log();
17
+ console.log(chalk.bold.yellow("2. package.json"));
18
+ console.log(chalk.gray("\u2500".repeat(50)));
19
+ console.log(chalk.white(`{
20
+ "name": "${opts.name}",
21
+ "type": "module",
22
+ "scripts": {
23
+ "start": "node --loader ts-node/esm src/index.ts"
24
+ }
25
+ }`));
26
+ console.log();
27
+ console.log(chalk.bold.yellow("3. Schema definition \u2192 src/schema.ts"));
28
+ console.log(chalk.gray("\u2500".repeat(50)));
29
+ console.log(chalk.white(`import { Schema, Field, Int32, Utf8, Float64, Bool } from "apache-arrow";
30
+
31
+ export const userSchema = new Schema([
32
+ new Field("id", new Int32(), false),
33
+ new Field("name", new Utf8(), false),
34
+ new Field("score", new Float64(), true),
35
+ new Field("active",new Bool(), true),
36
+ ]);`));
37
+ console.log();
38
+ console.log(chalk.bold.yellow("4. Write helper \u2192 src/write.ts"));
39
+ console.log(chalk.gray("\u2500".repeat(50)));
40
+ console.log(chalk.white(`import { tableFromArrays, RecordBatchWriter } from "apache-arrow";
41
+ import { writeFileSync } from "fs";
42
+ import { userSchema } from "./schema.js";
43
+
44
+ export function writeArrowFile(path: string) {
45
+ const table = tableFromArrays(
46
+ {
47
+ id: Int32Array.from([1, 2, 3]),
48
+ name: ["Alice", "Bob", "Charlie"],
49
+ score: Float64Array.from([9.5, 8.2, 7.7]),
50
+ active: [true, false, true],
51
+ },
52
+ { schema: userSchema }
53
+ );
54
+
55
+ const writer = RecordBatchWriter.throughNode({ autoDestroy: true });
56
+ const chunks: Buffer[] = [];
57
+ writer.on("data", (c: Buffer) => chunks.push(c));
58
+ writer.write(table);
59
+ writer.finish();
60
+ writeFileSync(path, Buffer.concat(chunks));
61
+ console.log("Wrote Arrow file:", path);
62
+ }`));
63
+ console.log();
64
+ console.log(chalk.bold.yellow("5. Read helper \u2192 src/read.ts"));
65
+ console.log(chalk.gray("\u2500".repeat(50)));
66
+ console.log(chalk.white(`import { tableFromIPC } from "apache-arrow";
67
+ import { readFileSync } from "fs";
68
+
69
+ export function readArrowFile(path: string) {
70
+ const buffer = readFileSync(path);
71
+ const table = tableFromIPC(buffer);
72
+ console.log("Schema:", table.schema.toString());
73
+ console.log("Rows: ", table.numRows);
74
+ return table;
75
+ }`));
76
+ console.log();
77
+ console.log(chalk.bold.yellow("6. Entry point \u2192 src/index.ts"));
78
+ console.log(chalk.gray("\u2500".repeat(50)));
79
+ console.log(chalk.white(`import { writeArrowFile } from "./write.js";
80
+ import { readArrowFile } from "./read.js";
81
+
82
+ writeArrowFile("data.arrow");
83
+ const table = readArrowFile("data.arrow");
84
+ console.log(table.toArray());`));
85
+ console.log();
86
+ console.log(chalk.bold.green("Done! Run:"), chalk.white("npm run start"));
87
+ console.log();
88
+ });
89
+ program.command("schema").description("Generate Arrow schema with all supported field types").option("--name <name>", "schema variable name", "mySchema").action((opts) => {
90
+ console.log(chalk.bold.cyan("\n Arrow Schema Generator\n"));
91
+ console.log(chalk.bold.yellow("Supported field types"));
92
+ console.log(chalk.gray("\u2500".repeat(50)));
93
+ const types = [
94
+ { type: "Int32", arrow: "new Int32()", note: "32-bit signed integer" },
95
+ { type: "Int64", arrow: "new Int64()", note: "64-bit signed integer (BigInt)" },
96
+ { type: "Float32", arrow: "new Float32()", note: "32-bit float" },
97
+ { type: "Float64", arrow: "new Float64()", note: "64-bit double (default numeric)" },
98
+ { type: "Utf8", arrow: "new Utf8()", note: "variable-length UTF-8 string" },
99
+ { type: "Bool", arrow: "new Bool()", note: "boolean" },
100
+ { type: "Date", arrow: "new DateDay()", note: "days since Unix epoch" },
101
+ { type: "Timestamp", arrow: "new TimestampMillisecond()", note: "ms since Unix epoch" },
102
+ { type: "List", arrow: "new List(new Field('item', new Int32()))", note: "typed array / list" },
103
+ { type: "Struct", arrow: "new Struct([new Field('x', new Float64()), new Field('y', new Float64())])", note: "nested record" }
104
+ ];
105
+ const maxType = Math.max(...types.map((t) => t.type.length));
106
+ for (const t of types) {
107
+ console.log(
108
+ chalk.green(t.type.padEnd(maxType + 2)),
109
+ chalk.gray("\u2192"),
110
+ chalk.white(t.arrow.padEnd(52)),
111
+ chalk.gray(`// ${t.note}`)
112
+ );
113
+ }
114
+ console.log();
115
+ console.log(chalk.bold.yellow(`Generated schema: ${opts.name}`));
116
+ console.log(chalk.gray("\u2500".repeat(50)));
117
+ console.log(chalk.white(`import {
118
+ Schema, Field,
119
+ Int32, Int64, Float32, Float64,
120
+ Utf8, Bool,
121
+ DateDay, TimestampMillisecond,
122
+ List, Struct,
123
+ } from "apache-arrow";
124
+
125
+ export const ${opts.name} = new Schema([
126
+ new Field("id", new Int32(), false), // not nullable
127
+ new Field("big_id", new Int64(), true),
128
+ new Field("price", new Float64(), true),
129
+ new Field("ratio", new Float32(), true),
130
+ new Field("label", new Utf8(), false),
131
+ new Field("active", new Bool(), true),
132
+ new Field("day", new DateDay(), true),
133
+ new Field("created_at", new TimestampMillisecond(), true),
134
+ new Field("tags", new List(new Field("item", new Utf8())), true),
135
+ new Field("coords", new Struct([
136
+ new Field("lat", new Float64()),
137
+ new Field("lon", new Float64()),
138
+ ]), true),
139
+ ]);`));
140
+ console.log();
141
+ console.log(chalk.bold.yellow("Inspect schema at runtime"));
142
+ console.log(chalk.gray("\u2500".repeat(50)));
143
+ console.log(chalk.white(`${opts.name}.fields.forEach(f => {
144
+ console.log(f.name, "\u2192", f.type.toString(), f.nullable ? "(nullable)" : "");
145
+ });`));
146
+ console.log();
147
+ });
148
+ program.command("convert").description("Show conversion code between Arrow/Parquet and CSV/JSON formats").option("--from <format>", "source format: arrow | parquet | csv | json", "csv").option("--to <format>", "target format: arrow | parquet | csv | json", "arrow").action((opts) => {
149
+ const from = opts.from.toLowerCase();
150
+ const to = opts.to.toLowerCase();
151
+ console.log(chalk.bold.cyan(`
152
+ Format Conversion: ${from.toUpperCase()} \u2192 ${to.toUpperCase()}
153
+ `));
154
+ const key = `${from}->${to}`;
155
+ const examples = {
156
+ // ── CSV → Arrow ──
157
+ "csv->arrow": `import { tableFromArrays } from "apache-arrow";
158
+ import { createReadStream } from "fs";
159
+ import { parse } from "csv-parse"; // npm i csv-parse
160
+
161
+ const rows: Record<string, unknown[]> = {};
162
+
163
+ const parser = createReadStream("input.csv").pipe(
164
+ parse({ columns: true, cast: true })
165
+ );
166
+
167
+ for await (const row of parser) {
168
+ for (const [k, v] of Object.entries(row)) {
169
+ (rows[k] ??= []).push(v);
170
+ }
171
+ }
172
+
173
+ const table = tableFromArrays(rows as any);
174
+ console.log(table.schema.toString());`,
175
+ // ── JSON → Arrow ──
176
+ "json->arrow": `import { tableFromJSON } from "apache-arrow";
177
+ import { readFileSync } from "fs";
178
+
179
+ const records = JSON.parse(readFileSync("input.json", "utf8")) as object[];
180
+ const table = tableFromJSON(records);
181
+ console.log("Rows:", table.numRows);`,
182
+ // ── Arrow → JSON ──
183
+ "arrow->json": `import { tableFromIPC } from "apache-arrow";
184
+ import { readFileSync, writeFileSync } from "fs";
185
+
186
+ const buf = readFileSync("input.arrow");
187
+ const table = tableFromIPC(buf);
188
+
189
+ const json = JSON.stringify(
190
+ table.toArray().map(row => row.toJSON()),
191
+ null,
192
+ 2
193
+ );
194
+ writeFileSync("output.json", json);
195
+ console.log("Written output.json");`,
196
+ // ── Arrow → CSV ──
197
+ "arrow->csv": `import { tableFromIPC } from "apache-arrow";
198
+ import { readFileSync, writeFileSync } from "fs";
199
+
200
+ const buf = readFileSync("input.arrow");
201
+ const table = tableFromIPC(buf);
202
+
203
+ const fields = table.schema.fields.map(f => f.name);
204
+ const header = fields.join(",");
205
+ const lines = table.toArray().map(row =>
206
+ fields.map(f => JSON.stringify(row[f] ?? "")).join(",")
207
+ );
208
+ writeFileSync("output.csv", [header, ...lines].join("\\n"));
209
+ console.log("Written output.csv");`,
210
+ // ── Parquet → Arrow ──
211
+ "parquet->arrow": `// Requires: npm i parquet-wasm
212
+ import initParquet, { readParquet } from "parquet-wasm";
213
+ import { tableFromIPC } from "apache-arrow";
214
+ import { readFileSync } from "fs";
215
+
216
+ await initParquet();
217
+ const parquetBuf = readFileSync("input.parquet");
218
+ const arrowBuf = readParquet(new Uint8Array(parquetBuf));
219
+ const table = tableFromIPC(arrowBuf);
220
+ console.log("Rows:", table.numRows);`,
221
+ // ── Arrow → Parquet ──
222
+ "arrow->parquet": `// Requires: npm i parquet-wasm
223
+ import initParquet, { writeParquet, Table as WasmTable } from "parquet-wasm";
224
+ import { tableToIPC } from "apache-arrow";
225
+ import { readFileSync, writeFileSync } from "fs";
226
+ import { tableFromIPC } from "apache-arrow";
227
+
228
+ await initParquet();
229
+ const arrowBuf = readFileSync("input.arrow");
230
+ const table = tableFromIPC(arrowBuf);
231
+ const ipcBytes = tableToIPC(table, "file");
232
+ const parquetData = writeParquet(WasmTable.fromIPCStream(ipcBytes));
233
+ writeFileSync("output.parquet", Buffer.from(parquetData));
234
+ console.log("Written output.parquet");`,
235
+ // ── CSV → Parquet ──
236
+ "csv->parquet": `// csv \u2192 arrow \u2192 parquet pipeline
237
+ import { tableFromJSON } from "apache-arrow";
238
+ import initParquet, { writeParquet, Table as W } from "parquet-wasm";
239
+ import { tableToIPC } from "apache-arrow";
240
+ import { createReadStream, writeFileSync } from "fs";
241
+ import { parse } from "csv-parse"; // npm i csv-parse
242
+
243
+ await initParquet();
244
+
245
+ const records: object[] = [];
246
+ const parser = createReadStream("input.csv").pipe(parse({ columns: true, cast: true }));
247
+ for await (const row of parser) records.push(row);
248
+
249
+ const table = tableFromJSON(records);
250
+ const ipc = tableToIPC(table, "file");
251
+ const parquet = writeParquet(W.fromIPCStream(ipc));
252
+ writeFileSync("output.parquet", Buffer.from(parquet));`,
253
+ // ── JSON → Parquet ──
254
+ "json->parquet": `import { tableFromJSON, tableToIPC } from "apache-arrow";
255
+ import initParquet, { writeParquet, Table as W } from "parquet-wasm";
256
+ import { readFileSync, writeFileSync } from "fs";
257
+
258
+ await initParquet();
259
+ const records = JSON.parse(readFileSync("input.json", "utf8")) as object[];
260
+ const table = tableFromJSON(records);
261
+ const ipc = tableToIPC(table, "file");
262
+ const parquet = writeParquet(W.fromIPCStream(ipc));
263
+ writeFileSync("output.parquet", Buffer.from(parquet));`,
264
+ // ── Parquet → JSON ──
265
+ "parquet->json": `import initParquet, { readParquet } from "parquet-wasm";
266
+ import { tableFromIPC } from "apache-arrow";
267
+ import { readFileSync, writeFileSync } from "fs";
268
+
269
+ await initParquet();
270
+ const pBuf = readFileSync("input.parquet");
271
+ const table = tableFromIPC(readParquet(new Uint8Array(pBuf)));
272
+ const json = JSON.stringify(table.toArray().map(r => r.toJSON()), null, 2);
273
+ writeFileSync("output.json", json);`,
274
+ // ── Parquet → CSV ──
275
+ "parquet->csv": `import initParquet, { readParquet } from "parquet-wasm";
276
+ import { tableFromIPC } from "apache-arrow";
277
+ import { readFileSync, writeFileSync } from "fs";
278
+
279
+ await initParquet();
280
+ const buf = readFileSync("input.parquet");
281
+ const table = tableFromIPC(readParquet(new Uint8Array(buf)));
282
+ const fields = table.schema.fields.map(f => f.name);
283
+ const lines = table.toArray().map(row =>
284
+ fields.map(f => JSON.stringify(row[f] ?? "")).join(",")
285
+ );
286
+ writeFileSync("output.csv", [fields.join(","), ...lines].join("\\n"));`
287
+ };
288
+ const code = examples[key];
289
+ if (code) {
290
+ console.log(chalk.bold.yellow("Dependencies"));
291
+ console.log(chalk.gray("\u2500".repeat(50)));
292
+ if (["csv->arrow", "csv->parquet"].includes(key)) {
293
+ console.log(chalk.green("npm install apache-arrow csv-parse"));
294
+ } else if (key.includes("parquet")) {
295
+ console.log(chalk.green("npm install apache-arrow parquet-wasm"));
296
+ } else {
297
+ console.log(chalk.green("npm install apache-arrow"));
298
+ }
299
+ console.log();
300
+ console.log(chalk.bold.yellow("Conversion code"));
301
+ console.log(chalk.gray("\u2500".repeat(50)));
302
+ console.log(chalk.white(code));
303
+ } else {
304
+ console.log(chalk.red(`No example available for "${from}" \u2192 "${to}".`));
305
+ console.log();
306
+ console.log(chalk.bold.yellow("Supported combinations"));
307
+ console.log(chalk.gray("\u2500".repeat(50)));
308
+ const supported = Object.keys(examples).map((k) => {
309
+ const [a, b] = k.split("->");
310
+ return ` ${chalk.green(a.padEnd(8))} \u2192 ${chalk.green(b)}`;
311
+ });
312
+ console.log(supported.join("\n"));
313
+ }
314
+ console.log();
315
+ });
316
+ program.parse(process.argv);
package/package.json ADDED
@@ -0,0 +1,38 @@
1
+ {
2
+ "name": "arrow-config-cli",
3
+ "version": "1.0.0",
4
+ "description": "CLI tool for Apache Arrow/Parquet project setup, schema generation, and format conversion",
5
+ "type": "module",
6
+ "bin": {
7
+ "arrow-config": "./dist/index.js"
8
+ },
9
+ "files": [
10
+ "dist"
11
+ ],
12
+ "scripts": {
13
+ "build": "tsup",
14
+ "dev": "tsup --watch",
15
+ "typecheck": "tsc --noEmit"
16
+ },
17
+ "dependencies": {
18
+ "chalk": "^5.3.0",
19
+ "commander": "^12.1.0"
20
+ },
21
+ "devDependencies": {
22
+ "@types/node": "^20.14.0",
23
+ "tsup": "^8.1.0",
24
+ "typescript": "^5.5.0"
25
+ },
26
+ "engines": {
27
+ "node": ">=18"
28
+ },
29
+ "repository": {
30
+ "type": "git",
31
+ "url": "https://github.com/okirmio-create/cli-forge.git",
32
+ "directory": "arrow-config-cli"
33
+ },
34
+ "homepage": "https://github.com/okirmio-create/cli-forge/tree/main/arrow-config-cli",
35
+ "bugs": {
36
+ "url": "https://github.com/okirmio-create/cli-forge/issues"
37
+ }
38
+ }