gitsheets 1.0.5 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -1,12 +1,14 @@
1
1
  // CLI entry. See specs/api/cli.md.
2
- // v1.0 substrate ships: upsert, query, read, normalize.
3
- // infer / migrate-config / edit are tracked as follow-ups against #130, #139.
4
- import { readFile } from 'node:fs/promises';
2
+ import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
3
+ import { tmpdir } from 'node:os';
4
+ import { dirname, isAbsolute, join } from 'node:path';
5
5
  import process from 'node:process';
6
6
  import yargs from 'yargs';
7
7
  import { hideBin } from 'yargs/helpers';
8
8
  import { ConfigError, GitsheetsError, IndexError, NotFoundError, RefError, TransactionError, ValidationError, } from '../errors.js';
9
9
  import { openRepo } from '../repository.js';
10
+ import { parseToml, stringifyRecord } from '../toml.js';
11
+ import { csvHeader, inferInputFormat, parseRecords, stringifyRecord_text, validateInputFormat, validateOutputFormat, } from './formats.js';
10
12
  // Exit codes per specs/api/cli.md
11
13
  function exitCodeForError(err) {
12
14
  if (err instanceof ValidationError)
@@ -46,43 +48,46 @@ function reportError(err) {
46
48
  }
47
49
  out.write(`gitsheets: ${String(err)}\n`);
48
50
  }
49
- async function readInput(input) {
51
+ async function readInput(input, encoding) {
50
52
  if (input === undefined || input === '-') {
51
53
  const chunks = [];
52
54
  for await (const chunk of process.stdin) {
53
55
  chunks.push(typeof chunk === 'string' ? Buffer.from(chunk) : chunk);
54
56
  }
55
- return Buffer.concat(chunks).toString('utf8');
57
+ return Buffer.concat(chunks).toString(encoding);
56
58
  }
57
59
  // Treat anything starting with `{` or `[` as inline JSON, otherwise a path.
58
60
  const trimmed = input.trimStart();
59
61
  if (trimmed.startsWith('{') || trimmed.startsWith('['))
60
62
  return input;
61
- return readFile(input, 'utf8');
63
+ return readFile(input, encoding);
62
64
  }
63
- function parseJsonRecords(text) {
64
- const trimmed = text.trim();
65
- if (!trimmed)
66
- return [];
67
- // JSON array → many records; JSON object → single record; one-record-per-line JSONL → many.
68
- if (trimmed.startsWith('[')) {
69
- const arr = JSON.parse(trimmed);
70
- if (!Array.isArray(arr))
71
- throw new Error('expected JSON array of records');
72
- return arr;
73
- }
74
- if (trimmed.startsWith('{')) {
75
- return [JSON.parse(trimmed)];
76
- }
77
- // JSONL fallback
78
- return trimmed
79
- .split('\n')
80
- .filter((line) => line.trim().length > 0)
81
- .map((line) => JSON.parse(line));
65
+ const VALID_ENCODINGS = new Set([
66
+ 'utf8',
67
+ 'utf-8',
68
+ 'utf16le',
69
+ 'utf-16le',
70
+ 'ascii',
71
+ 'latin1',
72
+ 'binary',
73
+ 'base64',
74
+ 'hex',
75
+ ]);
76
+ function resolveEncoding(raw) {
77
+ const enc = (raw ?? 'utf8').toLowerCase();
78
+ if (!VALID_ENCODINGS.has(enc)) {
79
+ throw new Error(`--encoding "${raw}" is not a recognized encoding`);
80
+ }
81
+ return enc;
82
82
  }
83
83
  async function loadRepoAndSheet(argv) {
84
84
  const repo = await openRepo(argv.gitDir ? { gitDir: argv.gitDir } : {});
85
- const sheet = await repo.openSheet(argv.sheet, argv.root ? { root: argv.root } : {});
85
+ const sheetOpts = {};
86
+ if (argv.root)
87
+ sheetOpts.root = argv.root;
88
+ if (argv.prefix)
89
+ sheetOpts.prefix = argv.prefix;
90
+ const sheet = await repo.openSheet(argv.sheet, sheetOpts);
86
91
  return { repo, sheet };
87
92
  }
88
93
  function buildTxOpts(argv, defaultMessage) {
@@ -103,36 +108,194 @@ function buildTxOpts(argv, defaultMessage) {
103
108
  async function runUpsert(argv) {
104
109
  const { repo, sheet } = await loadRepoAndSheet(argv);
105
110
  void sheet; // loadRepoAndSheet validates the config exists
106
- const text = await readInput(argv.input);
107
- const records = parseJsonRecords(text);
111
+ const encoding = resolveEncoding(argv.encoding);
112
+ const explicitFormat = validateInputFormat(argv.format);
113
+ const format = explicitFormat ?? inferInputFormat(argv.input);
114
+ const text = await readInput(argv.input, encoding);
115
+ const records = parseRecords(text, format);
108
116
  if (records.length === 0)
109
117
  return;
110
- const txOpts = buildTxOpts(argv, `${argv.sheet} upsert (${records.length})`);
118
+ const attachmentMap = argv.attachment ?? {};
119
+ const attachmentNames = Object.keys(attachmentMap);
120
+ if (attachmentNames.length > 0 && records.length !== 1) {
121
+ throw new Error(`--attachment requires a single-record input (got ${records.length} records)`);
122
+ }
123
+ // Resolve attachment source paths up front so a missing file fails before
124
+ // the transaction opens. We hand them to hologit's writeBlobFromFile so
125
+ // binary content is hashed correctly (git hash-object -w). For `-` (stdin),
126
+ // we buffer it to a tmp file first since stdin may already be consumed by
127
+ // the record input.
128
+ const attachmentSources = {}; // name → absolute path
129
+ const tmpDirs = []; // cleanup at end
130
+ let stdinConsumed = argv.input === '-' || argv.input === undefined;
131
+ const inputDir = argv.input && argv.input !== '-' ? dirname(argv.input) : process.cwd();
132
+ for (const name of attachmentNames) {
133
+ const source = attachmentMap[name];
134
+ if (source === '-') {
135
+ if (stdinConsumed) {
136
+ throw new Error(`--attachment ${name}=-: stdin is already consumed; only one '-' source per command`);
137
+ }
138
+ stdinConsumed = true;
139
+ const chunks = [];
140
+ for await (const chunk of process.stdin) {
141
+ chunks.push(typeof chunk === 'string' ? Buffer.from(chunk) : chunk);
142
+ }
143
+ const dir = await mkdtemp(join(tmpdir(), 'gitsheets-attach-'));
144
+ tmpDirs.push(dir);
145
+ const tmpPath = join(dir, 'data');
146
+ await writeFile(tmpPath, Buffer.concat(chunks));
147
+ attachmentSources[name] = tmpPath;
148
+ }
149
+ else {
150
+ const resolved = isAbsolute(source) ? source : join(inputDir, source);
151
+ attachmentSources[name] = resolved;
152
+ }
153
+ }
154
+ if (argv.patch && argv.deleteMissing) {
155
+ throw new Error('--patch and --delete-missing cannot be combined');
156
+ }
157
+ if (argv.patch && attachmentNames.length > 0) {
158
+ throw new Error('--patch and --attachment cannot be combined');
159
+ }
160
+ // For --patch: pre-load the sheet's path-template so we know which input
161
+ // fields form the query (record-identifier) and which are the patch payload.
162
+ let templateKeyFields;
163
+ if (argv.patch) {
164
+ const config = await sheet.readConfig();
165
+ const tpl = (await import('../path-template/index.js')).Template.fromString(config.path);
166
+ templateKeyFields = new Set(tpl.getFieldNames());
167
+ if (templateKeyFields.size === 0) {
168
+ throw new Error('--patch: cannot auto-derive a query — sheet path template has no extractable field names');
169
+ }
170
+ }
171
+ const messageDefault = argv.patch
172
+ ? `${argv.sheet} patch (${records.length})`
173
+ : argv.deleteMissing
174
+ ? `${argv.sheet} full-replace (${records.length})`
175
+ : `${argv.sheet} upsert (${records.length})`;
176
+ const txOpts = buildTxOpts(argv, messageDefault);
177
+ const txSheetOpts = argv.prefix ? { prefix: argv.prefix } : {};
178
+ // For --delete-missing, capture the existing record paths BEFORE the
179
+ // transaction opens, so we can compute the "missing" set after all upserts.
180
+ // Reading from the same Sheet handle the loadRepoAndSheet returned gives us
181
+ // a HEAD snapshot, not the in-flight tx state — exactly what we want.
182
+ const existingPaths = new Set();
183
+ if (argv.deleteMissing) {
184
+ for await (const r of sheet.query()) {
185
+ const p = r[Symbol.for('gitsheets-path')];
186
+ if (typeof p === 'string')
187
+ existingPaths.add(p);
188
+ }
189
+ }
111
190
  await repo.transact(txOpts, async (tx) => {
112
- const target = tx.sheet(argv.sheet);
191
+ const target = tx.sheet(argv.sheet, txSheetOpts);
192
+ const upsertedPaths = new Set();
193
+ let lastResult;
113
194
  for (const record of records) {
195
+ if (argv.patch && templateKeyFields) {
196
+ // Split the input into (query, partial) using the template's field
197
+ // names: keys present in the template AND in the record form the
198
+ // query; the rest is the JSON Merge Patch payload.
199
+ const query = {};
200
+ const partial = {};
201
+ for (const [k, v] of Object.entries(record)) {
202
+ if (templateKeyFields.has(k)) {
203
+ query[k] = v;
204
+ }
205
+ else {
206
+ partial[k] = v;
207
+ }
208
+ }
209
+ if (Object.keys(query).length === 0) {
210
+ throw new Error(`--patch: input record does not include any of the path-template fields (${[...templateKeyFields].join(', ')})`);
211
+ }
212
+ const result = await target.patch(query, partial);
213
+ upsertedPaths.add(result.path);
214
+ process.stdout.write(`${result.blob.hash} ${result.path}\n`);
215
+ lastResult = result;
216
+ continue;
217
+ }
114
218
  const result = await target.upsert(record);
219
+ upsertedPaths.add(result.path);
115
220
  process.stdout.write(`${result.blob.hash} ${result.path}\n`);
221
+ lastResult = result;
222
+ }
223
+ // Attach files alongside the (single) record. Already guarded above to
224
+ // run only when records.length === 1. Each source goes through
225
+ // hologit's writeBlobFromFile (git hash-object -w <path>) so binary
226
+ // content is hashed verbatim.
227
+ if (attachmentNames.length > 0 && lastResult) {
228
+ const blobMap = {};
229
+ for (const [name, sourcePath] of Object.entries(attachmentSources)) {
230
+ blobMap[name] = await repo.hologitRepo.writeBlobFromFile(sourcePath);
231
+ }
232
+ await target.setAttachments(lastResult.path, blobMap);
233
+ for (const name of attachmentNames) {
234
+ process.stdout.write(`+ ${lastResult.path}/${name}\n`);
235
+ }
236
+ }
237
+ if (argv.deleteMissing) {
238
+ // Anything in the existing set that's not in the upserted set must die.
239
+ for (const p of existingPaths) {
240
+ if (!upsertedPaths.has(p)) {
241
+ await target.delete(p);
242
+ process.stdout.write(`- ${p}\n`);
243
+ }
244
+ }
116
245
  }
117
246
  });
247
+ // Clean up tmp dirs used to materialize stdin-sourced attachments.
248
+ for (const dir of tmpDirs) {
249
+ await rm(dir, { recursive: true, force: true });
250
+ }
118
251
  }
119
252
  async function runQuery(argv) {
120
253
  const { sheet } = await loadRepoAndSheet(argv);
121
254
  const filter = argv.filter ?? {};
255
+ const format = validateOutputFormat(argv.format) ?? 'json';
256
+ const headers = argv.headers ?? true;
257
+ const fields = argv.fields;
258
+ // `--body` defaults to true; `--no-body` flips it off via yargs' implicit
259
+ // boolean negation. No effect on TOML sheets.
260
+ const withBody = argv.body ?? true;
122
261
  let yielded = 0;
123
- for await (const record of sheet.query(filter)) {
124
- const out = argv.fields
125
- ? Object.fromEntries(argv.fields.map((f) => [f, record[f]]))
126
- : record;
127
- // Strip well-known symbols before serializing
128
- process.stdout.write(`${JSON.stringify(out)}\n`);
262
+ let headerWritten = false;
263
+ const allRecords = []; // only used for TOML output
264
+ for await (const record of sheet.query(filter, { withBody })) {
265
+ if (format === 'csv' || format === 'tsv') {
266
+ if (!headerWritten) {
267
+ // Header columns come from --fields, otherwise from the first record.
268
+ const cols = fields ?? Object.keys(record).filter((k) => !k.startsWith('__'));
269
+ if (headers)
270
+ process.stdout.write(csvHeader(cols, format));
271
+ headerWritten = true;
272
+ }
273
+ process.stdout.write(stringifyRecord_text(record, format, fields));
274
+ }
275
+ else if (format === 'toml') {
276
+ // TOML needs all records to assemble [[records]] — buffer.
277
+ allRecords.push(record);
278
+ }
279
+ else {
280
+ // json (default) — stream NDJSON
281
+ process.stdout.write(stringifyRecord_text(record, 'json', fields));
282
+ }
129
283
  yielded++;
130
284
  if (argv.limit !== undefined && yielded >= argv.limit)
131
285
  break;
132
286
  }
287
+ if (format === 'toml') {
288
+ // Emit a single TOML document with a [[records]] array. The wrapper keeps
289
+ // the output round-trippable through `parseRecords(text, 'toml')`.
290
+ for (const r of allRecords) {
291
+ process.stdout.write('[[records]]\n');
292
+ process.stdout.write(stringifyRecord_text(r, 'toml', fields));
293
+ }
294
+ }
133
295
  }
134
296
  async function runRead(argv) {
135
297
  const { sheet } = await loadRepoAndSheet(argv);
298
+ const format = validateOutputFormat(argv.format) ?? 'json';
136
299
  // The path is treated as the record's full slug-rendered key plus optional
137
300
  // .toml extension. For the simple `${{ slug }}` case this is just the slug.
138
301
  const target = argv.path.endsWith('.toml') ? argv.path.slice(0, -5) : argv.path;
@@ -149,7 +312,374 @@ async function runRead(argv) {
149
312
  if (!found) {
150
313
  throw new NotFoundError('record_not_found', `${argv.sheet}: no record at ${target}`);
151
314
  }
152
- process.stdout.write(`${JSON.stringify(found, null, 2)}\n`);
315
+ if (format === 'json') {
316
+ // Pretty-print for human reads — JSON.stringify(_, null, 2) matches v1.0 behavior
317
+ const cleaned = { ...found };
318
+ delete cleaned[Symbol.for('gitsheets-path')];
319
+ delete cleaned[Symbol.for('gitsheets-sheet')];
320
+ process.stdout.write(`${JSON.stringify(cleaned, null, 2)}\n`);
321
+ return;
322
+ }
323
+ process.stdout.write(stringifyRecord_text(found, format));
324
+ }
325
+ /**
326
+ * Read the raw TOML bytes of a sheet config at HEAD (or whichever ref the
327
+ * caller's `--ref` points to). Returns the file's text content.
328
+ *
329
+ * Uses `git cat-file blob` rather than the parsed Sheet config so the same
330
+ * helper supports both v1.0-shaped and pre-v1.0 (`[gitsheet.fields]`) configs
331
+ * — `migrate-config` operates on the latter.
332
+ */
333
+ async function readSheetConfigText(gitDir, ref, configPath) {
334
+ const { execFile } = await import('node:child_process');
335
+ const { promisify } = await import('node:util');
336
+ const ex = promisify(execFile);
337
+ const { stdout } = await ex('git', ['cat-file', 'blob', `${ref}:${configPath}`], {
338
+ cwd: gitDir,
339
+ maxBuffer: 16 * 1024 * 1024,
340
+ });
341
+ return stdout;
342
+ }
343
+ /** Update one of a record-like field's observed type info during inference. */
344
+ function observeField(acc, key, value) {
345
+ if (!acc[key])
346
+ acc[key] = { types: new Set() };
347
+ const slot = acc[key];
348
+ if (value === null) {
349
+ slot.types.add('null');
350
+ }
351
+ else if (Array.isArray(value)) {
352
+ slot.types.add('array');
353
+ if (!slot.items)
354
+ slot.items = new Set();
355
+ for (const el of value)
356
+ slot.items.add(jsonTypeOf(el));
357
+ }
358
+ else if (value instanceof Date) {
359
+ slot.types.add('string'); // TOML datetimes stringify as ISO-8601 in JSON Schema
360
+ }
361
+ else if (typeof value === 'object') {
362
+ slot.types.add('object');
363
+ }
364
+ else if (typeof value === 'number') {
365
+ slot.types.add(Number.isInteger(value) ? 'integer' : 'number');
366
+ if (slot.min === undefined || value < slot.min)
367
+ slot.min = value;
368
+ if (slot.max === undefined || value > slot.max)
369
+ slot.max = value;
370
+ }
371
+ else if (typeof value === 'boolean') {
372
+ slot.types.add('boolean');
373
+ }
374
+ else if (typeof value === 'string') {
375
+ slot.types.add('string');
376
+ }
377
+ }
378
+ function jsonTypeOf(value) {
379
+ if (value === null)
380
+ return 'null';
381
+ if (Array.isArray(value))
382
+ return 'array';
383
+ if (value instanceof Date)
384
+ return 'string';
385
+ if (typeof value === 'object')
386
+ return 'object';
387
+ if (typeof value === 'number')
388
+ return Number.isInteger(value) ? 'integer' : 'number';
389
+ return typeof value;
390
+ }
391
+ async function runInfer(argv) {
392
+ const { repo, sheet } = await loadRepoAndSheet(argv);
393
+ const configPath = `${argv.root && argv.root !== '/' && argv.root !== '.' ? argv.root + '/' : ''}.gitsheets/${argv.sheet}.toml`;
394
+ const observed = {};
395
+ const presence = new Map();
396
+ let recordCount = 0;
397
+ for await (const record of sheet.query()) {
398
+ recordCount++;
399
+ for (const [k, v] of Object.entries(record)) {
400
+ presence.set(k, (presence.get(k) ?? 0) + 1);
401
+ observeField(observed, k, v);
402
+ }
403
+ }
404
+ if (recordCount === 0) {
405
+ process.stderr.write('gitsheets: no records to infer from\n');
406
+ return;
407
+ }
408
+ const properties = {};
409
+ for (const [field, info] of Object.entries(observed)) {
410
+ const types = [...info.types].sort();
411
+ const prop = {};
412
+ prop['type'] = types.length === 1 ? types[0] : types;
413
+ if (info.items && info.items.size > 0) {
414
+ const itemTypes = [...info.items].sort();
415
+ prop['items'] = { type: itemTypes.length === 1 ? itemTypes[0] : itemTypes };
416
+ }
417
+ if (info.min !== undefined)
418
+ prop['minimum'] = info.min;
419
+ if (info.max !== undefined)
420
+ prop['maximum'] = info.max;
421
+ properties[field] = prop;
422
+ }
423
+ const required = [...presence.entries()]
424
+ .filter(([, count]) => count === recordCount)
425
+ .map(([n]) => n)
426
+ .sort();
427
+ // Read existing config and merge schema into it (preserving root / path / fields).
428
+ const ref = argv.ref ?? 'HEAD';
429
+ const configText = await readSheetConfigText(repo.gitDir, ref, configPath);
430
+ const parsed = parseToml(configText);
431
+ const gitsheet = (parsed['gitsheet'] ?? {});
432
+ const newSchema = { type: 'object', properties };
433
+ if (required.length > 0)
434
+ newSchema['required'] = required;
435
+ gitsheet['schema'] = newSchema;
436
+ parsed['gitsheet'] = gitsheet;
437
+ const newText = stringifyRecord(parsed);
438
+ const txOpts = buildTxOpts(argv, `${argv.sheet} infer schema (${Object.keys(properties).length} fields)`);
439
+ await repo.transact(txOpts, async (tx) => {
440
+ await tx.tree.writeChild(configPath, newText);
441
+ tx.markMutated();
442
+ });
443
+ process.stdout.write(`inferred schema for .gitsheets/${argv.sheet}.toml — ${Object.keys(properties).length} properties, ${required.length} required\n`);
444
+ }
445
+ async function runInit(argv) {
446
+ const repo = await openRepo(argv.gitDir ? { gitDir: argv.gitDir } : {});
447
+ const configPath = `${argv.root && argv.root !== '/' && argv.root !== '.' ? argv.root + '/' : ''}.gitsheets/${argv.sheet}.toml`;
448
+ // Refuse to overwrite an existing config (unless --force).
449
+ const ref = argv.ref ?? 'HEAD';
450
+ if (!argv.force) {
451
+ try {
452
+ await readSheetConfigText(repo.gitDir, ref, configPath);
453
+ throw new Error(`.gitsheets/${argv.sheet}.toml already exists at ${ref} — use --force to overwrite`);
454
+ }
455
+ catch (err) {
456
+ // err is "already exists" → rethrow; otherwise it doesn't exist → proceed.
457
+ if (err instanceof Error &&
458
+ err.message.startsWith(`.gitsheets/${argv.sheet}.toml already exists`)) {
459
+ throw err;
460
+ }
461
+ }
462
+ }
463
+ const config = {
464
+ gitsheet: {
465
+ root: argv.sheet,
466
+ path: argv.path ?? '${{ id }}',
467
+ },
468
+ };
469
+ if (argv.schema) {
470
+ const schemaText = await readFile(argv.schema, 'utf8');
471
+ const schemaParsed = JSON.parse(schemaText);
472
+ if (typeof schemaParsed !== 'object' || schemaParsed === null) {
473
+ throw new Error(`--schema: ${argv.schema} did not parse as a JSON object`);
474
+ }
475
+ config['gitsheet']['schema'] = schemaParsed;
476
+ }
477
+ const newText = stringifyRecord(config);
478
+ // Validate the config parses through the standard SheetConfig loader so we
479
+ // don't commit something we'll then fail to open.
480
+ try {
481
+ const { parseConfigToml } = await import('../toml.js');
482
+ parseConfigToml(newText, configPath);
483
+ }
484
+ catch (err) {
485
+ throw new Error(`init produced an invalid config: ${err instanceof Error ? err.message : String(err)}`);
486
+ }
487
+ const txOpts = buildTxOpts(argv, `${argv.sheet} init sheet config`);
488
+ await repo.transact(txOpts, async (tx) => {
489
+ await tx.tree.writeChild(configPath, newText);
490
+ tx.markMutated();
491
+ });
492
+ process.stdout.write(`created .gitsheets/${argv.sheet}.toml\n`);
493
+ }
494
+ async function runMigrateConfig(argv) {
495
+ const repo = await openRepo(argv.gitDir ? { gitDir: argv.gitDir } : {});
496
+ const configPath = `${argv.root && argv.root !== '/' && argv.root !== '.' ? argv.root + '/' : ''}.gitsheets/${argv.sheet}.toml`;
497
+ const ref = argv.ref ?? 'HEAD';
498
+ const configText = await readSheetConfigText(repo.gitDir, ref, configPath);
499
+ const parsed = parseToml(configText);
500
+ const gitsheet = (parsed['gitsheet'] ?? {});
501
+ const fields = gitsheet['fields'];
502
+ if (!fields || typeof fields !== 'object') {
503
+ process.stderr.write('gitsheets: no [gitsheet.fields] block to migrate\n');
504
+ return;
505
+ }
506
+ const properties = {};
507
+ const remainingFields = {};
508
+ let warnings = 0;
509
+ for (const [name, cfg] of Object.entries(fields)) {
510
+ if (typeof cfg !== 'object' || cfg === null)
511
+ continue;
512
+ const schemaProp = {};
513
+ const remainingField = {};
514
+ if (cfg['type'] !== undefined)
515
+ schemaProp['type'] = cfg['type'];
516
+ if (cfg['enum'] !== undefined)
517
+ schemaProp['enum'] = cfg['enum'];
518
+ if (cfg['default'] !== undefined)
519
+ schemaProp['default'] = cfg['default'];
520
+ if (cfg['sort'] !== undefined)
521
+ remainingField['sort'] = cfg['sort'];
522
+ if (cfg['trueValues'] !== undefined || cfg['falseValues'] !== undefined) {
523
+ process.stderr.write(`gitsheets: warning — ${name}.trueValues/falseValues moved out of validation (use a CSV-ingest helper)\n`);
524
+ warnings++;
525
+ }
526
+ if (Object.keys(schemaProp).length > 0)
527
+ properties[name] = schemaProp;
528
+ if (Object.keys(remainingField).length > 0)
529
+ remainingFields[name] = remainingField;
530
+ }
531
+ // Rebuild gitsheet block: drop the old fields, keep root/path, add schema.
532
+ const newGitsheet = {};
533
+ for (const [k, v] of Object.entries(gitsheet)) {
534
+ if (k === 'fields' || k === 'schema')
535
+ continue;
536
+ newGitsheet[k] = v;
537
+ }
538
+ if (Object.keys(remainingFields).length > 0)
539
+ newGitsheet['fields'] = remainingFields;
540
+ if (Object.keys(properties).length > 0) {
541
+ newGitsheet['schema'] = { type: 'object', properties };
542
+ }
543
+ parsed['gitsheet'] = newGitsheet;
544
+ const newText = stringifyRecord(parsed);
545
+ const txOpts = buildTxOpts(argv, `${argv.sheet} migrate-config`);
546
+ await repo.transact(txOpts, async (tx) => {
547
+ await tx.tree.writeChild(configPath, newText);
548
+ tx.markMutated();
549
+ });
550
+ process.stdout.write(`migrated .gitsheets/${argv.sheet}.toml — ${Object.keys(properties).length} property migrations${warnings ? `, ${warnings} warning(s)` : ''}\n`);
551
+ }
552
+ /**
553
+ * `gitsheets check <sheet> <file>` — verify a single record file in the
554
+ * working tree is parseable, schema-valid, and in canonical form. Pass
555
+ * `--fix` to also rewrite the file in canonical form when it isn't.
556
+ *
557
+ * Two use cases:
558
+ *
559
+ * - **CI / pre-commit verification:** `gitsheets check posts $file` — exits
560
+ * non-zero if the file isn't already canonical. Doesn't touch the file.
561
+ * - **Post-edit auto-formatter:** `gitsheets check posts $file --fix` —
562
+ * rewrites the file to canonical form, exits 0 (1 only on parse /
563
+ * validation failure).
564
+ *
565
+ * Exit codes:
566
+ * - `0` — file is canonical and valid (or `--fix` rewrote it successfully)
567
+ * - `1` — file is parseable + valid but not canonical (without `--fix`)
568
+ * - `22` — `ValidationError` (record fails schema)
569
+ * - `64` — `ConfigError` (file failed to parse as the sheet's format)
570
+ *
571
+ * Works against the working tree only. Never commits.
572
+ */
573
+ async function runCheck(argv) {
574
+ const { sheet } = await loadRepoAndSheet(argv);
575
+ const config = await sheet.readConfig();
576
+ const { getFormat } = await import('../format/index.js');
577
+ const format = getFormat(config.format.type);
578
+ const absPath = isAbsolute(argv.file) ? argv.file : join(process.cwd(), argv.file);
579
+ // Read the file from the working tree (not from git). If the path doesn't
580
+ // exist, fail with a clear message rather than a cryptic ENOENT.
581
+ let original;
582
+ try {
583
+ original = await readFile(absPath, 'utf8');
584
+ }
585
+ catch (err) {
586
+ throw new NotFoundError('record_not_found', `gitsheets check: cannot read ${argv.file}: ${err instanceof Error ? err.message : String(err)}`);
587
+ }
588
+ // Parse via the sheet's format. Parse errors surface as ConfigError so the
589
+ // exit code is well-defined.
590
+ let record;
591
+ try {
592
+ record = format.parse(original, config.format);
593
+ }
594
+ catch (err) {
595
+ throw new ConfigError('config_invalid', `gitsheets check: failed to parse ${argv.file} as ${config.format.type}: ${err instanceof Error ? err.message : String(err)}`, { cause: err });
596
+ }
597
+ // Validate the parsed record through the sheet's schema (and any registered
598
+ // Standard Schema validator). Throws ValidationError on failure — the CLI
599
+ // error handler maps it to exit 22.
600
+ const { validateRecord } = await import('../validation.js');
601
+ const validated = await validateRecord({
602
+ record: { ...record },
603
+ schema: config.schema,
604
+ schemaSourcePath: `.gitsheets/${argv.sheet}.toml`,
605
+ });
606
+ // Normalize the record (deep key sort + per-field array sort rules) and
607
+ // re-serialize via the sheet's format. For markdown sheets the body also
608
+ // goes through markdownlint at this step.
609
+ const normalized = await sheet.normalizeRecord(validated);
610
+ const newText = await format.serialize({ ...normalized }, config.format);
611
+ if (newText === original) {
612
+ process.stdout.write(`ok ${argv.file}\n`);
613
+ return;
614
+ }
615
+ if (argv.fix) {
616
+ await writeFile(absPath, newText, 'utf8');
617
+ process.stdout.write(`fixed ${argv.file}\n`);
618
+ return;
619
+ }
620
+ process.stderr.write(`gitsheets check: ${argv.file} is not in canonical form. Run with --fix to rewrite.\n`);
621
+ process.exit(1);
622
+ }
623
+ async function runEdit(argv) {
624
+ const { repo, sheet } = await loadRepoAndSheet(argv);
625
+ const target = argv.path.endsWith('.toml') ? argv.path.slice(0, -5) : argv.path;
626
+ // Resolve the record by walking query() and matching the rendered path.
627
+ let found;
628
+ for await (const record of sheet.query()) {
629
+ const pathSym = record[Symbol.for('gitsheets-path')];
630
+ if (pathSym === target) {
631
+ found = record;
632
+ break;
633
+ }
634
+ }
635
+ if (!found) {
636
+ throw new NotFoundError('record_not_found', `${argv.sheet}: no record at ${target}`);
637
+ }
638
+ // Drop symbols before serializing; they're not part of the record's data.
639
+ const cleaned = { ...found };
640
+ delete cleaned[Symbol.for('gitsheets-path')];
641
+ delete cleaned[Symbol.for('gitsheets-sheet')];
642
+ const originalToml = stringifyRecord(cleaned);
643
+ const tmpDir = await mkdtemp(join(tmpdir(), 'gitsheets-edit-'));
644
+ const tmpFile = join(tmpDir, `${argv.sheet}-${target.replace(/\//g, '-')}.toml`);
645
+ try {
646
+ await writeFile(tmpFile, originalToml, 'utf8');
647
+ const editor = process.env['VISUAL'] || process.env['EDITOR'] || 'vi';
648
+ const shell = process.platform === 'win32' ? 'cmd' : 'sh';
649
+ const shellArgs = process.platform === 'win32'
650
+ ? ['/c', `${editor} "${tmpFile}"`]
651
+ : ['-c', `${editor} "${tmpFile}"`];
652
+ // Spawn with inherited stdio so the editor takes the terminal.
653
+ const { spawn } = await import('node:child_process');
654
+ const exit = await new Promise((resolve, reject) => {
655
+ const child = spawn(shell, shellArgs, { stdio: 'inherit' });
656
+ child.on('error', reject);
657
+ child.on('exit', (code) => resolve(code));
658
+ });
659
+ if (exit !== 0) {
660
+ throw new Error(`editor exited with code ${exit ?? 'null'} — aborting`);
661
+ }
662
+ const editedToml = await readFile(tmpFile, 'utf8');
663
+ if (editedToml === originalToml) {
664
+ // No-op; don't commit. Matches the "no commit on no change" idiom.
665
+ process.stderr.write('gitsheets: no changes — nothing to commit\n');
666
+ return;
667
+ }
668
+ const edited = parseToml(editedToml);
669
+ const txOpts = buildTxOpts(argv, `${argv.sheet} edit ${target}`);
670
+ const txSheetOpts = argv.prefix ? { prefix: argv.prefix } : {};
671
+ await repo.transact(txOpts, async (tx) => {
672
+ const sheetTx = tx.sheet(argv.sheet, txSheetOpts);
673
+ // Carry the original path annotation so upsert detects renames if the
674
+ // user changed a path-template field.
675
+ edited[Symbol.for('gitsheets-path')] = target;
676
+ const result = await sheetTx.upsert(edited);
677
+ process.stdout.write(`${result.blob.hash} ${result.path}\n`);
678
+ });
679
+ }
680
+ finally {
681
+ await rm(tmpDir, { recursive: true, force: true });
682
+ }
153
683
  }
154
684
  async function runNormalize(argv) {
155
685
  const { repo, sheet } = await loadRepoAndSheet(argv);
@@ -160,8 +690,9 @@ async function runNormalize(argv) {
160
690
  if (records.length === 0)
161
691
  return;
162
692
  const txOpts = buildTxOpts(argv, `${argv.sheet} normalize`);
693
+ const txSheetOpts = argv.prefix ? { prefix: argv.prefix } : {};
163
694
  await repo.transact(txOpts, async (tx) => {
164
- const target = tx.sheet(argv.sheet);
695
+ const target = tx.sheet(argv.sheet, txSheetOpts);
165
696
  for (const r of records) {
166
697
  const result = await target.upsert(r);
167
698
  process.stdout.write(`${result.blob.hash} ${result.path}\n`);
@@ -181,6 +712,10 @@ export async function main(args = hideBin(process.argv)) {
181
712
  default: process.env['GIT_DIR'],
182
713
  })
183
714
  .option('root', { type: 'string', describe: 'Sub-directory under the data tree; default "/"' })
715
+ .option('prefix', {
716
+ type: 'string',
717
+ describe: 'Sub-prefix under each sheet\'s config root — scopes records to a sub-tree (multi-tenant)',
718
+ })
184
719
  .option('ref', { type: 'string', describe: "Parent ref/commit; default HEAD's branch" })
185
720
  .option('commit-to', { type: 'string', describe: 'Branch to update on commit' })
186
721
  .option('message', { type: 'string', describe: 'Commit message (mutating commands)' })
@@ -208,8 +743,44 @@ export async function main(args = hideBin(process.argv)) {
208
743
  .positional('input', {
209
744
  type: 'string',
210
745
  describe: "Inline JSON, a file path, or '-' for stdin",
746
+ })
747
+ .option('format', {
748
+ type: 'string',
749
+ choices: ['json', 'toml', 'csv'],
750
+ describe: 'Input format; default: inferred from extension, falls back to json',
751
+ })
752
+ .option('encoding', {
753
+ type: 'string',
754
+ describe: 'Encoding for file/stdin input (default: utf8)',
755
+ })
756
+ .option('delete-missing', {
757
+ type: 'boolean',
758
+ default: false,
759
+ describe: 'DESTRUCTIVE: delete every record not present in the input set, in the same transaction',
760
+ })
761
+ .option('patch', {
762
+ type: 'boolean',
763
+ default: false,
764
+ describe: 'Treat each input record as an RFC 7396 merge-patch: fields matching the sheet path template become the query; the rest are merged into the matched record. Cannot be combined with --delete-missing or --attachment.',
765
+ })
766
+ .option('attachment', {
767
+ type: 'string',
768
+ array: true,
769
+ describe: "Attach a file alongside the record: --attachment <name>=<source>. <source> is a file path (relative to the input file's dir, else cwd) or '-' for stdin. Repeatable. Requires a single-record input.",
770
+ coerce: (raw) => {
771
+ const items = Array.isArray(raw) ? raw : [raw];
772
+ const out = {};
773
+ for (const item of items) {
774
+ const eq = item.indexOf('=');
775
+ if (eq === -1) {
776
+ throw new Error(`--attachment expects <name>=<source>, got ${item}`);
777
+ }
778
+ out[item.slice(0, eq)] = item.slice(eq + 1);
779
+ }
780
+ return out;
781
+ },
211
782
  }), runUpsert)
212
- .command('query <sheet>', 'Read records as newline-delimited JSON', (y) => y
783
+ .command('query <sheet>', 'Read records (output: JSON by default; --format=csv|tsv|toml supported)', (y) => y
213
784
  .positional('sheet', { type: 'string', demandOption: true })
214
785
  .option('filter', {
215
786
  type: 'string',
@@ -228,11 +799,59 @@ export async function main(args = hideBin(process.argv)) {
228
799
  },
229
800
  })
230
801
  .option('fields', { type: 'string', array: true })
231
- .option('limit', { type: 'number' }), runQuery)
802
+ .option('limit', { type: 'number' })
803
+ .option('format', {
804
+ type: 'string',
805
+ choices: ['json', 'toml', 'csv', 'tsv'],
806
+ describe: 'Output format (default: json)',
807
+ })
808
+ .option('headers', {
809
+ type: 'boolean',
810
+ default: true,
811
+ describe: 'Emit a header row for CSV/TSV output (default: true)',
812
+ })
813
+ .option('body', {
814
+ type: 'boolean',
815
+ default: true,
816
+ describe: 'For content-typed (markdown) sheets, include the body field. Pass --no-body for header-only reads. No effect on TOML sheets.',
817
+ }), runQuery)
232
818
  .command('read <sheet> <path>', 'Read a single record by its rendered path', (y) => y
233
819
  .positional('sheet', { type: 'string', demandOption: true })
234
- .positional('path', { type: 'string', demandOption: true }), runRead)
820
+ .positional('path', { type: 'string', demandOption: true })
821
+ .option('format', {
822
+ type: 'string',
823
+ choices: ['json', 'toml', 'csv', 'tsv'],
824
+ describe: 'Output format (default: pretty json)',
825
+ }), runRead)
826
+ .command('edit <sheet> <path>', "Open a record in $EDITOR (TOML form); on save, validate and upsert in a transaction", (y) => y
827
+ .positional('sheet', { type: 'string', demandOption: true })
828
+ .positional('path', { type: 'string', demandOption: true }), runEdit)
829
+ .command('check <sheet> <file>', "Verify a record file in the working tree is parseable, valid, and canonical. --fix rewrites it.", (y) => y
830
+ .positional('sheet', { type: 'string', demandOption: true })
831
+ .positional('file', { type: 'string', demandOption: true })
832
+ .option('fix', {
833
+ type: 'boolean',
834
+ default: false,
835
+ describe: 'Rewrite the file in canonical form when it isn\'t already (no commit; working tree only)',
836
+ }), runCheck)
235
837
  .command('normalize <sheet>', 'Re-write every record through the canonical-normalization pipeline', (y) => y.positional('sheet', { type: 'string', demandOption: true }), runNormalize)
838
+ .command('init <sheet>', "Scaffold .gitsheets/<sheet>.toml with sensible defaults", (y) => y
839
+ .positional('sheet', { type: 'string', demandOption: true })
840
+ .option('path', {
841
+ type: 'string',
842
+ describe: "Path template (default: '${{ id }}')",
843
+ })
844
+ .option('schema', {
845
+ type: 'string',
846
+ describe: 'Path to a JSON Schema file to embed under [gitsheet.schema]',
847
+ })
848
+ .option('force', {
849
+ type: 'boolean',
850
+ default: false,
851
+ describe: 'Overwrite an existing .gitsheets/<sheet>.toml',
852
+ }), runInit)
853
+ .command('infer <sheet>', "Scan every record and write a starter [gitsheet.schema] block", (y) => y.positional('sheet', { type: 'string', demandOption: true }), runInfer)
854
+ .command('migrate-config <sheet>', "Convert a pre-v1.0 [gitsheet.fields] config to a v1.0 [gitsheet.schema] config", (y) => y.positional('sheet', { type: 'string', demandOption: true }), runMigrateConfig)
236
855
  .fail((msg, err) => {
237
856
  if (err) {
238
857
  reportError(err);