@indigoai-us/hq-cloud 5.16.0 → 5.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/cli/share.ts CHANGED
@@ -9,9 +9,17 @@ import * as fs from "fs";
9
9
  import * as path from "path";
10
10
  import type { EntityContext, VaultServiceConfig, SyncJournal } from "../types.js";
11
11
  import { resolveEntityContext, isExpiringSoon, refreshEntityContext } from "../context.js";
12
- import { uploadFile, headRemoteFile, deleteRemoteFile } from "../s3.js";
12
+ import { uploadFile, uploadSymlink, headRemoteFile, deleteRemoteFile } from "../s3.js";
13
13
  import type { UploadAuthor } from "../s3.js";
14
- import { readJournal, writeJournal, hashFile, updateEntry, removeEntry, normalizeEtag } from "../journal.js";
14
+ import {
15
+ readJournal,
16
+ writeJournal,
17
+ hashFile,
18
+ hashSymlinkTarget,
19
+ updateEntry,
20
+ removeEntry,
21
+ normalizeEtag,
22
+ } from "../journal.js";
15
23
  import { createIgnoreFilter, isWithinSizeLimit } from "../ignore.js";
16
24
  import { resolveConflict } from "./conflict.js";
17
25
  import type { ConflictStrategy } from "./conflict.js";
@@ -30,11 +38,24 @@ import type { SyncProgressEvent } from "./sync.js";
30
38
  type PushPlanItem =
31
39
  | {
32
40
  action: "upload";
41
+ kind: "file";
33
42
  absolutePath: string;
34
43
  relativePath: string;
35
44
  localHash: string;
36
45
  size: number;
37
46
  }
47
+ | {
48
+ action: "upload";
49
+ kind: "symlink";
50
+ absolutePath: string;
51
+ relativePath: string;
52
+ // The link's target string verbatim (whatever readlink returned).
53
+ // Hashed into localHash so a target rewrite re-uploads even when
54
+ // skipUnchanged is on; size stays 0 because the wire body is empty.
55
+ target: string;
56
+ localHash: string;
57
+ size: 0;
58
+ }
38
59
  | {
39
60
  action: "skip-size-limit";
40
61
  absolutePath: string;
@@ -64,13 +85,46 @@ interface PushPlan {
64
85
  * Consumers that want a conflict count get it from the `complete` event.
65
86
  */
66
87
  function computePushPlan(
67
- filesToShare: { absolutePath: string; relativePath: string }[],
88
+ filesToShare: CollectedEntry[],
68
89
  journal: SyncJournal,
69
90
  skipUnchanged: boolean,
70
91
  ): PushPlan {
71
92
  const items: PushPlanItem[] = [];
72
93
 
73
- for (const { absolutePath, relativePath } of filesToShare) {
94
+ for (const entry of filesToShare) {
95
+ const { absolutePath, relativePath } = entry;
96
+
97
+ // Symlinks bypass the size-limit gate (the wire body is small,
98
+ // bounded by target length) and hash the target through the
99
+ // symlink-namespaced hash so a symlink to "real.md" can never
100
+ // collide with a regular file containing the bytes "real.md" in
101
+ // the skip-unchanged gate. The target is what we're actually
102
+ // uploading, so its hash is what should drive change detection —
103
+ // a target rewrite must re-fire an upload even when the link
104
+ // itself "looks" identical.
105
+ if (entry.kind === "symlink") {
106
+ const localHash = hashSymlinkTarget(entry.target);
107
+
108
+ if (skipUnchanged) {
109
+ const existing = journal.files[relativePath];
110
+ if (existing && existing.hash === localHash) {
111
+ items.push({ action: "skip-unchanged", absolutePath, relativePath });
112
+ continue;
113
+ }
114
+ }
115
+
116
+ items.push({
117
+ action: "upload",
118
+ kind: "symlink",
119
+ absolutePath,
120
+ relativePath,
121
+ target: entry.target,
122
+ localHash,
123
+ size: 0,
124
+ });
125
+ continue;
126
+ }
127
+
74
128
  if (!isWithinSizeLimit(absolutePath)) {
75
129
  items.push({ action: "skip-size-limit", absolutePath, relativePath });
76
130
  continue;
@@ -89,6 +143,7 @@ function computePushPlan(
89
143
  const size = fs.statSync(absolutePath).size;
90
144
  items.push({
91
145
  action: "upload",
146
+ kind: "file",
92
147
  absolutePath,
93
148
  relativePath,
94
149
  localHash,
@@ -185,6 +240,34 @@ export interface ShareOptions {
185
240
  * full-tree bidirectional runner opts in.
186
241
  */
187
242
  propagateDeletes?: boolean;
243
+ /**
244
+ * Policy for which journal entries `propagateDeletes` is willing to
245
+ * convert into remote `DeleteObject` calls. Only consulted when
246
+ * `propagateDeletes === true`.
247
+ *
248
+ * - `"owned-only"` (default, safer): only entries whose journal
249
+ * `direction === "up"` are eligible. That is, only files this
250
+ * machine previously uploaded can be remotely deleted on its
251
+ * behalf. Entries the journal records as pulled from elsewhere
252
+ * (`direction === "down"`) are never delete-propagated — the
253
+ * local absence may just be an unpulled state or a filter
254
+ * mismatch, both of which previously caused this machine to
255
+ * erase other machines' uploads.
256
+ * - `"all"`: legacy behaviour — every in-scope journal entry whose
257
+ * local file is missing is eligible (regardless of direction). The
258
+ * bidirectional runner's first-push and any tool that wants to
259
+ * mirror a destructive local checkout opts in here explicitly.
260
+ *
261
+ * Independently of this policy, an entry is also dropped from the
262
+ * plan when neither the file-shape nor the directory-shape probe of
263
+ * `shouldSync` accepts the path — i.e. the current ignore filter
264
+ * would have skipped the path on pull (whether classified as a
265
+ * regular file or a symlink record / directory). That symmetry
266
+ * blocks the failure mode where a path was filtered locally but
267
+ * lived in the vault (and the journal) from an older HQ layout or
268
+ * a different machine, causing the next push to erase it.
269
+ */
270
+ propagateDeletePolicy?: "owned-only" | "all";
188
271
  /**
189
272
  * Identity stamped onto each uploaded object's S3 user metadata
190
273
  * (`created-by`, `created-by-sub`, `created-at`). The hq-console vault UI
@@ -236,6 +319,14 @@ export interface ShareResult {
236
319
  */
237
320
  export async function share(options: ShareOptions): Promise<ShareResult> {
238
321
  const { paths, company, message, onConflict, vaultConfig, entityContext, hqRoot, skipUnchanged, propagateDeletes } = options;
322
+ // Default to the safer "owned-only" policy when delete-propagation is on
323
+ // but the caller hasn't pinned a policy. Pre-existing callers that passed
324
+ // `propagateDeletes: true` (the `sync now` push leg, the runner's
325
+ // bidirectional sync, the `--all` fanout) thereby flip to the safer
326
+ // semantics automatically. Set `propagateDeletePolicy: "all"` explicitly
327
+ // to opt back into the legacy any-missing-file-deletes behaviour.
328
+ const propagateDeletePolicy: "owned-only" | "all" =
329
+ options.propagateDeletePolicy ?? "owned-only";
239
330
  const emit = options.onEvent ?? defaultConsoleLogger;
240
331
 
241
332
  // Exactly-one-of contract: either we vend (vaultConfig) or the caller did
@@ -316,7 +407,13 @@ export async function share(options: ShareOptions): Promise<ShareResult> {
316
407
  ? resolveDeleteScopeRoots(paths, hqRoot, syncRoot)
317
408
  : [];
318
409
  const deletePlan = propagateDeletes === true
319
- ? computeDeletePlan(journal, syncRoot, deleteScopeRoots)
410
+ ? computeDeletePlan(
411
+ journal,
412
+ syncRoot,
413
+ deleteScopeRoots,
414
+ shouldSync,
415
+ propagateDeletePolicy,
416
+ )
320
417
  : [];
321
418
 
322
419
  emit({
@@ -413,18 +510,26 @@ export async function share(options: ShareOptions): Promise<ShareResult> {
413
510
  }
414
511
  }
415
512
 
416
- // Upload
513
+ // Upload — symlinks go through uploadSymlink (zero-byte body + target
514
+ // metadata), regular files through uploadFile (file contents). The
515
+ // discriminator is item.kind set by computePushPlan; both branches
516
+ // converge on the same journal/event update path below.
417
517
  try {
418
- const stat = fs.statSync(absolutePath);
518
+ const isSymlinkUpload = item.kind === "symlink";
519
+ const size = isSymlinkUpload ? 0 : fs.statSync(absolutePath).size;
419
520
 
420
- const { etag } = options.author
421
- ? await uploadFile(ctx, absolutePath, relativePath, options.author)
422
- : await uploadFile(ctx, absolutePath, relativePath);
521
+ const { etag } = isSymlinkUpload
522
+ ? options.author
523
+ ? await uploadSymlink(ctx, item.target, relativePath, options.author)
524
+ : await uploadSymlink(ctx, item.target, relativePath)
525
+ : options.author
526
+ ? await uploadFile(ctx, absolutePath, relativePath, options.author)
527
+ : await uploadFile(ctx, absolutePath, relativePath);
423
528
 
424
529
  // Update journal with optional message; capture the post-upload ETag
425
530
  // so the next sync can distinguish "remote moved since we last wrote"
426
531
  // from "user edited locally" without conflating the two.
427
- updateEntry(journal, relativePath, localHash, stat.size, "up", etag);
532
+ updateEntry(journal, relativePath, localHash, size, "up", etag);
428
533
  if (message) {
429
534
  journal.files[relativePath] = {
430
535
  ...journal.files[relativePath],
@@ -433,11 +538,11 @@ export async function share(options: ShareOptions): Promise<ShareResult> {
433
538
  }
434
539
 
435
540
  filesUploaded++;
436
- bytesUploaded += stat.size;
541
+ bytesUploaded += size;
437
542
  emit({
438
543
  type: "progress",
439
544
  path: relativePath,
440
- bytes: stat.size,
545
+ bytes: size,
441
546
  ...(message ? { message } : {}),
442
547
  });
443
548
  } catch (err) {
@@ -538,42 +643,104 @@ function resolveActiveCompany(hqRoot: string): string | undefined {
538
643
  return undefined;
539
644
  }
540
645
 
646
+ /**
647
+ * One entry produced by collectFiles/walkDir. Files describe regular
648
+ * payloads that get hashed + size-checked + uploaded via uploadFile;
649
+ * symlinks describe link records whose target string flows through
650
+ * uploadSymlink as user metadata. Walked-dir traversal NEVER descends
651
+ * into a symlink — directory symlinks are recorded as link entries and
652
+ * left at that, so following them never duplicates content into the
653
+ * wrong vault path (the same topology safety the legacy walker provided
654
+ * by accident of Dirent.isFile() returning false for links).
655
+ */
656
+ type CollectedEntry =
657
+ | { kind: "file"; absolutePath: string; relativePath: string }
658
+ | { kind: "symlink"; absolutePath: string; relativePath: string; target: string };
659
+
541
660
  /**
542
661
  * Collect files from paths (expanding directories recursively).
543
662
  *
544
663
  * Remote S3 keys are computed relative to `syncRoot` (companies/{slug}/), not
545
664
  * `hqRoot`. Files outside `syncRoot` are skipped with a warning — sharing
546
665
  * anything outside a company's folder would leak state into the wrong vault.
666
+ *
667
+ * Symlink classification uses lstat (not stat), so a top-level path that is
668
+ * itself a symlink is recorded as a link record rather than dereferenced.
669
+ * Pre-fix, statSync followed the link and the target's bytes were uploaded
670
+ * under the link's key — silently flattening the link topology.
547
671
  */
548
672
  function collectFiles(
549
673
  paths: string[],
550
674
  hqRoot: string,
551
675
  syncRoot: string,
552
676
  filter: (p: string, isDir?: boolean) => boolean,
553
- ): { absolutePath: string; relativePath: string }[] {
554
- const results: { absolutePath: string; relativePath: string }[] = [];
677
+ ): CollectedEntry[] {
678
+ const results: CollectedEntry[] = [];
555
679
 
556
680
  for (const p of paths) {
557
681
  const absolutePath = path.isAbsolute(p) ? p : path.resolve(hqRoot, p);
558
682
 
559
- if (!fs.existsSync(absolutePath)) {
683
+ // existsSync follows symlinks: a dangling top-level link will report
684
+ // not-existing and be skipped here. lstatSync below handles the
685
+ // valid-link case directly without needing the existsSync gate.
686
+ let lstat: fs.Stats;
687
+ try {
688
+ lstat = fs.lstatSync(absolutePath);
689
+ } catch {
560
690
  console.error(` Warning: ${p} does not exist, skipping.`);
561
691
  continue;
562
692
  }
563
693
 
694
+ // Containment check is split by entry kind: regular files and
695
+ // directories use isWithin (which canonicalizes via realpathSync to
696
+ // tolerate macOS APFS case-insensitivity), but symlinks use the
697
+ // link's own pathname rather than the link's resolved target. A
698
+ // valid use case for this asymmetry: a directory symlink whose
699
+ // target lives outside the company folder (e.g. companies/{co}/
700
+ // knowledge → repos/private/knowledge-{co}/) — the LINK itself is
701
+ // inside the company folder and is exactly what we want to record,
702
+ // but isWithin's realpath would say the resolved target is outside
703
+ // and reject the share. Recording symlinks rather than following
704
+ // them is the whole topology contract this fix establishes; the
705
+ // containment check needs to honor the same semantic.
706
+ if (lstat.isSymbolicLink()) {
707
+ if (!isWithinForLink(syncRoot, absolutePath)) {
708
+ console.error(` Warning: ${p} is outside company folder, skipping.`);
709
+ continue;
710
+ }
711
+ const relativePath = path.relative(syncRoot, absolutePath);
712
+ // Probe the filter with both isDir hints — we don't know whether
713
+ // the link's target is a file or a directory without
714
+ // stat-following the link, which we explicitly avoid (it would
715
+ // re-introduce the dereference behavior this whole change set is
716
+ // designed to prevent). An `.hqinclude` dir-only pattern like
717
+ // `companies/*/knowledge/` only matches with isDir=true, so a
718
+ // single isDir=false probe would silently drop directory
719
+ // symlinks under allowlist mode (the motivating case for this
720
+ // whole branch). The filter is pure path lookup with no I/O,
721
+ // so two calls are free.
722
+ if (!filter(absolutePath, false) && !filter(absolutePath, true)) continue;
723
+ results.push({
724
+ kind: "symlink",
725
+ absolutePath,
726
+ relativePath,
727
+ target: fs.readlinkSync(absolutePath),
728
+ });
729
+ continue;
730
+ }
731
+
564
732
  if (!isWithin(syncRoot, absolutePath)) {
565
733
  console.error(` Warning: ${p} is outside company folder, skipping.`);
566
734
  continue;
567
735
  }
568
736
 
569
- const stat = fs.statSync(absolutePath);
570
- if (stat.isDirectory()) {
737
+ if (lstat.isDirectory()) {
571
738
  if (!filter(absolutePath, true)) continue;
572
739
  results.push(...walkDir(absolutePath, syncRoot, filter));
573
- } else if (stat.isFile()) {
740
+ } else if (lstat.isFile()) {
574
741
  const relativePath = path.relative(syncRoot, absolutePath);
575
742
  if (filter(absolutePath)) {
576
- results.push({ absolutePath, relativePath });
743
+ results.push({ kind: "file", absolutePath, relativePath });
577
744
  }
578
745
  }
579
746
  }
@@ -585,14 +752,43 @@ function walkDir(
585
752
  dir: string,
586
753
  syncRoot: string,
587
754
  filter: (p: string, isDir?: boolean) => boolean,
588
- ): { absolutePath: string; relativePath: string }[] {
589
- const results: { absolutePath: string; relativePath: string }[] = [];
755
+ ): CollectedEntry[] {
756
+ const results: CollectedEntry[] = [];
590
757
  if (!fs.existsSync(dir)) return results;
591
758
 
592
759
  const entries = fs.readdirSync(dir, { withFileTypes: true });
593
760
  for (const entry of entries) {
594
761
  const absolutePath = path.join(dir, entry.name);
595
762
  const isDir = entry.isDirectory();
763
+
764
+ // Symlinks need their own filter probe BEFORE the regular gate.
765
+ // Dirent.isDirectory() returns false for any symlink — even a
766
+ // directory symlink — so the regular filter call below would use
767
+ // isDir=false and a dir-only allowlist pattern like
768
+ // `companies/*/knowledge/` would reject the link before the
769
+ // record-only branch runs. Probe with both hints; include if
770
+ // either matches. The filter is pure path lookup with no I/O.
771
+ if (entry.isSymbolicLink()) {
772
+ if (!filter(absolutePath, false) && !filter(absolutePath, true)) continue;
773
+ // Record the link without descending into its target. Following
774
+ // a directory symlink would re-enter content via a path that
775
+ // isn't its on-disk home (e.g. companies/{co}/knowledge → repos/
776
+ // private/knowledge-{co}/), causing per-company knowledge repos
777
+ // to be uploaded into every vault that links them. Recording
778
+ // and not following preserves the link topology while avoiding
779
+ // that duplication. readlinkSync on a Dirent-known link cannot
780
+ // fail under normal conditions; let the throw propagate if it
781
+ // somehow does (race with rm, EPERM) — the operator needs to
782
+ // see it rather than us silently dropping the link again.
783
+ results.push({
784
+ kind: "symlink",
785
+ absolutePath,
786
+ relativePath: path.relative(syncRoot, absolutePath),
787
+ target: fs.readlinkSync(absolutePath),
788
+ });
789
+ continue;
790
+ }
791
+
596
792
  // Pass the dir hint so dir-only ignore/include patterns (`foo/`)
597
793
  // resolve correctly for the descent decision.
598
794
  if (!filter(absolutePath, isDir)) continue;
@@ -601,6 +797,7 @@ function walkDir(
601
797
  results.push(...walkDir(absolutePath, syncRoot, filter));
602
798
  } else if (entry.isFile()) {
603
799
  results.push({
800
+ kind: "file",
604
801
  absolutePath,
605
802
  relativePath: path.relative(syncRoot, absolutePath),
606
803
  });
@@ -630,6 +827,28 @@ function realpathSafe(p: string): string {
630
827
  }
631
828
  }
632
829
 
830
+ /**
831
+ * Containment check tailored for symlinks. Canonicalizes the link's
832
+ * PARENT DIR (which is a real dir, not the link), then compares the
833
+ * recombined `parentReal/basename(linkPath)` against `parent`. Skipping
834
+ * the link's own canonicalization means a symlink that points outside
835
+ * `parent` is still considered "inside" so long as the link file itself
836
+ * lives inside — which is exactly the topology we want to upload as a
837
+ * link record without dereferencing.
838
+ *
839
+ * Falls back to `parent` / `path.dirname(linkPath)` literally when
840
+ * realpath throws (e.g. permission denied on a parent), trading a tiny
841
+ * window of macOS-APFS case-sensitivity drift for the more common case
842
+ * of "link lives inside, target lives outside."
843
+ */
844
+ function isWithinForLink(parent: string, linkPath: string): boolean {
845
+ const parentReal = realpathSafe(parent);
846
+ const linkParentReal = realpathSafe(path.dirname(linkPath));
847
+ const candidate = path.join(linkParentReal, path.basename(linkPath));
848
+ const rel = path.relative(parentReal, candidate);
849
+ return rel === "" || (!rel.startsWith("..") && !path.isAbsolute(rel));
850
+ }
851
+
633
852
  /**
634
853
  * Returns true when the remote object appears to have moved since the
635
854
  * journal entry's last-recorded sync. Prefers ETag equality; falls back to
@@ -681,18 +900,37 @@ function resolveDeleteScopeRoots(
681
900
 
682
901
  /**
683
902
  * Walk every journal key in `scopeRoots` whose local file is missing from
684
- * disk, and return the keys to delete. A key is in-scope when it matches
685
- * (or sits beneath) one of the resolved prefixes. Empty `scopeRoots`
686
- * empty plan (caller didn't opt in).
903
+ * disk and return the keys eligible for a remote `DeleteObject`. An entry
904
+ * is in the plan only when ALL of the following hold:
905
+ *
906
+ * 1. Its key matches (or sits beneath) one of the `scopeRoots` prefixes.
907
+ * 2. Its local file is missing from disk.
908
+ * 3. The current ignore filter (`shouldSync`) accepts the key — so paths
909
+ * filtered out by `.hqignore` / `.gitignore` / `DEFAULT_IGNORES` are
910
+ * never delete-propagated. This blocks the failure mode where a path
911
+ * lives in the vault (and the journal) but the local walk skips it
912
+ * because of asymmetric ignore rules; without this guard the push
913
+ * leg would erase it.
914
+ * 4. When `policy === "owned-only"`: the journal entry's `direction`
915
+ * is `"up"` (i.e. this machine previously uploaded the file). This
916
+ * blocks the failure mode where a behind machine's first `sync now`
917
+ * push leg would otherwise erase recent uploads from peers, since
918
+ * those entries are recorded as `direction: "down"` (pulled) or
919
+ * absent (never seen). Set `policy: "all"` to opt back into the
920
+ * legacy any-missing-file-deletes behaviour.
921
+ *
922
+ * Empty `scopeRoots` ⇒ empty plan (caller didn't opt in).
687
923
  */
688
924
  function computeDeletePlan(
689
925
  journal: SyncJournal,
690
926
  syncRoot: string,
691
927
  scopeRoots: string[],
928
+ shouldSync: (filePath: string, isDir?: boolean) => boolean,
929
+ policy: "owned-only" | "all",
692
930
  ): string[] {
693
931
  if (scopeRoots.length === 0) return [];
694
932
  const out: string[] = [];
695
- for (const relativeKey of Object.keys(journal.files)) {
933
+ for (const [relativeKey, entry] of Object.entries(journal.files)) {
696
934
  const inScope = scopeRoots.some(
697
935
  (root) =>
698
936
  root === "" ||
@@ -701,9 +939,48 @@ function computeDeletePlan(
701
939
  );
702
940
  if (!inScope) continue;
703
941
  const localPath = path.join(syncRoot, relativeKey);
704
- if (!fs.existsSync(localPath)) {
705
- out.push(relativeKey);
942
+ // lstat (not existsSync) so a dangling symlink — a link whose
943
+ // target has been removed but whose link file is still on disk —
944
+ // counts as "still present locally" and is NOT delete-propagated.
945
+ // Pre-fix, existsSync followed the link, returned false, and the
946
+ // entry was queued for remote DeleteObject in the same sync that
947
+ // had just uploaded it via uploadSymlink. The link round-tripped
948
+ // as "upload, then delete" in one cycle. ENOENT means truly
949
+ // absent → eligible; other lstat errors propagate.
950
+ let presentLocally = true;
951
+ try {
952
+ fs.lstatSync(localPath);
953
+ } catch (err: unknown) {
954
+ if (
955
+ err &&
956
+ typeof err === "object" &&
957
+ "code" in err &&
958
+ (err as { code?: string }).code === "ENOENT"
959
+ ) {
960
+ presentLocally = false;
961
+ } else {
962
+ throw err;
963
+ }
706
964
  }
965
+ if (presentLocally) continue;
966
+ // (3) Symmetric filter guard. `shouldSync` is constructed from the same
967
+ // hqRoot the pull leg uses, so a key the pull would have skipped
968
+ // ("ignored") is also one we must not delete-propagate.
969
+ //
970
+ // Dual-hint probe: by the time we're considering this entry for
971
+ // remote deletion, the local file is already gone — we have no
972
+ // way to know whether it was a regular file or a symlink record.
973
+ // A single isDir=false probe would silently keep the remote
974
+ // record alive whenever the only matching .hqinclude allowlist
975
+ // pattern is dir-only (e.g. `companies/*/knowledge/`), since
976
+ // gitignore's slash semantics reject the slashless probe. The
977
+ // same dual-hint pattern in walkDir/collectFiles (push) and
978
+ // computePullPlan (pull) applies symmetrically here. Pure path
979
+ // lookup, no I/O.
980
+ if (!shouldSync(localPath, false) && !shouldSync(localPath, true)) continue;
981
+ // (4) Direction guard under "owned-only" policy.
982
+ if (policy === "owned-only" && entry.direction !== "up") continue;
983
+ out.push(relativeKey);
707
984
  }
708
985
  return out;
709
986
  }