@indigoai-us/hq-cloud 5.17.0 → 5.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/cli/share.ts CHANGED
@@ -9,9 +9,17 @@ import * as fs from "fs";
9
9
  import * as path from "path";
10
10
  import type { EntityContext, VaultServiceConfig, SyncJournal } from "../types.js";
11
11
  import { resolveEntityContext, isExpiringSoon, refreshEntityContext } from "../context.js";
12
- import { uploadFile, headRemoteFile, deleteRemoteFile } from "../s3.js";
12
+ import { uploadFile, uploadSymlink, headRemoteFile, deleteRemoteFile } from "../s3.js";
13
13
  import type { UploadAuthor } from "../s3.js";
14
- import { readJournal, writeJournal, hashFile, updateEntry, removeEntry, normalizeEtag } from "../journal.js";
14
+ import {
15
+ readJournal,
16
+ writeJournal,
17
+ hashFile,
18
+ hashSymlinkTarget,
19
+ updateEntry,
20
+ removeEntry,
21
+ normalizeEtag,
22
+ } from "../journal.js";
15
23
  import { createIgnoreFilter, isWithinSizeLimit } from "../ignore.js";
16
24
  import { resolveConflict } from "./conflict.js";
17
25
  import type { ConflictStrategy } from "./conflict.js";
@@ -30,11 +38,24 @@ import type { SyncProgressEvent } from "./sync.js";
30
38
  type PushPlanItem =
31
39
  | {
32
40
  action: "upload";
41
+ kind: "file";
33
42
  absolutePath: string;
34
43
  relativePath: string;
35
44
  localHash: string;
36
45
  size: number;
37
46
  }
47
+ | {
48
+ action: "upload";
49
+ kind: "symlink";
50
+ absolutePath: string;
51
+ relativePath: string;
52
+ // The link's target string verbatim (whatever readlink returned).
53
+ // Hashed into localHash so a target rewrite re-uploads even when
54
+ // skipUnchanged is on; size stays 0 because the wire body is empty.
55
+ target: string;
56
+ localHash: string;
57
+ size: 0;
58
+ }
38
59
  | {
39
60
  action: "skip-size-limit";
40
61
  absolutePath: string;
@@ -64,13 +85,46 @@ interface PushPlan {
64
85
  * Consumers that want a conflict count get it from the `complete` event.
65
86
  */
66
87
  function computePushPlan(
67
- filesToShare: { absolutePath: string; relativePath: string }[],
88
+ filesToShare: CollectedEntry[],
68
89
  journal: SyncJournal,
69
90
  skipUnchanged: boolean,
70
91
  ): PushPlan {
71
92
  const items: PushPlanItem[] = [];
72
93
 
73
- for (const { absolutePath, relativePath } of filesToShare) {
94
+ for (const entry of filesToShare) {
95
+ const { absolutePath, relativePath } = entry;
96
+
97
+ // Symlinks bypass the size-limit gate (the wire body is small,
98
+ // bounded by target length) and hash the target through the
99
+ // symlink-namespaced hash so a symlink to "real.md" can never
100
+ // collide with a regular file containing the bytes "real.md" in
101
+ // the skip-unchanged gate. The target is what we're actually
102
+ // uploading, so its hash is what should drive change detection —
103
+ // a target rewrite must re-fire an upload even when the link
104
+ // itself "looks" identical.
105
+ if (entry.kind === "symlink") {
106
+ const localHash = hashSymlinkTarget(entry.target);
107
+
108
+ if (skipUnchanged) {
109
+ const existing = journal.files[relativePath];
110
+ if (existing && existing.hash === localHash) {
111
+ items.push({ action: "skip-unchanged", absolutePath, relativePath });
112
+ continue;
113
+ }
114
+ }
115
+
116
+ items.push({
117
+ action: "upload",
118
+ kind: "symlink",
119
+ absolutePath,
120
+ relativePath,
121
+ target: entry.target,
122
+ localHash,
123
+ size: 0,
124
+ });
125
+ continue;
126
+ }
127
+
74
128
  if (!isWithinSizeLimit(absolutePath)) {
75
129
  items.push({ action: "skip-size-limit", absolutePath, relativePath });
76
130
  continue;
@@ -89,6 +143,7 @@ function computePushPlan(
89
143
  const size = fs.statSync(absolutePath).size;
90
144
  items.push({
91
145
  action: "upload",
146
+ kind: "file",
92
147
  absolutePath,
93
148
  relativePath,
94
149
  localHash,
@@ -204,11 +259,13 @@ export interface ShareOptions {
204
259
  * mirror a destructive local checkout opts in here explicitly.
205
260
  *
206
261
  * Independently of this policy, an entry is also dropped from the
207
- * plan when `shouldSync(localPath, false) === false` i.e. the
208
- * current ignore filter would have skipped the path on pull. That
209
- * symmetry blocks the failure mode where a path was filtered locally
210
- * but lived in the vault (and the journal) from an older HQ layout
211
- * or a different machine, causing the next push to erase it.
262
+ * plan when neither the file-shape nor the directory-shape probe of
263
+ * `shouldSync` accepts the path i.e. the current ignore filter
264
+ * would have skipped the path on pull (whether classified as a
265
+ * regular file or a symlink record / directory). That symmetry
266
+ * blocks the failure mode where a path was filtered locally but
267
+ * lived in the vault (and the journal) from an older HQ layout or
268
+ * a different machine, causing the next push to erase it.
212
269
  */
213
270
  propagateDeletePolicy?: "owned-only" | "all";
214
271
  /**
@@ -453,18 +510,26 @@ export async function share(options: ShareOptions): Promise<ShareResult> {
453
510
  }
454
511
  }
455
512
 
456
- // Upload
513
+ // Upload — symlinks go through uploadSymlink (zero-byte body + target
514
+ // metadata), regular files through uploadFile (file contents). The
515
+ // discriminator is item.kind set by computePushPlan; both branches
516
+ // converge on the same journal/event update path below.
457
517
  try {
458
- const stat = fs.statSync(absolutePath);
518
+ const isSymlinkUpload = item.kind === "symlink";
519
+ const size = isSymlinkUpload ? 0 : fs.statSync(absolutePath).size;
459
520
 
460
- const { etag } = options.author
461
- ? await uploadFile(ctx, absolutePath, relativePath, options.author)
462
- : await uploadFile(ctx, absolutePath, relativePath);
521
+ const { etag } = isSymlinkUpload
522
+ ? options.author
523
+ ? await uploadSymlink(ctx, item.target, relativePath, options.author)
524
+ : await uploadSymlink(ctx, item.target, relativePath)
525
+ : options.author
526
+ ? await uploadFile(ctx, absolutePath, relativePath, options.author)
527
+ : await uploadFile(ctx, absolutePath, relativePath);
463
528
 
464
529
  // Update journal with optional message; capture the post-upload ETag
465
530
  // so the next sync can distinguish "remote moved since we last wrote"
466
531
  // from "user edited locally" without conflating the two.
467
- updateEntry(journal, relativePath, localHash, stat.size, "up", etag);
532
+ updateEntry(journal, relativePath, localHash, size, "up", etag);
468
533
  if (message) {
469
534
  journal.files[relativePath] = {
470
535
  ...journal.files[relativePath],
@@ -473,11 +538,11 @@ export async function share(options: ShareOptions): Promise<ShareResult> {
473
538
  }
474
539
 
475
540
  filesUploaded++;
476
- bytesUploaded += stat.size;
541
+ bytesUploaded += size;
477
542
  emit({
478
543
  type: "progress",
479
544
  path: relativePath,
480
- bytes: stat.size,
545
+ bytes: size,
481
546
  ...(message ? { message } : {}),
482
547
  });
483
548
  } catch (err) {
@@ -578,42 +643,104 @@ function resolveActiveCompany(hqRoot: string): string | undefined {
578
643
  return undefined;
579
644
  }
580
645
 
646
+ /**
647
+ * One entry produced by collectFiles/walkDir. Files describe regular
648
+ * payloads that get hashed + size-checked + uploaded via uploadFile;
649
+ * symlinks describe link records whose target string flows through
650
+ * uploadSymlink as user metadata. Walked-dir traversal NEVER descends
651
+ * into a symlink — directory symlinks are recorded as link entries and
652
+ * left at that, so following them never duplicates content into the
653
+ * wrong vault path (the same topology safety the legacy walker provided
654
+ * by accident of Dirent.isFile() returning false for links).
655
+ */
656
+ type CollectedEntry =
657
+ | { kind: "file"; absolutePath: string; relativePath: string }
658
+ | { kind: "symlink"; absolutePath: string; relativePath: string; target: string };
659
+
581
660
  /**
582
661
  * Collect files from paths (expanding directories recursively).
583
662
  *
584
663
  * Remote S3 keys are computed relative to `syncRoot` (companies/{slug}/), not
585
664
  * `hqRoot`. Files outside `syncRoot` are skipped with a warning — sharing
586
665
  * anything outside a company's folder would leak state into the wrong vault.
666
+ *
667
+ * Symlink classification uses lstat (not stat), so a top-level path that is
668
+ * itself a symlink is recorded as a link record rather than dereferenced.
669
+ * Pre-fix, statSync followed the link and the target's bytes were uploaded
670
+ * under the link's key — silently flattening the link topology.
587
671
  */
588
672
  function collectFiles(
589
673
  paths: string[],
590
674
  hqRoot: string,
591
675
  syncRoot: string,
592
676
  filter: (p: string, isDir?: boolean) => boolean,
593
- ): { absolutePath: string; relativePath: string }[] {
594
- const results: { absolutePath: string; relativePath: string }[] = [];
677
+ ): CollectedEntry[] {
678
+ const results: CollectedEntry[] = [];
595
679
 
596
680
  for (const p of paths) {
597
681
  const absolutePath = path.isAbsolute(p) ? p : path.resolve(hqRoot, p);
598
682
 
599
- if (!fs.existsSync(absolutePath)) {
683
+ // existsSync follows symlinks: a dangling top-level link will report
684
+ // not-existing and be skipped here. lstatSync below handles the
685
+ // valid-link case directly without needing the existsSync gate.
686
+ let lstat: fs.Stats;
687
+ try {
688
+ lstat = fs.lstatSync(absolutePath);
689
+ } catch {
600
690
  console.error(` Warning: ${p} does not exist, skipping.`);
601
691
  continue;
602
692
  }
603
693
 
694
+ // Containment check is split by entry kind: regular files and
695
+ // directories use isWithin (which canonicalizes via realpathSync to
696
+ // tolerate macOS APFS case-insensitivity), but symlinks use the
697
+ // link's own pathname rather than the link's resolved target. A
698
+ // valid use case for this asymmetry: a directory symlink whose
699
+ // target lives outside the company folder (e.g. companies/{co}/
700
+ // knowledge → repos/private/knowledge-{co}/) — the LINK itself is
701
+ // inside the company folder and is exactly what we want to record,
702
+ // but isWithin's realpath would say the resolved target is outside
703
+ // and reject the share. Recording symlinks rather than following
704
+ // them is the whole topology contract this fix establishes; the
705
+ // containment check needs to honor the same semantic.
706
+ if (lstat.isSymbolicLink()) {
707
+ if (!isWithinForLink(syncRoot, absolutePath)) {
708
+ console.error(` Warning: ${p} is outside company folder, skipping.`);
709
+ continue;
710
+ }
711
+ const relativePath = path.relative(syncRoot, absolutePath);
712
+ // Probe the filter with both isDir hints — we don't know whether
713
+ // the link's target is a file or a directory without
714
+ // stat-following the link, which we explicitly avoid (it would
715
+ // re-introduce the dereference behavior this whole change set is
716
+ // designed to prevent). An `.hqinclude` dir-only pattern like
717
+ // `companies/*/knowledge/` only matches with isDir=true, so a
718
+ // single isDir=false probe would silently drop directory
719
+ // symlinks under allowlist mode (the motivating case for this
720
+ // whole branch). The filter is pure path lookup with no I/O,
721
+ // so two calls are free.
722
+ if (!filter(absolutePath, false) && !filter(absolutePath, true)) continue;
723
+ results.push({
724
+ kind: "symlink",
725
+ absolutePath,
726
+ relativePath,
727
+ target: fs.readlinkSync(absolutePath),
728
+ });
729
+ continue;
730
+ }
731
+
604
732
  if (!isWithin(syncRoot, absolutePath)) {
605
733
  console.error(` Warning: ${p} is outside company folder, skipping.`);
606
734
  continue;
607
735
  }
608
736
 
609
- const stat = fs.statSync(absolutePath);
610
- if (stat.isDirectory()) {
737
+ if (lstat.isDirectory()) {
611
738
  if (!filter(absolutePath, true)) continue;
612
739
  results.push(...walkDir(absolutePath, syncRoot, filter));
613
- } else if (stat.isFile()) {
740
+ } else if (lstat.isFile()) {
614
741
  const relativePath = path.relative(syncRoot, absolutePath);
615
742
  if (filter(absolutePath)) {
616
- results.push({ absolutePath, relativePath });
743
+ results.push({ kind: "file", absolutePath, relativePath });
617
744
  }
618
745
  }
619
746
  }
@@ -625,14 +752,43 @@ function walkDir(
625
752
  dir: string,
626
753
  syncRoot: string,
627
754
  filter: (p: string, isDir?: boolean) => boolean,
628
- ): { absolutePath: string; relativePath: string }[] {
629
- const results: { absolutePath: string; relativePath: string }[] = [];
755
+ ): CollectedEntry[] {
756
+ const results: CollectedEntry[] = [];
630
757
  if (!fs.existsSync(dir)) return results;
631
758
 
632
759
  const entries = fs.readdirSync(dir, { withFileTypes: true });
633
760
  for (const entry of entries) {
634
761
  const absolutePath = path.join(dir, entry.name);
635
762
  const isDir = entry.isDirectory();
763
+
764
+ // Symlinks need their own filter probe BEFORE the regular gate.
765
+ // Dirent.isDirectory() returns false for any symlink — even a
766
+ // directory symlink — so the regular filter call below would use
767
+ // isDir=false and a dir-only allowlist pattern like
768
+ // `companies/*/knowledge/` would reject the link before the
769
+ // record-only branch runs. Probe with both hints; include if
770
+ // either matches. The filter is pure path lookup with no I/O.
771
+ if (entry.isSymbolicLink()) {
772
+ if (!filter(absolutePath, false) && !filter(absolutePath, true)) continue;
773
+ // Record the link without descending into its target. Following
774
+ // a directory symlink would re-enter content via a path that
775
+ // isn't its on-disk home (e.g. companies/{co}/knowledge → repos/
776
+ // private/knowledge-{co}/), causing per-company knowledge repos
777
+ // to be uploaded into every vault that links them. Recording
778
+ // and not following preserves the link topology while avoiding
779
+ // that duplication. readlinkSync on a Dirent-known link cannot
780
+ // fail under normal conditions; let the throw propagate if it
781
+ // somehow does (race with rm, EPERM) — the operator needs to
782
+ // see it rather than us silently dropping the link again.
783
+ results.push({
784
+ kind: "symlink",
785
+ absolutePath,
786
+ relativePath: path.relative(syncRoot, absolutePath),
787
+ target: fs.readlinkSync(absolutePath),
788
+ });
789
+ continue;
790
+ }
791
+
636
792
  // Pass the dir hint so dir-only ignore/include patterns (`foo/`)
637
793
  // resolve correctly for the descent decision.
638
794
  if (!filter(absolutePath, isDir)) continue;
@@ -641,6 +797,7 @@ function walkDir(
641
797
  results.push(...walkDir(absolutePath, syncRoot, filter));
642
798
  } else if (entry.isFile()) {
643
799
  results.push({
800
+ kind: "file",
644
801
  absolutePath,
645
802
  relativePath: path.relative(syncRoot, absolutePath),
646
803
  });
@@ -670,6 +827,28 @@ function realpathSafe(p: string): string {
670
827
  }
671
828
  }
672
829
 
830
+ /**
831
+ * Containment check tailored for symlinks. Canonicalizes the link's
832
+ * PARENT DIR (which is a real dir, not the link), then compares the
833
+ * recombined `parentReal/basename(linkPath)` against `parent`. Skipping
834
+ * the link's own canonicalization means a symlink that points outside
835
+ * `parent` is still considered "inside" so long as the link file itself
836
+ * lives inside — which is exactly the topology we want to upload as a
837
+ * link record without dereferencing.
838
+ *
839
+ * Falls back to `parent` / `path.dirname(linkPath)` literally when
840
+ * realpath throws (e.g. permission denied on a parent), trading a tiny
841
+ * window of macOS-APFS case-sensitivity drift for the more common case
842
+ * of "link lives inside, target lives outside."
843
+ */
844
+ function isWithinForLink(parent: string, linkPath: string): boolean {
845
+ const parentReal = realpathSafe(parent);
846
+ const linkParentReal = realpathSafe(path.dirname(linkPath));
847
+ const candidate = path.join(linkParentReal, path.basename(linkPath));
848
+ const rel = path.relative(parentReal, candidate);
849
+ return rel === "" || (!rel.startsWith("..") && !path.isAbsolute(rel));
850
+ }
851
+
673
852
  /**
674
853
  * Returns true when the remote object appears to have moved since the
675
854
  * journal entry's last-recorded sync. Prefers ETag equality; falls back to
@@ -760,11 +939,45 @@ function computeDeletePlan(
760
939
  );
761
940
  if (!inScope) continue;
762
941
  const localPath = path.join(syncRoot, relativeKey);
763
- if (fs.existsSync(localPath)) continue;
942
+ // lstat (not existsSync) so a dangling symlink — a link whose
943
+ // target has been removed but whose link file is still on disk —
944
+ // counts as "still present locally" and is NOT delete-propagated.
945
+ // Pre-fix, existsSync followed the link, returned false, and the
946
+ // entry was queued for remote DeleteObject in the same sync that
947
+ // had just uploaded it via uploadSymlink. The link round-tripped
948
+ // as "upload, then delete" in one cycle. ENOENT means truly
949
+ // absent → eligible; other lstat errors propagate.
950
+ let presentLocally = true;
951
+ try {
952
+ fs.lstatSync(localPath);
953
+ } catch (err: unknown) {
954
+ if (
955
+ err &&
956
+ typeof err === "object" &&
957
+ "code" in err &&
958
+ (err as { code?: string }).code === "ENOENT"
959
+ ) {
960
+ presentLocally = false;
961
+ } else {
962
+ throw err;
963
+ }
964
+ }
965
+ if (presentLocally) continue;
764
966
  // (3) Symmetric filter guard. `shouldSync` is constructed from the same
765
967
  // hqRoot the pull leg uses, so a key the pull would have skipped
766
968
  // ("ignored") is also one we must not delete-propagate.
767
- if (!shouldSync(localPath, false)) continue;
969
+ //
970
+ // Dual-hint probe: by the time we're considering this entry for
971
+ // remote deletion, the local file is already gone — we have no
972
+ // way to know whether it was a regular file or a symlink record.
973
+ // A single isDir=false probe would silently keep the remote
974
+ // record alive whenever the only matching .hqinclude allowlist
975
+ // pattern is dir-only (e.g. `companies/*/knowledge/`), since
976
+ // gitignore's slash semantics reject the slashless probe. The
977
+ // same dual-hint pattern in walkDir/collectFiles (push) and
978
+ // computePullPlan (pull) applies symmetrically here. Pure path
979
+ // lookup, no I/O.
980
+ if (!shouldSync(localPath, false) && !shouldSync(localPath, true)) continue;
768
981
  // (4) Direction guard under "owned-only" policy.
769
982
  if (policy === "owned-only" && entry.direction !== "up") continue;
770
983
  out.push(relativeKey);