@misterhuydo/sentinel 1.6.10 → 1.6.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/.cairn/.hint-lock +1 -1
  2. package/.cairn/memory/auto-memory/MEMORY.md +21 -0
  3. package/.cairn/memory/auto-memory/decision_auto_commit_auto_release_split.md +22 -0
  4. package/.cairn/memory/auto-memory/decision_git_apply_recount_for_llm_diffs.md +17 -0
  5. package/.cairn/memory/auto-memory/decision_jenkins_wait_before_cascade.md +23 -0
  6. package/.cairn/memory/auto-memory/decision_multi_repo_fix_architecture.md +25 -0
  7. package/.cairn/memory/auto-memory/decision_per_project_claude_session.md +23 -0
  8. package/.cairn/memory/auto-memory/experience_bash_local_shadows_env_var.md +11 -0
  9. package/.cairn/memory/auto-memory/experience_cairn_session_discipline.md +9 -0
  10. package/.cairn/memory/auto-memory/experience_cicd_user_must_not_be_hardcoded.md +17 -0
  11. package/.cairn/memory/auto-memory/experience_claude_resume_stale_context_risk.md +23 -0
  12. package/.cairn/memory/auto-memory/experience_envelope_first_auth_detection.md +11 -0
  13. package/.cairn/memory/auto-memory/experience_mvn_negative_cache_blocks_cascade_retries.md +17 -0
  14. package/.cairn/memory/auto-memory/experience_publish_safety_check.md +13 -0
  15. package/.cairn/memory/auto-memory/experience_secrets_in_gitignored_files.md +9 -0
  16. package/.cairn/memory/auto-memory/experience_sentinel_deployment_server.md +13 -0
  17. package/.cairn/memory/auto-memory/feedback_ageri_rag_architecture.md +19 -0
  18. package/.cairn/memory/auto-memory/feedback_design_principle.md +16 -0
  19. package/.cairn/memory/auto-memory/feedback_publish_workflow.md +14 -0
  20. package/.cairn/memory/auto-memory/feedback_secrets_handling.md +15 -0
  21. package/.cairn/memory/auto-memory/feedback_slack_admin_allowlist.md +11 -0
  22. package/.cairn/memory/auto-memory/feedback_slack_thinking_status.md +14 -0
  23. package/.cairn/memory/auto-memory/feedback_start_sh_patching.md +16 -0
  24. package/.cairn/memory/auto-memory/knowledge_cairn_federation_for_sentinel_projects.md +20 -0
  25. package/.cairn/memory/auto-memory/knowledge_cairn_hooks_block_oauth_read_edit.md +24 -0
  26. package/.cairn/memory/auto-memory/knowledge_sentinel_repo_remotes.md +28 -0
  27. package/.cairn/memory/auto-memory/knowledge_sentinel_systemd_inactive_is_misleading.md +18 -0
  28. package/.cairn/memory/auto-memory/knowledge_sentinel_upgrade_requires_file_copy.md +20 -0
  29. package/.cairn/memory/auto-memory/preference_no_api_key_for_heavy_coding.md +17 -0
  30. package/.cairn/memory/auto-memory/project_ageri.md +45 -0
  31. package/.cairn/memory/auto-memory/project_ageri_architecture_v2.md +89 -0
  32. package/.cairn/memory/auto-memory/project_ageri_devtest.md +15 -0
  33. package/.cairn/memory/auto-memory/project_ageri_personality.md +61 -0
  34. package/.cairn/memory/auto-memory/project_ageri_platform_vision.md +70 -0
  35. package/.cairn/memory/auto-memory/project_publish_workflow.md +23 -0
  36. package/.cairn/memory/auto-memory/project_sentinel_state.md +44 -0
  37. package/.cairn/memory/auto-memory/project_sentinel_ui.md +39 -0
  38. package/.cairn/memory/auto-memory/reference_ageri_server.md +35 -0
  39. package/.cairn/memory/auto-memory/reference_cairn_federation.md +26 -0
  40. package/.cairn/memory/auto-memory/reference_oracle_servers.md +24 -0
  41. package/.cairn/memory/auto-memory/reference_sentinel_server.md +15 -0
  42. package/.cairn/memory/auto-memory/reference_taplo.md +52 -0
  43. package/.cairn/session.json +2 -2
  44. package/package.json +1 -1
  45. package/python/sentinel/__init__.py +1 -1
  46. package/python/sentinel/notify.py +1 -1
  47. package/python/sentinel/slack_bot.py +34 -0
package/.cairn/.hint-lock CHANGED
@@ -1 +1 @@
1
- 2026-04-25T09:41:31.525Z
1
+ 2026-04-27T13:09:20.340Z
@@ -0,0 +1,21 @@
1
+ # Memory Index
2
+
3
+ - [sentinel_deployment_server](experience_sentinel_deployment_server.md) — Sentinel deploys on Oracle Ampere ARM server, SSH via ageri user `[experience]`
4
+ - [publish_safety_check](experience_publish_safety_check.md) — Always syntax-check Python and JS files before npm publish `[experience]`
5
+ - [cairn_session_discipline](experience_cairn_session_discipline.md) — Use cairn_resume at session start, cairn_checkpoint at end — not cairn_maintain `[experience]`
6
+ - [secrets_in_gitignored_files](experience_secrets_in_gitignored_files.md) — All secrets go in gitignored files only — never in committed config `[experience]`
7
+ - [auto_commit_auto_release_split](decision_auto_commit_auto_release_split.md) — AUTO_PUBLISH split into AUTO_COMMIT + AUTO_RELEASE with 3-level hierarchy `[decision]`
8
+ - [bash_local_shadows_env_var](experience_bash_local_shadows_env_var.md) — bash `local var=""` shadows same-named env vars — use a distinct SENTINEL_*_OVERRIDE pattern `[experience]`
9
+ - [sentinel_upgrade_requires_file_copy](knowledge_sentinel_upgrade_requires_file_copy.md) — npm install -g does NOT update the running instance — must copy files from npm package to /home/sentinel/sentinel/code/ and restart `[knowledge]`
10
+ - [multi_repo_fix_architecture](decision_multi_repo_fix_architecture.md) — Multi-repo fix flow: parse_multi_repo_patch → atomic dry-run → per-repo apply/PR with sibling list `[decision]`
11
+ - [per_project_claude_session](decision_per_project_claude_session.md) — Long-lived `claude --resume` session per project, persisted in claude_sessions table; per-project asyncio.Lock `[decision]`
12
+ - [cairn_federation_for_sentinel_projects](knowledge_cairn_federation_for_sentinel_projects.md) — Sentinel project root + each sub-repo both run `cairn install`; federation auto-mounts via parent walk-up `[knowledge]`
13
+ - [claude_resume_stale_context_risk](experience_claude_resume_stale_context_risk.md) — claude --resume carries file-content memory across turns — must use per-route session keys + force fresh Read `[experience]`
14
+ - [no_api_key_for_heavy_coding](preference_no_api_key_for_heavy_coding.md) — Never bill heavy coding tasks against ANTHROPIC_API_KEY — must use Claude Pro OAuth `[preference]`
15
+ - [cairn_hooks_block_oauth_read_edit](knowledge_cairn_hooks_block_oauth_read_edit.md) — Cairn PreToolUse hooks (minify, edit-guard) exit 2 to block Read/Edit; bypass via --setting-sources project,local + re-add MCP via --mcp-config `[knowledge]`
16
+ - [git_apply_recount_for_llm_diffs](decision_git_apply_recount_for_llm_diffs.md) — Always pass `git apply --recount` for LLM-generated patches — fixes off-by-one hunk header counts `[decision]`
17
+ - [mvn_negative_cache_blocks_cascade_retries](experience_mvn_negative_cache_blocks_cascade_retries.md) — mvn caches "artifact not found" responses for ~24h — pass `-U` to bypass when polling for a freshly-published artifact `[experience]`
18
+ - [jenkins_wait_before_cascade](decision_jenkins_wait_before_cascade.md) — manage_release blocks on Jenkins build (wait=True) before firing cascade — eliminates Nexus race `[decision]`
19
+ - [cicd_user_must_not_be_hardcoded](experience_cicd_user_must_not_be_hardcoded.md) — Jenkins triggers must use repo.cicd_user — hardcoded "sentinel" caused silent 401s for all elprint builds `[experience]`
20
+ - [sentinel_repo_remotes](knowledge_sentinel_repo_remotes.md) — J:\Projects\Sentinel = source of truth; misterhuydo/Sentinel primary; exoreaction/Sentinel = downstream mirror `[knowledge]`
21
+ - [sentinel_systemd_inactive_is_misleading](knowledge_sentinel_systemd_inactive_is_misleading.md) — systemctl status sentinel reports inactive even when workers run; check ps + per-project PID files instead `[knowledge]`
@@ -0,0 +1,22 @@
1
+ ---
2
+ name: auto_commit_auto_release_split
3
+ description: AUTO_PUBLISH split into AUTO_COMMIT + AUTO_RELEASE with 3-level hierarchy
4
+ type: decision
5
+ created_at: 2026-04-08T16:00:35.305Z
6
+ updated_at: 2026-04-08T16:00:35.305Z
7
+ ---
8
+
9
+ AUTO_PUBLISH was split into two independent flags (v1.5.17):
10
+
11
+ - AUTO_COMMIT: push directly to main (no PR). Default: false.
12
+ - AUTO_RELEASE: trigger Jenkins/GHA pipeline after push. Default: false.
13
+
14
+ Both follow 3-level hierarchy: repo override → project default (sentinel.properties) → false.
15
+
16
+ Current config:
17
+ - sentinel-1881: AUTO_COMMIT=true, AUTO_RELEASE=false (project level)
18
+ - sentinel-elprint: AUTO_COMMIT=true, AUTO_RELEASE=false (project level)
19
+ - All individual repo configs: no AUTO_COMMIT/AUTO_RELEASE (inherit from project)
20
+
21
+ Pending releases: when AUTO_COMMIT=true and AUTO_RELEASE=false, committed pushes are
22
+ recorded in pending_releases table. get_status exposes them. manage_release clears them.
@@ -0,0 +1,17 @@
1
+ ---
2
+ name: git_apply_recount_for_llm_diffs
3
+ description: Always pass `git apply --recount` for LLM-generated patches — fixes off-by-one hunk header counts
4
+ type: decision
5
+ created_at: 2026-04-24T12:34:44.489Z
6
+ updated_at: 2026-04-24T12:34:44.489Z
7
+ ---
8
+
9
+ All `git apply` invocations in `sentinel/git_manager.py` (both `--check` dry-runs and actual applies, single-repo and multi-repo) MUST include `--recount`.
10
+
11
+ **Why:** LLM-generated unified diffs frequently miscount hunk header line counts (`@@ -X,N +Y,M @@`) by ±1 — Claude can write a hunk body with 9 old / 19 new lines but emit a header `@@ -4,9 +4,18 @@`. Without `--recount`, this fails dry-run with "corrupt patch at line N". With `--recount`, git infers counts from the actual `+/-/ ` lines and the patch applies cleanly.
12
+
13
+ **Origin:** v1.6.5 ended a long debugging arc where Claude in `--print` mode kept producing patches with miscounted hunks even after fixing cairn-hook permission denials and per-route session contamination. The actual content was always correct — only headers were off. `--recount` made every prior failure into a clean apply.
14
+
15
+ **Where applied:** `git_manager.py:266` (apply_and_commit dry-run), `:271` (apply), `:380` (apply_and_commit_multi dry-run), `:412` (apply_and_commit_multi apply). All four sites now use `["apply", "--check", "--recount", "--ignore-whitespace", ...]` or `["apply", "--recount", "--ignore-whitespace", ...]`.
16
+
17
+ **Caveat:** `--recount` only fixes header miscounts. If the actual `+/-/ ` lines are wrong (missing context, wrong content), the patch still fails — which is correct safety behavior.
@@ -0,0 +1,23 @@
1
+ ---
2
+ name: jenkins_wait_before_cascade
3
+ description: manage_release blocks on Jenkins build (wait=True) before firing cascade — eliminates Nexus race
4
+ type: decision
5
+ created_at: 2026-04-24T13:57:52.942Z
6
+ updated_at: 2026-04-24T13:57:52.942Z
7
+ ---
8
+
9
+ When `Boss.manage_release(operation='release' or 'release_and_cascade', confirmed=True)` is called, sentinel_boss.py now invokes `_trigger_jenkins_release(repo, wait=True)` whenever a cascade will follow.
10
+
11
+ **Why:** the cascade calls `mvn compile -DskipTests -U` per dependent repo, which queries Nexus for the new artifact. If Jenkins is still building, Nexus returns 404 and (without `-U` — fixed in v1.6.6) mvn caches that for ~24h. Even with `-U`, racing Jenkins is wasteful — better to know exactly when the build finished via Jenkins's own API.
12
+
13
+ **How `_wait_for_jenkins_build` works** (`sentinel/cicd_trigger.py`):
14
+ - Records `lastBuild` number BEFORE triggering
15
+ - Polls `<job_url>/api/json` every 20s for up to 15 min (`JENKINS_RELEASE_TIMEOUT=900`)
16
+ - Returns True only if a NEW build appears AND completes with `result=SUCCESS`
17
+ - Returns False on FAILURE / ABORTED / TIMEOUT
18
+
19
+ **Caveat:** Boss conversation BLOCKS for up to 15 min while waiting. Mitigation: a Slack `:hourglass_flowing_sand:` message is posted right before the wait so the user knows what's happening. Future improvement: run the wait async with periodic Slack updates.
20
+
21
+ **Where applied:** `sentinel/sentinel_boss.py` `manage_release` handler, `operation in ("release", "release_and_cascade")` branch (around line 4912 in v1.6.7).
22
+
23
+ Shipped in v1.6.7. Pairs with v1.6.6's `-U` fix to mvn — both are needed: -U for the case where someone runs the cascade independently, wait=True for the integrated trigger-then-cascade flow.
@@ -0,0 +1,25 @@
1
+ ---
2
+ name: multi_repo_fix_architecture
3
+ description: Multi-repo fix flow: parse_multi_repo_patch → atomic dry-run → per-repo apply/PR with sibling list
4
+ type: decision
5
+ created_at: 2026-04-24T10:45:04.157Z
6
+ updated_at: 2026-04-24T10:45:04.157Z
7
+ ---
8
+
9
+ Sentinel now supports cross-repo fixes (one task → multiple repos).
10
+
11
+ **Patch format claude must produce:**
12
+ - Paths prefixed `repos/<repo-name>/...` (relative to project root)
13
+ - Optional `# Affected repos: a, b, c` header line — order = merge order (library first, consumer after)
14
+
15
+ **Flow (in main._generate_apply_publish):**
16
+ 1. `generate_fix(..., all_repos=cfg_loader.repos.values())` — Claude runs from project root with cairn federation visibility, gets `--resume <session_id>` + `--output-format json`
17
+ 2. `apply_and_commit_multi(event, patch_path, all_repos, cfg)` — splits combined patch by `repos/<name>/` prefix, atomic dry-run all repos first; if ANY fails, ALL marked "aborted" with no commits. Then per-repo: apply + test + commit (per-repo failures don't block others)
18
+ 3. `publish_multi(event, results, cfg)` — pushes branches, opens PRs with `extra_body` listing sibling repo NAMES (URL cross-refs deferred to v2 — needs two-pass GitHub PATCH)
19
+ 4. Per-repo state goes to `fix_repos` table; primary repo also lands in legacy `fixes` for back-compat with reporter/Boss
20
+
21
+ **Fallback:** If patch has no `repos/<name>/` prefix (legacy/manual), `apply_and_commit_multi` returns `[]` and main falls back to single-repo `apply_and_commit + publish`.
22
+
23
+ **Files:** sentinel/git_manager.py (parse_multi_repo_patch + apply_and_commit_multi + publish_multi), sentinel/fix_engine.py (multi-repo prompt + JSON parser + session resume), sentinel/main.py (_generate_apply_publish + _apply_publish_single fallback).
24
+
25
+ **Removed:** MAX_FILES_IN_PATCH=5 / MAX_LINES_IN_PATCH=200 limits. Claude is trusted to size patches appropriately.
@@ -0,0 +1,23 @@
1
+ ---
2
+ name: per_project_claude_session
3
+ description: Long-lived `claude --resume` session per project, persisted in claude_sessions table; per-project asyncio.Lock
4
+ type: decision
5
+ created_at: 2026-04-24T10:45:21.608Z
6
+ updated_at: 2026-04-24T10:45:21.608Z
7
+ ---
8
+
9
+ Each Sentinel project keeps ONE long-lived Claude conversation across fix tasks (NOT one subprocess — claude is still spawned per task, but with `--resume <session_id>` so it continues the previous conversation).
10
+
11
+ **Why this design (not long-lived subprocess):**
12
+ - `claude` CLI isn't built for stdin/stdout piping — no clean response framing, hangs on confirmations, ANSI escapes leak. Subprocess startup (~1s) is negligible vs the 30s-2min fix runtime.
13
+ - `--resume` gives prompt-cache reuse + cross-task context without process-management complexity.
14
+
15
+ **Mechanism:**
16
+ - `state_store.claude_sessions(project_name PK, session_id, last_used, total_cost_usd, turn_count)` table
17
+ - `fix_engine.generate_fix` reads `store.get_claude_session(cfg.project_name)` and passes the id via `--resume <id>`
18
+ - After the call, parses `{"session_id": "...", "total_cost_usd": ...}` from the JSON output and `store.set_claude_session(project_name, id, cost_delta=cost)` — accumulates cost + turn count
19
+ - `--output-format json` is required to extract session_id deterministically. Live tool-use progress streaming (`⏺ Bash(...)`, etc) is sacrificed — `_progress_from_line` is now dead code in the JSON path. stream-json migration deferred.
20
+
21
+ **Concurrency safety:**
22
+ - `main._project_locks: dict[str, asyncio.Lock]` keyed by `cfg.project_name` (or `"_default"` if empty)
23
+ - `_handle_error` wraps `_generate_apply_publish` in `async with _project_lock(...)` — guarantees one Claude session active per project at a time. Different projects can run in parallel.
@@ -0,0 +1,11 @@
1
+ ---
2
+ name: bash_local_shadows_env_var
3
+ description: bash `local var=""` shadows same-named env vars — use a distinct SENTINEL_*_OVERRIDE pattern
4
+ type: experience
5
+ created_at: 2026-04-21T07:22:54.615Z
6
+ updated_at: 2026-04-21T07:22:54.615Z
7
+ ---
8
+
9
+ In fetch_log.sh, `local OUTPUT_DIR=""` inside fetch_from_properties() shadows any OUTPUT_DIR env var set by the Python caller. Python was setting env["OUTPUT_DIR"] = temp_path, but bash's local declaration wiped it, causing filtered logs to always write to $SCRIPT_DIR instead of the temp dir — so monitor results were always empty.
10
+
11
+ Fix: use a dedicated env var (SENTINEL_OUTPUT_DIR_OVERRIDE) that doesn't conflict with a local variable name, and apply it after the local declarations. This is the same pattern already used for SENTINEL_GREP_FILTER_OVERRIDE.
@@ -0,0 +1,9 @@
1
+ ---
2
+ name: cairn_session_discipline
3
+ description: Use cairn_resume at session start, cairn_checkpoint at end — not cairn_maintain
4
+ type: experience
5
+ created_at: 2026-03-31T03:26:44.345Z
6
+ updated_at: 2026-03-31T03:26:44.345Z
7
+ ---
8
+
9
+ At the start of a session, always call `cairn_resume` (not `cairn_maintain`) to get incremental re-index + memory surface from the last checkpoint. `cairn_maintain` does a full re-index and loses prior session context. At the end of every session, call `cairn_checkpoint` with a message, active_files, and any notes worth carrying forward. Skipping checkpoint means the next session starts cold.
@@ -0,0 +1,17 @@
1
+ ---
2
+ name: cicd_user_must_not_be_hardcoded
3
+ description: Jenkins triggers must use repo.cicd_user — hardcoded "sentinel" caused silent 401s for all elprint builds
4
+ type: experience
5
+ created_at: 2026-04-24T14:06:02.248Z
6
+ updated_at: 2026-04-24T14:06:02.248Z
7
+ ---
8
+
9
+ In `sentinel/cicd_trigger.py`, `_trigger_jenkins(repo)` was passing `auth=("sentinel", repo.cicd_token)` — hardcoding "sentinel" as the username. This silently broke every Jenkins API call when the repo's API token was issued under a different user (e.g. CICD_USER=misterhuydo).
10
+
11
+ **Detection** (2026-04-24): Boss tried to trigger 14 elprint builds; all returned HTTP 401. Direct curl from EC2 with `-u misterhuydo:<token>` returned HTTP 200, proving creds were valid. Bug was the hardcoded "sentinel" username.
12
+
13
+ **Fix** (v1.6.8): use `auth=(repo.cicd_user or "sentinel", repo.cicd_token)` — same pattern already in use by `_trigger_jenkins_release` (which was unaffected). One-line fix.
14
+
15
+ **Lesson:** when adding new auth code, always thread `repo.cicd_user` through. Never hardcode usernames. The fallback to "sentinel" is fine for backward compat, but the configured user must take priority.
16
+
17
+ Affected functions: `_trigger_jenkins` (fixed in v1.6.8). Others (`_trigger_jenkins_release`, `_trigger_github_actions`) already correct.
@@ -0,0 +1,23 @@
1
+ ---
2
+ name: claude_resume_stale_context_risk
3
+ description: claude --resume carries file-content memory across turns — must use per-route session keys + force fresh Read
4
+ type: experience
5
+ created_at: 2026-04-24T11:26:21.418Z
6
+ updated_at: 2026-04-24T11:26:21.418Z
7
+ ---
8
+
9
+ When using `claude --print --resume <session_id>` for sentinel fix engine, the resumed Claude carries memory of file contents from earlier turns. If the next task targets a DIFFERENT repo (or the same file has changed), Claude may patch from stale memory instead of re-reading the actual file → diff context mismatch → `git apply --check` fails.
10
+
11
+ **Real incident (2026-04-24):**
12
+ - Turn 1: fix attempt routed to `1881-SSOLoginWebApp` (local override of FirstName.java). Claude got session `d1c18a02`. Failed.
13
+ - Turn 2: new fix routed to `Whydah-TypeLib` (the upstream FirstName.java which differs from the local override). With per-project resume, Claude resumed `d1c18a02` and patched TypeLib using the SSOLWA file's content from memory. Patch context didn't match TypeLib's actual file. Dry-run aborted.
14
+
15
+ **Three layered defenses (all shipped in v1.6.2):**
16
+
17
+ 1. **Per-route session key**: in fix_engine.generate_fix, key claude_sessions by `f"{cfg.project_name}/{repo.repo_name}"` instead of just `cfg.project_name`. Different routes get fresh sessions.
18
+
19
+ 2. **Pre-fix `git pull` of every project repo** via `git_manager.pull_all_repos(all_repos)`. Closes the window where the on-disk file lags behind a recent sentinel commit or human commit.
20
+
21
+ 3. **Prompt instruction**: "CRITICAL — fresh reads only. Before you write ANY diff line, use the Read tool to view the CURRENT content of every file you intend to modify. Do NOT rely on prior memory ..."
22
+
23
+ **General rule when using --resume:** if the task topic / file scope changes between turns, either start a fresh session OR explicitly tell Claude to re-read everything. Never trust memory across context boundaries.
@@ -0,0 +1,11 @@
1
+ ---
2
+ name: envelope_first_auth_detection
3
+ description: Substring-based auth-error detection on LLM output triggers false positives when patches contain words like "UNAUTHORIZED" — check the JSON envelope first
4
+ type: experience
5
+ originSessionId: d78671ef-fb75-4934-949b-11dda1645051
6
+ ---
7
+ When a Claude `--print --output-format json` invocation succeeds, its `result` field can contain arbitrary text from the codebase being patched — including unchanged-context diff lines like `return new ResponseEntity<>(err, HttpStatus.UNAUTHORIZED)`. Substring matching on the raw output (e.g. checking for `"unauthorized"`, `"unauthenticated"`, `"login required"`) will fire even when the request authenticated cleanly, producing false "auth failure" Slack alerts and wasting both attempts in a fallback loop.
8
+
9
+ **Why:** On 2026-04-27 fp `6cb7a875` produced a valid HttpRequestMethodNotSupportedException handler patch, but the diff included an unchanged `HttpStatus.UNAUTHORIZED` context line for a sibling exception handler. Both OAuth and API-key attempts were marked auth-failed, the patch was discarded, and the user got a misleading "out of session?" alert. Cost ~$0.62 for the wasted retries; fix would have shipped to prod 3 hours earlier without the false positive.
10
+
11
+ **How to apply:** Whenever you have both a structured envelope (`is_error` flag, JSON status) and a free-text scan, *consult the envelope first*. The substring scan is only valid as a fallback for early-exit failures that never produced a parseable envelope (binary missing, immediate stderr crash). Fix in `sentinel/fix_engine.py` shipped in v1.6.10: parse JSON before running `_is_auth_error()`; if `is_error:false` and `result` is non-empty, treat it as success regardless of body content.
@@ -0,0 +1,17 @@
1
+ ---
2
+ name: mvn_negative_cache_blocks_cascade_retries
3
+ description: mvn caches "artifact not found" responses for ~24h — pass `-U` to bypass when polling for a freshly-published artifact
4
+ type: experience
5
+ created_at: 2026-04-24T13:27:45.148Z
6
+ updated_at: 2026-04-24T13:27:45.148Z
7
+ ---
8
+
9
+ When a sentinel cascade fires immediately after triggering a Jenkins release, mvn often hits Nexus before Jenkins finishes publishing the new artifact. The `404 not found` response is **cached locally** in `~/.m2/repository/<group>/<artifact>/<version>/_remote.repositories` (or via the `--no-transfer-progress` machinery), and mvn refuses to re-check that artifact for ~24h (the default `updatePolicy` interval).
10
+
11
+ Symptom: cascade reports "Artifact X:Y not yet in Nexus", user retries 5 minutes later, same failure even though Nexus actually has the artifact. The user can manually confirm via their local IDE (which has its OWN m2 cache and saw the artifact at a different time).
12
+
13
+ **Fix:** add `-U` (`--update-snapshots`) to every `mvn` invocation in the cascade path. `-U` forces mvn to re-check remote repos on every call, bypassing the negative cache. Cost: slightly more network traffic. Benefit: cascade retries actually work.
14
+
15
+ Shipped in v1.6.6 in `sentinel/git_manager.py:maven_compile_check` — added `-U` to the `mvn compile -DskipTests -q --batch-mode` command.
16
+
17
+ **Also worth knowing:** Sentinel detects the Nexus URL from the project's `pom.xml` (`<repositories>` block), NOT from `~/.m2/settings.xml`. settings.xml only carries credentials, not URLs.
@@ -0,0 +1,13 @@
1
+ ---
2
+ name: publish_safety_check
3
+ description: Always syntax-check Python and JS files before npm publish
4
+ type: experience
5
+ created_at: 2026-03-31T03:26:41.450Z
6
+ updated_at: 2026-03-31T03:26:41.450Z
7
+ ---
8
+
9
+ Before running `npm publish`, always validate modified files:
10
+ - Python: `python -c "import ast; ast.parse(open('file.py').read())"`
11
+ - JS/Node: `node --check file.js`
12
+
13
+ The bundle step does NOT catch Python syntax errors. Backtick characters inside JS template literals will also break when embedding Python code blocks — avoid them or escape carefully.
@@ -0,0 +1,9 @@
1
+ ---
2
+ name: secrets_in_gitignored_files
3
+ description: All secrets go in gitignored files only — never in committed config
4
+ type: experience
5
+ created_at: 2026-03-31T03:26:47.431Z
6
+ updated_at: 2026-03-31T03:26:47.431Z
7
+ ---
8
+
9
+ Never put tokens, API keys, passwords, or any secrets in committed config files. Keep them in a gitignored file such as `private_*.properties`, `.env`, or equivalent that lives only on the server. If a project has a committed config template (e.g. `sentinel.properties`), use placeholder values like `<token>` and document that the real values go in the private file.
@@ -0,0 +1,13 @@
1
+ ---
2
+ name: sentinel_deployment_server
3
+ description: Sentinel deploys on EC2 server, SSH via ec2-user with sentinel.pem key
4
+ type: experience
5
+ created_at: 2026-03-31T03:23:46.605Z
6
+ updated_at: 2026-04-21T00:00:00.000Z
7
+ originSessionId: bbd5aef7-18ed-4068-be3f-60d68de4936f
8
+ ---
9
+ Sentinel is deployed on EC2 at `13.50.101.130`. SSH: `ssh -i ~/.ssh/sentinel.pem ec2-user@13.50.101.130` (key at `C:\Users\huy\.ssh\sentinel.pem`).
10
+
11
+ Single service: `sentinel.service` (not sentinel-1881 or sentinel-elprint — just `sentinel`).
12
+
13
+ Deploy: `sudo npm install -g @misterhuydo/sentinel@<version> && sudo systemctl restart sentinel`
@@ -0,0 +1,19 @@
1
+ ---
2
+ name: Ageri RAG and CoreSkill architecture decisions
3
+ description: CoreSkill is passive observer only; personal skill uses layered context pipeline for RAG
4
+ type: feedback
5
+ ---
6
+
7
+ CoreSkill must remain a passive observer — no URL fetching, no active retrieval. It only listens, extracts explicit facts, and writes to memory silently. The moment it fetches URLs it changes nature.
8
+
9
+ Personal skill RAG uses a layered context pipeline (pre-LLM step):
10
+ 1. Memory search — what we know about this user/topic
11
+ 2. URL fetch — if message contains a URL, fetch it
12
+ 3. prior_results — what other skills returned this turn
13
+ 4. User-uploaded docs — files stored from previous uploads
14
+
15
+ One context build → one LLM call. Not scattered retrieval inside each skill.
16
+
17
+ **Why:** User confirmed this pushback explicitly. Separation of concerns — CoreSkill listens, research skill searches broadly, personal skill retrieves personal context.
18
+
19
+ **How to apply:** Never add active retrieval to CoreSkill. When building personal skill RAG, use the pipeline approach above, not ad-hoc fetching in individual handlers.
@@ -0,0 +1,16 @@
1
+ ---
2
+ name: Design principle — simple but sophisticated
3
+ description: User's core design philosophy for Sentinel and all work in this project
4
+ type: feedback
5
+ ---
6
+
7
+ "Simple but sophisticated" — the guiding principle for all design decisions.
8
+
9
+ **Why:** User explicitly stated this after I added a redundant SLACK_WORKSPACE_ID workspace verification check that duplicated isolation Slack's own token architecture already provides.
10
+
11
+ **How to apply:**
12
+ - Solve the real problem, not hypothetical edge cases the platform already handles
13
+ - Don't add config options, fields, or checks that guard against things that can't happen
14
+ - When the underlying system (Slack, GitHub, SQLite, etc.) already enforces a constraint, trust it — don't re-enforce it in code
15
+ - Prefer fewer moving parts. A sophisticated outcome should feel simple to operate.
16
+ - Before adding any new mechanism, ask: "does something already handle this?"
@@ -0,0 +1,14 @@
1
+ ---
2
+ name: Sentinel publish workflow
3
+ description: How to correctly publish Sentinel npm package
4
+ type: feedback
5
+
6
+ ---
7
+
8
+ Always run syntax checks before publishing: `python -c "import ast; ast.parse(open(f, encoding='utf-8').read())"` on modified Python files, and `node --check` on modified JS files.
9
+
10
+ **Why:** Multiple bugs shipped due to literal newlines in JS strings and Python indentation errors. These are caught instantly by syntax checks.
11
+
12
+ **How to apply:** Before every `npm publish`, check all modified files. The bundle step will succeed even with broken Python (it just copies files), so Python errors only surface at runtime on the server.
13
+
14
+ Also: JS template literals containing backtick characters (Python f-strings with backtick code formatting) will cause syntax errors. Use string concatenation instead of template literals for multi-line Python code blocks in patch scripts.
@@ -0,0 +1,15 @@
1
+ ---
2
+ name: Secrets handling — never commit tokens
3
+ description: All secrets (GITHUB_TOKEN, SLACK tokens, API keys) go in private_sentinel.properties only, never in committed config files
4
+ type: feedback
5
+ ---
6
+
7
+ Never put secrets in config repo files (sentinel.properties, repo-configs/*.properties). They get committed to GitHub.
8
+
9
+ **Why:** GITHUB_TOKEN, SLACK_BOT_TOKEN, ANTHROPIC_API_KEY etc. were accidentally added to committed sentinel.properties files. These end up in GitHub history even if later removed.
10
+
11
+ **How to apply:**
12
+ - All secrets go in `private_sentinel.properties` (gitignored, lives next to the project dir on the server)
13
+ - Committed `sentinel.properties` files contain only non-sensitive config (PROJECT_NAME, MAILS, POLL_INTERVAL, etc.)
14
+ - The `.gitignore` in every sentinel config repo must include `private_sentinel.properties`
15
+ - On the server: `~/sentinel/private_sentinel.properties` for workspace-level secrets
@@ -0,0 +1,11 @@
1
+ ---
2
+ name: SLACK_ADMIN_USERS implicitly allowed
3
+ description: Admins are always allowed to talk to Boss — no need to also add them to SLACK_ALLOWED_USERS
4
+ type: feedback
5
+ ---
6
+
7
+ `SLACK_ADMIN_USERS` implies access. Do not require admins to also appear in `SLACK_ALLOWED_USERS`.
8
+
9
+ **Why:** User pointed out the redundancy — if you're an admin you should obviously be able to talk to the bot. Fixed in slack_bot.py: allowlist check now skips users who are in `slack_admin_users`.
10
+
11
+ **How to apply:** When configuring a new Sentinel instance, only set `SLACK_ADMIN_USERS` in `sentinel.properties`. `SLACK_ALLOWED_USERS` is only needed to grant access to non-admin users.
@@ -0,0 +1,14 @@
1
+ ---
2
+ name: Slack thinking status messages
3
+ description: User confirmed they like the random thinking status messages in Slack Boss
4
+ type: feedback
5
+ ---
6
+
7
+ User explicitly approved the random thinking status approach (`_THINKING_STATUS` list with `random.choice()`).
8
+
9
+ Messages like "poking around...", "on it...", "cooking..." are preferred over the static "thinking...".
10
+
11
+ The "(still on it...)" suffix appended on long responses was also well received — it reassures the user the bot hasn't stalled.
12
+
13
+ **Why:** More personality, less robotic. Matches the casual tone the user wants from Boss.
14
+ **How to apply:** Keep the `_THINKING_STATUS` list and the "(still on it...)" update pattern. Do not revert to static "thinking...".
@@ -0,0 +1,16 @@
1
+ ---
2
+ name: start.sh patching approach
3
+ description: How to safely patch start.sh on the server — SSH double-quote expansion pitfall, $HOME vs hardcoded paths
4
+ type: feedback
5
+ ---
6
+
7
+ Always use `$HOME` (not hardcoded user paths like `/home/sentinel/`) in start.sh templates and patches. The script may run as any Linux user, not just `sentinel`.
8
+
9
+ **Why:** The server user might not be `sentinel` — admin may choose any username. Using `$HOME` makes the script portable.
10
+
11
+ **How to apply:** The generate.js template already uses `$HOME`. When patching start.sh on the server via SSH, never write Python patch scripts that include `$HOME` inside an SSH double-quoted string — bash will expand `$HOME` to the *local* machine's home before sending. Instead:
12
+ - Use `sed -i` with single-quoted replacement strings (prevents local expansion)
13
+ - Or write the Python patch to `/tmp/fix.py` via heredoc `<< 'PYEOF'` (single-quoted prevents expansion), then run separately
14
+ - Or use SCP to upload the script file
15
+
16
+ Also: `sentinel upgrade` regenerates start.sh from generate.js template. The template (line ~65 in generate.js) now includes auto-detection of JAVA_HOME via a for-loop over `$HOME/jdk-*` patterns, using `$HOME` throughout. This is permanent fix — no need to re-patch after upgrades.
@@ -0,0 +1,20 @@
1
+ ---
2
+ name: cairn_federation_for_sentinel_projects
3
+ description: Sentinel project root + each sub-repo both run `cairn install`; federation auto-mounts via parent walk-up
4
+ type: knowledge
5
+ created_at: 2026-04-24T10:45:36.058Z
6
+ updated_at: 2026-04-24T10:45:36.058Z
7
+ ---
8
+
9
+ Cairn supports nested federation natively. For Sentinel:
10
+ - Project root (e.g. `/home/sentinel/sentinel/sentinel-1881/`) gets `.cairn/` (its own `.cairn-project` marker + `index.db`)
11
+ - Each sub-repo under `repos/<name>/` ALSO gets its own `.cairn/`
12
+ - When Claude runs from the project root, cairn's `mountParentSubIndexes` (Cairn `db.js:97`) auto-discovers sibling sub-indexes — no explicit registration needed (works "even if the parent project has never run cairn_maintain")
13
+
14
+ **Sentinel wiring:**
15
+ - `sentinel/cairn_client.py:_install_cairn_at(path)` — idempotent (skip if `.cairn/.cairn-project` marker exists), takes any path
16
+ - `init_project_root(project_dir)` — wrapper invoked at sentinel startup
17
+ - `index_repo(repo)` — same wrapper, called for each sub-repo (existing behaviour)
18
+ - `main._startup_checks` calls `init_project_root(Path(cfg.workspace_dir).parent)` BEFORE iterating sub-repos with `index_repo`
19
+
20
+ **Cairn CLI surface is limited** (`install`, `install-hooks`, `minify`, `edit-guard`, `validate-map`, `checkpoint --auto`, `resume-hint`). All federation/index queries are MCP-only — Claude calls them via MCP at runtime, Sentinel doesn't shell out for them.
@@ -0,0 +1,24 @@
1
+ ---
2
+ name: cairn_hooks_block_oauth_read_edit
3
+ description: Cairn PreToolUse hooks (minify, edit-guard) exit 2 to block Read/Edit; bypass via --setting-sources project,local + re-add MCP via --mcp-config
4
+ type: knowledge
5
+ created_at: 2026-04-24T12:00:46.946Z
6
+ updated_at: 2026-04-24T12:00:46.946Z
7
+ ---
8
+
9
+ Cairn ships two `PreToolUse` hooks installed in `~/.claude/settings.json`:
10
+ - `cairn minify` on `Read` — exits with code 2 (blocking) and writes minified content to stderr; advances per-file state machine to `compressed`
11
+ - `cairn edit-guard` on `Edit`/`Write` — exits 2 if file is in `compressed` state, shows full content, advances to `edit-ready`. Next Edit attempt then succeeds.
12
+
13
+ This works fine for INTERACTIVE Claude Code (model retries naturally). In headless `claude --print` sessions doing complex multi-step work (26 turns of fix engine), the model often does NOT retry the blocked Edit — it falls back to hand-crafting a unified diff in markdown. That hand-crafted diff has off-by-one hunk-line counting bugs (`@@ -X,10 +X,11 @@` with a body of 9/10 lines) → `git apply --check` fails with "corrupt patch at line N".
14
+
15
+ `--bare` would skip ALL hooks (per `claude --help`: "skip hooks, LSP, plugin sync...") but ALSO forces ANTHROPIC_API_KEY auth — incompatible with OAuth.
16
+
17
+ **Surgical fix** for headless OAuth sessions:
18
+ - `--setting-sources project,local` — skip user-scope settings.json (where cairn hooks live), keep project/local settings working
19
+ - `--mcp-config '{"mcpServers":{"cairn":{"command":"cairn-mcp"}}}'` — re-add cairn MCP (which was also in user settings) standalone, so cairn_search/outline/checkpoint stay available
20
+ - `--dangerously-skip-permissions` — keep, redundant given Read(**) allow but harmless
21
+
22
+ This combo is in `sentinel/fix_engine.py:_claude_cmd` since v1.6.3.
23
+
24
+ NOTE: When invoking `claude --mcp-config <json>` from a shell, the JSON string can be misparsed if a positional prompt follows. Use `--mcp-config=<json>` (= form) OR pass via Python subprocess as a list element (no shell). MCP config flag accepts space-separated values — that's why a positional prompt gets pulled in as a "second config".
@@ -0,0 +1,28 @@
1
+ ---
2
+ name: sentinel_repo_remotes
3
+ description: J:\Projects\Sentinel = source of truth; misterhuydo/Sentinel primary; exoreaction/Sentinel = downstream mirror
4
+ type: knowledge
5
+ created_at: 2026-04-24T15:03:35.440Z
6
+ updated_at: 2026-04-24T15:03:35.440Z
7
+ ---
8
+
9
+ **Sentinel repo topology (set 2026-04-24):**
10
+
11
+ - **J:\Projects\Sentinel** — single source of truth, primary working directory. ALL development happens here.
12
+ - **misterhuydo/Sentinel** on GitHub — the personal/canonical remote, pushed to from J:\
13
+ - **exoreaction/Sentinel** on GitHub — company-visible MIRROR. Always kept in sync with main; NOT the place to develop in.
14
+ - **H:\Projects\exoreaction\Sentinel** — local clone of exoreaction/Sentinel; redundant when J:\ is the working dir, but harmless to keep around.
15
+
16
+ **Workflow:**
17
+ 1. Edit, commit, and push from J:\Projects\Sentinel as usual.
18
+ 2. After pushing to misterhuydo, also push the same refs to exoreaction so the mirror stays current. Easiest: add exoreaction as a second remote in J:\ and push to both.
19
+
20
+ ```
21
+ git -C /j/Projects/Sentinel remote add exoreaction git@github.com:exoreaction/Sentinel.git
22
+ git -C /j/Projects/Sentinel push origin main
23
+ git -C /j/Projects/Sentinel push exoreaction main
24
+ ```
25
+
26
+ Or chain into one command via an alias / shell function.
27
+
28
+ **Do NOT** do new work in H:\Projects\exoreaction\Sentinel — that workspace is just a local checkout of the mirror.
@@ -0,0 +1,18 @@
1
+ ---
2
+ name: sentinel_systemd_inactive_is_misleading
3
+ description: systemctl status sentinel shows inactive even when worker processes are healthy — check ps for python -m sentinel.main, not systemd
4
+ type: knowledge
5
+ originSessionId: 98f6382b-f7e7-45dd-8f41-8f5b249e0853
6
+ ---
7
+ The `sentinel.service` systemd unit is `Type=forking` but its `ExecStart=/home/sentinel/sentinel/startAll.sh` backgrounds per-project children (`python -m sentinel.main`) without writing a `PIDFile=` that systemd can track. As a result `systemctl status sentinel` reports `inactive (dead)` immediately after start, even when all workers are running normally.
8
+
9
+ **Why:** This is a unit-file design quirk, not an outage. The wrapper script + `Type=forking` mismatch means systemd loses track of the children. Restarting the unit will start a NEW set of workers without killing the existing ones — risk of duplicates.
10
+
11
+ **How to apply:** When asked "is sentinel down?":
12
+ 1. Don't trust `systemctl is-active sentinel`.
13
+ 2. Check `ps -ef | grep "python3 -m sentinel.main"` and `ls /proc/<pid>/cwd` to map PIDs → projects.
14
+ 3. Cross-check against `sentinel.pid` in each `/home/sentinel/sentinel/<project>/`.
15
+ 4. Check `tail logs/sentinel.log` for recent activity.
16
+ 5. If duplicate processes for the same project exist (multiple PIDs with the same cwd), the older one is an orphan — `stop.sh` only kills the one in `sentinel.pid`. Kill orphans manually with `kill <pid>`.
17
+
18
+ Never run `systemctl restart sentinel` to "fix" this without first stopping all running children, or you'll create duplicates.
@@ -0,0 +1,20 @@
1
+ ---
2
+ name: sentinel_upgrade_requires_file_copy
3
+ description: npm install -g does NOT update the running instance — must copy files from npm package to /home/sentinel/sentinel/code/ and restart
4
+ type: knowledge
5
+ created_at: 2026-04-21T08:02:15.597Z
6
+ updated_at: 2026-04-21T08:02:15.597Z
7
+ ---
8
+
9
+ The running Sentinel instance uses PYTHONPATH=/home/sentinel/sentinel/code/ (not the npm package directory at /usr/lib/node_modules/@misterhuydo/sentinel/python/).
10
+
11
+ `npm install -g @misterhuydo/sentinel@X.Y.Z` only updates the global npm package. To actually apply code changes to the running instance:
12
+
13
+ 1. Copy changed Python/shell files:
14
+ sudo cp /usr/lib/node_modules/@misterhuydo/sentinel/python/sentinel/<file>.py /home/sentinel/sentinel/code/sentinel/<file>.py
15
+ sudo cp /usr/lib/node_modules/@misterhuydo/sentinel/python/scripts/<file>.sh /home/sentinel/sentinel/code/scripts/<file>.sh
16
+
17
+ 2. Restart the instance:
18
+ sudo -u sentinel bash /home/sentinel/sentinel/sentinel-1881/stop.sh && sleep 2 && sudo -u sentinel bash /home/sentinel/sentinel/sentinel-1881/start.sh
19
+
20
+ The `sentinel upgrade` CLI command (via Slack Boss) likely does this automatically, but manual deploys via SSH must do both steps.
@@ -0,0 +1,17 @@
1
+ ---
2
+ name: no_api_key_for_heavy_coding
3
+ description: Never bill heavy coding tasks against ANTHROPIC_API_KEY — must use Claude Pro OAuth
4
+ type: preference
5
+ created_at: 2026-04-24T11:39:14.740Z
6
+ updated_at: 2026-04-24T11:39:14.740Z
7
+ ---
8
+
9
+ User explicitly stated: "I never want to use API key for heavy coding tasks."
10
+
11
+ **How to apply:**
12
+ - For Sentinel fix_engine, ask_codebase, repo_task_engine: MUST use Claude Pro (OAuth) via the `claude` CLI's cached login, not `--bare`/API-key
13
+ - API key remains acceptable for the lightweight Boss conversation loop (structured tool-use), but heavy code work is OAuth-only
14
+ - If OAuth fails, do NOT silently fall back to API key — surface the failure and fix the OAuth issue instead
15
+ - Configuration default: `CLAUDE_PRO_FOR_TASKS=true` is the only acceptable mode; do not propose API-key fallback as a "workaround"
16
+
17
+ **Why:** Claude Pro covers heavy/agentic work via subscription. API credit is finite and gets exhausted (we hit the "Credit balance too low" wall earlier in 2026-04-24). Pro is the durable answer; falling back to API key is a regression in cost discipline.
@@ -0,0 +1,45 @@
1
+ ---
2
+ name: Ageri project
3
+ description: Personal AI platform — live on Oracle Ampere server, Slack connected, personal app responding
4
+ type: project
5
+ ---
6
+
7
+ Ageri is a personal AI platform, separate from Sentinel.
8
+
9
+ **Domain:** ageri.ai (registered on Cloudflare)
10
+ **GitHub:** git@github.com:misterhuydo/Ageri.git (private)
11
+ **Local path:** J:\Projects\Ageri
12
+ **Deploy key:** ~/.ssh/ageri_deploy on Oracle server
13
+
14
+ **Server:** Oracle Ampere ARM 138.2.17.152, user: `ageri`
15
+ **SSH from local:** `ssh -i /c/Users/huy/.ssh/oracle/devtest-arm-ampere.key ageri@138.2.17.152`
16
+ **Code on server:** ~/ageri/code
17
+ **Venv:** ~/ageri/venv
18
+ **Config dir:** ~/ageri (ageri.properties + private_ageri.properties)
19
+ **Log file:** ~/ageri/ageri.log
20
+ **Start command:** `cd ~/ageri/code && AGERI_CONFIG=~/ageri ~/ageri/venv/bin/python -m ageri.main >> ~/ageri/ageri.log 2>&1 &`
21
+
22
+ **Current status (2026-03-31):** LIVE — Ageri running on Oracle server, connected to Slack workspace via Socket Mode. Personal app active. DMs to @Ageri in Slack are working.
23
+
24
+ **Architecture:**
25
+ - Orchestrator → AppRegistry → Apps (personal, research, sentinel stubs)
26
+ - Three-tier SQLite memory (session/working/long_term)
27
+ - Slack Socket Mode interface
28
+ - ageri-sdk: separate PyPI package at sdk/ — install with `pip install -e sdk/`
29
+ - CLI at cli/ — `ageri init`, `ageri slack`, `ageri slack --update`
30
+
31
+ **CLI (npm):** cli/ — not yet published. Run locally with `node bin/ageri.js`
32
+ - `ageri init` — full setup (venv, pip, Slack prompts, config files)
33
+ - `ageri slack` — prints one-click Slack manifest URL
34
+
35
+ **Slack app:** messages_tab_enabled=true, interactivity=true, socket_mode=true
36
+ **Scopes:** chat:write, im:*, channels:*, groups:*, reactions:write, users:read
37
+
38
+ **Key fixes applied:**
39
+ - Ubuntu 22.04 externally-managed Python → venv required
40
+ - ageri-sdk not in requirements → `pip install -e sdk/` needed
41
+ - setuptools.backends not available → changed to setuptools.build_meta
42
+ - anthropic package missing from requirements.txt → added >=0.25
43
+ - Slack messages tab disabled → added app_home.messages_tab_enabled=true to manifest
44
+
45
+ **Why:** Platform play — Sentinel becomes an app module on top. BYOK + license business model.
@@ -0,0 +1,89 @@
1
+ ---
2
+ name: Ageri architecture v2 decisions
3
+ description: Finalized architectural decisions — multi-profile, skill types, agent society, Orchestrator as God
4
+ type: project
5
+ ---
6
+
7
+ ## Terminology: "App" → "Skill" ✓ DONE
8
+
9
+ Renamed throughout codebase. SkillBase, SkillResult, SkillRegistry, ageri/skills/, SKILLS= config key. Backwards-compatible aliases kept until v1.0.
10
+
11
+ ## One Orchestrator, Multiple Agent Profiles ✓ FINALIZED
12
+
13
+ - One Orchestrator process, one DB, one deployment
14
+ - Multiple named Agent Profiles within it (e.g. Sammy, John, Peter, Selina)
15
+ - Each profile has: name, channel bindings, memory scope (`profile:{id}:*`), personality, skill set
16
+ - Reset = wipe `profile:{id}:*` memory only, config untouched
17
+ - **Users can create as many profiles as they want** — profile count is a monetization lever (free tier limit, paid tier unlimited). Do not hardcode a profile cap in architecture.
18
+
19
+ ## Skill Assignment & Orchestrator Routing ✓ FINALIZED
20
+
21
+ - A profile can hold multiple skills simultaneously (e.g. research + knowledge + zalo_adapter)
22
+ - User intent determines required skills: "open Zalo, reply to customer questions" → needs research + knowledge + zalo_adapter on the same profile
23
+ - If the active/addressed profile lacks a required skill, **Orchestrator suggests a profile that has the needed skill set** — does not silently fail or auto-delegate
24
+ - Skill matching is capability-based, not name-based — Orchestrator inspects skill registry per profile
25
+
26
+ ## Memory Architecture ✓ FINALIZED
27
+
28
+ Three scopes:
29
+ 1. **Global layer** (`global:*`) — shared across all profiles. User's name, location, timezone, language, who each profile is. Read by all profiles, written only by Orchestrator or explicit user action. "God's memory."
30
+ 2. **Profile scope** (`profile:{id}:*`) — private to each profile. What Sammy knows stays with Sammy.
31
+ 3. **Cross-profile transfer** — Orchestrator only, explicit user request. Logged and visible.
32
+
33
+ ## The Orchestrator as "God" ✓ FINALIZED
34
+
35
+ - Omniscient: knows all profiles, all skills, all global facts
36
+ - Omnipresent: receives every message from every channel
37
+ - Controls: can read any profile's memory, broker conversations, delegate tasks
38
+ - Transparent: logs every cross-profile interaction — user always has visibility
39
+ - Suggests but doesn't act unilaterally on cross-profile decisions
40
+
41
+ ## Agent-to-Agent Communication ✓ FINALIZED
42
+
43
+ - Agents cannot talk directly to each other
44
+ - All communication routes through the Orchestrator
45
+ - Orchestrator can facilitate: moves context between profiles, both agents contribute, user sees the thread
46
+ - No private agent-to-agent conversations — Orchestrator is always present
47
+ - Agents know each other exist IF the user has introduced them (stored in global layer)
48
+
49
+ ## Skills per Profile ✓ FINALIZED
50
+
51
+ - Each profile has its own skill set (John the tutor has `english` skill, others don't)
52
+ - Orchestrator knows who has what
53
+ - Cross-profile skill borrowing with user approval: "Peter has that skill — want me to ask him?"
54
+
55
+ ## Skill Types ✓ FINALIZED
56
+
57
+ Three categories:
58
+ 1. **Cognitive skills** — pure LLM reasoning (personal, research, memory). No external connections.
59
+ 2. **Adapter skills** — bridge to external systems. The skill owns the connection protocol.
60
+ - Examples: `openclaw`, `github`, `email`, `calendar`, `sentinel`
61
+ - User builds adapter skills to connect their own applications
62
+ - Platform doesn't need to know what the external system is — skill author does
63
+ 3. **Hybrid** — reasons + connects (e.g. research skill that searches the web)
64
+
65
+ **Key insight:** A Skill is an adapter by nature. Ageri becomes an orchestration layer over ANY tool the user has — not by building every integration, but by letting community build adapter skills. Same model as VS Code extensions / browser extensions.
66
+
67
+ ## Language Handling ✓ FINALIZED
68
+
69
+ - Mirror user's language dynamically by default
70
+ - When user explicitly requests a language → save as `user:language` in profile memory → always use it
71
+ - LLM can search internet for cultural context once location/culture known
72
+
73
+ ## Passive Learning ✓ FINALIZED
74
+
75
+ - Learn silently but surface occasionally: "I noticed you prefer Vietnamese — I'll remember that"
76
+ - Infer freely (location from "Tết", interests from topics, relationships from names)
77
+ - All inferences written to DB. DB is source of truth — LLM enriches, doesn't replace.
78
+
79
+ ## Assistant Reset ✓ FINALIZED
80
+
81
+ User can reset any profile — wipes its memory scope, preserves config and global layer.
82
+
83
+ ## Systemd ⚠ PENDING
84
+
85
+ Ageri dies on server reboot. Need systemd unit for auto-restart. Not yet set up.
86
+
87
+ ## CLAUDE.md + docs updates ⚠ PENDING
88
+
89
+ CLAUDE.md and ageri-engineering-brief.md need updates for Skill rename + all v2 architecture decisions. To be done in Ageri Claude session.
@@ -0,0 +1,15 @@
1
+ ---
2
+ name: ageri_devtest_principle
3
+ description: User wants solid devtest infrastructure built into Ageri from day one — lesson learned from Sentinel
4
+ type: project
5
+ ---
6
+
7
+ Ageri must have a devtest infrastructure built from the start, before the platform is complex.
8
+
9
+ **Why:** Sentinel was painful to test — every change required npm publish → server upgrade → real Slack message → watch logs. No module-level testing. This made iteration slow and bugs hard to catch early. Ageri will have many workspaces built on top of it, so the core platform must be solid.
10
+
11
+ **How to apply:**
12
+ - Each module (Orchestrator, memory tiers, tool registry, workspace adapters) should be independently testable with mocked boundaries
13
+ - Integration tests for full flows (message in → action out) before shipping
14
+ - Local dev mode that mocks external services (Slack, GitHub, etc.) so no live infra needed for testing
15
+ - Don't ship Ageri core until the test harness exists — platform stability is a prerequisite for workspace extensibility
@@ -0,0 +1,61 @@
1
+ ---
2
+ name: Ageri personality and user intelligence vision
3
+ description: Vision for Ageri as a truly smart personal assistant — user profiling, cultural awareness, language adaptation, role-playing
4
+ type: project
5
+ ---
6
+
7
+ Ageri should evolve from a task executor into a genuinely intelligent companion that knows the user deeply.
8
+
9
+ **Core vision:** Ageri studies the user over time and builds a persistent mental model of them — their habits, interests, communication style, culture, language, and routines. Every interaction is an opportunity to learn.
10
+
11
+ ## What Ageri should learn and remember
12
+
13
+ **Language & communication style**
14
+ - Detect the user's preferred language from their messages — respond in the same language automatically
15
+ - Learn their tone (formal vs casual, humor level, how they phrase things)
16
+ - Adapt vocabulary and register to match the user
17
+
18
+ **Identity & address**
19
+ - Learn the user's name and how they prefer to be addressed
20
+ - Know their timezone, location (inferred from context or explicitly set)
21
+ - Understand their role/occupation to contextualize requests
22
+
23
+ **Culture & location**
24
+ - Detect or ask where the user lives
25
+ - Learn about their country's culture, customs, public holidays, food, language nuances
26
+ - Use culturally appropriate greetings, references, and examples
27
+
28
+ **Habits & routines**
29
+ - Notice patterns: when they wake up, when they work, what they ask about on certain days
30
+ - Learn recurring tasks, preferences, and rituals
31
+ - Proactively suggest reminders based on observed routine
32
+
33
+ **Interests & hobbies**
34
+ - Build a topic map of what the user cares about
35
+ - Remember what they've asked about before and connect topics over time
36
+ - Surface relevant info without being asked
37
+
38
+ **Relationships**
39
+ - Remember names and context of people the user mentions (family, colleagues, friends)
40
+ - Keep track of commitments made to specific people
41
+
42
+ ## Roles Ageri can play
43
+
44
+ Depending on the user and context, Ageri adapts its role:
45
+ - **Companion** — casual chat, humor, emotional check-ins
46
+ - **Personal assistant** — reminders, tasks, memory
47
+ - **Research assistant** — deep dives, synthesis, tracking topics
48
+ - **Coach** — habits, routines, accountability (if user opts in)
49
+
50
+ ## How to implement
51
+
52
+ All learned facts stored in `long_term` memory under structured keys:
53
+ - `user:name`, `user:location`, `user:timezone`, `user:language`
54
+ - `user:interest:{topic}`, `user:habit:{description}`, `user:person:{name}`
55
+ - `user:style:tone`, `user:style:address` (how user likes to be addressed)
56
+
57
+ **Language detection:** Already works — Claude responds in the user's language when the system prompt doesn't force English. No extra code needed for basic support.
58
+
59
+ **User profiling intent:** Add `LEARN` intent to personal app — when Ageri notices something worth remembering about the user from their message, it silently writes to memory. This should happen passively during every ANSWER interaction too.
60
+
61
+ **Why:** A complete smart assistant must feel like it *knows* you — not just execute tasks. The memory system is already built for this. The missing piece is actively populating it from every interaction.
@@ -0,0 +1,70 @@
1
+ ---
2
+ name: Ageri platform vision and architecture decisions
3
+ description: Full platform vision — own apps, skill marketplace, mobile runtime, presence, groups
4
+ type: project
5
+ ---
6
+
7
+ ## Ageri is a Platform, Not a Slack Bot
8
+
9
+ Ageri has its own native apps (mobile + desktop). Slack/WhatsApp/etc are adapter skills — outbound tools the user can activate, not the primary interface. No plans to integrate other messaging platforms.
10
+
11
+ ```
12
+ Ageri App (mobile/desktop) ← the interface Ageri owns
13
+
14
+ Ageri Runtime ← the engine (installable anywhere)
15
+
16
+ Skills ← capabilities
17
+ ├── personal, research ← core cognitive skills
18
+ ├── slack, email, github ← outbound adapter skills
19
+ └── camera, calendar ← mobile-native skills
20
+ ```
21
+
22
+ **Why:** Owning the interface = owning the user relationship. Not constrained by Slack API changes or pricing. Companion AI experience doesn't belong in a work tool.
23
+
24
+ ## Skill Mobility Attribute
25
+
26
+ Each skill declares `mobility=true/false`:
27
+ - `mobility=true` — works on mobile without a PC (personal, research, email, camera)
28
+ - `mobility=false` — requires PC/desktop resources (code-runner, file-system, sentinel)
29
+
30
+ A profile's available skills on mobile = all its skills where `mobility=true`. No separate mobile profile config needed.
31
+
32
+ ## Mobile Ageri Runtime
33
+
34
+ - Same codebase, deployed on mobile device
35
+ - Only loads `mobility=true` skills
36
+ - Users without a PC can use Ageri mobile-only — download skills from marketplace
37
+ - PC Ageri is the "home base" (source of truth for memory)
38
+ - When PC is offline, mobile runs from last-synced state; reconciles delta when PC comes back
39
+
40
+ ## Sync and Privacy
41
+
42
+ - **Cloud DB stores: presence only** — instance status, last_seen, type (desktop/mobile). Hashed user ID, no real identity.
43
+ - **User data never touches the cloud** — memories, conversations, facts stay in local SQLite
44
+ - **Sync is peer-to-peer** between instances, direct + encrypted, when both are online
45
+ - **Single session lock** — only one mobile Ageri instance active at a time per user. Cloud DB acts as mutex. Prevents sync conflicts.
46
+
47
+ ## Agent Profile Groups
48
+
49
+ Users can create a group where multiple profiles join:
50
+ - Profiles addressed by name (@Mentor, @Selina)
51
+ - All communication routes through Orchestrator — no direct agent-to-agent
52
+ - Profiles contribute from their angle (Mentor asks the probing question, Colleague gives tactical take)
53
+ - Presence-aware: offline profiles are greyed out in group
54
+
55
+ ## Skill Marketplace (Production — future)
56
+
57
+ - Hosted page exposing skills to communities
58
+ - Skill creators must register products via the system
59
+ - Creators can sell skills (free or paid) — monetization planned post-production
60
+ - Skills declare metadata: name, version, author, mobility, required permissions, price
61
+ - `ageri add <skill-name>` installs from registry
62
+ - Skills run locally — marketplace never sees user data
63
+ - Verified skills badge (reviewed, trusted)
64
+
65
+ ## Business Model Notes
66
+
67
+ - Not competing with Apple/Google on mass-market AI assistant
68
+ - Winning paths: privacy-conscious power users → B2B enterprise (AI that never leaves your infra) → marketplace platform moat
69
+ - Companion AI (private, local, customizable) addresses trust gap that Replika/Character.AI can't solve
70
+ - Interface ownership is the most important long-term decision
@@ -0,0 +1,23 @@
1
+ ---
2
+ name: Sentinel npm publish workflow
3
+ description: Who publishes to npm, when, and how — Dev Claude vs human+Claude
4
+ type: project
5
+ ---
6
+
7
+ User and Claude Code are joint project owners for Sentinel.
8
+
9
+ **Publish workflow:**
10
+ - Dev Claude fixes bugs autonomously → commits to `/home/sentinel/sentinel/code/` → live immediately on server
11
+ - Dev Claude never publishes to npm (race condition risk with multiple instances)
12
+ - User + Claude Code review Dev Claude's commits periodically and publish manually
13
+ - Auto-upgrade (every 6h) distributes published versions to all running instances
14
+
15
+ **How to publish:**
16
+ - User says "publish", "release", or similar
17
+ - Claude Code checks recent Dev Claude commits (`git log --oneline` on server or local)
18
+ - Claude Code bumps patch version in `cli/package.json`
19
+ - Runs syntax checks + `npm publish --access public` from `J:\Projects\Sentinel\cli\`
20
+
21
+ **Current version:** 1.4.90 (published 2026-03-27)
22
+
23
+ **Why:** Dev Claude (sentinel-1881) and Dev Claude (sentinel-elprint) both share the same source repo on the server. If both published to npm they'd conflict on version numbers.
@@ -0,0 +1,44 @@
1
+ ---
2
+ name: Sentinel project state
3
+ description: Current state of Sentinel — latest npm version, key modules, architecture decisions
4
+ type: project
5
+
6
+ ---
7
+
8
+ Sentinel is published as @misterhuydo/sentinel on npm. Latest version: 1.4.96.
9
+
10
+ **Why:** Autonomous DevOps agent — watches prod logs, generates Claude Code fixes, opens PRs. Deployed as one instance per project.
11
+
12
+ **How to apply:** When making changes, always bump package.json version and run `npm publish --access public` from `J:\Projects\Sentinel\cli`. Python source is bundled into the npm package via `cli/scripts/bundle.js`.
13
+
14
+ Key architecture decisions made:
15
+ - Auth split: ANTHROPIC_API_KEY → Sentinel Boss (structured tools), Claude Pro OAuth → fix_engine/ask_codebase (heavy tasks). Controlled by CLAUDE_PRO_FOR_TASKS=true.
16
+ - Per-user concurrent Slack sessions (no queue) — each user gets independent session, history persisted in SQLite.
17
+ - notify.py: shared Slack alert module used by fix_engine + sentinel_boss — never silent on rate limits/auth failures.
18
+ - sentinel_boss.py uses `<@USER_ID>` Slack mentions in all replies. user_id→display_name map stored in slack_users SQLite table.
19
+ - post_file tool: Claude can upload files directly to Slack conversation via files_upload_v2.
20
+ - bin/sentinel.js has self-heal: if upgrade.js fails to load, falls back to bare npm install.
21
+
22
+ SQLite tables: errors, fixes, reports, conversations, submitted_issues, slack_users.
23
+
24
+ Docs: README.md updated, docs/slack_integration.md created with full Slack setup guide including all 9 scopes.
25
+
26
+ ---
27
+
28
+ ## chain_release (as of 2026-03-26)
29
+
30
+ - chain_release flow confirmed working end-to-end: TypeLib → Java-SDK → Admin-SDK.
31
+ - chain_release pushes dep updates directly to master (not via PR) — this is an admin-confirmed operation.
32
+ - cicd_trigger.py has wait=True Jenkins polling: 15-minute timeout, 20-second polling intervals.
33
+ - datetime shadowing bug fixed in sentinel_boss.py (in the list_renovate_prs block).
34
+ - Version reporting fix: chain_release now reports the actual released version read from the live pom, not the plan-time version.
35
+ - The "auto-cascade" (execute_cascade) does NOT trigger automatically — chain_release must be called explicitly each time.
36
+
37
+ ## Pending upgrades (as of 2026-03-26)
38
+
39
+ - STS, UAS, SSOLWA, UIB are planned for upgrade with Admin-SDK 3.1.6 at off-peak hours.
40
+
41
+ ## Server-side patch sync status
42
+
43
+ - All server-side patches are applied directly to `/home/sentinel/sentinel/code/sentinel/` on the remote server.
44
+ - These patches have NOT yet been synced back to the local git repo at `J:\Projects\Sentinel`.
@@ -0,0 +1,39 @@
1
+ ---
2
+ name: Sentinel UI plan
3
+ description: Web dashboard for Sentinel — planned but not urgent. To be hosted at sentinel.ageri.ai
4
+ type: project
5
+ ---
6
+
7
+ Sentinel needs a web dashboard UI. Not urgent but clearly defined scope.
8
+
9
+ **Domain:** sentinel.ageri.ai (subdomain on ageri.ai, Cloudflare DNS)
10
+
11
+ **Why:**
12
+ - Share status/fix history with non-Slack users (e.g. boss, stakeholders)
13
+ - Log browsing is painful in Slack
14
+ - Admin management without requiring Slack login
15
+ - Professional status page for showing what Sentinel has fixed/not fixed
16
+
17
+ **Two audiences:**
18
+
19
+ 1. **Read-only viewers** (boss, stakeholders) — no login required or simple token link
20
+ - Live project status (running/down, last poll, error rate)
21
+ - Fix history: what was fixed, when, which repo, PR link
22
+ - Open issues: detected but not yet fixed
23
+ - Open PRs awaiting review
24
+
25
+ 2. **Admins** — authenticated
26
+ - User management
27
+ - Per-project config view
28
+ - PR management (merge/close without GitHub UI)
29
+ - Log viewer (searchable synced logs)
30
+ - Full error feed with severity + source
31
+
32
+ **Tech stack:**
33
+ - Backend: FastAPI (Python, fits existing codebase) — thin REST/WebSocket over state_store.py
34
+ - Frontend: HTMX or plain HTML + Alpine.js — no React, keep it simple
35
+ - Auth: single token-based (personal infra, no OAuth needed)
36
+
37
+ **Priority:** OUTDATED DESIGN — needs full redesign before any work starts.
38
+ **Build order:** Agent Profiles → Messenger adapter → Web UI redesign.
39
+ **Do not start Web UI until Messenger adapter is done.**
@@ -0,0 +1,35 @@
1
+ ---
2
+ name: Ageri server reference
3
+ description: SSH access, paths, and run commands for Ageri on Oracle Ampere
4
+ type: reference
5
+ ---
6
+
7
+ **Server:** Oracle Ampere ARM — 138.2.17.152 (24GB RAM)
8
+ **User:** `ageri` (separate from `sentinel` user)
9
+ **SSH from local:** `ssh -i /c/Users/huy/.ssh/oracle/devtest-arm-ampere.key ageri@138.2.17.152`
10
+
11
+ **Directory layout:**
12
+ - `~/ageri/code` — Python source (git clone of misterhuydo/Ageri)
13
+ - `~/ageri/venv` — Python virtualenv
14
+ - `~/ageri/ageri.properties` — main config
15
+ - `~/ageri/private_ageri.properties` — secrets (chmod 600)
16
+ - `~/ageri/ageri.log` — log file
17
+ - `~/ageri/state.db` — SQLite memory store
18
+ - `~/.ssh/ageri_deploy` — GitHub deploy key (read-only, added to Ageri repo)
19
+
20
+ **Run:**
21
+ ```bash
22
+ cd ~/ageri/code && AGERI_CONFIG=~/ageri ~/ageri/venv/bin/python -m ageri.main >> ~/ageri/ageri.log 2>&1 &
23
+ tail -f ~/ageri/ageri.log
24
+ ```
25
+
26
+ **Install sdk after git pull:**
27
+ ```bash
28
+ ~/ageri/venv/bin/pip install -e ~/ageri/code/sdk/
29
+ ```
30
+
31
+ **GitHub clone (uses deploy key):**
32
+ ```bash
33
+ git clone git@github-ageri:misterhuydo/Ageri.git ~/ageri/code
34
+ ```
35
+ (Requires `~/.ssh/config` entry: `Host github-ageri` → `IdentityFile ~/.ssh/ageri_deploy`)
@@ -0,0 +1,26 @@
1
+ ---
2
+ name: Cairn sub-index federation
3
+ description: How Cairn federates multiple repo indexes — repos must be subdirectories of the workspace for automatic federation
4
+ type: reference
5
+ ---
6
+
7
+ Cairn's `cairn_maintain` indexes from `process.cwd()` — wherever the Claude Code session starts.
8
+ There is no `--path` CLI flag; indexing is MCP-only (called from within a Claude Code session).
9
+
10
+ **Sub-directory federation (built-in, passive):**
11
+ - Each repo subdirectory can have its own `.cairn/index.db` (built when Claude Code runs there)
12
+ - When `cairn_maintain` runs at the workspace root, it globs `**/.cairn/index.db` and federates all sub-indexes
13
+ - `cairn_search` queries all federated sub-indexes and merges results
14
+ - All tools use UNION ALL views across all indexes
15
+ - Federation paths are persisted in `sub_indexes` table → `cairn_resume` re-federates automatically
16
+
17
+ **Critical constraint:** Repos must be **subdirectories** of the workspace root — external paths (e.g. `~/git/repo`) are NOT discovered. There is no `cairn.repos` config or external path registration yet.
18
+
19
+ **Implication for Sentinel:**
20
+ - `fix_engine.py` must run `claude --print` with `cwd=repo.local_path` so Cairn hooks index that repo
21
+ - For `sentinel_boss` to federate all repo indexes, repos must be subdirectories of the Sentinel project dir
22
+ - Default `LOCAL_PATH` should be `<project_dir>/repos/<repo-name>` — user can override but loses federation
23
+ - Shared repos used by multiple Sentinel projects: each project gets its own clone in its `repos/` dir
24
+
25
+ **How to apply:** When setting `LOCAL_PATH` defaults in `sentinel add`, use `<project_dir>/repos/<repo-name>`.
26
+ Wire `cwd=repo.local_path` into `fix_engine._run_claude_attempt`.
@@ -0,0 +1,24 @@
1
+ ---
2
+ name: Oracle Cloud servers
3
+ description: Two Oracle Always Free servers — Ampere ARM for personal/internal projects, micro for lightweight tasks
4
+ type: reference
5
+ ---
6
+
7
+ ## Oracle Ampere (primary personal server)
8
+ - **SSH:** `ssh -i /home/huy/.ssh/oracle/devtest-arm-ampere.key ubuntu@138.2.17.152`
9
+ - **Key (Windows):** `C:\Users\huy\.ssh\oracle\devtest-arm-ampere.key`
10
+ - **Specs:** ARM Ampere — up to 4 OCPUs, 24GB RAM (Oracle Always Free generous tier)
11
+ - **OS:** Ubuntu
12
+ - **Purpose:** Personal/internal projects — Ageri, personal tools, dev experiments
13
+ - **Note:** Separate from EC2 (13.50.101.130) which is for company projects (1881, elprint)
14
+
15
+ ## Oracle Micro (tiny, secondary)
16
+ - **SSH:** `ssh -i /home/huy/.ssh/oracle/devtest-arm-micro.key ubuntu@155.248.181.206`
17
+ - **Key (Windows):** `C:\Users\huy\.ssh\oracle\devtest-arm-micro.key`
18
+ - **Specs:** 1 OCPU, 1GB RAM — very limited
19
+ - **Purpose:** Lightweight only — Cloudflare Tunnel endpoint, simple proxy, cron jobs, DNS, monitoring relay
20
+
21
+ ## Server allocation strategy
22
+ - **EC2 (13.50.101.130):** Company projects — Sentinel for 1881, elprint, etc.
23
+ - **Oracle Ampere (138.2.17.152):** Personal projects — Ageri, Taplo monitoring, personal Sentinel
24
+ - **Oracle Micro (155.248.181.206):** Ultra-lightweight tasks only — tunnel, relay, watchdog
@@ -0,0 +1,15 @@
1
+ ---
2
+ name: Sentinel server SSH connection
3
+ description: SSH credentials and host for the Sentinel deployment server
4
+ type: reference
5
+ ---
6
+
7
+ - **Host:** 13.50.101.130
8
+ - **User:** ec2-user
9
+ - **Key (local Windows path):** C:\Users\huy\.ssh\sentinel.pem
10
+ - **Key (in bash/WSL):** /c/Users/huy/.ssh/sentinel.pem
11
+ - **Key (on server):** /home/huy/.ssh/sentinel.pem
12
+ - **Command:** `ssh -l ec2-user -i /c/Users/huy/.ssh/sentinel.pem 13.50.101.130`
13
+ - **Sentinel process user:** sentinel
14
+ - **Code path:** /home/sentinel/sentinel/code/sentinel/
15
+ - **Instance config:** /home/sentinel/sentinel/sentinel-1881/
@@ -0,0 +1,52 @@
1
+ ---
2
+ name: Taplo project reference
3
+ description: Taplo platform — universal seller identity/QR discovery app, Cloudflare-native, pnpm monorepo
4
+ type: reference
5
+ ---
6
+
7
+ **Repo:** git@github.com:misterhuydo/taplo.git
8
+ **Local:** J:\Projects\taplo
9
+ **Domain:** taploapp.com + taploapp.vn (Cloudflare)
10
+ **Account:** taplo.platform@gmail.com
11
+
12
+ **What it is:** Universal seller identity + QR discovery platform. Sellers register, get a QR code instantly, customers scan it to see their page. Global, not Vietnam-specific.
13
+
14
+ **Stack (100% Cloudflare-native):**
15
+ - Workers (API), D1 (SQLite DB), R2 (images/QR files), KV (sessions/cache), Queues (async jobs), Pages (Next.js web)
16
+ - pnpm workspaces + Turborepo monorepo
17
+ - TypeScript strict everywhere
18
+ - React Native + Expo (mobile — iOS + Android)
19
+ - Next.js SSR for seller pages (SEO critical)
20
+
21
+ **Business model:** Free → Basic ($5/mo) → Pro ($15/mo) → Business ($50/mo)
22
+
23
+ **Current phase:** Phase 1 — Foundation (paused, resuming soon)
24
+ **First build checklist:** monorepo → types → D1 schema → identity module → sellers module → QR module → seller page → search → dashboard → mobile skeleton → deploy
25
+
26
+ **Key rules (never violate):**
27
+ - No hardcoded VND/Vietnamese/HCMC assumptions
28
+ - UUID v4 always, never sequential IDs
29
+ - E.164 phone format always
30
+ - Analytics events always via Queue, never blocking
31
+ - Migrations only, never manual schema edits
32
+ - Never hard DELETE — soft delete via status field
33
+ - QR pages must always use KV edge cache
34
+
35
+ **Testing + error monitoring need:**
36
+ - User wants tests written per module as each is built
37
+ - Sentinel can monitor Taplo in production (Cloudflare Worker logs → Sentinel)
38
+ - Sentry for unhandled exceptions
39
+ - Structured JSON logging from day one
40
+
41
+ **Sentinel integration note:**
42
+ - Taplo is 100% Cloudflare — no SSH servers
43
+ - Sentinel CF log source (SOURCE_TYPE=cloudflare) covers Workers logs
44
+ - CF Pages build errors, D1 errors surface in Worker logs
45
+ - No separate DB log stream — all errors in Worker logs
46
+
47
+ **TODOs saved in:** J:\Projects\taplo\TODOs.txt
48
+ - WebAuthn/passkeys auth
49
+ - Smart country code detection
50
+ - Content moderation pipeline (Claude API for text, CF Images for photos)
51
+ - iOS App Store compliance strategies (documented in detail)
52
+ - taploapp.vn → auto Vietnamese locale
@@ -1,6 +1,6 @@
1
1
  {
2
- "message": "Auto-checkpoint at 2026-04-25T09:42:20.720Z",
3
- "checkpoint_at": "2026-04-25T09:42:20.724Z",
2
+ "message": "Auto-checkpoint at 2026-04-27T12:15:40.415Z",
3
+ "checkpoint_at": "2026-04-27T12:15:40.417Z",
4
4
  "active_files": [
5
5
  "J:\\Projects\\Sentinel\\cli\\bin\\sentinel.js",
6
6
  "J:\\Projects\\Sentinel\\cli\\lib\\test.js"
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@misterhuydo/sentinel",
3
- "version": "1.6.10",
3
+ "version": "1.6.11",
4
4
  "description": "Sentinel — Autonomous DevOps Agent installer and manager",
5
5
  "bin": {
6
6
  "sentinel": "./bin/sentinel.js"
@@ -1 +1 @@
1
- __version__ = "1.6.10"
1
+ __version__ = "1.6.11"
@@ -310,7 +310,7 @@ def notify_fix_blocked(
310
310
  f"{repo_line}"
311
311
  f"*What Claude found:* {short_reason}\n\n"
312
312
  f"*Original report:*\n{report_block}\n\n"
313
- f"_Reply `ignore` to dismiss, or assign someone to investigate._"
313
+ f"_Reply `ignore` to dismiss, or reply here to investigate._"
314
314
  )
315
315
 
316
316
  target_channel = origin_channel or cfg.slack_channel
@@ -608,6 +608,21 @@ async def _dispatch(event: dict, client, cfg_loader, store) -> None:
608
608
  if not text:
609
609
  text = "hello"
610
610
 
611
+ # Thread-reply context: if the user is replying inside a thread (rather
612
+ # than starting one), pull the parent message so Boss sees what the user
613
+ # is actually focused on. Without this, a one-word reply like "ignore" or
614
+ # "investigate" in a fix-blocked alert thread arrives at Boss with no
615
+ # context at all — Boss can't tell which fingerprint or which error.
616
+ thread_ts = event.get("thread_ts")
617
+ if thread_ts and thread_ts != event.get("ts"):
618
+ parent = await _fetch_thread_parent(client, channel, thread_ts)
619
+ if parent:
620
+ text = (
621
+ "[Thread context — the message in this thread the user is replying to:]\n"
622
+ f"{parent}\n\n"
623
+ f"[User's reply:]\n{text}"
624
+ )
625
+
611
626
  # Allowlist check — if SLACK_ALLOWED_USERS is configured, only those users + admins may interact.
612
627
  # Admins (SLACK_ADMIN_USERS) are always allowed regardless of SLACK_ALLOWED_USERS.
613
628
  allowed = cfg_loader.sentinel.slack_allowed_users
@@ -806,3 +821,22 @@ def _strip_mention(text: str) -> str:
806
821
  """Remove leading <@BOTID> mention from message text."""
807
822
  import re
808
823
  return re.sub(r"^<@[A-Z0-9]+>\s*", "", text)
824
+
825
+
826
+ async def _fetch_thread_parent(client, channel: str, thread_ts: str) -> str:
827
+ """Fetch the first (parent) message of a Slack thread.
828
+
829
+ Used so a user reply in a thread that Sentinel started (e.g. a fix-blocked
830
+ alert) gets the alert text injected into Boss's prompt — otherwise Boss
831
+ sees only the bare reply ("ignore", "investigate", ...) with no context.
832
+ """
833
+ try:
834
+ resp = await client.conversations_replies(
835
+ channel=channel, ts=thread_ts, limit=1, inclusive=True,
836
+ )
837
+ msgs = resp.get("messages", [])
838
+ if msgs:
839
+ return (msgs[0].get("text") or "").strip()
840
+ except Exception as exc:
841
+ logger.warning("Boss: could not fetch thread parent for %s: %s", thread_ts, exc)
842
+ return ""