@rubytech/create-realagent 1.0.826 → 1.0.828

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/package.json +1 -1
  2. package/payload/platform/neo4j/schema.cypher +34 -2
  3. package/payload/platform/plugins/admin/hooks/archive-ingest-surface-gate.sh +19 -13
  4. package/payload/platform/plugins/admin/skills/onboarding/SKILL.md +5 -5
  5. package/payload/platform/plugins/docs/references/cloudflare.md +1 -1
  6. package/payload/platform/plugins/docs/references/plugins-guide.md +1 -1
  7. package/payload/platform/plugins/docs/references/troubleshooting.md +1 -0
  8. package/payload/platform/plugins/memory/PLUGIN.md +1 -1
  9. package/payload/platform/plugins/memory/mcp/dist/index.js +6 -41
  10. package/payload/platform/plugins/memory/mcp/dist/index.js.map +1 -1
  11. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/llm-classifier.test.js +51 -0
  12. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/llm-classifier.test.js.map +1 -1
  13. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts +19 -4
  14. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts.map +1 -1
  15. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js +139 -56
  16. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js.map +1 -1
  17. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.d.ts +2 -0
  18. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.d.ts.map +1 -0
  19. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.js +61 -0
  20. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.js.map +1 -0
  21. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts +34 -0
  22. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts.map +1 -1
  23. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js +241 -0
  24. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js.map +1 -1
  25. package/payload/platform/plugins/memory/references/schema-base.md +5 -2
  26. package/payload/platform/plugins/whatsapp-import/PLUGIN.md +17 -15
  27. package/payload/platform/plugins/whatsapp-import/bin/ingest.mjs +313 -366
  28. package/payload/platform/plugins/whatsapp-import/bin/whatsapp-ingest.sh +27 -60
  29. package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.d.ts +18 -0
  30. package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.d.ts.map +1 -0
  31. package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.js +31 -0
  32. package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.js.map +1 -0
  33. package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.d.ts +27 -12
  34. package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.d.ts.map +1 -1
  35. package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.js +40 -20
  36. package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.js.map +1 -1
  37. package/payload/platform/plugins/whatsapp-import/lib/dist/index.d.ts +7 -4
  38. package/payload/platform/plugins/whatsapp-import/lib/dist/index.d.ts.map +1 -1
  39. package/payload/platform/plugins/whatsapp-import/lib/dist/index.js +9 -6
  40. package/payload/platform/plugins/whatsapp-import/lib/dist/index.js.map +1 -1
  41. package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.d.ts +25 -0
  42. package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.d.ts.map +1 -0
  43. package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.js +48 -0
  44. package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.js.map +1 -0
  45. package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.d.ts +3 -0
  46. package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.d.ts.map +1 -0
  47. package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.js +47 -0
  48. package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.js.map +1 -0
  49. package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/delta-append.test.ts +163 -0
  50. package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/sessionize.test.ts +91 -0
  51. package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/to-classifier-input.test.ts +59 -0
  52. package/payload/platform/plugins/whatsapp-import/lib/src/delta-cursor.ts +54 -0
  53. package/payload/platform/plugins/whatsapp-import/lib/src/derive-keys.ts +55 -32
  54. package/payload/platform/plugins/whatsapp-import/lib/src/index.ts +9 -6
  55. package/payload/platform/plugins/whatsapp-import/lib/src/sessionize.ts +81 -0
  56. package/payload/platform/plugins/whatsapp-import/lib/src/to-classifier-input.ts +48 -0
  57. package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/SKILL.md +66 -73
  58. package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/references/conversation-archive-shape.md +143 -0
  59. package/payload/platform/templates/specialists/agents/database-operator.md +10 -11
  60. package/payload/server/chunk-T2OPNP3L.js +654 -0
  61. package/payload/server/cloudflare-task-tracker-CR6TL4VL.js +19 -0
  62. package/payload/server/public/assets/{admin-DOkUspG1.js → admin-BNwPsMhJ.js} +2 -2
  63. package/payload/server/public/assets/{graph-LLMJa4Ch.js → graph-N_Bw-8oT.js} +1 -1
  64. package/payload/server/public/assets/{page-DoaF3DB0.js → page-BKLGP-th.js} +1 -1
  65. package/payload/server/public/graph.html +2 -2
  66. package/payload/server/public/index.html +2 -2
  67. package/payload/server/server.js +277 -164
  68. package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/filter-gate.test.ts +0 -172
  69. package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/ingest-idempotence.test.ts +0 -141
  70. package/payload/platform/plugins/whatsapp-import/lib/src/filter.ts +0 -136
  71. package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import-enrich/SKILL.md +0 -333
@@ -1,52 +1,40 @@
1
1
  #!/usr/bin/env bash
2
2
  # =============================================================================
3
3
  # whatsapp-ingest.sh — single deterministic Bash entry for WhatsApp archive
4
- # ingestion (Task 855 / Task 871). Thin wrapper: arg-validate, resolve env,
5
- # invoke ingest.mjs in-process. The script is the database-operator
6
- # subagent's only operator-facing handle on the parsefilter archive-write
7
- # pipeline. Phase 1 has NO LLM. The Haiku insight pass is Phase 2 — invoked
8
- # consciously via `mcp__memory__whatsapp-export-insight-pass`.
4
+ # ingestion (Task 891 chunked :ConversationArchive shape).
5
+ #
6
+ # Pipeline: parse bind canonical sender setderive conversationIdentity
7
+ # look up prior :ConversationArchive (delta cursor) sessionize delta at
8
+ # gap-hours boundary → classify each session via Haiku (mode='chat') →
9
+ # memory-ingest with parentLabel='ConversationArchive'.
9
10
  #
10
11
  # Usage:
11
12
  # bash whatsapp-ingest.sh <archive.zip|dir|_chat.txt>
12
13
  # --owner-element-id <id>
13
- # --subject-person-id <id>
14
+ # --participant-person-ids <csv>
14
15
  # --scope <admin|public>
15
- # --filter <all|senders=<csv>|date-range=<isoFrom>..<isoTo>>
16
+ # [--session-gap-hours <N>] (default 12)
16
17
  # [--account-id <accountId>]
17
18
  # [--timezone <iana-zone>]
18
19
  # [--date-format <DD/MM/YY|MM/DD/YY|DD/MM/YYYY|MM/DD/YYYY>]
19
20
  #
20
- # `--subject-person-id` is required (Task 887 §A0). DM scope: the
21
- # operator-confirmed third-party :Person elementId from the preview
22
- # histogram. Owner + subject form the closed sender set; any parsed
23
- # senderName outside that set LOUD-FAILs with `parser-miss` and exits
24
- # non-zero — bounds writer cardinality to the deterministic preview output.
25
- #
26
- # `--filter` is mandatory (Task 871). Forms:
27
- # all — write every parsed row
28
- # senders=Alice,Bob Carter — keep rows whose senderName ∈ csv
29
- # date-range=2024-01-01..2024-06-30
30
- # — keep rows whose dateSent falls inside
31
- # the inclusive range (date-only or full
32
- # ISO 8601 endpoints both accepted)
21
+ # `--owner-element-id` + `--participant-person-ids` form the closed sender
22
+ # set; any parsed senderName outside that set LOUD-FAILs with `parser-miss`
23
+ # and exits non-zero. `--filter` and `--subject-person-id` are gone chunked
24
+ # Section:Conversation rows bound the operator surface naturally.
33
25
  #
34
26
  # Exit 0 + JSON summary on stdout on success.
35
- # Exit !0 + one [whatsapp-ingest] FAIL line on stderr on failure.
27
+ # Exit !0 + one [whatsapp-import] FAIL line on stderr on failure.
36
28
  # =============================================================================
37
29
 
38
30
  set -euo pipefail
39
31
 
40
- # Pre-flight arg check — emit the same single-line FAIL shape ingest.mjs uses
41
- # so the operator's grep pattern catches both layers uniformly.
42
32
  arg_fail() {
43
33
  local reason="$1"
44
- echo "[whatsapp-ingest] FAIL phase=argv reason=\"${reason}\"" >&2
34
+ echo "[whatsapp-import] FAIL phase=argv reason=\"${reason}\"" >&2
45
35
  exit 1
46
36
  }
47
37
 
48
- # Locate platform root from this script's location:
49
- # platform/plugins/whatsapp-import/bin/ → ../../.. = platform/
50
38
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
51
39
  PLATFORM_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
52
40
  INGEST_MJS="$SCRIPT_DIR/ingest.mjs"
@@ -55,31 +43,24 @@ if [ ! -f "$INGEST_MJS" ]; then
55
43
  arg_fail "ingest.mjs not found at $INGEST_MJS — run from a built install"
56
44
  fi
57
45
 
58
- # Validate required flags up-front — gives a cleaner pre-flight error than
59
- # the node-side throw and keeps the failure surface single-line. argv
60
- # validation runs before env resolution so a wrong invocation fails on the
61
- # wrong invocation, not on a missing password.
62
46
  ARCHIVE=""
63
- OWNER_VAL=""
64
- SUBJECT_VAL=""
65
- SCOPE_VAL=""
66
- FILTER_VAL=""
67
47
  HAS_OWNER=0
68
- HAS_SUBJECT=0
48
+ OWNER_VAL=""
49
+ HAS_PARTICIPANTS=0
50
+ PARTICIPANTS_VAL=""
69
51
  HAS_SCOPE=0
70
- HAS_FILTER=0
52
+ SCOPE_VAL=""
71
53
 
72
54
  ARGS=("$@")
73
55
  i=0
74
56
  while [ $i -lt ${#ARGS[@]} ]; do
75
57
  a="${ARGS[$i]}"
76
58
  case "$a" in
77
- --owner-element-id) HAS_OWNER=1; OWNER_VAL="${ARGS[$((i + 1))]:-}"; i=$((i + 2)); continue ;;
78
- --subject-person-id) HAS_SUBJECT=1; SUBJECT_VAL="${ARGS[$((i + 1))]:-}"; i=$((i + 2)); continue ;;
79
- --scope) HAS_SCOPE=1; SCOPE_VAL="${ARGS[$((i + 1))]:-}"; i=$((i + 2)); continue ;;
80
- --filter) HAS_FILTER=1; FILTER_VAL="${ARGS[$((i + 1))]:-}"; i=$((i + 2)); continue ;;
81
- --account-id|--timezone|--date-format) i=$((i + 2)); continue ;;
82
- --*) i=$((i + 2)); continue ;;
59
+ --owner-element-id) HAS_OWNER=1; OWNER_VAL="${ARGS[$((i + 1))]:-}"; i=$((i + 2)); continue ;;
60
+ --participant-person-ids) HAS_PARTICIPANTS=1; PARTICIPANTS_VAL="${ARGS[$((i + 1))]:-}"; i=$((i + 2)); continue ;;
61
+ --scope) HAS_SCOPE=1; SCOPE_VAL="${ARGS[$((i + 1))]:-}"; i=$((i + 2)); continue ;;
62
+ --session-gap-hours|--account-id|--timezone|--date-format) i=$((i + 2)); continue ;;
63
+ --*) i=$((i + 2)); continue ;;
83
64
  *)
84
65
  if [ -z "$ARCHIVE" ]; then ARCHIVE="$a"; fi
85
66
  i=$((i + 1))
@@ -88,24 +69,15 @@ while [ $i -lt ${#ARGS[@]} ]; do
88
69
  esac
89
70
  done
90
71
 
91
- [ -n "$ARCHIVE" ] || arg_fail "missing positional <archive>"
92
- [ "$HAS_OWNER" -eq 1 ] && [ -n "$OWNER_VAL" ] || arg_fail "missing --owner-element-id (or empty value)"
93
- [ "$HAS_SUBJECT" -eq 1 ] && [ -n "$SUBJECT_VAL" ] || arg_fail "missing --subject-person-id (Task 887: operator-confirmed third-party :Person elementId from preview histogram)"
94
- [ "$HAS_SCOPE" -eq 1 ] && [ -n "$SCOPE_VAL" ] || arg_fail "missing --scope (or empty value)"
72
+ [ -n "$ARCHIVE" ] || arg_fail "missing positional <archive>"
73
+ [ "$HAS_OWNER" -eq 1 ] && [ -n "$OWNER_VAL" ] || arg_fail "missing --owner-element-id (or empty value)"
74
+ [ "$HAS_PARTICIPANTS" -eq 1 ] && [ -n "$PARTICIPANTS_VAL" ] || arg_fail "missing --participant-person-ids (csv of operator-confirmed :Person/:AdminUser elementIds, owner excluded)"
75
+ [ "$HAS_SCOPE" -eq 1 ] && [ -n "$SCOPE_VAL" ] || arg_fail "missing --scope (or empty value)"
95
76
  case "$SCOPE_VAL" in
96
77
  admin|public) : ;;
97
78
  *) arg_fail "invalid --scope \"$SCOPE_VAL\" (admin|public)" ;;
98
79
  esac
99
- if [ "$HAS_FILTER" -ne 1 ] || [ -z "$FILTER_VAL" ]; then
100
- # Mirror ingest.mjs's pinned LOUD-FAIL line so a single grep covers both
101
- # layers — the operator's runbook recipe is `grep '\[whatsapp-ingest\] FAIL filter-required'`.
102
- echo "[whatsapp-ingest] FAIL filter-required reason=\"bulk-archive-gate (Task 871) — operator must specify --filter (one of all, senders=<csv>, date-range=<isoFrom>..<isoTo>)\"" >&2
103
- arg_fail "missing --filter (one of all, senders=<csv>, date-range=<isoFrom>..<isoTo>)"
104
- fi
105
80
 
106
- # Lift NEO4J_PASSWORD from the install's config file when env doesn't carry it
107
- # (e.g. operator running directly from a shell rather than via the platform
108
- # server). Mirror seed-neo4j.sh precedence: env var wins, then the file.
109
81
  if [ -z "${NEO4J_PASSWORD:-}" ]; then
110
82
  NEO4J_PASSWORD_FILE="$PLATFORM_ROOT/config/.neo4j-password"
111
83
  if [ -f "$NEO4J_PASSWORD_FILE" ]; then
@@ -116,9 +88,6 @@ if [ -z "${NEO4J_PASSWORD:-}" ]; then
116
88
  fi
117
89
  fi
118
90
 
119
- # NEO4J_URI is hard-required (Task 787). The wrapper does not fabricate a
120
- # default — the platform server sets this at spawn time and the operator can
121
- # pass it explicitly when running standalone.
122
91
  if [ -z "${NEO4J_URI:-}" ]; then
123
92
  arg_fail "NEO4J_URI not set (no default — set in env)"
124
93
  fi
@@ -126,6 +95,4 @@ fi
126
95
  export NEO4J_USER="${NEO4J_USER:-neo4j}"
127
96
  export MAXY_PLATFORM_ROOT="$PLATFORM_ROOT"
128
97
 
129
- # Hand off to node. ingest.mjs handles all observability; the wrapper does
130
- # not double-log. Forward exit code verbatim.
131
98
  exec node "$INGEST_MJS" "$@"
@@ -0,0 +1,18 @@
1
+ import type { ParsedLine } from "./parse-export.js";
2
+ export type CursorResult = {
3
+ kind: "found";
4
+ deltaStart: number;
5
+ } | {
6
+ kind: "empty";
7
+ } | {
8
+ kind: "missing";
9
+ };
10
+ /**
11
+ * Walk parsed lines forward and return the first index whose content hash
12
+ * matches `lastIngestedMessageHash`. The first match is correct because
13
+ * messages with identical (dateSent, normalisedSenderName, body) tuples
14
+ * are genuine duplicates — there is no way to disambiguate them and slicing
15
+ * after the first occurrence is the chronologically safe choice.
16
+ */
17
+ export declare function findDeltaCursor(parsedLines: readonly ParsedLine[], lastIngestedMessageHash: string): CursorResult;
18
+ //# sourceMappingURL=delta-cursor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"delta-cursor.d.ts","sourceRoot":"","sources":["../src/delta-cursor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAqBpD,MAAM,MAAM,YAAY,GACpB;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,UAAU,EAAE,MAAM,CAAA;CAAE,GACrC;IAAE,IAAI,EAAE,OAAO,CAAA;CAAE,GACjB;IAAE,IAAI,EAAE,SAAS,CAAA;CAAE,CAAC;AAExB;;;;;;GAMG;AACH,wBAAgB,eAAe,CAC7B,WAAW,EAAE,SAAS,UAAU,EAAE,EAClC,uBAAuB,EAAE,MAAM,GAC9B,YAAY,CAiBd"}
@@ -0,0 +1,31 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.findDeltaCursor = findDeltaCursor;
4
+ const derive_keys_js_1 = require("./derive-keys.js");
5
+ /**
6
+ * Walk parsed lines forward and return the first index whose content hash
7
+ * matches `lastIngestedMessageHash`. The first match is correct because
8
+ * messages with identical (dateSent, normalisedSenderName, body) tuples
9
+ * are genuine duplicates — there is no way to disambiguate them and slicing
10
+ * after the first occurrence is the chronologically safe choice.
11
+ */
12
+ function findDeltaCursor(parsedLines, lastIngestedMessageHash) {
13
+ if (!lastIngestedMessageHash || !lastIngestedMessageHash.trim()) {
14
+ throw new Error("findDeltaCursor: lastIngestedMessageHash must be non-empty");
15
+ }
16
+ for (let i = 0; i < parsedLines.length; i++) {
17
+ const line = parsedLines[i];
18
+ const hash = (0, derive_keys_js_1.deriveMessageContentHash)({
19
+ dateSent: line.dateSent,
20
+ senderName: line.senderName,
21
+ body: line.body,
22
+ });
23
+ if (hash === lastIngestedMessageHash) {
24
+ if (i === parsedLines.length - 1)
25
+ return { kind: "empty" };
26
+ return { kind: "found", deltaStart: i + 1 };
27
+ }
28
+ }
29
+ return { kind: "missing" };
30
+ }
31
+ //# sourceMappingURL=delta-cursor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"delta-cursor.js","sourceRoot":"","sources":["../src/delta-cursor.ts"],"names":[],"mappings":";;AAiCA,0CAoBC;AApDD,qDAA4D;AAyB5D;;;;;;GAMG;AACH,SAAgB,eAAe,CAC7B,WAAkC,EAClC,uBAA+B;IAE/B,IAAI,CAAC,uBAAuB,IAAI,CAAC,uBAAuB,CAAC,IAAI,EAAE,EAAE,CAAC;QAChE,MAAM,IAAI,KAAK,CAAC,4DAA4D,CAAC,CAAC;IAChF,CAAC;IACD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5C,MAAM,IAAI,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;QAC5B,MAAM,IAAI,GAAG,IAAA,yCAAwB,EAAC;YACpC,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,IAAI,EAAE,IAAI,CAAC,IAAI;SAChB,CAAC,CAAC;QACH,IAAI,IAAI,KAAK,uBAAuB,EAAE,CAAC;YACrC,IAAI,CAAC,KAAK,WAAW,CAAC,MAAM,GAAG,CAAC;gBAAE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;YAC3D,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC;QAC9C,CAAC;IACH,CAAC;IACD,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;AAC7B,CAAC"}
@@ -1,21 +1,36 @@
1
1
  export declare function normaliseSenderName(name: string): string;
2
2
  export declare function sha256Hex(input: string): string;
3
- export interface DeriveMessageIdInput {
4
- /** SHA-256 of the source `_chat.txt` bytes — stable across re-imports. */
5
- conversationSha256: string;
3
+ export interface DeriveConversationIdentityInput {
4
+ accountId: string;
5
+ /**
6
+ * Element IDs of every confirmed participant (owner + others). Order is
7
+ * not significant; the function sorts internally so the same set always
8
+ * produces the same identity.
9
+ */
10
+ participantElementIds: readonly string[];
11
+ }
12
+ /**
13
+ * Compute the stable identity for a conversation. Same accountId + same
14
+ * participant set ⇒ same identity, regardless of message content or export
15
+ * file bytes. DM and group chats use this identical formula.
16
+ */
17
+ export declare function deriveConversationIdentity(input: DeriveConversationIdentityInput): string;
18
+ export interface DeriveMessageContentHashInput {
6
19
  /** ISO 8601 with timezone offset, as emitted by parseExport. */
7
20
  dateSent: string;
8
21
  /** Raw senderName from the export line. Normalised internally. */
9
22
  senderName: string;
10
- /** Raw message body. Hashed internally. */
23
+ /** Raw message body. */
11
24
  body: string;
12
25
  }
13
- export declare function deriveMessageId(input: DeriveMessageIdInput): string;
14
- export interface ObservationContentFields {
15
- summary?: string | null;
16
- from?: string | null;
17
- to?: string | null;
18
- subject?: string | null;
19
- }
20
- export declare function observationContentHash(fields: ObservationContentFields): string;
26
+ /**
27
+ * Compute a content-only hash for a single message. Used as the delta-append
28
+ * cursor: `:ConversationArchive.lastIngestedMessageHash` records the hash of
29
+ * the last ingested message; on re-import, the orchestrator finds the line
30
+ * with the matching hash and slices everything after it.
31
+ *
32
+ * Excludes archive sha256 deliberately — the cursor must survive a fresh
33
+ * re-export of the same chat (different file bytes, same message tuples).
34
+ */
35
+ export declare function deriveMessageContentHash(input: DeriveMessageContentHashInput): string;
21
36
  //# sourceMappingURL=derive-keys.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"derive-keys.d.ts","sourceRoot":"","sources":["../src/derive-keys.ts"],"names":[],"mappings":"AAqBA,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAExD;AAED,wBAAgB,SAAS,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAE/C;AAED,MAAM,WAAW,oBAAoB;IACnC,0EAA0E;IAC1E,kBAAkB,EAAE,MAAM,CAAC;IAC3B,gEAAgE;IAChE,QAAQ,EAAE,MAAM,CAAC;IACjB,kEAAkE;IAClE,UAAU,EAAE,MAAM,CAAC;IACnB,2CAA2C;IAC3C,IAAI,EAAE,MAAM,CAAC;CACd;AAED,wBAAgB,eAAe,CAAC,KAAK,EAAE,oBAAoB,GAAG,MAAM,CAInE;AAED,MAAM,WAAW,wBAAwB;IACvC,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,IAAI,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,EAAE,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACzB;AAED,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,wBAAwB,GAAG,MAAM,CAK/E"}
1
+ {"version":3,"file":"derive-keys.d.ts","sourceRoot":"","sources":["../src/derive-keys.ts"],"names":[],"mappings":"AAsBA,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAExD;AAED,wBAAgB,SAAS,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAE/C;AAED,MAAM,WAAW,+BAA+B;IAC9C,SAAS,EAAE,MAAM,CAAC;IAClB;;;;OAIG;IACH,qBAAqB,EAAE,SAAS,MAAM,EAAE,CAAC;CAC1C;AAED;;;;GAIG;AACH,wBAAgB,0BAA0B,CACxC,KAAK,EAAE,+BAA+B,GACrC,MAAM,CASR;AAED,MAAM,WAAW,6BAA6B;IAC5C,gEAAgE;IAChE,QAAQ,EAAE,MAAM,CAAC;IACjB,kEAAkE;IAClE,UAAU,EAAE,MAAM,CAAC;IACnB,wBAAwB;IACxB,IAAI,EAAE,MAAM,CAAC;CACd;AAED;;;;;;;;GAQG;AACH,wBAAgB,wBAAwB,CACtC,KAAK,EAAE,6BAA6B,GACnC,MAAM,CAGR"}
@@ -2,26 +2,27 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.normaliseSenderName = normaliseSenderName;
4
4
  exports.sha256Hex = sha256Hex;
5
- exports.deriveMessageId = deriveMessageId;
6
- exports.observationContentHash = observationContentHash;
5
+ exports.deriveConversationIdentity = deriveConversationIdentity;
6
+ exports.deriveMessageContentHash = deriveMessageContentHash;
7
7
  const node_crypto_1 = require("node:crypto");
8
8
  // ---------------------------------------------------------------------------
9
- // derive-keys — natural-key derivation for whatsapp-import (Task 870).
9
+ // derive-keys — natural-key derivation for whatsapp-import (Task 891,
10
+ // supersedes Task 870's per-message contract).
10
11
  //
11
12
  // Pure functions. No I/O. The whole point is that re-imports of the same
12
- // archive collapse to the same Message identity regardless of release-level
13
- // drift in array indices, hash widths, or arbitrary tiebreakers.
13
+ // archive collapse to the same identity regardless of release-level drift in
14
+ // chunk indices, hash widths, or arbitrary tiebreakers.
14
15
  //
15
- // Key shape (Task 870 brief):
16
+ // Identity contracts (Task 891 brief):
16
17
  //
17
- // messageId = whatsapp-export:msg:<conversationSha256>:<dateSentISO>
18
- // :<NFKC-trim-lower(senderName)>
19
- // :<sha256-hex(body)>
18
+ // conversationIdentity = sha256(accountId + ":" + sortedParticipantElementIds.join(","))
19
+ // messageContentHash = sha256(dateSent + "|" + NFKC-trim-lower(senderName) + "|" + body)
20
20
  //
21
- // Operator constraint: the same archive must be re-imported with the same
22
- // `--timezone` flag. Different timezones reinterpret wall-clock instants and
23
- // will produce drifted messageIds that is correct semantics, not a bug.
24
- // Documented in .docs/whatsapp.md natural-key contract section.
21
+ // `conversationIdentity` is stable across re-exports same operator + same
22
+ // participant set same identity, regardless of file bytes. DM and group
23
+ // follow the same formula; the difference is the participant array length.
24
+ // `messageContentHash` is content-only (no archive sha256, no chunk index)
25
+ // so cursor lookup survives a fresh re-export of the same chat.
25
26
  // ---------------------------------------------------------------------------
26
27
  function normaliseSenderName(name) {
27
28
  return name.normalize("NFKC").trim().toLowerCase();
@@ -29,13 +30,32 @@ function normaliseSenderName(name) {
29
30
  function sha256Hex(input) {
30
31
  return (0, node_crypto_1.createHash)("sha256").update(input).digest("hex");
31
32
  }
32
- function deriveMessageId(input) {
33
- const norm = normaliseSenderName(input.senderName);
34
- const bodyHash = sha256Hex(input.body);
35
- return `whatsapp-export:msg:${input.conversationSha256}:${input.dateSent}:${norm}:${bodyHash}`;
33
+ /**
34
+ * Compute the stable identity for a conversation. Same accountId + same
35
+ * participant set ⇒ same identity, regardless of message content or export
36
+ * file bytes. DM and group chats use this identical formula.
37
+ */
38
+ function deriveConversationIdentity(input) {
39
+ if (!input.accountId || !input.accountId.trim()) {
40
+ throw new Error("deriveConversationIdentity: accountId is required");
41
+ }
42
+ if (input.participantElementIds.length === 0) {
43
+ throw new Error("deriveConversationIdentity: participantElementIds must be non-empty");
44
+ }
45
+ const sorted = [...input.participantElementIds].sort();
46
+ return sha256Hex(`${input.accountId}:${sorted.join(",")}`);
36
47
  }
37
- function observationContentHash(fields) {
38
- const parts = [fields.summary, fields.from, fields.to, fields.subject].map((p) => (p ?? "").normalize("NFKC").trim().toLowerCase());
39
- return sha256Hex(parts.join("|"));
48
+ /**
49
+ * Compute a content-only hash for a single message. Used as the delta-append
50
+ * cursor: `:ConversationArchive.lastIngestedMessageHash` records the hash of
51
+ * the last ingested message; on re-import, the orchestrator finds the line
52
+ * with the matching hash and slices everything after it.
53
+ *
54
+ * Excludes archive sha256 deliberately — the cursor must survive a fresh
55
+ * re-export of the same chat (different file bytes, same message tuples).
56
+ */
57
+ function deriveMessageContentHash(input) {
58
+ const norm = normaliseSenderName(input.senderName);
59
+ return sha256Hex(`${input.dateSent}|${norm}|${input.body}`);
40
60
  }
41
61
  //# sourceMappingURL=derive-keys.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"derive-keys.js","sourceRoot":"","sources":["../src/derive-keys.ts"],"names":[],"mappings":";;AAqBA,kDAEC;AAED,8BAEC;AAaD,0CAIC;AASD,wDAKC;AA1DD,6CAAyC;AAEzC,8EAA8E;AAC9E,uEAAuE;AACvE,EAAE;AACF,yEAAyE;AACzE,4EAA4E;AAC5E,iEAAiE;AACjE,EAAE;AACF,8BAA8B;AAC9B,EAAE;AACF,uEAAuE;AACvE,kEAAkE;AAClE,uDAAuD;AACvD,EAAE;AACF,0EAA0E;AAC1E,6EAA6E;AAC7E,0EAA0E;AAC1E,gEAAgE;AAChE,8EAA8E;AAE9E,SAAgB,mBAAmB,CAAC,IAAY;IAC9C,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;AACrD,CAAC;AAED,SAAgB,SAAS,CAAC,KAAa;IACrC,OAAO,IAAA,wBAAU,EAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC1D,CAAC;AAaD,SAAgB,eAAe,CAAC,KAA2B;IACzD,MAAM,IAAI,GAAG,mBAAmB,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;IACnD,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACvC,OAAO,uBAAuB,KAAK,CAAC,kBAAkB,IAAI,KAAK,CAAC,QAAQ,IAAI,IAAI,IAAI,QAAQ,EAAE,CAAC;AACjG,CAAC;AASD,SAAgB,sBAAsB,CAAC,MAAgC;IACrE,MAAM,KAAK,GAAG,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,EAAE,EAAE,MAAM,CAAC,OAAO,CAAC,CAAC,GAAG,CACxE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CACxD,CAAC;IACF,OAAO,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AACpC,CAAC"}
1
+ {"version":3,"file":"derive-keys.js","sourceRoot":"","sources":["../src/derive-keys.ts"],"names":[],"mappings":";;AAsBA,kDAEC;AAED,8BAEC;AAiBD,gEAWC;AAoBD,4DAKC;AAjFD,6CAAyC;AAEzC,8EAA8E;AAC9E,sEAAsE;AACtE,+CAA+C;AAC/C,EAAE;AACF,yEAAyE;AACzE,6EAA6E;AAC7E,wDAAwD;AACxD,EAAE;AACF,uCAAuC;AACvC,EAAE;AACF,6FAA6F;AAC7F,+FAA+F;AAC/F,EAAE;AACF,4EAA4E;AAC5E,0EAA0E;AAC1E,2EAA2E;AAC3E,2EAA2E;AAC3E,gEAAgE;AAChE,8EAA8E;AAE9E,SAAgB,mBAAmB,CAAC,IAAY;IAC9C,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;AACrD,CAAC;AAED,SAAgB,SAAS,CAAC,KAAa;IACrC,OAAO,IAAA,wBAAU,EAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC1D,CAAC;AAYD;;;;GAIG;AACH,SAAgB,0BAA0B,CACxC,KAAsC;IAEtC,IAAI,CAAC,KAAK,CAAC,SAAS,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,EAAE,EAAE,CAAC;QAChD,MAAM,IAAI,KAAK,CAAC,mDAAmD,CAAC,CAAC;IACvE,CAAC;IACD,IAAI,KAAK,CAAC,qBAAqB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC7C,MAAM,IAAI,KAAK,CAAC,qEAAqE,CAAC,CAAC;IACzF,CAAC;IACD,MAAM,MAAM,GAAG,CAAC,GAAG,KAAK,CAAC,qBAAqB,CAAC,CAAC,IAAI,EAAE,CAAC;IACvD,OAAO,SAAS,CAAC,GAAG,KAAK,CAAC,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;AAC7D,CAAC;AAWD;;;;;;;;GAQG;AACH,SAAgB,wBAAwB,CACtC,KAAoC;IAEpC,MAAM,IAAI,GAAG,mBAAmB,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;IACnD,OAAO,SAAS,CAAC,GAAG,KAAK,CAAC,QAAQ,IAAI,IAAI,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;AAC9D,CAAC"}
@@ -1,7 +1,10 @@
1
1
  export { parseExport } from "./parse-export.js";
2
2
  export type { ParseExportInput, ParseExportResult, ParseExportCounters, ParsedLine, } from "./parse-export.js";
3
- export { parseFilterArg, applyFilter } from "./filter.js";
4
- export type { Filter } from "./filter.js";
5
- export { normaliseSenderName, sha256Hex, deriveMessageId, observationContentHash, } from "./derive-keys.js";
6
- export type { DeriveMessageIdInput, ObservationContentFields, } from "./derive-keys.js";
3
+ export { normaliseSenderName, sha256Hex, deriveConversationIdentity, deriveMessageContentHash, } from "./derive-keys.js";
4
+ export type { DeriveConversationIdentityInput, DeriveMessageContentHashInput, } from "./derive-keys.js";
5
+ export { sessionize } from "./sessionize.js";
6
+ export type { Session } from "./sessionize.js";
7
+ export { toClassifierInput } from "./to-classifier-input.js";
8
+ export { findDeltaCursor } from "./delta-cursor.js";
9
+ export type { CursorResult } from "./delta-cursor.js";
7
10
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,YAAY,EACV,gBAAgB,EAChB,iBAAiB,EACjB,mBAAmB,EACnB,UAAU,GACX,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EAAE,cAAc,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAC1D,YAAY,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAC1C,OAAO,EACL,mBAAmB,EACnB,SAAS,EACT,eAAe,EACf,sBAAsB,GACvB,MAAM,kBAAkB,CAAC;AAC1B,YAAY,EACV,oBAAoB,EACpB,wBAAwB,GACzB,MAAM,kBAAkB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,YAAY,EACV,gBAAgB,EAChB,iBAAiB,EACjB,mBAAmB,EACnB,UAAU,GACX,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EACL,mBAAmB,EACnB,SAAS,EACT,0BAA0B,EAC1B,wBAAwB,GACzB,MAAM,kBAAkB,CAAC;AAC1B,YAAY,EACV,+BAA+B,EAC/B,6BAA6B,GAC9B,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,YAAY,EAAE,OAAO,EAAE,MAAM,iBAAiB,CAAC;AAC/C,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AACpD,YAAY,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC"}
@@ -1,14 +1,17 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.observationContentHash = exports.deriveMessageId = exports.sha256Hex = exports.normaliseSenderName = exports.applyFilter = exports.parseFilterArg = exports.parseExport = void 0;
3
+ exports.findDeltaCursor = exports.toClassifierInput = exports.sessionize = exports.deriveMessageContentHash = exports.deriveConversationIdentity = exports.sha256Hex = exports.normaliseSenderName = exports.parseExport = void 0;
4
4
  var parse_export_js_1 = require("./parse-export.js");
5
5
  Object.defineProperty(exports, "parseExport", { enumerable: true, get: function () { return parse_export_js_1.parseExport; } });
6
- var filter_js_1 = require("./filter.js");
7
- Object.defineProperty(exports, "parseFilterArg", { enumerable: true, get: function () { return filter_js_1.parseFilterArg; } });
8
- Object.defineProperty(exports, "applyFilter", { enumerable: true, get: function () { return filter_js_1.applyFilter; } });
9
6
  var derive_keys_js_1 = require("./derive-keys.js");
10
7
  Object.defineProperty(exports, "normaliseSenderName", { enumerable: true, get: function () { return derive_keys_js_1.normaliseSenderName; } });
11
8
  Object.defineProperty(exports, "sha256Hex", { enumerable: true, get: function () { return derive_keys_js_1.sha256Hex; } });
12
- Object.defineProperty(exports, "deriveMessageId", { enumerable: true, get: function () { return derive_keys_js_1.deriveMessageId; } });
13
- Object.defineProperty(exports, "observationContentHash", { enumerable: true, get: function () { return derive_keys_js_1.observationContentHash; } });
9
+ Object.defineProperty(exports, "deriveConversationIdentity", { enumerable: true, get: function () { return derive_keys_js_1.deriveConversationIdentity; } });
10
+ Object.defineProperty(exports, "deriveMessageContentHash", { enumerable: true, get: function () { return derive_keys_js_1.deriveMessageContentHash; } });
11
+ var sessionize_js_1 = require("./sessionize.js");
12
+ Object.defineProperty(exports, "sessionize", { enumerable: true, get: function () { return sessionize_js_1.sessionize; } });
13
+ var to_classifier_input_js_1 = require("./to-classifier-input.js");
14
+ Object.defineProperty(exports, "toClassifierInput", { enumerable: true, get: function () { return to_classifier_input_js_1.toClassifierInput; } });
15
+ var delta_cursor_js_1 = require("./delta-cursor.js");
16
+ Object.defineProperty(exports, "findDeltaCursor", { enumerable: true, get: function () { return delta_cursor_js_1.findDeltaCursor; } });
14
17
  //# sourceMappingURL=index.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;AAAA,qDAAgD;AAAvC,8GAAA,WAAW,OAAA;AAOpB,yCAA0D;AAAjD,2GAAA,cAAc,OAAA;AAAE,wGAAA,WAAW,OAAA;AAEpC,mDAK0B;AAJxB,qHAAA,mBAAmB,OAAA;AACnB,2GAAA,SAAS,OAAA;AACT,iHAAA,eAAe,OAAA;AACf,wHAAA,sBAAsB,OAAA"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;AAAA,qDAAgD;AAAvC,8GAAA,WAAW,OAAA;AAOpB,mDAK0B;AAJxB,qHAAA,mBAAmB,OAAA;AACnB,2GAAA,SAAS,OAAA;AACT,4HAAA,0BAA0B,OAAA;AAC1B,0HAAA,wBAAwB,OAAA;AAM1B,iDAA6C;AAApC,2GAAA,UAAU,OAAA;AAEnB,mEAA6D;AAApD,2HAAA,iBAAiB,OAAA;AAC1B,qDAAoD;AAA3C,kHAAA,eAAe,OAAA"}
@@ -0,0 +1,25 @@
1
+ import type { ParsedLine } from "./parse-export.js";
2
+ export interface Session {
3
+ /** 0-based index across the archive's sessions. */
4
+ index: number;
5
+ /** ISO 8601 timestamp of the first message in the session. */
6
+ firstMessageAt: string;
7
+ /** ISO 8601 timestamp of the last message in the session. */
8
+ lastMessageAt: string;
9
+ /** Messages in the session, chronological. */
10
+ messages: ParsedLine[];
11
+ }
12
+ /**
13
+ * Split parsed messages into sessions on gaps ≥ `gapHours`. Input must be
14
+ * pre-sorted by `dateSent` (parse-export emits in file order, which IS
15
+ * chronological for any well-formed `_chat.txt`).
16
+ *
17
+ * Boundary semantics (exact-at-threshold):
18
+ * gap == gapHours → cut here (start a new session)
19
+ * gap < gapHours → same session
20
+ * gap > gapHours → cut here
21
+ *
22
+ * Empty input returns []; single-message input returns one one-message session.
23
+ */
24
+ export declare function sessionize(messages: readonly ParsedLine[], gapHours: number): Session[];
25
+ //# sourceMappingURL=sessionize.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sessionize.d.ts","sourceRoot":"","sources":["../src/sessionize.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAsBpD,MAAM,WAAW,OAAO;IACtB,mDAAmD;IACnD,KAAK,EAAE,MAAM,CAAC;IACd,8DAA8D;IAC9D,cAAc,EAAE,MAAM,CAAC;IACvB,6DAA6D;IAC7D,aAAa,EAAE,MAAM,CAAC;IACtB,8CAA8C;IAC9C,QAAQ,EAAE,UAAU,EAAE,CAAC;CACxB;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,UAAU,CACxB,QAAQ,EAAE,SAAS,UAAU,EAAE,EAC/B,QAAQ,EAAE,MAAM,GACf,OAAO,EAAE,CAgCX"}
@@ -0,0 +1,48 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.sessionize = sessionize;
4
+ /**
5
+ * Split parsed messages into sessions on gaps ≥ `gapHours`. Input must be
6
+ * pre-sorted by `dateSent` (parse-export emits in file order, which IS
7
+ * chronological for any well-formed `_chat.txt`).
8
+ *
9
+ * Boundary semantics (exact-at-threshold):
10
+ * gap == gapHours → cut here (start a new session)
11
+ * gap < gapHours → same session
12
+ * gap > gapHours → cut here
13
+ *
14
+ * Empty input returns []; single-message input returns one one-message session.
15
+ */
16
+ function sessionize(messages, gapHours) {
17
+ if (gapHours <= 0) {
18
+ throw new Error(`sessionize: gapHours must be positive, got ${gapHours}`);
19
+ }
20
+ if (messages.length === 0)
21
+ return [];
22
+ const gapMs = gapHours * 60 * 60 * 1000;
23
+ const sessions = [];
24
+ let currentMessages = [messages[0]];
25
+ const flush = () => {
26
+ sessions.push({
27
+ index: sessions.length,
28
+ firstMessageAt: currentMessages[0].dateSent,
29
+ lastMessageAt: currentMessages[currentMessages.length - 1].dateSent,
30
+ messages: currentMessages,
31
+ });
32
+ };
33
+ for (let i = 1; i < messages.length; i++) {
34
+ const prevMs = Date.parse(messages[i - 1].dateSent);
35
+ const currMs = Date.parse(messages[i].dateSent);
36
+ const gap = currMs - prevMs;
37
+ if (gap >= gapMs) {
38
+ flush();
39
+ currentMessages = [messages[i]];
40
+ }
41
+ else {
42
+ currentMessages.push(messages[i]);
43
+ }
44
+ }
45
+ flush();
46
+ return sessions;
47
+ }
48
+ //# sourceMappingURL=sessionize.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sessionize.js","sourceRoot":"","sources":["../src/sessionize.ts"],"names":[],"mappings":";;AA6CA,gCAmCC;AA/CD;;;;;;;;;;;GAWG;AACH,SAAgB,UAAU,CACxB,QAA+B,EAC/B,QAAgB;IAEhB,IAAI,QAAQ,IAAI,CAAC,EAAE,CAAC;QAClB,MAAM,IAAI,KAAK,CAAC,8CAA8C,QAAQ,EAAE,CAAC,CAAC;IAC5E,CAAC;IACD,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAErC,MAAM,KAAK,GAAG,QAAQ,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;IACxC,MAAM,QAAQ,GAAc,EAAE,CAAC;IAC/B,IAAI,eAAe,GAAiB,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;IAElD,MAAM,KAAK,GAAG,GAAG,EAAE;QACjB,QAAQ,CAAC,IAAI,CAAC;YACZ,KAAK,EAAE,QAAQ,CAAC,MAAM;YACtB,cAAc,EAAE,eAAe,CAAC,CAAC,CAAC,CAAC,QAAQ;YAC3C,aAAa,EAAE,eAAe,CAAC,eAAe,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,QAAQ;YACnE,QAAQ,EAAE,eAAe;SAC1B,CAAC,CAAC;IACL,CAAC,CAAC;IAEF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACzC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;QACpD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;QAChD,MAAM,GAAG,GAAG,MAAM,GAAG,MAAM,CAAC;QAC5B,IAAI,GAAG,IAAI,KAAK,EAAE,CAAC;YACjB,KAAK,EAAE,CAAC;YACR,eAAe,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QAClC,CAAC;aAAM,CAAC;YACN,eAAe,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QACpC,CAAC;IACH,CAAC;IACD,KAAK,EAAE,CAAC;IACR,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -0,0 +1,3 @@
1
+ import type { Session } from "./sessionize.js";
2
+ export declare function toClassifierInput(session: Session): string;
3
+ //# sourceMappingURL=to-classifier-input.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"to-classifier-input.d.ts","sourceRoot":"","sources":["../src/to-classifier-input.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,iBAAiB,CAAC;AAwB/C,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM,CAM1D"}
@@ -0,0 +1,47 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.toClassifierInput = toClassifierInput;
4
+ // ---------------------------------------------------------------------------
5
+ // to-classifier-input — Pass 2 input formatter (Task 891).
6
+ //
7
+ // Pure function. Renders one Session as a turn-attributed text block ready
8
+ // to hand to memory-classify (mode='chat'). Format:
9
+ //
10
+ // [YYYY-MM-DD HH:MM:SS TZ] <Sender>: <body>
11
+ // [YYYY-MM-DD HH:MM:SS TZ] <Sender>: <body>
12
+ // ...
13
+ //
14
+ // Multi-line message bodies are kept verbatim (with their internal newlines).
15
+ // The leading `[ts] <Sender>: ` prefix is the only structural addition; the
16
+ // classifier prompt instructs Haiku to preserve it in the chunk `body` so
17
+ // downstream Phase 2 work can recover per-message provenance via snippet
18
+ // matching against the conversation tail.
19
+ //
20
+ // Timezone: each `dateSent` ISO already carries an offset (set by parseExport
21
+ // from the operator's confirmed IANA zone). The renderer prints the
22
+ // human-readable wall-clock for that offset; the trailing "TZ" suffix is the
23
+ // offset itself, not a zone name.
24
+ // ---------------------------------------------------------------------------
25
+ function toClassifierInput(session) {
26
+ const lines = [];
27
+ for (const m of session.messages) {
28
+ lines.push(`[${formatWallClock(m.dateSent)}] ${m.senderName}: ${m.body}`);
29
+ }
30
+ return lines.join("\n");
31
+ }
32
+ /**
33
+ * Format an ISO 8601 instant with offset as `YYYY-MM-DD HH:MM:SS ±HH:MM`,
34
+ * preserving the offset that the parser set from the operator's IANA zone.
35
+ * The wall-clock components are read directly from the ISO string — no
36
+ * Date construction (which would re-interpret in the local zone).
37
+ */
38
+ function formatWallClock(iso) {
39
+ // ISO from parse-export is shaped: "YYYY-MM-DDTHH:MM:SS±HH:MM" (or "Z").
40
+ const m = iso.match(/^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})(?:\.\d+)?(Z|[+-]\d{2}:?\d{2})$/);
41
+ if (!m)
42
+ return iso; // surface the raw value if the shape drifted; pure function never throws on caller-supplied data
43
+ const [, y, mo, d, h, mi, s, off] = m;
44
+ const offsetLabel = off === "Z" ? "+00:00" : off;
45
+ return `${y}-${mo}-${d} ${h}:${mi}:${s} ${offsetLabel}`;
46
+ }
47
+ //# sourceMappingURL=to-classifier-input.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"to-classifier-input.js","sourceRoot":"","sources":["../src/to-classifier-input.ts"],"names":[],"mappings":";;AAwBA,8CAMC;AA5BD,8EAA8E;AAC9E,2DAA2D;AAC3D,EAAE;AACF,2EAA2E;AAC3E,oDAAoD;AACpD,EAAE;AACF,8CAA8C;AAC9C,8CAA8C;AAC9C,QAAQ;AACR,EAAE;AACF,8EAA8E;AAC9E,4EAA4E;AAC5E,0EAA0E;AAC1E,yEAAyE;AACzE,0CAA0C;AAC1C,EAAE;AACF,8EAA8E;AAC9E,oEAAoE;AACpE,6EAA6E;AAC7E,kCAAkC;AAClC,8EAA8E;AAE9E,SAAgB,iBAAiB,CAAC,OAAgB;IAChD,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,MAAM,CAAC,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;QACjC,KAAK,CAAC,IAAI,CAAC,IAAI,eAAe,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,UAAU,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAC5E,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED;;;;;GAKG;AACH,SAAS,eAAe,CAAC,GAAW;IAClC,yEAAyE;IACzE,MAAM,CAAC,GAAG,GAAG,CAAC,KAAK,CACjB,iFAAiF,CAClF,CAAC;IACF,IAAI,CAAC,CAAC;QAAE,OAAO,GAAG,CAAC,CAAC,iGAAiG;IACrH,MAAM,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC;IACtC,MAAM,WAAW,GAAG,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC;IACjD,OAAO,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,WAAW,EAAE,CAAC;AAC1D,CAAC"}