@lota-sdk/core 0.1.13 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/package.json +5 -5
  2. package/src/ai/embedding-cache.ts +7 -6
  3. package/src/ai/index.ts +1 -0
  4. package/src/bifrost/bifrost.ts +12 -7
  5. package/src/config/agent-defaults.ts +1 -1
  6. package/src/config/logger.ts +7 -9
  7. package/src/{runtime.ts → create-runtime.ts} +6 -6
  8. package/src/db/cursor-pagination.ts +1 -1
  9. package/src/db/memory-store.ts +10 -6
  10. package/src/db/memory.ts +6 -4
  11. package/src/db/schema-fingerprint.ts +1 -0
  12. package/src/db/service.ts +45 -51
  13. package/src/db/startup.ts +3 -3
  14. package/src/index.ts +1 -1
  15. package/src/queues/context-compaction.queue.ts +4 -8
  16. package/src/queues/document-processor.queue.ts +7 -7
  17. package/src/queues/memory-consolidation.queue.ts +7 -8
  18. package/src/queues/post-chat-memory.queue.ts +2 -6
  19. package/src/queues/recent-activity-title-refinement.queue.ts +2 -6
  20. package/src/queues/regular-chat-memory-digest.queue.ts +4 -7
  21. package/src/queues/skill-extraction.queue.ts +4 -7
  22. package/src/queues/workstream-title-generation.queue.ts +2 -6
  23. package/src/redis/connection.ts +6 -3
  24. package/src/redis/index.ts +1 -0
  25. package/src/redis/org-memory-lock.ts +1 -1
  26. package/src/redis/redis-lease-lock.ts +41 -8
  27. package/src/runtime/agent-stream-helpers.ts +2 -1
  28. package/src/runtime/context-compaction-constants.ts +1 -1
  29. package/src/runtime/context-compaction-runtime.ts +6 -4
  30. package/src/runtime/context-compaction.ts +19 -38
  31. package/src/runtime/execution-plan.ts +2 -2
  32. package/src/runtime/helper-model.ts +3 -1
  33. package/src/runtime/index.ts +12 -1
  34. package/src/runtime/memory-block.ts +3 -2
  35. package/src/runtime/memory-pipeline.ts +24 -5
  36. package/src/runtime/plugin-types.ts +1 -1
  37. package/src/runtime/runtime-extensions.ts +89 -13
  38. package/src/runtime/title-helpers.ts +11 -2
  39. package/src/runtime/workstream-chat-helpers.ts +5 -6
  40. package/src/runtime/workstream-routing-policy.ts +0 -30
  41. package/src/runtime/workstream-state.ts +17 -7
  42. package/src/services/attachment.service.ts +1 -1
  43. package/src/services/context-compaction.service.ts +3 -3
  44. package/src/services/document-chunk.service.ts +37 -32
  45. package/src/services/execution-plan.service.ts +2 -0
  46. package/src/services/learned-skill.service.ts +6 -10
  47. package/src/services/{memory.utils.ts → memory-utils.ts} +4 -8
  48. package/src/services/memory.service.ts +21 -18
  49. package/src/services/organization-member.service.ts +1 -1
  50. package/src/services/plan-artifact.service.ts +1 -0
  51. package/src/services/plan-executor.service.ts +2 -18
  52. package/src/services/plan-helpers.ts +15 -0
  53. package/src/services/plan-validator.service.ts +3 -18
  54. package/src/services/recent-activity-title.service.ts +3 -10
  55. package/src/services/recent-activity.service.ts +6 -12
  56. package/src/services/workstream-message.service.ts +26 -16
  57. package/src/services/workstream-title.service.ts +1 -9
  58. package/src/services/{workstream-turn-preparation.ts → workstream-turn-preparation.service.ts} +401 -314
  59. package/src/services/workstream-turn.ts +2 -2
  60. package/src/services/workstream.service.ts +22 -10
  61. package/src/services/workstream.types.ts +7 -16
  62. package/src/storage/attachment-storage.service.ts +4 -4
  63. package/src/storage/{attachments.utils.ts → attachment-utils.ts} +1 -4
  64. package/src/storage/index.ts +2 -2
  65. package/src/system-agents/{context-compacter.agent.ts → context-compaction.agent.ts} +4 -4
  66. package/src/system-agents/delegated-agent-factory.ts +3 -2
  67. package/src/system-agents/index.ts +8 -0
  68. package/src/system-agents/memory-reranker.agent.ts +1 -1
  69. package/src/system-agents/memory.agent.ts +1 -1
  70. package/src/system-agents/recent-activity-title-refiner.agent.ts +1 -1
  71. package/src/tools/execution-plan.tool.ts +6 -2
  72. package/src/tools/fetch-webpage.tool.ts +20 -18
  73. package/src/tools/index.ts +2 -2
  74. package/src/tools/read-file-parts.tool.ts +1 -1
  75. package/src/tools/search-web.tool.ts +18 -15
  76. package/src/tools/{search-tools.ts → search.tool.ts} +1 -1
  77. package/src/tools/team-think.tool.ts +9 -5
  78. package/src/tools/{tool-contract.ts → tool-contracts.ts} +9 -2
  79. package/src/utils/async.ts +1 -1
  80. package/src/utils/errors.ts +15 -0
  81. package/src/utils/hono-error-handler.ts +1 -2
  82. package/src/utils/index.ts +10 -2
  83. package/src/utils/string.ts +14 -0
  84. package/src/workers/bootstrap.ts +2 -2
  85. package/src/workers/memory-consolidation.worker.ts +12 -12
  86. package/src/workers/regular-chat-memory-digest.helpers.ts +2 -7
  87. package/src/workers/regular-chat-memory-digest.runner.ts +9 -103
  88. package/src/workers/skill-extraction.runner.ts +7 -101
  89. package/src/workers/utils/file-section-chunker.ts +5 -3
  90. package/src/workers/utils/workstream-message-query.ts +106 -0
  91. package/src/workers/worker-utils.ts +4 -0
  92. package/src/runtime/retrieval-pipeline.ts +0 -3
  93. package/src/utils/error.ts +0 -10
  94. /package/src/services/{context-compaction-runtime.ts → context-compaction-runtime.singleton.ts} +0 -0
  95. /package/src/storage/{attachments.types.ts → attachment-types.ts} +0 -0
@@ -1,6 +1,14 @@
1
1
  export * from './async'
2
2
  export * from './date-time'
3
- export * from './error'
4
3
  export * from './errors'
5
4
  export * from './hono-error-handler'
6
- export * from './string'
5
+ export * from './sse-keepalive'
6
+ export {
7
+ CHARS_PER_TOKEN_ESTIMATE,
8
+ compactWhitespace,
9
+ isRecord,
10
+ readString,
11
+ readStringField,
12
+ truncateOptionalText,
13
+ truncateText,
14
+ } from './string'
@@ -49,3 +49,17 @@ export function truncateOptionalText(value: string | undefined, maxChars: number
49
49
  export function compactWhitespace(value: string): string {
50
50
  return value.trim().replace(/\s+/g, ' ')
51
51
  }
52
+
53
+ /**
54
+ * Returns the value as a plain record if it is a non-null, non-array object,
55
+ * or null otherwise.
56
+ */
57
+ export function readRecord(value: unknown): Record<string, unknown> | null {
58
+ if (!value || typeof value !== 'object' || Array.isArray(value)) return null
59
+ return value as Record<string, unknown>
60
+ }
61
+
62
+ /**
63
+ * Rough character-to-token estimate used for context budget calculations.
64
+ */
65
+ export const CHARS_PER_TOKEN_ESTIMATE = 3
@@ -1,4 +1,4 @@
1
- import { configureLogger, serverLogger } from '../config/logger'
1
+ import { configureLotaLogger, serverLogger } from '../config/logger'
2
2
  import { LOTA_SDK_DATABASE_NAME } from '../db/sdk-database'
3
3
  import { SurrealDBService, databaseService, setDatabaseService } from '../db/service'
4
4
  import { connectWithStartupRetry, waitForDatabaseBootstrap } from '../db/startup'
@@ -27,7 +27,7 @@ export async function initializeSandboxedWorkerRuntime(): Promise<void> {
27
27
 
28
28
  sandboxedWorkerRuntimePromise = (async () => {
29
29
  const env = parseWorkerBootstrapEnv(process.env)
30
- await configureLogger()
30
+ await configureLotaLogger()
31
31
 
32
32
  ensureDatabaseServiceConfigured()
33
33
 
@@ -1,6 +1,7 @@
1
1
  import type { SandboxedJob } from 'bullmq'
2
2
  import { BoundQuery, eq, inside } from 'surrealdb'
3
3
 
4
+ import { MEMORY } from '../config/constants'
4
5
  import { serverLogger } from '../config/logger'
5
6
  import { ensureRecordId, recordIdToString } from '../db/record-id'
6
7
  import type { RecordIdInput } from '../db/record-id'
@@ -16,6 +17,7 @@ await initializeSandboxedWorkerRuntime()
16
17
  const MEMORY_TABLE = TABLES.MEMORY
17
18
  const MEMORY_RELATION_TABLE = TABLES.MEMORY_RELATION
18
19
  const MEMORY_HISTORY_TABLE = TABLES.MEMORY_HISTORY
20
+ const RELATION_SUPERSEDES = 'supersedes' as const
19
21
  const HARD_SIMILARITY_THRESHOLD = 0.95
20
22
  const SOFT_SIMILARITY_THRESHOLD = 0.9
21
23
  const MAX_MEMORIES_PER_SCOPE = 500
@@ -130,7 +132,7 @@ async function deduplicateScope(scopeId: string): Promise<number> {
130
132
  ensureRecordId(winner.id, TABLES.MEMORY),
131
133
  MEMORY_RELATION_TABLE,
132
134
  ensureRecordId(loser.id, TABLES.MEMORY),
133
- { relationType: 'supersedes', confidence: 1.0 },
135
+ { relationType: RELATION_SUPERSEDES, confidence: 1.0 },
134
136
  )
135
137
 
136
138
  await databaseService.query(
@@ -187,13 +189,13 @@ async function collapseSupersedeCh(): Promise<number> {
187
189
  new BoundQuery(
188
190
  `SELECT
189
191
  id AS middleId,
190
- <-${MEMORY_RELATION_TABLE}[WHERE relationType = 'supersedes']<-${MEMORY_TABLE}.id AS predecessors,
191
- ->${MEMORY_RELATION_TABLE}[WHERE relationType = 'supersedes']->${MEMORY_TABLE}.id AS successors
192
+ <-${MEMORY_RELATION_TABLE}[WHERE relationType = '${RELATION_SUPERSEDES}']<-${MEMORY_TABLE}.id AS predecessors,
193
+ ->${MEMORY_RELATION_TABLE}[WHERE relationType = '${RELATION_SUPERSEDES}']->${MEMORY_TABLE}.id AS successors
192
194
  FROM ${MEMORY_TABLE}
193
195
  WHERE archivedAt IS NONE
194
- AND count(->${MEMORY_RELATION_TABLE}[WHERE relationType = 'supersedes']) > 0
195
- AND count(<-${MEMORY_RELATION_TABLE}[WHERE relationType = 'supersedes']) > 0
196
- LIMIT 100`,
196
+ AND count(->${MEMORY_RELATION_TABLE}[WHERE relationType = '${RELATION_SUPERSEDES}']) > 0
197
+ AND count(<-${MEMORY_RELATION_TABLE}[WHERE relationType = '${RELATION_SUPERSEDES}']) > 0
198
+ LIMIT ${MEMORY.MAX_KNN_LIMIT}`,
197
199
  ),
198
200
  )
199
201
 
@@ -210,7 +212,7 @@ async function collapseSupersedeCh(): Promise<number> {
210
212
  const existing = await databaseService.query<{ id: RecordIdInput }>(
211
213
  new BoundQuery(
212
214
  `SELECT id FROM ${MEMORY_RELATION_TABLE}
213
- WHERE in = $predId AND out = $succId AND relationType = 'supersedes'
215
+ WHERE in = $predId AND out = $succId AND relationType = '${RELATION_SUPERSEDES}'
214
216
  LIMIT 1`,
215
217
  { predId: predRef, succId: succRef },
216
218
  ),
@@ -218,7 +220,7 @@ async function collapseSupersedeCh(): Promise<number> {
218
220
 
219
221
  if (existing.length === 0) {
220
222
  await databaseService.relate(predRef, MEMORY_RELATION_TABLE, succRef, {
221
- relationType: 'supersedes',
223
+ relationType: RELATION_SUPERSEDES,
222
224
  confidence: 1.0,
223
225
  })
224
226
  }
@@ -288,10 +290,8 @@ const handler = async (job: SandboxedJob<MemoryConsolidationJob>) => {
288
290
  new BoundQuery(`SELECT VALUE scopeId FROM ${MEMORY_TABLE} WHERE archivedAt IS NONE GROUP BY scopeId`),
289
291
  )
290
292
 
291
- for (const scopeId of scopeIds) {
292
- const merged = await deduplicateScope(scopeId)
293
- totalMerged += merged
294
- }
293
+ const results = await Promise.all(scopeIds.map(deduplicateScope))
294
+ totalMerged = results.reduce((a, b) => a + b, 0)
295
295
  }
296
296
 
297
297
  const pruned = await pruneStaleMemories()
@@ -1,4 +1,5 @@
1
1
  import { isAgentName } from '../config/agent-defaults'
2
+ import { compactWhitespace } from '../utils/string'
2
3
 
3
4
  interface DigestMessageForTranscript {
4
5
  source: 'workstream'
@@ -8,10 +9,6 @@ interface DigestMessageForTranscript {
8
9
  metadata?: Record<string, unknown>
9
10
  }
10
11
 
11
- function normalizeWhitespace(value: string): string {
12
- return value.replace(/\s+/g, ' ').trim()
13
- }
14
-
15
12
  function normalizeFilePartMetadata(part: Record<string, unknown>): string | null {
16
13
  if (part.type !== 'file') return null
17
14
 
@@ -57,9 +54,7 @@ export function buildDigestTranscript(params: { messages: DigestMessageForTransc
57
54
 
58
55
  const sourcePrefix = `[${message.source}:${message.sourceId}]`
59
56
  const textParts = message.parts
60
- .flatMap((part) =>
61
- part.type === 'text' && typeof part.text === 'string' ? [normalizeWhitespace(part.text)] : [],
62
- )
57
+ .flatMap((part) => (part.type === 'text' && typeof part.text === 'string' ? [compactWhitespace(part.text)] : []))
63
58
  .filter((value) => value.length > 0)
64
59
  const fileParts = message.parts
65
60
  .map((part) => normalizeFilePartMetadata(part))
@@ -1,4 +1,3 @@
1
- import { toTimestamp } from '@lota-sdk/shared'
2
1
  import { BoundQuery } from 'surrealdb'
3
2
  import { z } from 'zod'
4
3
 
@@ -16,26 +15,20 @@ import { createHelperModelRuntime } from '../runtime/helper-model'
16
15
  import { getRuntimeAdapters, withConfiguredWorkspaceMemoryLock } from '../runtime/runtime-extensions'
17
16
  import { memoryService } from '../services/memory.service'
18
17
  import { createRegularChatMemoryDigestAgent } from '../system-agents/regular-chat-memory-digest.agent'
18
+ import { compactWhitespace } from '../utils/string'
19
19
  import { buildDigestTranscript, resolveWorkspaceBootstrapCutoff } from './regular-chat-memory-digest.helpers'
20
+ import {
21
+ compareDigestMessageOrder,
22
+ listEligibleWorkstreamMessages,
23
+ listWorkstreamIdsForOrg,
24
+ normalizeBlock,
25
+ } from './utils/workstream-message-query'
26
+ import type { DigestCursor, DigestMessage } from './utils/workstream-message-query'
20
27
 
21
28
  const StructuredProfilePatchSchema = z.record(z.string(), z.unknown()).default({})
22
29
 
23
30
  const REGULAR_CHAT_MEMORY_DIGEST_TIMEOUT_MS = 10 * 60 * 1000
24
31
  const WorkspaceMemoryRowSchema = z.object({ content: z.string() })
25
- const EntityIdRowSchema = z.string().trim().min(1)
26
- const RecordTimestampSchema = z.union([z.date(), z.string(), z.number()])
27
- const MessageRoleSchema = z.enum(['system', 'user', 'assistant'])
28
- const MessagePartSchema = z.record(z.string(), z.unknown())
29
- const MessageMetadataSchema = z.record(z.string(), z.unknown()).nullish()
30
-
31
- const WorkstreamDigestMessageRowSchema = z.object({
32
- id: z.string(),
33
- workstreamId: z.string(),
34
- role: MessageRoleSchema,
35
- parts: z.array(MessagePartSchema).optional(),
36
- metadata: MessageMetadataSchema,
37
- createdAt: RecordTimestampSchema,
38
- })
39
32
 
40
33
  const ExtractedFactSchema = z.object({
41
34
  content: z.string().trim().min(1),
@@ -52,40 +45,18 @@ const RegularChatMemoryDigestOutputSchema = z.object({
52
45
 
53
46
  const helperModelRuntime = createHelperModelRuntime()
54
47
 
55
- interface DigestCursor {
56
- createdAt: Date
57
- id: string
58
- }
59
-
60
- interface DigestMessage {
61
- source: 'workstream'
62
- sourceId: string
63
- role: 'system' | 'user' | 'assistant'
64
- parts: Array<Record<string, unknown>>
65
- metadata?: Record<string, unknown>
66
- cursor: DigestCursor
67
- }
68
-
69
48
  interface RegularChatDigestRunResult {
70
49
  skipped: boolean
71
50
  processedWorkstreamMessages: number
72
51
  followUpScheduled: boolean
73
52
  }
74
53
 
75
- function normalizeWhitespace(value: string): string {
76
- return value.replace(/\s+/g, ' ').trim()
77
- }
78
-
79
- function normalizeBlock(value: string): string {
80
- return value.replaceAll(String.fromCharCode(0), '').replace(/\r/g, '').trim()
81
- }
82
-
83
54
  function buildMemoryContext(memories: Array<{ content: string }>): string {
84
55
  if (memories.length === 0) return 'No existing memories.'
85
56
 
86
57
  return memories
87
58
  .map((memory, index) => {
88
- const content = normalizeWhitespace(memory.content)
59
+ const content = compactWhitespace(memory.content)
89
60
  if (!content) return ''
90
61
  return `${index + 1}. ${content}`
91
62
  })
@@ -117,75 +88,10 @@ function buildPrompt(params: {
117
88
  ].join('\n')
118
89
  }
119
90
 
120
- function mapWorkstreamDigestRow(row: z.infer<typeof WorkstreamDigestMessageRowSchema>): DigestMessage {
121
- return {
122
- source: 'workstream',
123
- sourceId: row.workstreamId,
124
- role: row.role,
125
- parts: row.parts ?? [],
126
- metadata: row.metadata ?? undefined,
127
- cursor: { createdAt: new Date(toTimestamp(row.createdAt)), id: row.id },
128
- }
129
- }
130
-
131
- function compareDigestMessageOrder(left: DigestMessage, right: DigestMessage): number {
132
- const timeDiff = left.cursor.createdAt.getTime() - right.cursor.createdAt.getTime()
133
- if (timeDiff !== 0) return timeDiff
134
- return left.cursor.id.localeCompare(right.cursor.id)
135
- }
136
-
137
91
  function getLastCursor(messages: DigestMessage[]): DigestCursor | null {
138
92
  return messages.length > 0 ? messages[messages.length - 1].cursor : null
139
93
  }
140
94
 
141
- async function listWorkstreamIdsForOrg(orgRef: RecordIdRef): Promise<RecordIdRef[]> {
142
- const ids = await databaseService.query<unknown>(
143
- new BoundQuery(
144
- `SELECT VALUE type::string(id) FROM ${TABLES.WORKSTREAM}
145
- WHERE organizationId = $organizationId`,
146
- { organizationId: orgRef },
147
- ),
148
- )
149
-
150
- return ids.map((value) => ensureRecordId(EntityIdRowSchema.parse(value), TABLES.WORKSTREAM))
151
- }
152
-
153
- async function listEligibleWorkstreamMessages(params: {
154
- workstreamIds: RecordIdRef[]
155
- cursor: DigestCursor | null
156
- onboardingCutoff: Date | null
157
- }): Promise<DigestMessage[]> {
158
- if (params.workstreamIds.length === 0) return []
159
-
160
- let query: BoundQuery | null = null
161
- if (params.cursor) {
162
- const cursorRowId = ensureRecordId(params.cursor.id, TABLES.WORKSTREAM_MESSAGE)
163
- query = new BoundQuery(
164
- `SELECT type::string(id) AS id, type::string(workstreamId) AS workstreamId, role, parts, metadata, createdAt FROM ${TABLES.WORKSTREAM_MESSAGE}
165
- WHERE workstreamId IN $workstreamIds
166
- AND (
167
- createdAt > $cursorCreatedAt
168
- OR (createdAt = $cursorCreatedAt AND id > $cursorRowId)
169
- )
170
- ORDER BY createdAt ASC, id ASC`,
171
- { workstreamIds: params.workstreamIds, cursorCreatedAt: params.cursor.createdAt, cursorRowId },
172
- )
173
- } else if (params.onboardingCutoff) {
174
- query = new BoundQuery(
175
- `SELECT type::string(id) AS id, type::string(workstreamId) AS workstreamId, role, parts, metadata, createdAt FROM ${TABLES.WORKSTREAM_MESSAGE}
176
- WHERE workstreamId IN $workstreamIds
177
- AND createdAt > $onboardingCutoff
178
- ORDER BY createdAt ASC, id ASC`,
179
- { workstreamIds: params.workstreamIds, onboardingCutoff: params.onboardingCutoff },
180
- )
181
- }
182
-
183
- if (!query) return []
184
-
185
- const rows = await databaseService.query<unknown>(query)
186
- return rows.map((row) => mapWorkstreamDigestRow(WorkstreamDigestMessageRowSchema.parse(row)))
187
- }
188
-
189
95
  async function hasNewEligibleWorkstreamMessages(params: {
190
96
  workstreamIds: RecordIdRef[]
191
97
  cursor: DigestCursor | null
@@ -1,11 +1,5 @@
1
- import { toTimestamp } from '@lota-sdk/shared'
2
- import { BoundQuery } from 'surrealdb'
3
- import { z } from 'zod'
4
-
5
1
  import { serverLogger } from '../config/logger'
6
2
  import { ensureRecordId, recordIdToString } from '../db/record-id'
7
- import type { RecordIdRef } from '../db/record-id'
8
- import { databaseService } from '../db/service'
9
3
  import { TABLES } from '../db/tables'
10
4
  import { getDefaultEmbeddings } from '../embeddings/provider'
11
5
  import type { SkillExtractionJob } from '../queues/skill-extraction.queue'
@@ -16,38 +10,15 @@ import { createSkillExtractorAgent, SkillExtractionOutputSchema } from '../syste
16
10
  import type { SkillCandidate } from '../system-agents/skill-extractor.agent'
17
11
  import { createSkillManagerAgent, SkillManagerOutputSchema } from '../system-agents/skill-manager.agent'
18
12
  import { buildDigestTranscript, resolveWorkspaceBootstrapCutoff } from './regular-chat-memory-digest.helpers'
13
+ import {
14
+ compareDigestMessageOrder,
15
+ listEligibleWorkstreamMessages,
16
+ listWorkstreamIdsForOrg,
17
+ } from './utils/workstream-message-query'
19
18
 
20
19
  const SKILL_EXTRACTION_TIMEOUT_MS = 10 * 60 * 1000
21
20
  const MIN_MESSAGE_THRESHOLD = 10
22
21
 
23
- const RecordTimestampSchema = z.union([z.date(), z.string(), z.number()])
24
- const MessageRoleSchema = z.enum(['system', 'user', 'assistant'])
25
- const MessagePartSchema = z.record(z.string(), z.unknown())
26
- const MessageMetadataSchema = z.record(z.string(), z.unknown()).nullish()
27
-
28
- const WorkstreamMessageRowSchema = z.object({
29
- id: z.string(),
30
- workstreamId: z.string(),
31
- role: MessageRoleSchema,
32
- parts: z.array(MessagePartSchema).optional(),
33
- metadata: MessageMetadataSchema,
34
- createdAt: RecordTimestampSchema,
35
- })
36
-
37
- interface DigestCursor {
38
- createdAt: Date
39
- id: string
40
- }
41
-
42
- interface DigestMessage {
43
- source: 'workstream'
44
- sourceId: string
45
- role: 'system' | 'user' | 'assistant'
46
- parts: Array<Record<string, unknown>>
47
- metadata?: Record<string, unknown>
48
- cursor: DigestCursor
49
- }
50
-
51
22
  interface SkillExtractionRunResult {
52
23
  skipped: boolean
53
24
  processedMessages: number
@@ -58,71 +29,6 @@ const embeddings = getDefaultEmbeddings()
58
29
 
59
30
  const helperModelRuntime = createHelperModelRuntime()
60
31
 
61
- function mapWorkstreamRow(row: z.infer<typeof WorkstreamMessageRowSchema>): DigestMessage {
62
- return {
63
- source: 'workstream',
64
- sourceId: row.workstreamId,
65
- role: row.role,
66
- parts: row.parts ?? [],
67
- metadata: row.metadata ?? undefined,
68
- cursor: { createdAt: new Date(toTimestamp(row.createdAt)), id: row.id },
69
- }
70
- }
71
-
72
- function compareMessageOrder(left: DigestMessage, right: DigestMessage): number {
73
- const timeDiff = left.cursor.createdAt.getTime() - right.cursor.createdAt.getTime()
74
- if (timeDiff !== 0) return timeDiff
75
- return left.cursor.id.localeCompare(right.cursor.id)
76
- }
77
-
78
- async function listWorkstreamIdsForOrg(orgRef: RecordIdRef): Promise<RecordIdRef[]> {
79
- const EntityIdRowSchema = z.string().trim().min(1)
80
- const ids = await databaseService.query<unknown>(
81
- new BoundQuery(
82
- `SELECT VALUE type::string(id) FROM ${TABLES.WORKSTREAM}
83
- WHERE organizationId = $organizationId`,
84
- { organizationId: orgRef },
85
- ),
86
- )
87
- return ids.map((value) => ensureRecordId(EntityIdRowSchema.parse(value), TABLES.WORKSTREAM))
88
- }
89
-
90
- async function listEligibleMessages(params: {
91
- workstreamIds: RecordIdRef[]
92
- cursor: DigestCursor | null
93
- onboardingCutoff: Date | null
94
- }): Promise<DigestMessage[]> {
95
- if (params.workstreamIds.length === 0) return []
96
-
97
- let query: BoundQuery | null = null
98
- if (params.cursor) {
99
- const cursorRowId = ensureRecordId(params.cursor.id, TABLES.WORKSTREAM_MESSAGE)
100
- query = new BoundQuery(
101
- `SELECT type::string(id) AS id, type::string(workstreamId) AS workstreamId, role, parts, metadata, createdAt FROM ${TABLES.WORKSTREAM_MESSAGE}
102
- WHERE workstreamId IN $workstreamIds
103
- AND (
104
- createdAt > $cursorCreatedAt
105
- OR (createdAt = $cursorCreatedAt AND id > $cursorRowId)
106
- )
107
- ORDER BY createdAt ASC, id ASC`,
108
- { workstreamIds: params.workstreamIds, cursorCreatedAt: params.cursor.createdAt, cursorRowId },
109
- )
110
- } else if (params.onboardingCutoff) {
111
- query = new BoundQuery(
112
- `SELECT type::string(id) AS id, type::string(workstreamId) AS workstreamId, role, parts, metadata, createdAt FROM ${TABLES.WORKSTREAM_MESSAGE}
113
- WHERE workstreamId IN $workstreamIds
114
- AND createdAt > $onboardingCutoff
115
- ORDER BY createdAt ASC, id ASC`,
116
- { workstreamIds: params.workstreamIds, onboardingCutoff: params.onboardingCutoff },
117
- )
118
- }
119
-
120
- if (!query) return []
121
-
122
- const rows = await databaseService.query<unknown>(query)
123
- return rows.map((row) => mapWorkstreamRow(WorkstreamMessageRowSchema.parse(row)))
124
- }
125
-
126
32
  function buildExtractionPrompt(params: { workspaceName: string; transcript: string; existingSkills: string }): string {
127
33
  return [
128
34
  `Workspace name: ${params.workspaceName}`,
@@ -196,14 +102,14 @@ export async function runSkillExtraction(data: SkillExtractionJob): Promise<Skil
196
102
  })
197
103
 
198
104
  const workstreamIds = await listWorkstreamIdsForOrg(orgRef)
199
- const messages = await listEligibleMessages({ workstreamIds, cursor: existingCursor, onboardingCutoff })
105
+ const messages = await listEligibleWorkstreamMessages({ workstreamIds, cursor: existingCursor, onboardingCutoff })
200
106
 
201
107
  if (messages.length < MIN_MESSAGE_THRESHOLD) {
202
108
  serverLogger.info`Skipping skill extraction for ${orgId}: only ${messages.length} messages (threshold: ${MIN_MESSAGE_THRESHOLD})`
203
109
  return { skipped: true, processedMessages: messages.length, extractedSkills: 0 }
204
110
  }
205
111
 
206
- const sortedMessages = [...messages].sort(compareMessageOrder)
112
+ const sortedMessages = [...messages].sort(compareDigestMessageOrder)
207
113
  const { transcript } = buildDigestTranscript({ messages: sortedMessages })
208
114
 
209
115
  const existingSkills = await learnedSkillService.listForOrg(orgId)
@@ -1,7 +1,9 @@
1
1
  export const DEFAULT_FILE_SECTION_CHUNK_MAX_CHARS = 250_000
2
- const MIN_FILE_SECTION_CHUNK_MAX_CHARS = 4_000
2
+ export const MIN_FILE_SECTION_CHUNK_MAX_CHARS = 4_000
3
3
  export const DEFAULT_FILE_SECTION_CHUNK_MIN_CHARS = 10_000
4
4
  const SECTION_SEPARATOR_LENGTH = 2
5
+ const CHARS_PER_TOKEN_ESTIMATE = 3
6
+ const MIN_CHUNK_CHARS_FLOOR = 512
5
7
 
6
8
  export interface FileSection {
7
9
  kind: 'preamble' | 'file'
@@ -29,7 +31,7 @@ export interface FileSectionChunkOptions {
29
31
 
30
32
  function estimateTokenCountFromChars(text: string): number {
31
33
  if (!text) return 0
32
- return Math.ceil(text.length / 3)
34
+ return Math.ceil(text.length / CHARS_PER_TOKEN_ESTIMATE)
33
35
  }
34
36
 
35
37
  function normalizeMaxChars(value?: number): number {
@@ -43,7 +45,7 @@ function normalizeMinChunkChars(value: number | undefined, maxChars: number): nu
43
45
  if (typeof value !== 'number' || !Number.isFinite(value)) {
44
46
  return Math.min(DEFAULT_FILE_SECTION_CHUNK_MIN_CHARS, Math.floor(maxChars * 0.35))
45
47
  }
46
- const normalized = Math.max(512, Math.floor(value))
48
+ const normalized = Math.max(MIN_CHUNK_CHARS_FLOOR, Math.floor(value))
47
49
  return Math.min(normalized, Math.floor(maxChars * 0.6))
48
50
  }
49
51
 
@@ -0,0 +1,106 @@
1
+ import { toTimestamp } from '@lota-sdk/shared'
2
+ import { BoundQuery } from 'surrealdb'
3
+ import { z } from 'zod'
4
+
5
+ import { ensureRecordId } from '../../db/record-id'
6
+ import type { RecordIdRef } from '../../db/record-id'
7
+ import { databaseService } from '../../db/service'
8
+ import { TABLES } from '../../db/tables'
9
+ import { normalizeTextBody } from '../../document/parsing'
10
+
11
+ const RecordTimestampSchema = z.union([z.date(), z.string(), z.number()])
12
+ const MessageRoleSchema = z.enum(['system', 'user', 'assistant'])
13
+ const MessagePartSchema = z.record(z.string(), z.unknown())
14
+ const MessageMetadataSchema = z.record(z.string(), z.unknown()).nullish()
15
+
16
+ const WorkstreamMessageRowSchema = z.object({
17
+ id: z.string(),
18
+ workstreamId: z.string(),
19
+ role: MessageRoleSchema,
20
+ parts: z.array(MessagePartSchema).optional(),
21
+ metadata: MessageMetadataSchema,
22
+ createdAt: RecordTimestampSchema,
23
+ })
24
+
25
+ export interface DigestCursor {
26
+ createdAt: Date
27
+ id: string
28
+ }
29
+
30
+ export interface DigestMessage {
31
+ source: 'workstream'
32
+ sourceId: string
33
+ role: 'system' | 'user' | 'assistant'
34
+ parts: Array<Record<string, unknown>>
35
+ metadata?: Record<string, unknown>
36
+ cursor: DigestCursor
37
+ }
38
+
39
+ function mapWorkstreamRow(row: z.infer<typeof WorkstreamMessageRowSchema>): DigestMessage {
40
+ return {
41
+ source: 'workstream',
42
+ sourceId: row.workstreamId,
43
+ role: row.role,
44
+ parts: row.parts ?? [],
45
+ metadata: row.metadata ?? undefined,
46
+ cursor: { createdAt: new Date(toTimestamp(row.createdAt) ?? Date.now()), id: row.id },
47
+ }
48
+ }
49
+
50
+ export function compareDigestMessageOrder(left: DigestMessage, right: DigestMessage): number {
51
+ const timeDiff = left.cursor.createdAt.getTime() - right.cursor.createdAt.getTime()
52
+ if (timeDiff !== 0) return timeDiff
53
+ return left.cursor.id.localeCompare(right.cursor.id)
54
+ }
55
+
56
+ export async function listWorkstreamIdsForOrg(orgRef: RecordIdRef): Promise<RecordIdRef[]> {
57
+ const EntityIdRowSchema = z.string().trim().min(1)
58
+ const ids = await databaseService.query<unknown>(
59
+ new BoundQuery(
60
+ `SELECT VALUE type::string(id) FROM ${TABLES.WORKSTREAM}
61
+ WHERE organizationId = $organizationId`,
62
+ { organizationId: orgRef },
63
+ ),
64
+ )
65
+ return ids.map((value) => ensureRecordId(EntityIdRowSchema.parse(value), TABLES.WORKSTREAM))
66
+ }
67
+
68
+ export async function listEligibleWorkstreamMessages(params: {
69
+ workstreamIds: RecordIdRef[]
70
+ cursor: DigestCursor | null
71
+ onboardingCutoff: Date | null
72
+ }): Promise<DigestMessage[]> {
73
+ if (params.workstreamIds.length === 0) return []
74
+
75
+ let query: BoundQuery | null = null
76
+ if (params.cursor) {
77
+ const cursorRowId = ensureRecordId(params.cursor.id, TABLES.WORKSTREAM_MESSAGE)
78
+ query = new BoundQuery(
79
+ `SELECT type::string(id) AS id, type::string(workstreamId) AS workstreamId, role, parts, metadata, createdAt FROM ${TABLES.WORKSTREAM_MESSAGE}
80
+ WHERE workstreamId IN $workstreamIds
81
+ AND (
82
+ createdAt > $cursorCreatedAt
83
+ OR (createdAt = $cursorCreatedAt AND id > $cursorRowId)
84
+ )
85
+ ORDER BY createdAt ASC, id ASC`,
86
+ { workstreamIds: params.workstreamIds, cursorCreatedAt: params.cursor.createdAt, cursorRowId },
87
+ )
88
+ } else if (params.onboardingCutoff) {
89
+ query = new BoundQuery(
90
+ `SELECT type::string(id) AS id, type::string(workstreamId) AS workstreamId, role, parts, metadata, createdAt FROM ${TABLES.WORKSTREAM_MESSAGE}
91
+ WHERE workstreamId IN $workstreamIds
92
+ AND createdAt > $onboardingCutoff
93
+ ORDER BY createdAt ASC, id ASC`,
94
+ { workstreamIds: params.workstreamIds, onboardingCutoff: params.onboardingCutoff },
95
+ )
96
+ }
97
+
98
+ if (!query) return []
99
+
100
+ const rows = await databaseService.query<unknown>(query)
101
+ return rows.map((row) => mapWorkstreamRow(WorkstreamMessageRowSchema.parse(row)))
102
+ }
103
+
104
+ export function normalizeBlock(value: string): string {
105
+ return normalizeTextBody(value)
106
+ }
@@ -5,6 +5,10 @@ import type { Job, Worker } from 'bullmq'
5
5
 
6
6
  import { chatLogger } from '../config/logger'
7
7
 
8
+ export const DEFAULT_JOB_RETENTION = { removeOnComplete: 200, removeOnFail: 200 }
9
+ export const LOW_JOB_RETENTION = { removeOnComplete: 50, removeOnFail: 50 }
10
+ export const LONG_JOB_LOCK_DURATION_MS = 600_000
11
+
8
12
  const DEFAULT_SHUTDOWN_TIMEOUT_MS = 10_000
9
13
  const MAX_TRACE_STRING_CHARS = 2_000
10
14
  const MAX_TRACE_ARRAY_ITEMS = 12
@@ -1,3 +0,0 @@
1
- export function resolveCandidateLimit(params: { limit: number; multiplier: number; minimum: number }): number {
2
- return Math.max(params.limit * params.multiplier, params.minimum)
3
- }
@@ -1,10 +0,0 @@
1
- export function getErrorMessage(error: unknown): string {
2
- if (error instanceof Error) return error.message
3
- if (typeof error === 'string') return error
4
-
5
- try {
6
- return JSON.stringify(error)
7
- } catch {
8
- return String(error)
9
- }
10
- }