@yolk-sdk/knowledge 0.0.1-canary.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +104 -0
  3. package/dist/agent.d.mts +26 -0
  4. package/dist/agent.d.mts.map +1 -0
  5. package/dist/agent.mjs +45 -0
  6. package/dist/agent.mjs.map +1 -0
  7. package/dist/artifacts.d.mts +37 -0
  8. package/dist/artifacts.d.mts.map +1 -0
  9. package/dist/artifacts.mjs +28 -0
  10. package/dist/artifacts.mjs.map +1 -0
  11. package/dist/chunking.d.mts +38 -0
  12. package/dist/chunking.d.mts.map +1 -0
  13. package/dist/chunking.mjs +93 -0
  14. package/dist/chunking.mjs.map +1 -0
  15. package/dist/context.d.mts +16 -0
  16. package/dist/context.d.mts.map +1 -0
  17. package/dist/context.mjs +25 -0
  18. package/dist/context.mjs.map +1 -0
  19. package/dist/documents.d.mts +105 -0
  20. package/dist/documents.d.mts.map +1 -0
  21. package/dist/documents.mjs +93 -0
  22. package/dist/documents.mjs.map +1 -0
  23. package/dist/embeddings.d.mts +14 -0
  24. package/dist/embeddings.d.mts.map +1 -0
  25. package/dist/embeddings.mjs +8 -0
  26. package/dist/embeddings.mjs.map +1 -0
  27. package/dist/errors.d.mts +72 -0
  28. package/dist/errors.d.mts.map +1 -0
  29. package/dist/errors.mjs +26 -0
  30. package/dist/errors.mjs.map +1 -0
  31. package/dist/extraction.d.mts +19 -0
  32. package/dist/extraction.d.mts.map +1 -0
  33. package/dist/extraction.mjs +7 -0
  34. package/dist/extraction.mjs.map +1 -0
  35. package/dist/index.d.mts +5 -0
  36. package/dist/index.mjs +5 -0
  37. package/dist/ingestion.d.mts +48 -0
  38. package/dist/ingestion.d.mts.map +1 -0
  39. package/dist/ingestion.mjs +111 -0
  40. package/dist/ingestion.mjs.map +1 -0
  41. package/dist/links.d.mts +17 -0
  42. package/dist/links.d.mts.map +1 -0
  43. package/dist/links.mjs +24 -0
  44. package/dist/links.mjs.map +1 -0
  45. package/dist/provenance.d.mts +20 -0
  46. package/dist/provenance.d.mts.map +1 -0
  47. package/dist/provenance.mjs +26 -0
  48. package/dist/provenance.mjs.map +1 -0
  49. package/dist/records.d.mts +59 -0
  50. package/dist/records.d.mts.map +1 -0
  51. package/dist/records.mjs +64 -0
  52. package/dist/records.mjs.map +1 -0
  53. package/dist/representations.d.mts +36 -0
  54. package/dist/representations.d.mts.map +1 -0
  55. package/dist/representations.mjs +44 -0
  56. package/dist/representations.mjs.map +1 -0
  57. package/dist/search-store.d.mts +77 -0
  58. package/dist/search-store.d.mts.map +1 -0
  59. package/dist/search-store.mjs +7 -0
  60. package/dist/search-store.mjs.map +1 -0
  61. package/dist/search.d.mts +49 -0
  62. package/dist/search.d.mts.map +1 -0
  63. package/dist/search.mjs +179 -0
  64. package/dist/search.mjs.map +1 -0
  65. package/dist/store.d.mts +44 -0
  66. package/dist/store.d.mts.map +1 -0
  67. package/dist/store.mjs +7 -0
  68. package/dist/store.mjs.map +1 -0
  69. package/dist/summarization.d.mts +23 -0
  70. package/dist/summarization.d.mts.map +1 -0
  71. package/dist/summarization.mjs +8 -0
  72. package/dist/summarization.mjs.map +1 -0
  73. package/dist/vector-store.d.mts +3 -0
  74. package/dist/vector-store.mjs +2 -0
  75. package/package.json +147 -0
  76. package/src/agent.ts +96 -0
  77. package/src/artifacts.ts +48 -0
  78. package/src/chunking.ts +175 -0
  79. package/src/context.ts +42 -0
  80. package/src/documents.ts +109 -0
  81. package/src/embeddings.ts +18 -0
  82. package/src/errors.ts +63 -0
  83. package/src/extraction.ts +21 -0
  84. package/src/index.ts +4 -0
  85. package/src/ingestion.ts +157 -0
  86. package/src/links.ts +23 -0
  87. package/src/provenance.ts +25 -0
  88. package/src/records.ts +76 -0
  89. package/src/representations.ts +51 -0
  90. package/src/search-store.ts +98 -0
  91. package/src/search.ts +270 -0
  92. package/src/store.ts +53 -0
  93. package/src/summarization.ts +28 -0
  94. package/src/vector-store.ts +6 -0
package/src/search.ts ADDED
@@ -0,0 +1,270 @@
1
+ import { Effect } from 'effect'
2
+ import type { KnowledgeChunk, KnowledgeDocument, KnowledgeSearchScope } from './documents.ts'
3
+ import { KnowledgeEmbedder } from './embeddings.ts'
4
+ import { KnowledgeSearchError } from './errors.ts'
5
+ import { SearchIndexStore } from './search-store.ts'
6
+
7
+ export type KnowledgeSearchInput = {
8
+ readonly scope: KnowledgeSearchScope
9
+ readonly query: string
10
+ readonly limit?: number
11
+ readonly minScore?: number
12
+ readonly contextChunks?: number
13
+ readonly mode?: KnowledgeSearchMode
14
+ readonly vectorLimit?: number
15
+ readonly textLimit?: number
16
+ }
17
+
18
+ export type KnowledgeSearchMode = 'vector' | 'hybrid'
19
+
20
+ export type KnowledgeSearchScores = {
21
+ readonly vector?: number
22
+ readonly text?: number
23
+ readonly fused?: number
24
+ }
25
+
26
+ export type KnowledgeSearchResult = {
27
+ readonly chunk: KnowledgeChunk
28
+ readonly score: number
29
+ readonly document: KnowledgeDocument
30
+ readonly context?: ReadonlyArray<KnowledgeChunk>
31
+ readonly scores?: KnowledgeSearchScores
32
+ }
33
+
34
+ export type KnowledgeSearchContext = {
35
+ readonly query: string
36
+ readonly results: ReadonlyArray<KnowledgeSearchResult>
37
+ readonly text: string
38
+ }
39
+
40
+ export type KnowledgeSearcher = {
41
+ readonly search: (
42
+ input: KnowledgeSearchInput
43
+ ) => Effect.Effect<ReadonlyArray<KnowledgeSearchResult>, KnowledgeSearchError>
44
+ }
45
+
46
+ const packedResultText = (result: KnowledgeSearchResult) =>
47
+ result.context?.map(chunk => chunk.content).join('\n\n') ?? result.chunk.content
48
+
49
+ export const packKnowledgeSearchContext = (query: string, results: ReadonlyArray<KnowledgeSearchResult>): KnowledgeSearchContext =>
50
+ ({
51
+ query,
52
+ results,
53
+ text: results.map(packedResultText).join('\n\n')
54
+ })
55
+
56
+ const scopeIds = (scope: KnowledgeSearchScope): ReadonlyArray<string> => {
57
+ switch (scope._tag) {
58
+ case 'KnowledgeCollection':
59
+ return [scope.id]
60
+ case 'KnowledgeCollections':
61
+ return scope.ids
62
+ }
63
+ }
64
+
65
+ const defaultHybridCandidateLimit = (limit: number) => Math.max(limit * 5, 40)
66
+
67
+ const reciprocalRankFusionScore = (rank: number, rankConstant: number) => 1 / (rankConstant + rank)
68
+
69
+ export const fuseKnowledgeSearchResults = (input: {
70
+ readonly vectorResults: ReadonlyArray<KnowledgeSearchResult>
71
+ readonly textResults: ReadonlyArray<KnowledgeSearchResult>
72
+ readonly limit: number
73
+ readonly rankConstant?: number
74
+ }): ReadonlyArray<KnowledgeSearchResult> => {
75
+ const rankConstant = input.rankConstant ?? 60
76
+ const fused = new Map<
77
+ string,
78
+ {
79
+ readonly result: KnowledgeSearchResult
80
+ readonly vector?: number
81
+ readonly text?: number
82
+ readonly fused: number
83
+ }
84
+ >()
85
+
86
+ const addResults = (results: ReadonlyArray<KnowledgeSearchResult>, kind: 'vector' | 'text') => {
87
+ results.forEach((result, index) => {
88
+ const rank = index + 1
89
+ const previous = fused.get(result.chunk.id)
90
+ const nextFused = (previous?.fused ?? 0) + reciprocalRankFusionScore(rank, rankConstant)
91
+ fused.set(result.chunk.id, {
92
+ result: previous?.result ?? result,
93
+ vector: kind === 'vector' ? result.score : previous?.vector,
94
+ text: kind === 'text' ? result.score : previous?.text,
95
+ fused: nextFused
96
+ })
97
+ })
98
+ }
99
+
100
+ addResults(input.vectorResults, 'vector')
101
+ addResults(input.textResults, 'text')
102
+
103
+ return Array.from(fused.values())
104
+ .sort((left, right) => right.fused - left.fused)
105
+ .slice(0, input.limit)
106
+ .map(item => ({
107
+ ...item.result,
108
+ score: item.fused,
109
+ scores: { vector: item.vector, text: item.text, fused: item.fused }
110
+ }))
111
+ }
112
+
113
+ const validateSearchInput = (input: KnowledgeSearchInput) => {
114
+ const query = input.query.trim()
115
+ const ids = scopeIds(input.scope)
116
+ const limit = input.limit ?? 10
117
+ const contextChunks = input.contextChunks ?? 0
118
+ const mode = input.mode ?? 'hybrid'
119
+ const vectorLimit = input.vectorLimit ?? (mode === 'hybrid' ? defaultHybridCandidateLimit(limit) : limit)
120
+ const textLimit = input.textLimit ?? defaultHybridCandidateLimit(limit)
121
+
122
+ if (query.length === 0) {
123
+ return Effect.fail(new KnowledgeSearchError({ message: 'Search query is empty', stage: 'embed' }))
124
+ }
125
+
126
+ if (ids.length === 0) {
127
+ return Effect.fail(new KnowledgeSearchError({ message: 'Search scope is empty', stage: 'store' }))
128
+ }
129
+
130
+ if (!Number.isInteger(limit) || limit < 1) {
131
+ return Effect.fail(new KnowledgeSearchError({ message: 'Search limit must be positive', stage: 'store' }))
132
+ }
133
+
134
+ if (!Number.isInteger(vectorLimit) || vectorLimit < 1) {
135
+ return Effect.fail(new KnowledgeSearchError({ message: 'Vector search limit must be positive', stage: 'store' }))
136
+ }
137
+
138
+ if (!Number.isInteger(textLimit) || textLimit < 1) {
139
+ return Effect.fail(new KnowledgeSearchError({ message: 'Text search limit must be positive', stage: 'store' }))
140
+ }
141
+
142
+ if (mode !== 'vector' && mode !== 'hybrid') {
143
+ return Effect.fail(new KnowledgeSearchError({ message: 'Search mode is invalid', stage: 'store' }))
144
+ }
145
+
146
+ if (!Number.isInteger(contextChunks) || contextChunks < 0) {
147
+ return Effect.fail(
148
+ new KnowledgeSearchError({ message: 'Context chunks must be zero or positive', stage: 'store' })
149
+ )
150
+ }
151
+
152
+ if (input.minScore !== undefined && !Number.isFinite(input.minScore)) {
153
+ return Effect.fail(
154
+ new KnowledgeSearchError({ message: 'Search minScore must be finite', stage: 'store' })
155
+ )
156
+ }
157
+
158
+ return Effect.succeed({ query, limit, contextChunks, mode, vectorLimit, textLimit })
159
+ }
160
+
161
+ const searchVectorChunks = (input: {
162
+ readonly scope: KnowledgeSearchScope
163
+ readonly query: string
164
+ readonly limit: number
165
+ readonly minScore?: number
166
+ }) =>
167
+ Effect.gen(function* () {
168
+ const store = yield* SearchIndexStore
169
+ const embedder = yield* KnowledgeEmbedder
170
+ const embedding = yield* embedder
171
+ .embedQuery(input.query)
172
+ .pipe(
173
+ Effect.mapError(
174
+ error => new KnowledgeSearchError({ message: error.message, stage: 'embed', cause: error })
175
+ )
176
+ )
177
+
178
+ return yield* store
179
+ .searchChunks({
180
+ scope: input.scope,
181
+ embedding,
182
+ limit: input.limit,
183
+ minScore: input.minScore
184
+ })
185
+ .pipe(
186
+ Effect.mapError(
187
+ error => new KnowledgeSearchError({ message: error.message, stage: 'store', cause: error })
188
+ )
189
+ )
190
+ })
191
+
192
+ const searchTextChunks = (input: {
193
+ readonly scope: KnowledgeSearchScope
194
+ readonly query: string
195
+ readonly limit: number
196
+ }) =>
197
+ Effect.gen(function* () {
198
+ const store = yield* SearchIndexStore
199
+ return yield* store
200
+ .searchChunksByText({ scope: input.scope, query: input.query, limit: input.limit })
201
+ .pipe(
202
+ Effect.mapError(
203
+ error => new KnowledgeSearchError({ message: error.message, stage: 'store', cause: error })
204
+ )
205
+ )
206
+ })
207
+
208
+ export const searchKnowledge = (input: KnowledgeSearchInput) =>
209
+ Effect.gen(function* () {
210
+ const valid = yield* validateSearchInput(input)
211
+ yield* Effect.annotateCurrentSpan({
212
+ 'knowledge_search.query_length': valid.query.length,
213
+ 'knowledge_search.limit': valid.limit,
214
+ 'knowledge_search.mode': valid.mode,
215
+ 'knowledge_search.context_chunks': valid.contextChunks,
216
+ 'knowledge_search.scope_count': scopeIds(input.scope).length
217
+ })
218
+ const store = yield* SearchIndexStore
219
+ const results: ReadonlyArray<KnowledgeSearchResult> = valid.mode === 'vector'
220
+ ? yield* searchVectorChunks({
221
+ scope: input.scope,
222
+ query: valid.query,
223
+ limit: valid.vectorLimit,
224
+ minScore: input.minScore
225
+ })
226
+ : yield* Effect.gen(function* () {
227
+ const searches = yield* Effect.all(
228
+ {
229
+ vectorResults: searchVectorChunks({
230
+ scope: input.scope,
231
+ query: valid.query,
232
+ limit: valid.vectorLimit,
233
+ minScore: input.minScore
234
+ }),
235
+ textResults: searchTextChunks({
236
+ scope: input.scope,
237
+ query: valid.query,
238
+ limit: valid.textLimit
239
+ })
240
+ },
241
+ { concurrency: 'unbounded' }
242
+ )
243
+
244
+ return fuseKnowledgeSearchResults({
245
+ vectorResults: searches.vectorResults,
246
+ textResults: searches.textResults,
247
+ limit: valid.limit
248
+ })
249
+ })
250
+
251
+ if (valid.contextChunks === 0) {
252
+ return results
253
+ }
254
+
255
+ return yield* Effect.forEach(results, result =>
256
+ store
257
+ .getContextChunks({
258
+ collectionId: result.chunk.collectionId,
259
+ documentId: result.chunk.documentId,
260
+ position: result.chunk.position,
261
+ contextChunks: valid.contextChunks
262
+ })
263
+ .pipe(
264
+ Effect.map(context => ({ ...result, context })),
265
+ Effect.mapError(
266
+ error => new KnowledgeSearchError({ message: error.message, stage: 'store', cause: error })
267
+ )
268
+ )
269
+ )
270
+ }).pipe(Effect.withSpan('knowledge_search.search'))
package/src/store.ts ADDED
@@ -0,0 +1,53 @@
1
+ import { Context } from 'effect'
2
+ import type { Effect } from 'effect'
3
+ import type { KnowledgeArtifact } from './artifacts.ts'
4
+ import type { KnowledgeStoreError } from './errors.ts'
5
+ import type { KnowledgeLink } from './links.ts'
6
+ import type {
7
+ CreateKnowledgeRecordInput,
8
+ KnowledgeRecord,
9
+ KnowledgeScope,
10
+ UpdateKnowledgeRecordInput
11
+ } from './records.ts'
12
+ import type { KnowledgeProvenance } from './provenance.ts'
13
+ import type { KnowledgeRepresentation } from './representations.ts'
14
+
15
+ export type GetKnowledgeRecordInput = {
16
+ readonly scope: KnowledgeScope
17
+ readonly id: string
18
+ }
19
+
20
+ export type ListKnowledgeRecordsInput = {
21
+ readonly scope: KnowledgeScope
22
+ readonly limit: number
23
+ }
24
+
25
+ export type ListKnowledgeRecordsResult = {
26
+ readonly records: ReadonlyArray<KnowledgeRecord>
27
+ }
28
+
29
+ export type ListPinnedKnowledgeInput = {
30
+ readonly scope: KnowledgeScope
31
+ readonly limit: number
32
+ }
33
+
34
+ export type ListPinnedKnowledgeResult = {
35
+ readonly records: ReadonlyArray<KnowledgeRecord>
36
+ readonly representations: ReadonlyArray<KnowledgeRepresentation>
37
+ }
38
+
39
+ export type KnowledgeStoreApi = {
40
+ readonly createRecord: (input: CreateKnowledgeRecordInput) => Effect.Effect<KnowledgeRecord, KnowledgeStoreError>
41
+ readonly updateRecord: (input: UpdateKnowledgeRecordInput) => Effect.Effect<KnowledgeRecord, KnowledgeStoreError>
42
+ readonly getRecord: (input: GetKnowledgeRecordInput) => Effect.Effect<KnowledgeRecord, KnowledgeStoreError>
43
+ readonly listRecords: (input: ListKnowledgeRecordsInput) => Effect.Effect<ListKnowledgeRecordsResult, KnowledgeStoreError>
44
+ readonly listPinned: (input: ListPinnedKnowledgeInput) => Effect.Effect<ListPinnedKnowledgeResult, KnowledgeStoreError>
45
+ readonly deleteRecord: (input: GetKnowledgeRecordInput) => Effect.Effect<void, KnowledgeStoreError>
46
+ readonly listArtifacts: (input: GetKnowledgeRecordInput) => Effect.Effect<ReadonlyArray<KnowledgeArtifact>, KnowledgeStoreError>
47
+ readonly listProvenance: (input: GetKnowledgeRecordInput) => Effect.Effect<ReadonlyArray<KnowledgeProvenance>, KnowledgeStoreError>
48
+ readonly listLinks: (input: GetKnowledgeRecordInput) => Effect.Effect<ReadonlyArray<KnowledgeLink>, KnowledgeStoreError>
49
+ }
50
+
51
+ export class KnowledgeStore extends Context.Service<KnowledgeStore, KnowledgeStoreApi>()(
52
+ '@yolk-sdk/knowledge/KnowledgeStore'
53
+ ) {}
@@ -0,0 +1,28 @@
1
+ import { Context, Effect, Layer } from 'effect'
2
+ import type { KnowledgeMetadata } from './documents.ts'
3
+ import type { KnowledgeSummarizationError } from './errors.ts'
4
+
5
+ export type KnowledgeDocumentSummary = {
6
+ readonly title?: string
7
+ readonly summary?: string
8
+ }
9
+
10
+ export type SummarizeKnowledgeDocumentInput = {
11
+ readonly content: string
12
+ readonly sourceTitle?: string
13
+ readonly metadata?: KnowledgeMetadata
14
+ }
15
+
16
+ export type KnowledgeSummarizerApi = {
17
+ readonly summarize: (
18
+ input: SummarizeKnowledgeDocumentInput
19
+ ) => Effect.Effect<KnowledgeDocumentSummary, KnowledgeSummarizationError>
20
+ }
21
+
22
+ export class KnowledgeSummarizer extends Context.Service<KnowledgeSummarizer, KnowledgeSummarizerApi>()(
23
+ '@yolk-sdk/knowledge/KnowledgeSummarizer'
24
+ ) {}
25
+
26
+ export const NoopKnowledgeSummarizerLive = Layer.succeed(KnowledgeSummarizer, {
27
+ summarize: input => Effect.succeed({ title: input.sourceTitle })
28
+ })
@@ -0,0 +1,6 @@
1
+ export { SearchIndexStoreError as VectorStoreError } from './errors.ts'
2
+ export type {
3
+ KnowledgeChunkSearchInput as VectorSearchQuery,
4
+ KnowledgeChunkSearchResult as VectorSearchResult,
5
+ ReplaceKnowledgeDocumentChunksInput as VectorRecord
6
+ } from './search-store.ts'