@yolk-sdk/knowledge 0.0.1-canary.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +104 -0
- package/dist/agent.d.mts +26 -0
- package/dist/agent.d.mts.map +1 -0
- package/dist/agent.mjs +45 -0
- package/dist/agent.mjs.map +1 -0
- package/dist/artifacts.d.mts +37 -0
- package/dist/artifacts.d.mts.map +1 -0
- package/dist/artifacts.mjs +28 -0
- package/dist/artifacts.mjs.map +1 -0
- package/dist/chunking.d.mts +38 -0
- package/dist/chunking.d.mts.map +1 -0
- package/dist/chunking.mjs +93 -0
- package/dist/chunking.mjs.map +1 -0
- package/dist/context.d.mts +16 -0
- package/dist/context.d.mts.map +1 -0
- package/dist/context.mjs +25 -0
- package/dist/context.mjs.map +1 -0
- package/dist/documents.d.mts +105 -0
- package/dist/documents.d.mts.map +1 -0
- package/dist/documents.mjs +93 -0
- package/dist/documents.mjs.map +1 -0
- package/dist/embeddings.d.mts +14 -0
- package/dist/embeddings.d.mts.map +1 -0
- package/dist/embeddings.mjs +8 -0
- package/dist/embeddings.mjs.map +1 -0
- package/dist/errors.d.mts +72 -0
- package/dist/errors.d.mts.map +1 -0
- package/dist/errors.mjs +26 -0
- package/dist/errors.mjs.map +1 -0
- package/dist/extraction.d.mts +19 -0
- package/dist/extraction.d.mts.map +1 -0
- package/dist/extraction.mjs +7 -0
- package/dist/extraction.mjs.map +1 -0
- package/dist/index.d.mts +5 -0
- package/dist/index.mjs +5 -0
- package/dist/ingestion.d.mts +48 -0
- package/dist/ingestion.d.mts.map +1 -0
- package/dist/ingestion.mjs +111 -0
- package/dist/ingestion.mjs.map +1 -0
- package/dist/links.d.mts +17 -0
- package/dist/links.d.mts.map +1 -0
- package/dist/links.mjs +24 -0
- package/dist/links.mjs.map +1 -0
- package/dist/provenance.d.mts +20 -0
- package/dist/provenance.d.mts.map +1 -0
- package/dist/provenance.mjs +26 -0
- package/dist/provenance.mjs.map +1 -0
- package/dist/records.d.mts +59 -0
- package/dist/records.d.mts.map +1 -0
- package/dist/records.mjs +64 -0
- package/dist/records.mjs.map +1 -0
- package/dist/representations.d.mts +36 -0
- package/dist/representations.d.mts.map +1 -0
- package/dist/representations.mjs +44 -0
- package/dist/representations.mjs.map +1 -0
- package/dist/search-store.d.mts +77 -0
- package/dist/search-store.d.mts.map +1 -0
- package/dist/search-store.mjs +7 -0
- package/dist/search-store.mjs.map +1 -0
- package/dist/search.d.mts +49 -0
- package/dist/search.d.mts.map +1 -0
- package/dist/search.mjs +179 -0
- package/dist/search.mjs.map +1 -0
- package/dist/store.d.mts +44 -0
- package/dist/store.d.mts.map +1 -0
- package/dist/store.mjs +7 -0
- package/dist/store.mjs.map +1 -0
- package/dist/summarization.d.mts +23 -0
- package/dist/summarization.d.mts.map +1 -0
- package/dist/summarization.mjs +8 -0
- package/dist/summarization.mjs.map +1 -0
- package/dist/vector-store.d.mts +3 -0
- package/dist/vector-store.mjs +2 -0
- package/package.json +147 -0
- package/src/agent.ts +96 -0
- package/src/artifacts.ts +48 -0
- package/src/chunking.ts +175 -0
- package/src/context.ts +42 -0
- package/src/documents.ts +109 -0
- package/src/embeddings.ts +18 -0
- package/src/errors.ts +63 -0
- package/src/extraction.ts +21 -0
- package/src/index.ts +4 -0
- package/src/ingestion.ts +157 -0
- package/src/links.ts +23 -0
- package/src/provenance.ts +25 -0
- package/src/records.ts +76 -0
- package/src/representations.ts +51 -0
- package/src/search-store.ts +98 -0
- package/src/search.ts +270 -0
- package/src/store.ts +53 -0
- package/src/summarization.ts +28 -0
- package/src/vector-store.ts +6 -0
package/src/documents.ts
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import { Schema } from 'effect'
|
|
2
|
+
|
|
3
|
+
const NonEmptyTrimmedString = Schema.Trimmed.pipe(Schema.check(Schema.isNonEmpty()))
|
|
4
|
+
const PositiveInteger = Schema.Int.pipe(Schema.check(Schema.isGreaterThan(0)))
|
|
5
|
+
const NonNegativeInteger = Schema.Int.pipe(Schema.check(Schema.isGreaterThanOrEqualTo(0)))
|
|
6
|
+
|
|
7
|
+
export const KnowledgeMetadataSchema = Schema.Record(Schema.String, Schema.Unknown)
|
|
8
|
+
export type KnowledgeMetadata = Schema.Schema.Type<typeof KnowledgeMetadataSchema>
|
|
9
|
+
|
|
10
|
+
export const KnowledgeDocumentStatusSchema = Schema.Literals(['pending', 'processing', 'ready', 'error'])
|
|
11
|
+
export type KnowledgeDocumentStatus = Schema.Schema.Type<typeof KnowledgeDocumentStatusSchema>
|
|
12
|
+
|
|
13
|
+
export const KnowledgeSourceSchema = Schema.Union([
|
|
14
|
+
Schema.Struct({
|
|
15
|
+
_tag: Schema.Literal('File'),
|
|
16
|
+
ref: NonEmptyTrimmedString,
|
|
17
|
+
name: Schema.optional(NonEmptyTrimmedString),
|
|
18
|
+
mediaType: Schema.optional(NonEmptyTrimmedString)
|
|
19
|
+
}),
|
|
20
|
+
Schema.Struct({
|
|
21
|
+
_tag: Schema.Literal('Url'),
|
|
22
|
+
url: NonEmptyTrimmedString
|
|
23
|
+
}),
|
|
24
|
+
Schema.Struct({
|
|
25
|
+
_tag: Schema.Literal('Text'),
|
|
26
|
+
label: Schema.optional(NonEmptyTrimmedString)
|
|
27
|
+
})
|
|
28
|
+
])
|
|
29
|
+
export type KnowledgeSource = Schema.Schema.Type<typeof KnowledgeSourceSchema>
|
|
30
|
+
|
|
31
|
+
export const KnowledgeEmbeddingConfigSchema = Schema.Struct({
|
|
32
|
+
model: NonEmptyTrimmedString,
|
|
33
|
+
dimensions: PositiveInteger
|
|
34
|
+
})
|
|
35
|
+
export type KnowledgeEmbeddingConfig = Schema.Schema.Type<typeof KnowledgeEmbeddingConfigSchema>
|
|
36
|
+
|
|
37
|
+
export const KnowledgeChunkingConfigSchema = Schema.Struct({
|
|
38
|
+
strategy: Schema.Literal('sentence-token'),
|
|
39
|
+
maxTokens: PositiveInteger
|
|
40
|
+
})
|
|
41
|
+
export type KnowledgeChunkingConfig = Schema.Schema.Type<typeof KnowledgeChunkingConfigSchema>
|
|
42
|
+
|
|
43
|
+
export const KnowledgeCollectionSchema = Schema.Struct({
|
|
44
|
+
id: NonEmptyTrimmedString,
|
|
45
|
+
label: Schema.optional(NonEmptyTrimmedString),
|
|
46
|
+
embeddingConfig: KnowledgeEmbeddingConfigSchema,
|
|
47
|
+
chunkingConfig: KnowledgeChunkingConfigSchema,
|
|
48
|
+
metadata: Schema.optional(KnowledgeMetadataSchema)
|
|
49
|
+
})
|
|
50
|
+
export type KnowledgeCollection = Schema.Schema.Type<typeof KnowledgeCollectionSchema>
|
|
51
|
+
|
|
52
|
+
export const KnowledgeDocumentSchema = Schema.Struct({
|
|
53
|
+
id: NonEmptyTrimmedString,
|
|
54
|
+
collectionId: NonEmptyTrimmedString,
|
|
55
|
+
source: KnowledgeSourceSchema,
|
|
56
|
+
status: KnowledgeDocumentStatusSchema,
|
|
57
|
+
title: Schema.optional(NonEmptyTrimmedString),
|
|
58
|
+
summary: Schema.optional(Schema.String),
|
|
59
|
+
errorMessage: Schema.optional(Schema.String),
|
|
60
|
+
contentHash: Schema.optional(NonEmptyTrimmedString),
|
|
61
|
+
tokenCount: Schema.optional(NonNegativeInteger),
|
|
62
|
+
chunkCount: Schema.optional(NonNegativeInteger),
|
|
63
|
+
metadata: Schema.optional(KnowledgeMetadataSchema)
|
|
64
|
+
})
|
|
65
|
+
export type KnowledgeDocument = Schema.Schema.Type<typeof KnowledgeDocumentSchema>
|
|
66
|
+
|
|
67
|
+
export const KnowledgeChunkSchema = Schema.Struct({
|
|
68
|
+
id: NonEmptyTrimmedString,
|
|
69
|
+
collectionId: NonEmptyTrimmedString,
|
|
70
|
+
documentId: NonEmptyTrimmedString,
|
|
71
|
+
content: NonEmptyTrimmedString,
|
|
72
|
+
position: NonNegativeInteger,
|
|
73
|
+
tokenCount: PositiveInteger,
|
|
74
|
+
metadata: Schema.optional(KnowledgeMetadataSchema)
|
|
75
|
+
})
|
|
76
|
+
export type KnowledgeChunk = Schema.Schema.Type<typeof KnowledgeChunkSchema>
|
|
77
|
+
|
|
78
|
+
export const ExtractedKnowledgeDocumentSchema = Schema.Struct({
|
|
79
|
+
content: NonEmptyTrimmedString,
|
|
80
|
+
title: Schema.optional(NonEmptyTrimmedString),
|
|
81
|
+
summary: Schema.optional(Schema.String),
|
|
82
|
+
metadata: Schema.optional(KnowledgeMetadataSchema)
|
|
83
|
+
})
|
|
84
|
+
export type ExtractedKnowledgeDocument = Schema.Schema.Type<typeof ExtractedKnowledgeDocumentSchema>
|
|
85
|
+
|
|
86
|
+
export const KnowledgeSearchScopeSchema = Schema.Union([
|
|
87
|
+
Schema.Struct({ _tag: Schema.Literal('KnowledgeCollection'), id: NonEmptyTrimmedString }),
|
|
88
|
+
Schema.Struct({ _tag: Schema.Literal('KnowledgeCollections'), ids: Schema.Array(NonEmptyTrimmedString) })
|
|
89
|
+
])
|
|
90
|
+
export type KnowledgeSearchScope = Schema.Schema.Type<typeof KnowledgeSearchScopeSchema>
|
|
91
|
+
|
|
92
|
+
export const defaultKnowledgeChunkingConfig: KnowledgeChunkingConfig = {
|
|
93
|
+
strategy: 'sentence-token',
|
|
94
|
+
maxTokens: 512
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export const makeKnowledgeCollection = (input: {
|
|
98
|
+
readonly id: string
|
|
99
|
+
readonly label?: string
|
|
100
|
+
readonly embeddingConfig: KnowledgeEmbeddingConfig
|
|
101
|
+
readonly chunkingConfig?: KnowledgeChunkingConfig
|
|
102
|
+
readonly metadata?: KnowledgeMetadata
|
|
103
|
+
}): KnowledgeCollection => ({
|
|
104
|
+
id: input.id,
|
|
105
|
+
label: input.label,
|
|
106
|
+
embeddingConfig: input.embeddingConfig,
|
|
107
|
+
chunkingConfig: input.chunkingConfig ?? defaultKnowledgeChunkingConfig,
|
|
108
|
+
metadata: input.metadata
|
|
109
|
+
})
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { Context } from 'effect'
|
|
2
|
+
import type { Effect } from 'effect'
|
|
3
|
+
import type { KnowledgeEmbeddingError } from './errors.ts'
|
|
4
|
+
|
|
5
|
+
export type KnowledgeEmbedding = ReadonlyArray<number>
|
|
6
|
+
|
|
7
|
+
export type KnowledgeEmbedderApi = {
|
|
8
|
+
readonly embedTexts: (
|
|
9
|
+
texts: ReadonlyArray<string>
|
|
10
|
+
) => Effect.Effect<ReadonlyArray<KnowledgeEmbedding>, KnowledgeEmbeddingError>
|
|
11
|
+
readonly embedQuery: (query: string) => Effect.Effect<KnowledgeEmbedding, KnowledgeEmbeddingError>
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export class KnowledgeEmbedder extends Context.Service<KnowledgeEmbedder, KnowledgeEmbedderApi>()(
|
|
15
|
+
'@yolk-sdk/knowledge/KnowledgeEmbedder'
|
|
16
|
+
) {}
|
|
17
|
+
|
|
18
|
+
export { KnowledgeEmbeddingError as EmbedderError } from './errors.ts'
|
package/src/errors.ts
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import { Data } from 'effect'
|
|
2
|
+
import * as Schema from 'effect/Schema'
|
|
3
|
+
|
|
4
|
+
export class SearchIndexStoreError extends Data.TaggedError('SearchIndexStoreError')<{
|
|
5
|
+
readonly message: string
|
|
6
|
+
readonly cause?: unknown
|
|
7
|
+
}> {}
|
|
8
|
+
|
|
9
|
+
export class KnowledgeExtractionError extends Data.TaggedError('KnowledgeExtractionError')<{
|
|
10
|
+
readonly message: string
|
|
11
|
+
readonly cause?: unknown
|
|
12
|
+
}> {}
|
|
13
|
+
|
|
14
|
+
export class KnowledgeChunkingError extends Data.TaggedError('KnowledgeChunkingError')<{
|
|
15
|
+
readonly message: string
|
|
16
|
+
readonly cause?: unknown
|
|
17
|
+
}> {}
|
|
18
|
+
|
|
19
|
+
export class KnowledgeEmbeddingError extends Data.TaggedError('KnowledgeEmbeddingError')<{
|
|
20
|
+
readonly message: string
|
|
21
|
+
readonly cause?: unknown
|
|
22
|
+
}> {}
|
|
23
|
+
|
|
24
|
+
export class KnowledgeSummarizationError extends Data.TaggedError('KnowledgeSummarizationError')<{
|
|
25
|
+
readonly message: string
|
|
26
|
+
readonly cause?: unknown
|
|
27
|
+
}> {}
|
|
28
|
+
|
|
29
|
+
export class KnowledgeIngestionError extends Data.TaggedError('KnowledgeIngestionError')<{
|
|
30
|
+
readonly message: string
|
|
31
|
+
readonly stage: 'store' | 'extract' | 'chunk' | 'embed' | 'summarize'
|
|
32
|
+
readonly cause?: unknown
|
|
33
|
+
}> {}
|
|
34
|
+
|
|
35
|
+
export class KnowledgeSearchError extends Data.TaggedError('KnowledgeSearchError')<{
|
|
36
|
+
readonly message: string
|
|
37
|
+
readonly stage: 'store' | 'embed'
|
|
38
|
+
readonly cause?: unknown
|
|
39
|
+
}> {}
|
|
40
|
+
|
|
41
|
+
export class KnowledgeStoreError extends Schema.TaggedErrorClass<KnowledgeStoreError>()(
|
|
42
|
+
'KnowledgeStoreError',
|
|
43
|
+
{
|
|
44
|
+
message: Schema.String,
|
|
45
|
+
cause: Schema.optional(Schema.Unknown)
|
|
46
|
+
}
|
|
47
|
+
) {}
|
|
48
|
+
|
|
49
|
+
export class KnowledgeArtifactError extends Schema.TaggedErrorClass<KnowledgeArtifactError>()(
|
|
50
|
+
'KnowledgeArtifactError',
|
|
51
|
+
{
|
|
52
|
+
message: Schema.String,
|
|
53
|
+
cause: Schema.optional(Schema.Unknown)
|
|
54
|
+
}
|
|
55
|
+
) {}
|
|
56
|
+
|
|
57
|
+
export class KnowledgeContextError extends Schema.TaggedErrorClass<KnowledgeContextError>()(
|
|
58
|
+
'KnowledgeContextError',
|
|
59
|
+
{
|
|
60
|
+
message: Schema.String,
|
|
61
|
+
cause: Schema.optional(Schema.Unknown)
|
|
62
|
+
}
|
|
63
|
+
) {}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { Context } from 'effect'
|
|
2
|
+
import type { Effect } from 'effect'
|
|
3
|
+
import type { ExtractedKnowledgeDocument, KnowledgeMetadata, KnowledgeSource } from './documents.ts'
|
|
4
|
+
import type { KnowledgeExtractionError } from './errors.ts'
|
|
5
|
+
|
|
6
|
+
export type LoadedKnowledgeSource = {
|
|
7
|
+
readonly source: KnowledgeSource
|
|
8
|
+
readonly content: string | Uint8Array
|
|
9
|
+
readonly mediaType?: string
|
|
10
|
+
readonly metadata?: KnowledgeMetadata
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export type KnowledgeExtractorApi = {
|
|
14
|
+
readonly extract: (
|
|
15
|
+
source: LoadedKnowledgeSource
|
|
16
|
+
) => Effect.Effect<ExtractedKnowledgeDocument, KnowledgeExtractionError>
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export class KnowledgeExtractor extends Context.Service<KnowledgeExtractor, KnowledgeExtractorApi>()(
|
|
20
|
+
'@yolk-sdk/knowledge/KnowledgeExtractor'
|
|
21
|
+
) {}
|
package/src/index.ts
ADDED
package/src/ingestion.ts
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
import { Array as Arr, Effect } from 'effect'
|
|
2
|
+
import { KnowledgeChunker } from './chunking.ts'
|
|
3
|
+
import { KnowledgeEmbedder } from './embeddings.ts'
|
|
4
|
+
import { KnowledgeExtractor } from './extraction.ts'
|
|
5
|
+
import { KnowledgeSummarizer } from './summarization.ts'
|
|
6
|
+
import type { LoadedKnowledgeSource } from './extraction.ts'
|
|
7
|
+
import { KnowledgeIngestionError } from './errors.ts'
|
|
8
|
+
import { SearchIndexStore } from './search-store.ts'
|
|
9
|
+
|
|
10
|
+
export type IngestKnowledgeDocumentInput = {
|
|
11
|
+
readonly collectionId: string
|
|
12
|
+
readonly documentId: string
|
|
13
|
+
readonly source: LoadedKnowledgeSource
|
|
14
|
+
readonly contentHash?: string
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
const markErrorBestEffort = (input: IngestKnowledgeDocumentInput, error: KnowledgeIngestionError) =>
|
|
18
|
+
Effect.gen(function* () {
|
|
19
|
+
const store = yield* SearchIndexStore
|
|
20
|
+
yield* store.markDocumentError({
|
|
21
|
+
collectionId: input.collectionId,
|
|
22
|
+
documentId: input.documentId,
|
|
23
|
+
message: error.message
|
|
24
|
+
})
|
|
25
|
+
}).pipe(Effect.catch(() => Effect.void))
|
|
26
|
+
|
|
27
|
+
export const ingestKnowledgeDocument = (input: IngestKnowledgeDocumentInput) =>
|
|
28
|
+
Effect.gen(function* () {
|
|
29
|
+
yield* Effect.annotateCurrentSpan({
|
|
30
|
+
'knowledge_search.set_id': input.collectionId,
|
|
31
|
+
'knowledge_search.document_id': input.documentId,
|
|
32
|
+
'knowledge_search.source_type': input.source.source._tag
|
|
33
|
+
})
|
|
34
|
+
const store = yield* SearchIndexStore
|
|
35
|
+
const extractor = yield* KnowledgeExtractor
|
|
36
|
+
const chunker = yield* KnowledgeChunker
|
|
37
|
+
const embedder = yield* KnowledgeEmbedder
|
|
38
|
+
const summarizer = yield* KnowledgeSummarizer
|
|
39
|
+
const collection = yield* store
|
|
40
|
+
.getSet(input.collectionId)
|
|
41
|
+
.pipe(
|
|
42
|
+
Effect.mapError(
|
|
43
|
+
error => new KnowledgeIngestionError({ message: error.message, stage: 'store', cause: error })
|
|
44
|
+
)
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
yield* store
|
|
48
|
+
.upsertDocument({
|
|
49
|
+
document: {
|
|
50
|
+
id: input.documentId,
|
|
51
|
+
collectionId: input.collectionId,
|
|
52
|
+
source: input.source.source,
|
|
53
|
+
status: 'processing',
|
|
54
|
+
metadata: input.source.metadata
|
|
55
|
+
}
|
|
56
|
+
})
|
|
57
|
+
.pipe(
|
|
58
|
+
Effect.mapError(
|
|
59
|
+
error => new KnowledgeIngestionError({ message: error.message, stage: 'store', cause: error })
|
|
60
|
+
)
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
const extracted = yield* extractor
|
|
64
|
+
.extract(input.source)
|
|
65
|
+
.pipe(
|
|
66
|
+
Effect.mapError(
|
|
67
|
+
error => new KnowledgeIngestionError({ message: error.message, stage: 'extract', cause: error })
|
|
68
|
+
)
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
const chunks = yield* chunker
|
|
72
|
+
.chunk({
|
|
73
|
+
collectionId: input.collectionId,
|
|
74
|
+
documentId: input.documentId,
|
|
75
|
+
content: extracted.content,
|
|
76
|
+
maxTokens: collection.chunkingConfig.maxTokens,
|
|
77
|
+
metadata: extracted.metadata
|
|
78
|
+
})
|
|
79
|
+
.pipe(
|
|
80
|
+
Effect.mapError(
|
|
81
|
+
error => new KnowledgeIngestionError({ message: error.message, stage: 'chunk', cause: error })
|
|
82
|
+
)
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
const indexed = yield* Effect.all(
|
|
86
|
+
{
|
|
87
|
+
embeddings: embedder.embedTexts(chunks.map(chunk => chunk.content)).pipe(
|
|
88
|
+
Effect.mapError(
|
|
89
|
+
error => new KnowledgeIngestionError({ message: error.message, stage: 'embed', cause: error })
|
|
90
|
+
)
|
|
91
|
+
),
|
|
92
|
+
summary: summarizer
|
|
93
|
+
.summarize({
|
|
94
|
+
content: extracted.content,
|
|
95
|
+
sourceTitle: extracted.title,
|
|
96
|
+
metadata: extracted.metadata
|
|
97
|
+
})
|
|
98
|
+
.pipe(
|
|
99
|
+
Effect.mapError(
|
|
100
|
+
error =>
|
|
101
|
+
new KnowledgeIngestionError({ message: error.message, stage: 'summarize', cause: error })
|
|
102
|
+
)
|
|
103
|
+
)
|
|
104
|
+
},
|
|
105
|
+
{ concurrency: 'unbounded' }
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
if (indexed.embeddings.length !== chunks.length) {
|
|
109
|
+
return yield* Effect.fail(
|
|
110
|
+
new KnowledgeIngestionError({ message: 'Embedding count did not match chunk count', stage: 'embed' })
|
|
111
|
+
)
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const indexedChunks = Arr.zip(chunks, indexed.embeddings).map(([chunk, embedding]) => ({
|
|
115
|
+
chunk,
|
|
116
|
+
embedding
|
|
117
|
+
}))
|
|
118
|
+
yield* store
|
|
119
|
+
.replaceDocumentChunks({
|
|
120
|
+
collectionId: input.collectionId,
|
|
121
|
+
documentId: input.documentId,
|
|
122
|
+
chunks: indexedChunks
|
|
123
|
+
})
|
|
124
|
+
.pipe(
|
|
125
|
+
Effect.mapError(
|
|
126
|
+
error => new KnowledgeIngestionError({ message: error.message, stage: 'store', cause: error })
|
|
127
|
+
)
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
const tokenCount = chunks.reduce((total, chunk) => total + chunk.tokenCount, 0)
|
|
131
|
+
return yield* store
|
|
132
|
+
.markDocumentReady({
|
|
133
|
+
collectionId: input.collectionId,
|
|
134
|
+
documentId: input.documentId,
|
|
135
|
+
title: indexed.summary.title ?? extracted.title,
|
|
136
|
+
summary: indexed.summary.summary ?? extracted.summary,
|
|
137
|
+
contentHash: input.contentHash,
|
|
138
|
+
tokenCount,
|
|
139
|
+
chunkCount: chunks.length
|
|
140
|
+
})
|
|
141
|
+
.pipe(
|
|
142
|
+
Effect.mapError(
|
|
143
|
+
error => new KnowledgeIngestionError({ message: error.message, stage: 'store', cause: error })
|
|
144
|
+
)
|
|
145
|
+
)
|
|
146
|
+
}).pipe(
|
|
147
|
+
Effect.withSpan('knowledge_search.ingestDocument'),
|
|
148
|
+
Effect.catch(error => markErrorBestEffort(input, error).pipe(Effect.flatMap(() => Effect.fail(error))))
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
export type KnowledgeIngestionPipeline = {
|
|
152
|
+
readonly ingest: (input: IngestKnowledgeDocumentInput) => ReturnType<typeof ingestKnowledgeDocument>
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
export const makeIngestionPipeline = (): KnowledgeIngestionPipeline => ({
|
|
156
|
+
ingest: ingestKnowledgeDocument
|
|
157
|
+
})
|
package/src/links.ts
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import * as Schema from 'effect/Schema'
|
|
2
|
+
import { KnowledgeMetadataSchema, NonEmptyTrimmedString } from './records.ts'
|
|
3
|
+
|
|
4
|
+
export const KnowledgeLinkTypeSchema = Schema.Literals([
|
|
5
|
+
'cites',
|
|
6
|
+
'supports',
|
|
7
|
+
'contradicts',
|
|
8
|
+
'supersedes',
|
|
9
|
+
'mentions',
|
|
10
|
+
'derived_from',
|
|
11
|
+
'related_to'
|
|
12
|
+
])
|
|
13
|
+
export type KnowledgeLinkType = Schema.Schema.Type<typeof KnowledgeLinkTypeSchema>
|
|
14
|
+
|
|
15
|
+
export const KnowledgeLinkSchema = Schema.Struct({
|
|
16
|
+
id: NonEmptyTrimmedString,
|
|
17
|
+
fromRecordId: NonEmptyTrimmedString,
|
|
18
|
+
toRecordId: NonEmptyTrimmedString,
|
|
19
|
+
type: KnowledgeLinkTypeSchema,
|
|
20
|
+
metadata: Schema.optional(KnowledgeMetadataSchema),
|
|
21
|
+
createdAt: Schema.DateTimeUtc
|
|
22
|
+
})
|
|
23
|
+
export type KnowledgeLink = Schema.Schema.Type<typeof KnowledgeLinkSchema>
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import * as Schema from 'effect/Schema'
|
|
2
|
+
import { KnowledgeMetadataSchema, NonEmptyTrimmedString } from './records.ts'
|
|
3
|
+
|
|
4
|
+
export const KnowledgeProvenanceSourceKindSchema = Schema.Literals([
|
|
5
|
+
'upload',
|
|
6
|
+
'user_statement',
|
|
7
|
+
'url',
|
|
8
|
+
'generated',
|
|
9
|
+
'imported',
|
|
10
|
+
'external_api'
|
|
11
|
+
])
|
|
12
|
+
export type KnowledgeProvenanceSourceKind = Schema.Schema.Type<typeof KnowledgeProvenanceSourceKindSchema>
|
|
13
|
+
|
|
14
|
+
export const KnowledgeProvenanceSchema = Schema.Struct({
|
|
15
|
+
id: NonEmptyTrimmedString,
|
|
16
|
+
recordId: NonEmptyTrimmedString,
|
|
17
|
+
artifactId: Schema.optional(NonEmptyTrimmedString),
|
|
18
|
+
sourceKind: KnowledgeProvenanceSourceKindSchema,
|
|
19
|
+
sourceLabel: NonEmptyTrimmedString,
|
|
20
|
+
sourceUrl: Schema.optional(NonEmptyTrimmedString),
|
|
21
|
+
observedAt: Schema.optional(Schema.DateTimeUtc),
|
|
22
|
+
metadata: Schema.optional(KnowledgeMetadataSchema),
|
|
23
|
+
createdAt: Schema.DateTimeUtc
|
|
24
|
+
})
|
|
25
|
+
export type KnowledgeProvenance = Schema.Schema.Type<typeof KnowledgeProvenanceSchema>
|
package/src/records.ts
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import * as Schema from 'effect/Schema'
|
|
2
|
+
|
|
3
|
+
export const NonEmptyTrimmedString = Schema.Trimmed.pipe(Schema.check(Schema.isNonEmpty()))
|
|
4
|
+
export const NonNegativeInteger = Schema.Int.pipe(Schema.check(Schema.isGreaterThanOrEqualTo(0)))
|
|
5
|
+
export const PositiveInteger = Schema.Int.pipe(Schema.check(Schema.isGreaterThan(0)))
|
|
6
|
+
|
|
7
|
+
export const KnowledgeMetadataSchema = Schema.Record(Schema.String, Schema.Unknown)
|
|
8
|
+
export type KnowledgeMetadata = Schema.Schema.Type<typeof KnowledgeMetadataSchema>
|
|
9
|
+
|
|
10
|
+
export const KnowledgeRecordRoleSchema = Schema.Literals([
|
|
11
|
+
'source',
|
|
12
|
+
'note',
|
|
13
|
+
'operating_protocol',
|
|
14
|
+
'knowledge_map',
|
|
15
|
+
'compiled_truth',
|
|
16
|
+
'decision'
|
|
17
|
+
])
|
|
18
|
+
export type KnowledgeRecordRole = Schema.Schema.Type<typeof KnowledgeRecordRoleSchema>
|
|
19
|
+
|
|
20
|
+
export const KnowledgeContextPolicySchema = Schema.Literals([
|
|
21
|
+
'pinned',
|
|
22
|
+
'routable',
|
|
23
|
+
'searchable',
|
|
24
|
+
'archival'
|
|
25
|
+
])
|
|
26
|
+
export type KnowledgeContextPolicy = Schema.Schema.Type<typeof KnowledgeContextPolicySchema>
|
|
27
|
+
|
|
28
|
+
export const KnowledgeLifecycleStatusSchema = Schema.Literals([
|
|
29
|
+
'draft',
|
|
30
|
+
'processing',
|
|
31
|
+
'ready',
|
|
32
|
+
'error',
|
|
33
|
+
'archived',
|
|
34
|
+
'deleted'
|
|
35
|
+
])
|
|
36
|
+
export type KnowledgeLifecycleStatus = Schema.Schema.Type<typeof KnowledgeLifecycleStatusSchema>
|
|
37
|
+
|
|
38
|
+
export const KnowledgeScopeSchema = Schema.Struct({
|
|
39
|
+
id: NonEmptyTrimmedString,
|
|
40
|
+
kind: Schema.optional(NonEmptyTrimmedString)
|
|
41
|
+
})
|
|
42
|
+
export type KnowledgeScope = Schema.Schema.Type<typeof KnowledgeScopeSchema>
|
|
43
|
+
|
|
44
|
+
export const KnowledgeRecordSchema = Schema.Struct({
|
|
45
|
+
id: NonEmptyTrimmedString,
|
|
46
|
+
role: KnowledgeRecordRoleSchema,
|
|
47
|
+
title: NonEmptyTrimmedString,
|
|
48
|
+
status: KnowledgeLifecycleStatusSchema,
|
|
49
|
+
contextPolicy: KnowledgeContextPolicySchema,
|
|
50
|
+
summary: Schema.optional(Schema.String),
|
|
51
|
+
metadata: Schema.optional(KnowledgeMetadataSchema),
|
|
52
|
+
createdAt: Schema.DateTimeUtc,
|
|
53
|
+
updatedAt: Schema.DateTimeUtc
|
|
54
|
+
})
|
|
55
|
+
export type KnowledgeRecord = Schema.Schema.Type<typeof KnowledgeRecordSchema>
|
|
56
|
+
|
|
57
|
+
export const CreateKnowledgeRecordInputSchema = Schema.Struct({
|
|
58
|
+
scope: KnowledgeScopeSchema,
|
|
59
|
+
role: KnowledgeRecordRoleSchema,
|
|
60
|
+
title: NonEmptyTrimmedString,
|
|
61
|
+
contextPolicy: KnowledgeContextPolicySchema,
|
|
62
|
+
summary: Schema.optional(Schema.String),
|
|
63
|
+
metadata: Schema.optional(KnowledgeMetadataSchema)
|
|
64
|
+
})
|
|
65
|
+
export type CreateKnowledgeRecordInput = Schema.Schema.Type<typeof CreateKnowledgeRecordInputSchema>
|
|
66
|
+
|
|
67
|
+
export const UpdateKnowledgeRecordInputSchema = Schema.Struct({
|
|
68
|
+
scope: KnowledgeScopeSchema,
|
|
69
|
+
id: NonEmptyTrimmedString,
|
|
70
|
+
title: Schema.optional(NonEmptyTrimmedString),
|
|
71
|
+
status: Schema.optional(KnowledgeLifecycleStatusSchema),
|
|
72
|
+
contextPolicy: Schema.optional(KnowledgeContextPolicySchema),
|
|
73
|
+
summary: Schema.optional(Schema.String),
|
|
74
|
+
metadata: Schema.optional(KnowledgeMetadataSchema)
|
|
75
|
+
})
|
|
76
|
+
export type UpdateKnowledgeRecordInput = Schema.Schema.Type<typeof UpdateKnowledgeRecordInputSchema>
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import * as Schema from 'effect/Schema'
|
|
2
|
+
import {
|
|
3
|
+
KnowledgeMetadataSchema,
|
|
4
|
+
NonEmptyTrimmedString,
|
|
5
|
+
NonNegativeInteger
|
|
6
|
+
} from './records.ts'
|
|
7
|
+
|
|
8
|
+
export const KnowledgeRepresentationModalitySchema = Schema.Literals([
|
|
9
|
+
'text',
|
|
10
|
+
'image',
|
|
11
|
+
'audio',
|
|
12
|
+
'video',
|
|
13
|
+
'table'
|
|
14
|
+
])
|
|
15
|
+
export type KnowledgeRepresentationModality = Schema.Schema.Type<typeof KnowledgeRepresentationModalitySchema>
|
|
16
|
+
|
|
17
|
+
export const KnowledgeRepresentationStatusSchema = Schema.Literals([
|
|
18
|
+
'pending',
|
|
19
|
+
'processing',
|
|
20
|
+
'ready',
|
|
21
|
+
'error'
|
|
22
|
+
])
|
|
23
|
+
export type KnowledgeRepresentationStatus = Schema.Schema.Type<typeof KnowledgeRepresentationStatusSchema>
|
|
24
|
+
|
|
25
|
+
export const KnowledgeRepresentationSchema = Schema.Struct({
|
|
26
|
+
id: NonEmptyTrimmedString,
|
|
27
|
+
recordId: NonEmptyTrimmedString,
|
|
28
|
+
artifactId: Schema.optional(NonEmptyTrimmedString),
|
|
29
|
+
modality: KnowledgeRepresentationModalitySchema,
|
|
30
|
+
status: KnowledgeRepresentationStatusSchema,
|
|
31
|
+
contentText: Schema.optional(Schema.String),
|
|
32
|
+
summary: Schema.optional(Schema.String),
|
|
33
|
+
model: Schema.optional(NonEmptyTrimmedString),
|
|
34
|
+
errorMessage: Schema.optional(Schema.String),
|
|
35
|
+
metadata: Schema.optional(KnowledgeMetadataSchema),
|
|
36
|
+
createdAt: Schema.DateTimeUtc,
|
|
37
|
+
updatedAt: Schema.DateTimeUtc
|
|
38
|
+
})
|
|
39
|
+
export type KnowledgeRepresentation = Schema.Schema.Type<typeof KnowledgeRepresentationSchema>
|
|
40
|
+
|
|
41
|
+
export const KnowledgeRepresentationChunkSchema = Schema.Struct({
|
|
42
|
+
id: NonEmptyTrimmedString,
|
|
43
|
+
recordId: NonEmptyTrimmedString,
|
|
44
|
+
representationId: NonEmptyTrimmedString,
|
|
45
|
+
content: NonEmptyTrimmedString,
|
|
46
|
+
position: NonNegativeInteger,
|
|
47
|
+
tokenCount: NonNegativeInteger,
|
|
48
|
+
metadata: Schema.optional(KnowledgeMetadataSchema),
|
|
49
|
+
createdAt: Schema.DateTimeUtc
|
|
50
|
+
})
|
|
51
|
+
export type KnowledgeRepresentationChunk = Schema.Schema.Type<typeof KnowledgeRepresentationChunkSchema>
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import { Context } from 'effect'
|
|
2
|
+
import type { Effect } from 'effect'
|
|
3
|
+
import type { KnowledgeChunk, KnowledgeDocument, KnowledgeSearchScope, KnowledgeCollection } from './documents.ts'
|
|
4
|
+
import type { KnowledgeEmbedding } from './embeddings.ts'
|
|
5
|
+
import type { SearchIndexStoreError } from './errors.ts'
|
|
6
|
+
|
|
7
|
+
export type UpsertKnowledgeDocumentInput = {
|
|
8
|
+
readonly document: KnowledgeDocument
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export type ReplaceKnowledgeDocumentChunksInput = {
|
|
12
|
+
readonly collectionId: string
|
|
13
|
+
readonly documentId: string
|
|
14
|
+
readonly chunks: ReadonlyArray<{
|
|
15
|
+
readonly chunk: KnowledgeChunk
|
|
16
|
+
readonly embedding: KnowledgeEmbedding
|
|
17
|
+
}>
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export type MarkKnowledgeDocumentReadyInput = {
|
|
21
|
+
readonly collectionId: string
|
|
22
|
+
readonly documentId: string
|
|
23
|
+
readonly title?: string
|
|
24
|
+
readonly summary?: string
|
|
25
|
+
readonly contentHash?: string
|
|
26
|
+
readonly tokenCount: number
|
|
27
|
+
readonly chunkCount: number
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export type MarkKnowledgeDocumentErrorInput = {
|
|
31
|
+
readonly collectionId: string
|
|
32
|
+
readonly documentId: string
|
|
33
|
+
readonly message: string
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export type KnowledgeChunkSearchInput = {
|
|
37
|
+
readonly scope: KnowledgeSearchScope
|
|
38
|
+
readonly embedding: KnowledgeEmbedding
|
|
39
|
+
readonly limit: number
|
|
40
|
+
readonly minScore?: number
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export type KnowledgeChunkTextSearchInput = {
|
|
44
|
+
readonly scope: KnowledgeSearchScope
|
|
45
|
+
readonly query: string
|
|
46
|
+
readonly limit: number
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export type KnowledgeChunkSearchResult = {
|
|
50
|
+
readonly chunk: KnowledgeChunk
|
|
51
|
+
readonly score: number
|
|
52
|
+
readonly document: KnowledgeDocument
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export type KnowledgeSearchContextChunksInput = {
|
|
56
|
+
readonly collectionId: string
|
|
57
|
+
readonly documentId: string
|
|
58
|
+
readonly position: number
|
|
59
|
+
readonly contextChunks: number
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export type SearchIndexStoreApi = {
|
|
63
|
+
readonly upsertSet: (set: KnowledgeCollection) => Effect.Effect<KnowledgeCollection, SearchIndexStoreError>
|
|
64
|
+
readonly getSet: (id: string) => Effect.Effect<KnowledgeCollection, SearchIndexStoreError>
|
|
65
|
+
readonly upsertDocument: (
|
|
66
|
+
input: UpsertKnowledgeDocumentInput
|
|
67
|
+
) => Effect.Effect<KnowledgeDocument, SearchIndexStoreError>
|
|
68
|
+
readonly markDocumentProcessing: (input: {
|
|
69
|
+
readonly collectionId: string
|
|
70
|
+
readonly documentId: string
|
|
71
|
+
}) => Effect.Effect<KnowledgeDocument, SearchIndexStoreError>
|
|
72
|
+
readonly replaceDocumentChunks: (
|
|
73
|
+
input: ReplaceKnowledgeDocumentChunksInput
|
|
74
|
+
) => Effect.Effect<void, SearchIndexStoreError>
|
|
75
|
+
readonly markDocumentReady: (
|
|
76
|
+
input: MarkKnowledgeDocumentReadyInput
|
|
77
|
+
) => Effect.Effect<KnowledgeDocument, SearchIndexStoreError>
|
|
78
|
+
readonly markDocumentError: (
|
|
79
|
+
input: MarkKnowledgeDocumentErrorInput
|
|
80
|
+
) => Effect.Effect<void, SearchIndexStoreError>
|
|
81
|
+
readonly deleteDocument: (input: {
|
|
82
|
+
readonly collectionId: string
|
|
83
|
+
readonly documentId: string
|
|
84
|
+
}) => Effect.Effect<void, SearchIndexStoreError>
|
|
85
|
+
readonly searchChunks: (
|
|
86
|
+
input: KnowledgeChunkSearchInput
|
|
87
|
+
) => Effect.Effect<ReadonlyArray<KnowledgeChunkSearchResult>, SearchIndexStoreError>
|
|
88
|
+
readonly searchChunksByText: (
|
|
89
|
+
input: KnowledgeChunkTextSearchInput
|
|
90
|
+
) => Effect.Effect<ReadonlyArray<KnowledgeChunkSearchResult>, SearchIndexStoreError>
|
|
91
|
+
readonly getContextChunks: (
|
|
92
|
+
input: KnowledgeSearchContextChunksInput
|
|
93
|
+
) => Effect.Effect<ReadonlyArray<KnowledgeChunk>, SearchIndexStoreError>
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export class SearchIndexStore extends Context.Service<SearchIndexStore, SearchIndexStoreApi>()(
|
|
97
|
+
'@yolk-sdk/knowledge/SearchIndexStore'
|
|
98
|
+
) {}
|