@yolk-sdk/knowledge 0.0.1-canary.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +104 -0
  3. package/dist/agent.d.mts +26 -0
  4. package/dist/agent.d.mts.map +1 -0
  5. package/dist/agent.mjs +45 -0
  6. package/dist/agent.mjs.map +1 -0
  7. package/dist/artifacts.d.mts +37 -0
  8. package/dist/artifacts.d.mts.map +1 -0
  9. package/dist/artifacts.mjs +28 -0
  10. package/dist/artifacts.mjs.map +1 -0
  11. package/dist/chunking.d.mts +38 -0
  12. package/dist/chunking.d.mts.map +1 -0
  13. package/dist/chunking.mjs +93 -0
  14. package/dist/chunking.mjs.map +1 -0
  15. package/dist/context.d.mts +16 -0
  16. package/dist/context.d.mts.map +1 -0
  17. package/dist/context.mjs +25 -0
  18. package/dist/context.mjs.map +1 -0
  19. package/dist/documents.d.mts +105 -0
  20. package/dist/documents.d.mts.map +1 -0
  21. package/dist/documents.mjs +93 -0
  22. package/dist/documents.mjs.map +1 -0
  23. package/dist/embeddings.d.mts +14 -0
  24. package/dist/embeddings.d.mts.map +1 -0
  25. package/dist/embeddings.mjs +8 -0
  26. package/dist/embeddings.mjs.map +1 -0
  27. package/dist/errors.d.mts +72 -0
  28. package/dist/errors.d.mts.map +1 -0
  29. package/dist/errors.mjs +26 -0
  30. package/dist/errors.mjs.map +1 -0
  31. package/dist/extraction.d.mts +19 -0
  32. package/dist/extraction.d.mts.map +1 -0
  33. package/dist/extraction.mjs +7 -0
  34. package/dist/extraction.mjs.map +1 -0
  35. package/dist/index.d.mts +5 -0
  36. package/dist/index.mjs +5 -0
  37. package/dist/ingestion.d.mts +48 -0
  38. package/dist/ingestion.d.mts.map +1 -0
  39. package/dist/ingestion.mjs +111 -0
  40. package/dist/ingestion.mjs.map +1 -0
  41. package/dist/links.d.mts +17 -0
  42. package/dist/links.d.mts.map +1 -0
  43. package/dist/links.mjs +24 -0
  44. package/dist/links.mjs.map +1 -0
  45. package/dist/provenance.d.mts +20 -0
  46. package/dist/provenance.d.mts.map +1 -0
  47. package/dist/provenance.mjs +26 -0
  48. package/dist/provenance.mjs.map +1 -0
  49. package/dist/records.d.mts +59 -0
  50. package/dist/records.d.mts.map +1 -0
  51. package/dist/records.mjs +64 -0
  52. package/dist/records.mjs.map +1 -0
  53. package/dist/representations.d.mts +36 -0
  54. package/dist/representations.d.mts.map +1 -0
  55. package/dist/representations.mjs +44 -0
  56. package/dist/representations.mjs.map +1 -0
  57. package/dist/search-store.d.mts +77 -0
  58. package/dist/search-store.d.mts.map +1 -0
  59. package/dist/search-store.mjs +7 -0
  60. package/dist/search-store.mjs.map +1 -0
  61. package/dist/search.d.mts +49 -0
  62. package/dist/search.d.mts.map +1 -0
  63. package/dist/search.mjs +179 -0
  64. package/dist/search.mjs.map +1 -0
  65. package/dist/store.d.mts +44 -0
  66. package/dist/store.d.mts.map +1 -0
  67. package/dist/store.mjs +7 -0
  68. package/dist/store.mjs.map +1 -0
  69. package/dist/summarization.d.mts +23 -0
  70. package/dist/summarization.d.mts.map +1 -0
  71. package/dist/summarization.mjs +8 -0
  72. package/dist/summarization.mjs.map +1 -0
  73. package/dist/vector-store.d.mts +3 -0
  74. package/dist/vector-store.mjs +2 -0
  75. package/package.json +147 -0
  76. package/src/agent.ts +96 -0
  77. package/src/artifacts.ts +48 -0
  78. package/src/chunking.ts +175 -0
  79. package/src/context.ts +42 -0
  80. package/src/documents.ts +109 -0
  81. package/src/embeddings.ts +18 -0
  82. package/src/errors.ts +63 -0
  83. package/src/extraction.ts +21 -0
  84. package/src/index.ts +4 -0
  85. package/src/ingestion.ts +157 -0
  86. package/src/links.ts +23 -0
  87. package/src/provenance.ts +25 -0
  88. package/src/records.ts +76 -0
  89. package/src/representations.ts +51 -0
  90. package/src/search-store.ts +98 -0
  91. package/src/search.ts +270 -0
  92. package/src/store.ts +53 -0
  93. package/src/summarization.ts +28 -0
  94. package/src/vector-store.ts +6 -0
@@ -0,0 +1,93 @@
1
+ import { Schema } from "effect";
2
+ //#region src/documents.ts
3
+ const NonEmptyTrimmedString = Schema.Trimmed.pipe(Schema.check(Schema.isNonEmpty()));
4
+ const PositiveInteger = Schema.Int.pipe(Schema.check(Schema.isGreaterThan(0)));
5
+ const NonNegativeInteger = Schema.Int.pipe(Schema.check(Schema.isGreaterThanOrEqualTo(0)));
6
+ const KnowledgeMetadataSchema = Schema.Record(Schema.String, Schema.Unknown);
7
+ const KnowledgeDocumentStatusSchema = Schema.Literals([
8
+ "pending",
9
+ "processing",
10
+ "ready",
11
+ "error"
12
+ ]);
13
+ const KnowledgeSourceSchema = Schema.Union([
14
+ Schema.Struct({
15
+ _tag: Schema.Literal("File"),
16
+ ref: NonEmptyTrimmedString,
17
+ name: Schema.optional(NonEmptyTrimmedString),
18
+ mediaType: Schema.optional(NonEmptyTrimmedString)
19
+ }),
20
+ Schema.Struct({
21
+ _tag: Schema.Literal("Url"),
22
+ url: NonEmptyTrimmedString
23
+ }),
24
+ Schema.Struct({
25
+ _tag: Schema.Literal("Text"),
26
+ label: Schema.optional(NonEmptyTrimmedString)
27
+ })
28
+ ]);
29
+ const KnowledgeEmbeddingConfigSchema = Schema.Struct({
30
+ model: NonEmptyTrimmedString,
31
+ dimensions: PositiveInteger
32
+ });
33
+ const KnowledgeChunkingConfigSchema = Schema.Struct({
34
+ strategy: Schema.Literal("sentence-token"),
35
+ maxTokens: PositiveInteger
36
+ });
37
+ const KnowledgeCollectionSchema = Schema.Struct({
38
+ id: NonEmptyTrimmedString,
39
+ label: Schema.optional(NonEmptyTrimmedString),
40
+ embeddingConfig: KnowledgeEmbeddingConfigSchema,
41
+ chunkingConfig: KnowledgeChunkingConfigSchema,
42
+ metadata: Schema.optional(KnowledgeMetadataSchema)
43
+ });
44
+ const KnowledgeDocumentSchema = Schema.Struct({
45
+ id: NonEmptyTrimmedString,
46
+ collectionId: NonEmptyTrimmedString,
47
+ source: KnowledgeSourceSchema,
48
+ status: KnowledgeDocumentStatusSchema,
49
+ title: Schema.optional(NonEmptyTrimmedString),
50
+ summary: Schema.optional(Schema.String),
51
+ errorMessage: Schema.optional(Schema.String),
52
+ contentHash: Schema.optional(NonEmptyTrimmedString),
53
+ tokenCount: Schema.optional(NonNegativeInteger),
54
+ chunkCount: Schema.optional(NonNegativeInteger),
55
+ metadata: Schema.optional(KnowledgeMetadataSchema)
56
+ });
57
+ const KnowledgeChunkSchema = Schema.Struct({
58
+ id: NonEmptyTrimmedString,
59
+ collectionId: NonEmptyTrimmedString,
60
+ documentId: NonEmptyTrimmedString,
61
+ content: NonEmptyTrimmedString,
62
+ position: NonNegativeInteger,
63
+ tokenCount: PositiveInteger,
64
+ metadata: Schema.optional(KnowledgeMetadataSchema)
65
+ });
66
+ const ExtractedKnowledgeDocumentSchema = Schema.Struct({
67
+ content: NonEmptyTrimmedString,
68
+ title: Schema.optional(NonEmptyTrimmedString),
69
+ summary: Schema.optional(Schema.String),
70
+ metadata: Schema.optional(KnowledgeMetadataSchema)
71
+ });
72
+ const KnowledgeSearchScopeSchema = Schema.Union([Schema.Struct({
73
+ _tag: Schema.Literal("KnowledgeCollection"),
74
+ id: NonEmptyTrimmedString
75
+ }), Schema.Struct({
76
+ _tag: Schema.Literal("KnowledgeCollections"),
77
+ ids: Schema.Array(NonEmptyTrimmedString)
78
+ })]);
79
+ const defaultKnowledgeChunkingConfig = {
80
+ strategy: "sentence-token",
81
+ maxTokens: 512
82
+ };
83
+ const makeKnowledgeCollection = (input) => ({
84
+ id: input.id,
85
+ label: input.label,
86
+ embeddingConfig: input.embeddingConfig,
87
+ chunkingConfig: input.chunkingConfig ?? defaultKnowledgeChunkingConfig,
88
+ metadata: input.metadata
89
+ });
90
+ //#endregion
91
+ export { ExtractedKnowledgeDocumentSchema, KnowledgeChunkSchema, KnowledgeChunkingConfigSchema, KnowledgeCollectionSchema, KnowledgeDocumentSchema, KnowledgeDocumentStatusSchema, KnowledgeEmbeddingConfigSchema, KnowledgeMetadataSchema, KnowledgeSearchScopeSchema, KnowledgeSourceSchema, defaultKnowledgeChunkingConfig, makeKnowledgeCollection };
92
+
93
+ //# sourceMappingURL=documents.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"documents.mjs","names":[],"sources":["../src/documents.ts"],"sourcesContent":["import { Schema } from 'effect'\n\nconst NonEmptyTrimmedString = Schema.Trimmed.pipe(Schema.check(Schema.isNonEmpty()))\nconst PositiveInteger = Schema.Int.pipe(Schema.check(Schema.isGreaterThan(0)))\nconst NonNegativeInteger = Schema.Int.pipe(Schema.check(Schema.isGreaterThanOrEqualTo(0)))\n\nexport const KnowledgeMetadataSchema = Schema.Record(Schema.String, Schema.Unknown)\nexport type KnowledgeMetadata = Schema.Schema.Type<typeof KnowledgeMetadataSchema>\n\nexport const KnowledgeDocumentStatusSchema = Schema.Literals(['pending', 'processing', 'ready', 'error'])\nexport type KnowledgeDocumentStatus = Schema.Schema.Type<typeof KnowledgeDocumentStatusSchema>\n\nexport const KnowledgeSourceSchema = Schema.Union([\n Schema.Struct({\n _tag: Schema.Literal('File'),\n ref: NonEmptyTrimmedString,\n name: Schema.optional(NonEmptyTrimmedString),\n mediaType: Schema.optional(NonEmptyTrimmedString)\n }),\n Schema.Struct({\n _tag: Schema.Literal('Url'),\n url: NonEmptyTrimmedString\n }),\n Schema.Struct({\n _tag: Schema.Literal('Text'),\n label: Schema.optional(NonEmptyTrimmedString)\n })\n])\nexport type KnowledgeSource = Schema.Schema.Type<typeof KnowledgeSourceSchema>\n\nexport const KnowledgeEmbeddingConfigSchema = Schema.Struct({\n model: NonEmptyTrimmedString,\n dimensions: PositiveInteger\n})\nexport type KnowledgeEmbeddingConfig = Schema.Schema.Type<typeof KnowledgeEmbeddingConfigSchema>\n\nexport const KnowledgeChunkingConfigSchema = Schema.Struct({\n strategy: Schema.Literal('sentence-token'),\n maxTokens: PositiveInteger\n})\nexport type KnowledgeChunkingConfig = Schema.Schema.Type<typeof KnowledgeChunkingConfigSchema>\n\nexport const KnowledgeCollectionSchema = Schema.Struct({\n id: NonEmptyTrimmedString,\n label: Schema.optional(NonEmptyTrimmedString),\n embeddingConfig: KnowledgeEmbeddingConfigSchema,\n chunkingConfig: KnowledgeChunkingConfigSchema,\n metadata: Schema.optional(KnowledgeMetadataSchema)\n})\nexport type KnowledgeCollection = Schema.Schema.Type<typeof KnowledgeCollectionSchema>\n\nexport const KnowledgeDocumentSchema = Schema.Struct({\n id: NonEmptyTrimmedString,\n collectionId: NonEmptyTrimmedString,\n source: KnowledgeSourceSchema,\n status: KnowledgeDocumentStatusSchema,\n title: Schema.optional(NonEmptyTrimmedString),\n summary: Schema.optional(Schema.String),\n errorMessage: Schema.optional(Schema.String),\n contentHash: Schema.optional(NonEmptyTrimmedString),\n tokenCount: Schema.optional(NonNegativeInteger),\n chunkCount: Schema.optional(NonNegativeInteger),\n metadata: Schema.optional(KnowledgeMetadataSchema)\n})\nexport type KnowledgeDocument = Schema.Schema.Type<typeof KnowledgeDocumentSchema>\n\nexport const KnowledgeChunkSchema = Schema.Struct({\n id: NonEmptyTrimmedString,\n collectionId: NonEmptyTrimmedString,\n documentId: NonEmptyTrimmedString,\n content: NonEmptyTrimmedString,\n position: NonNegativeInteger,\n tokenCount: PositiveInteger,\n metadata: Schema.optional(KnowledgeMetadataSchema)\n})\nexport type KnowledgeChunk = Schema.Schema.Type<typeof KnowledgeChunkSchema>\n\nexport const ExtractedKnowledgeDocumentSchema = Schema.Struct({\n content: NonEmptyTrimmedString,\n title: Schema.optional(NonEmptyTrimmedString),\n summary: Schema.optional(Schema.String),\n metadata: Schema.optional(KnowledgeMetadataSchema)\n})\nexport type ExtractedKnowledgeDocument = Schema.Schema.Type<typeof ExtractedKnowledgeDocumentSchema>\n\nexport const KnowledgeSearchScopeSchema = Schema.Union([\n Schema.Struct({ _tag: Schema.Literal('KnowledgeCollection'), id: NonEmptyTrimmedString }),\n Schema.Struct({ _tag: Schema.Literal('KnowledgeCollections'), ids: Schema.Array(NonEmptyTrimmedString) })\n])\nexport type KnowledgeSearchScope = Schema.Schema.Type<typeof KnowledgeSearchScopeSchema>\n\nexport const defaultKnowledgeChunkingConfig: KnowledgeChunkingConfig = {\n strategy: 'sentence-token',\n maxTokens: 512\n}\n\nexport const makeKnowledgeCollection = (input: {\n readonly id: string\n readonly label?: string\n readonly embeddingConfig: KnowledgeEmbeddingConfig\n readonly chunkingConfig?: KnowledgeChunkingConfig\n readonly metadata?: KnowledgeMetadata\n}): KnowledgeCollection => ({\n id: input.id,\n label: input.label,\n embeddingConfig: input.embeddingConfig,\n chunkingConfig: input.chunkingConfig ?? defaultKnowledgeChunkingConfig,\n metadata: input.metadata\n})\n"],"mappings":";;AAEA,MAAM,wBAAwB,OAAO,QAAQ,KAAK,OAAO,MAAM,OAAO,WAAW,CAAC,CAAC;AACnF,MAAM,kBAAkB,OAAO,IAAI,KAAK,OAAO,MAAM,OAAO,cAAc,CAAC,CAAC,CAAC;AAC7E,MAAM,qBAAqB,OAAO,IAAI,KAAK,OAAO,MAAM,OAAO,uBAAuB,CAAC,CAAC,CAAC;AAEzF,MAAa,0BAA0B,OAAO,OAAO,OAAO,QAAQ,OAAO,OAAO;AAGlF,MAAa,gCAAgC,OAAO,SAAS;CAAC;CAAW;CAAc;CAAS;AAAO,CAAC;AAGxG,MAAa,wBAAwB,OAAO,MAAM;CAChD,OAAO,OAAO;EACZ,MAAM,OAAO,QAAQ,MAAM;EAC3B,KAAK;EACL,MAAM,OAAO,SAAS,qBAAqB;EAC3C,WAAW,OAAO,SAAS,qBAAqB;CAClD,CAAC;CACD,OAAO,OAAO;EACZ,MAAM,OAAO,QAAQ,KAAK;EAC1B,KAAK;CACP,CAAC;CACD,OAAO,OAAO;EACZ,MAAM,OAAO,QAAQ,MAAM;EAC3B,OAAO,OAAO,SAAS,qBAAqB;CAC9C,CAAC;AACH,CAAC;AAGD,MAAa,iCAAiC,OAAO,OAAO;CAC1D,OAAO;CACP,YAAY;AACd,CAAC;AAGD,MAAa,gCAAgC,OAAO,OAAO;CACzD,UAAU,OAAO,QAAQ,gBAAgB;CACzC,WAAW;AACb,CAAC;AAGD,MAAa,4BAA4B,OAAO,OAAO;CACrD,IAAI;CACJ,OAAO,OAAO,SAAS,qBAAqB;CAC5C,iBAAiB;CACjB,gBAAgB;CAChB,UAAU,OAAO,SAAS,uBAAuB;AACnD,CAAC;AAGD,MAAa,0BAA0B,OAAO,OAAO;CACnD,IAAI;CACJ,cAAc;CACd,QAAQ;CACR,QAAQ;CACR,OAAO,OAAO,SAAS,qBAAqB;CAC5C,SAAS,OAAO,SAAS,OAAO,MAAM;CACtC,cAAc,OAAO,SAAS,OAAO,MAAM;CAC3C,aAAa,OAAO,SAAS,qBAAqB;CAClD,YAAY,OAAO,SAAS,kBAAkB;CAC9C,YAAY,OAAO,SAAS,kBAAkB;CAC9C,UAAU,OAAO,SAAS,uBAAuB;AACnD,CAAC;AAGD,MAAa,uBAAuB,OAAO,OAAO;CAChD,IAAI;CACJ,cAAc;CACd,YAAY;CACZ,SAAS;CACT,UAAU;CACV,YAAY;CACZ,UAAU,OAAO,SAAS,uBAAuB;AACnD,CAAC;AAGD,MAAa,mCAAmC,OAAO,OAAO;CAC5D,SAAS;CACT,OAAO,OAAO,SAAS,qBAAqB;CAC5C,SAAS,OAAO,SAAS,OAAO,MAAM;CACtC,UAAU,OAAO,SAAS,uBAAuB;AACnD,CAAC;AAGD,MAAa,6BAA6B,OAAO,MAAM,CACrD,OAAO,OAAO;CAAE,MAAM,OAAO,QAAQ,qBAAqB;CAAG,IAAI;AAAsB,CAAC,GACxF,OAAO,OAAO;CAAE,MAAM,OAAO,QAAQ,sBAAsB;CAAG,KAAK,OAAO,MAAM,qBAAqB;AAAE,CAAC,CAC1G,CAAC;AAGD,MAAa,iCAA0D;CACrE,UAAU;CACV,WAAW;AACb;AAEA,MAAa,2BAA2B,WAMZ;CAC1B,IAAI,MAAM;CACV,OAAO,MAAM;CACb,iBAAiB,MAAM;CACvB,gBAAgB,MAAM,kBAAkB;CACxC,UAAU,MAAM;AAClB"}
@@ -0,0 +1,14 @@
1
+ import { KnowledgeEmbeddingError } from "./errors.mjs";
2
+ import { Context, Effect } from "effect";
3
+
4
+ //#region src/embeddings.d.ts
5
+ type KnowledgeEmbedding = ReadonlyArray<number>;
6
+ type KnowledgeEmbedderApi = {
7
+ readonly embedTexts: (texts: ReadonlyArray<string>) => Effect.Effect<ReadonlyArray<KnowledgeEmbedding>, KnowledgeEmbeddingError>;
8
+ readonly embedQuery: (query: string) => Effect.Effect<KnowledgeEmbedding, KnowledgeEmbeddingError>;
9
+ };
10
+ declare const KnowledgeEmbedder_base: Context.ServiceClass<KnowledgeEmbedder, "@yolk-sdk/knowledge/KnowledgeEmbedder", KnowledgeEmbedderApi>;
11
+ declare class KnowledgeEmbedder extends KnowledgeEmbedder_base {}
12
+ //#endregion
13
+ export { KnowledgeEmbeddingError as EmbedderError, KnowledgeEmbedder, KnowledgeEmbedderApi, KnowledgeEmbedding };
14
+ //# sourceMappingURL=embeddings.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embeddings.d.mts","names":[],"sources":["../src/embeddings.ts"],"mappings":";;;;KAIY,kBAAA,GAAqB,aAAa;AAAA,KAElC,oBAAA;EAAA,SACD,UAAA,GACP,KAAA,EAAO,aAAA,aACJ,MAAA,CAAO,MAAA,CAAO,aAAA,CAAc,kBAAA,GAAqB,uBAAA;EAAA,SAC7C,UAAA,GAAa,KAAA,aAAkB,MAAA,CAAO,MAAA,CAAO,kBAAA,EAAoB,uBAAA;AAAA;AAAA,cAC3E,sBAAA;cAEY,iBAAA,SAA0B,sBAEtC"}
@@ -0,0 +1,8 @@
1
+ import { KnowledgeEmbeddingError } from "./errors.mjs";
2
+ import { Context } from "effect";
3
+ //#region src/embeddings.ts
4
+ var KnowledgeEmbedder = class extends Context.Service()("@yolk-sdk/knowledge/KnowledgeEmbedder") {};
5
+ //#endregion
6
+ export { KnowledgeEmbeddingError as EmbedderError, KnowledgeEmbedder };
7
+
8
+ //# sourceMappingURL=embeddings.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embeddings.mjs","names":[],"sources":["../src/embeddings.ts"],"sourcesContent":["import { Context } from 'effect'\nimport type { Effect } from 'effect'\nimport type { KnowledgeEmbeddingError } from './errors.ts'\n\nexport type KnowledgeEmbedding = ReadonlyArray<number>\n\nexport type KnowledgeEmbedderApi = {\n readonly embedTexts: (\n texts: ReadonlyArray<string>\n ) => Effect.Effect<ReadonlyArray<KnowledgeEmbedding>, KnowledgeEmbeddingError>\n readonly embedQuery: (query: string) => Effect.Effect<KnowledgeEmbedding, KnowledgeEmbeddingError>\n}\n\nexport class KnowledgeEmbedder extends Context.Service<KnowledgeEmbedder, KnowledgeEmbedderApi>()(\n '@yolk-sdk/knowledge/KnowledgeEmbedder'\n) {}\n\nexport { KnowledgeEmbeddingError as EmbedderError } from './errors.ts'\n"],"mappings":";;;AAaA,IAAa,oBAAb,cAAuC,QAAQ,QAAiD,EAC9F,uCACF,EAAE,CAAC"}
@@ -0,0 +1,72 @@
1
+ import * as Schema from "effect/Schema";
2
+
3
+ //#region src/errors.d.ts
4
+ declare const SearchIndexStoreError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").VoidIfEmpty<{ readonly [P in keyof A as P extends "_tag" ? never : P]: A[P] }>) => import("effect/Cause").YieldableError & {
5
+ readonly _tag: "SearchIndexStoreError";
6
+ } & Readonly<A>;
7
+ declare class SearchIndexStoreError extends SearchIndexStoreError_base<{
8
+ readonly message: string;
9
+ readonly cause?: unknown;
10
+ }> {}
11
+ declare const KnowledgeExtractionError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").VoidIfEmpty<{ readonly [P in keyof A as P extends "_tag" ? never : P]: A[P] }>) => import("effect/Cause").YieldableError & {
12
+ readonly _tag: "KnowledgeExtractionError";
13
+ } & Readonly<A>;
14
+ declare class KnowledgeExtractionError extends KnowledgeExtractionError_base<{
15
+ readonly message: string;
16
+ readonly cause?: unknown;
17
+ }> {}
18
+ declare const KnowledgeChunkingError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").VoidIfEmpty<{ readonly [P in keyof A as P extends "_tag" ? never : P]: A[P] }>) => import("effect/Cause").YieldableError & {
19
+ readonly _tag: "KnowledgeChunkingError";
20
+ } & Readonly<A>;
21
+ declare class KnowledgeChunkingError extends KnowledgeChunkingError_base<{
22
+ readonly message: string;
23
+ readonly cause?: unknown;
24
+ }> {}
25
+ declare const KnowledgeEmbeddingError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").VoidIfEmpty<{ readonly [P in keyof A as P extends "_tag" ? never : P]: A[P] }>) => import("effect/Cause").YieldableError & {
26
+ readonly _tag: "KnowledgeEmbeddingError";
27
+ } & Readonly<A>;
28
+ declare class KnowledgeEmbeddingError extends KnowledgeEmbeddingError_base<{
29
+ readonly message: string;
30
+ readonly cause?: unknown;
31
+ }> {}
32
+ declare const KnowledgeSummarizationError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").VoidIfEmpty<{ readonly [P in keyof A as P extends "_tag" ? never : P]: A[P] }>) => import("effect/Cause").YieldableError & {
33
+ readonly _tag: "KnowledgeSummarizationError";
34
+ } & Readonly<A>;
35
+ declare class KnowledgeSummarizationError extends KnowledgeSummarizationError_base<{
36
+ readonly message: string;
37
+ readonly cause?: unknown;
38
+ }> {}
39
+ declare const KnowledgeIngestionError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").VoidIfEmpty<{ readonly [P in keyof A as P extends "_tag" ? never : P]: A[P] }>) => import("effect/Cause").YieldableError & {
40
+ readonly _tag: "KnowledgeIngestionError";
41
+ } & Readonly<A>;
42
+ declare class KnowledgeIngestionError extends KnowledgeIngestionError_base<{
43
+ readonly message: string;
44
+ readonly stage: 'store' | 'extract' | 'chunk' | 'embed' | 'summarize';
45
+ readonly cause?: unknown;
46
+ }> {}
47
+ declare const KnowledgeSearchError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").VoidIfEmpty<{ readonly [P in keyof A as P extends "_tag" ? never : P]: A[P] }>) => import("effect/Cause").YieldableError & {
48
+ readonly _tag: "KnowledgeSearchError";
49
+ } & Readonly<A>;
50
+ declare class KnowledgeSearchError extends KnowledgeSearchError_base<{
51
+ readonly message: string;
52
+ readonly stage: 'store' | 'embed';
53
+ readonly cause?: unknown;
54
+ }> {}
55
+ declare const KnowledgeStoreError_base: Schema.Class<KnowledgeStoreError, Schema.TaggedStruct<"KnowledgeStoreError", {
56
+ readonly message: Schema.String;
57
+ readonly cause: Schema.optional<Schema.Unknown>;
58
+ }>, import("effect/Cause").YieldableError>;
59
+ declare class KnowledgeStoreError extends KnowledgeStoreError_base {}
60
+ declare const KnowledgeArtifactError_base: Schema.Class<KnowledgeArtifactError, Schema.TaggedStruct<"KnowledgeArtifactError", {
61
+ readonly message: Schema.String;
62
+ readonly cause: Schema.optional<Schema.Unknown>;
63
+ }>, import("effect/Cause").YieldableError>;
64
+ declare class KnowledgeArtifactError extends KnowledgeArtifactError_base {}
65
+ declare const KnowledgeContextError_base: Schema.Class<KnowledgeContextError, Schema.TaggedStruct<"KnowledgeContextError", {
66
+ readonly message: Schema.String;
67
+ readonly cause: Schema.optional<Schema.Unknown>;
68
+ }>, import("effect/Cause").YieldableError>;
69
+ declare class KnowledgeContextError extends KnowledgeContextError_base {}
70
+ //#endregion
71
+ export { KnowledgeArtifactError, KnowledgeChunkingError, KnowledgeContextError, KnowledgeEmbeddingError, KnowledgeExtractionError, KnowledgeIngestionError, KnowledgeSearchError, KnowledgeStoreError, KnowledgeSummarizationError, SearchIndexStoreError };
72
+ //# sourceMappingURL=errors.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"errors.d.mts","names":[],"sources":["../src/errors.ts"],"mappings":";;;cACuC,0BAAA;;;cAE1B,qBAAA,SAA8B,0BAAA;EAAA,SAChC,OAAA;EAAA,SACA,KAAA;AAAA;AAAA,cACN,6BAAA;;;cAEQ,wBAAA,SAAiC,6BAAA;EAAA,SACnC,OAAA;EAAA,SACA,KAAA;AAAA;AAAA,cACN,2BAAA;;;cAEQ,sBAAA,SAA+B,2BAAA;EAAA,SACjC,OAAA;EAAA,SACA,KAAA;AAAA;AAAA,cACN,4BAAA;;;cAEQ,uBAAA,SAAgC,4BAAA;EAAA,SAClC,OAAA;EAAA,SACA,KAAA;AAAA;AAAA,cACN,gCAAA;;;cAEQ,2BAAA,SAAoC,gCAAA;EAAA,SACtC,OAAA;EAAA,SACA,KAAA;AAAA;AAAA,cACN,4BAAA;;;cAEQ,uBAAA,SAAgC,4BAAA;EAAA,SAClC,OAAA;EAAA,SACA,KAAA;EAAA,SACA,KAAA;AAAA;AAAA,cACN,yBAAA;;;cAEQ,oBAAA,SAA6B,yBAAA;EAAA,SAC/B,OAAA;EAAA,SACA,KAAA;EAAA,SACA,KAAA;AAAA;AAAA,cACN,wBAAA;;;;cAEQ,mBAAA,SAA4B,wBAMxC;AAAA,cAAG,2BAAA;;;;cAES,sBAAA,SAA+B,2BAM3C;AAAA,cAAG,0BAAA;;;;cAES,qBAAA,SAA8B,0BAM1C"}
@@ -0,0 +1,26 @@
1
+ import { Data } from "effect";
2
+ import * as Schema$1 from "effect/Schema";
3
+ //#region src/errors.ts
4
+ var SearchIndexStoreError = class extends Data.TaggedError("SearchIndexStoreError") {};
5
+ var KnowledgeExtractionError = class extends Data.TaggedError("KnowledgeExtractionError") {};
6
+ var KnowledgeChunkingError = class extends Data.TaggedError("KnowledgeChunkingError") {};
7
+ var KnowledgeEmbeddingError = class extends Data.TaggedError("KnowledgeEmbeddingError") {};
8
+ var KnowledgeSummarizationError = class extends Data.TaggedError("KnowledgeSummarizationError") {};
9
+ var KnowledgeIngestionError = class extends Data.TaggedError("KnowledgeIngestionError") {};
10
+ var KnowledgeSearchError = class extends Data.TaggedError("KnowledgeSearchError") {};
11
+ var KnowledgeStoreError = class extends Schema$1.TaggedErrorClass()("KnowledgeStoreError", {
12
+ message: Schema$1.String,
13
+ cause: Schema$1.optional(Schema$1.Unknown)
14
+ }) {};
15
+ var KnowledgeArtifactError = class extends Schema$1.TaggedErrorClass()("KnowledgeArtifactError", {
16
+ message: Schema$1.String,
17
+ cause: Schema$1.optional(Schema$1.Unknown)
18
+ }) {};
19
+ var KnowledgeContextError = class extends Schema$1.TaggedErrorClass()("KnowledgeContextError", {
20
+ message: Schema$1.String,
21
+ cause: Schema$1.optional(Schema$1.Unknown)
22
+ }) {};
23
+ //#endregion
24
+ export { KnowledgeArtifactError, KnowledgeChunkingError, KnowledgeContextError, KnowledgeEmbeddingError, KnowledgeExtractionError, KnowledgeIngestionError, KnowledgeSearchError, KnowledgeStoreError, KnowledgeSummarizationError, SearchIndexStoreError };
25
+
26
+ //# sourceMappingURL=errors.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"errors.mjs","names":["Schema"],"sources":["../src/errors.ts"],"sourcesContent":["import { Data } from 'effect'\nimport * as Schema from 'effect/Schema'\n\nexport class SearchIndexStoreError extends Data.TaggedError('SearchIndexStoreError')<{\n readonly message: string\n readonly cause?: unknown\n}> {}\n\nexport class KnowledgeExtractionError extends Data.TaggedError('KnowledgeExtractionError')<{\n readonly message: string\n readonly cause?: unknown\n}> {}\n\nexport class KnowledgeChunkingError extends Data.TaggedError('KnowledgeChunkingError')<{\n readonly message: string\n readonly cause?: unknown\n}> {}\n\nexport class KnowledgeEmbeddingError extends Data.TaggedError('KnowledgeEmbeddingError')<{\n readonly message: string\n readonly cause?: unknown\n}> {}\n\nexport class KnowledgeSummarizationError extends Data.TaggedError('KnowledgeSummarizationError')<{\n readonly message: string\n readonly cause?: unknown\n}> {}\n\nexport class KnowledgeIngestionError extends Data.TaggedError('KnowledgeIngestionError')<{\n readonly message: string\n readonly stage: 'store' | 'extract' | 'chunk' | 'embed' | 'summarize'\n readonly cause?: unknown\n}> {}\n\nexport class KnowledgeSearchError extends Data.TaggedError('KnowledgeSearchError')<{\n readonly message: string\n readonly stage: 'store' | 'embed'\n readonly cause?: unknown\n}> {}\n\nexport class KnowledgeStoreError extends Schema.TaggedErrorClass<KnowledgeStoreError>()(\n 'KnowledgeStoreError',\n {\n message: Schema.String,\n cause: Schema.optional(Schema.Unknown)\n }\n) {}\n\nexport class KnowledgeArtifactError extends Schema.TaggedErrorClass<KnowledgeArtifactError>()(\n 'KnowledgeArtifactError',\n {\n message: Schema.String,\n cause: Schema.optional(Schema.Unknown)\n }\n) {}\n\nexport class KnowledgeContextError extends Schema.TaggedErrorClass<KnowledgeContextError>()(\n 'KnowledgeContextError',\n {\n message: Schema.String,\n cause: Schema.optional(Schema.Unknown)\n }\n) {}\n"],"mappings":";;;AAGA,IAAa,wBAAb,cAA2C,KAAK,YAAY,uBAAuB,EAGhF,CAAC;AAEJ,IAAa,2BAAb,cAA8C,KAAK,YAAY,0BAA0B,EAGtF,CAAC;AAEJ,IAAa,yBAAb,cAA4C,KAAK,YAAY,wBAAwB,EAGlF,CAAC;AAEJ,IAAa,0BAAb,cAA6C,KAAK,YAAY,yBAAyB,EAGpF,CAAC;AAEJ,IAAa,8BAAb,cAAiD,KAAK,YAAY,6BAA6B,EAG5F,CAAC;AAEJ,IAAa,0BAAb,cAA6C,KAAK,YAAY,yBAAyB,EAIpF,CAAC;AAEJ,IAAa,uBAAb,cAA0C,KAAK,YAAY,sBAAsB,EAI9E,CAAC;AAEJ,IAAa,sBAAb,cAAyCA,SAAO,iBAAsC,EACpF,uBACA;CACE,SAASA,SAAO;CAChB,OAAOA,SAAO,SAASA,SAAO,OAAO;AACvC,CACF,EAAE,CAAC;AAEH,IAAa,yBAAb,cAA4CA,SAAO,iBAAyC,EAC1F,0BACA;CACE,SAASA,SAAO;CAChB,OAAOA,SAAO,SAASA,SAAO,OAAO;AACvC,CACF,EAAE,CAAC;AAEH,IAAa,wBAAb,cAA2CA,SAAO,iBAAwC,EACxF,yBACA;CACE,SAASA,SAAO;CAChB,OAAOA,SAAO,SAASA,SAAO,OAAO;AACvC,CACF,EAAE,CAAC"}
@@ -0,0 +1,19 @@
1
+ import { ExtractedKnowledgeDocument, KnowledgeMetadata, KnowledgeSource } from "./documents.mjs";
2
+ import { KnowledgeExtractionError } from "./errors.mjs";
3
+ import { Context, Effect } from "effect";
4
+
5
+ //#region src/extraction.d.ts
6
+ type LoadedKnowledgeSource = {
7
+ readonly source: KnowledgeSource;
8
+ readonly content: string | Uint8Array;
9
+ readonly mediaType?: string;
10
+ readonly metadata?: KnowledgeMetadata;
11
+ };
12
+ type KnowledgeExtractorApi = {
13
+ readonly extract: (source: LoadedKnowledgeSource) => Effect.Effect<ExtractedKnowledgeDocument, KnowledgeExtractionError>;
14
+ };
15
+ declare const KnowledgeExtractor_base: Context.ServiceClass<KnowledgeExtractor, "@yolk-sdk/knowledge/KnowledgeExtractor", KnowledgeExtractorApi>;
16
+ declare class KnowledgeExtractor extends KnowledgeExtractor_base {}
17
+ //#endregion
18
+ export { KnowledgeExtractor, KnowledgeExtractorApi, LoadedKnowledgeSource };
19
+ //# sourceMappingURL=extraction.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"extraction.d.mts","names":[],"sources":["../src/extraction.ts"],"mappings":";;;;;KAKY,qBAAA;EAAA,SACD,MAAA,EAAQ,eAAA;EAAA,SACR,OAAA,WAAkB,UAAA;EAAA,SAClB,SAAA;EAAA,SACA,QAAA,GAAW,iBAAA;AAAA;AAAA,KAGV,qBAAA;EAAA,SACD,OAAA,GACP,MAAA,EAAQ,qBAAA,KACL,MAAA,CAAO,MAAA,CAAO,0BAAA,EAA4B,wBAAA;AAAA;AAAA,cAChD,uBAAA;cAEY,kBAAA,SAA2B,uBAEvC"}
@@ -0,0 +1,7 @@
1
+ import { Context } from "effect";
2
+ //#region src/extraction.ts
3
+ var KnowledgeExtractor = class extends Context.Service()("@yolk-sdk/knowledge/KnowledgeExtractor") {};
4
+ //#endregion
5
+ export { KnowledgeExtractor };
6
+
7
+ //# sourceMappingURL=extraction.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"extraction.mjs","names":[],"sources":["../src/extraction.ts"],"sourcesContent":["import { Context } from 'effect'\nimport type { Effect } from 'effect'\nimport type { ExtractedKnowledgeDocument, KnowledgeMetadata, KnowledgeSource } from './documents.ts'\nimport type { KnowledgeExtractionError } from './errors.ts'\n\nexport type LoadedKnowledgeSource = {\n readonly source: KnowledgeSource\n readonly content: string | Uint8Array\n readonly mediaType?: string\n readonly metadata?: KnowledgeMetadata\n}\n\nexport type KnowledgeExtractorApi = {\n readonly extract: (\n source: LoadedKnowledgeSource\n ) => Effect.Effect<ExtractedKnowledgeDocument, KnowledgeExtractionError>\n}\n\nexport class KnowledgeExtractor extends Context.Service<KnowledgeExtractor, KnowledgeExtractorApi>()(\n '@yolk-sdk/knowledge/KnowledgeExtractor'\n) {}\n"],"mappings":";;AAkBA,IAAa,qBAAb,cAAwC,QAAQ,QAAmD,EACjG,wCACF,EAAE,CAAC"}
@@ -0,0 +1,5 @@
1
+ import { KnowledgeArtifactError, KnowledgeContextError, KnowledgeStoreError } from "./errors.mjs";
2
+ import { KnowledgeArtifactStore } from "./artifacts.mjs";
3
+ import { buildKnowledgeContext } from "./context.mjs";
4
+ import { KnowledgeStore } from "./store.mjs";
5
+ export { KnowledgeArtifactError, KnowledgeArtifactStore, KnowledgeContextError, KnowledgeStore, KnowledgeStoreError, buildKnowledgeContext };
package/dist/index.mjs ADDED
@@ -0,0 +1,5 @@
1
+ import { KnowledgeArtifactError, KnowledgeContextError, KnowledgeStoreError } from "./errors.mjs";
2
+ import { KnowledgeArtifactStore } from "./artifacts.mjs";
3
+ import { buildKnowledgeContext } from "./context.mjs";
4
+ import { KnowledgeStore } from "./store.mjs";
5
+ export { KnowledgeArtifactError, KnowledgeArtifactStore, KnowledgeContextError, KnowledgeStore, KnowledgeStoreError, buildKnowledgeContext };
@@ -0,0 +1,48 @@
1
+ import { KnowledgeIngestionError } from "./errors.mjs";
2
+ import { KnowledgeEmbedder } from "./embeddings.mjs";
3
+ import { SearchIndexStore } from "./search-store.mjs";
4
+ import { KnowledgeChunker } from "./chunking.mjs";
5
+ import { KnowledgeExtractor, LoadedKnowledgeSource } from "./extraction.mjs";
6
+ import { KnowledgeSummarizer } from "./summarization.mjs";
7
+ import { Effect } from "effect";
8
+
9
+ //#region src/ingestion.d.ts
10
+ type IngestKnowledgeDocumentInput = {
11
+ readonly collectionId: string;
12
+ readonly documentId: string;
13
+ readonly source: LoadedKnowledgeSource;
14
+ readonly contentHash?: string;
15
+ };
16
+ declare const ingestKnowledgeDocument: (input: IngestKnowledgeDocumentInput) => Effect.Effect<{
17
+ readonly id: string;
18
+ readonly collectionId: string;
19
+ readonly source: {
20
+ readonly _tag: "File";
21
+ readonly ref: string;
22
+ readonly name?: string | undefined;
23
+ readonly mediaType?: string | undefined;
24
+ } | {
25
+ readonly _tag: "Url";
26
+ readonly url: string;
27
+ } | {
28
+ readonly _tag: "Text";
29
+ readonly label?: string | undefined;
30
+ };
31
+ readonly status: "pending" | "processing" | "ready" | "error";
32
+ readonly metadata?: {
33
+ readonly [x: string]: unknown;
34
+ } | undefined;
35
+ readonly tokenCount?: number | undefined;
36
+ readonly title?: string | undefined;
37
+ readonly summary?: string | undefined;
38
+ readonly errorMessage?: string | undefined;
39
+ readonly contentHash?: string | undefined;
40
+ readonly chunkCount?: number | undefined;
41
+ }, KnowledgeIngestionError, SearchIndexStore | KnowledgeEmbedder | KnowledgeSummarizer | KnowledgeChunker | KnowledgeExtractor>;
42
+ type KnowledgeIngestionPipeline = {
43
+ readonly ingest: (input: IngestKnowledgeDocumentInput) => ReturnType<typeof ingestKnowledgeDocument>;
44
+ };
45
+ declare const makeIngestionPipeline: () => KnowledgeIngestionPipeline;
46
+ //#endregion
47
+ export { IngestKnowledgeDocumentInput, KnowledgeIngestionPipeline, ingestKnowledgeDocument, makeIngestionPipeline };
48
+ //# sourceMappingURL=ingestion.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ingestion.d.mts","names":[],"sources":["../src/ingestion.ts"],"mappings":";;;;;;;;;KASY,4BAAA;EAAA,SACD,YAAA;EAAA,SACA,UAAA;EAAA,SACA,MAAA,EAAQ,qBAAqB;EAAA,SAC7B,WAAA;AAAA;AAAA,cAaE,uBAAA,GAA2B,KAAA,EAAO,4BAAA,KAA4B,MAAA,CAAA,MAAA;EAAA;;;;;;;;;;;;;;;;;;;;;;;;;KA4H/D,0BAAA;EAAA,SACD,MAAA,GAAS,KAAA,EAAO,4BAAA,KAAiC,UAAA,QAAkB,uBAAA;AAAA;AAAA,cAGjE,qBAAA,QAA4B,0BAEvC"}
@@ -0,0 +1,111 @@
1
+ import { KnowledgeIngestionError } from "./errors.mjs";
2
+ import { KnowledgeEmbedder } from "./embeddings.mjs";
3
+ import { SearchIndexStore } from "./search-store.mjs";
4
+ import { KnowledgeChunker } from "./chunking.mjs";
5
+ import { KnowledgeExtractor } from "./extraction.mjs";
6
+ import { KnowledgeSummarizer } from "./summarization.mjs";
7
+ import { Array, Effect } from "effect";
8
+ //#region src/ingestion.ts
9
+ const markErrorBestEffort = (input, error) => Effect.gen(function* () {
10
+ yield* (yield* SearchIndexStore).markDocumentError({
11
+ collectionId: input.collectionId,
12
+ documentId: input.documentId,
13
+ message: error.message
14
+ });
15
+ }).pipe(Effect.catch(() => Effect.void));
16
+ const ingestKnowledgeDocument = (input) => Effect.gen(function* () {
17
+ yield* Effect.annotateCurrentSpan({
18
+ "knowledge_search.set_id": input.collectionId,
19
+ "knowledge_search.document_id": input.documentId,
20
+ "knowledge_search.source_type": input.source.source._tag
21
+ });
22
+ const store = yield* SearchIndexStore;
23
+ const extractor = yield* KnowledgeExtractor;
24
+ const chunker = yield* KnowledgeChunker;
25
+ const embedder = yield* KnowledgeEmbedder;
26
+ const summarizer = yield* KnowledgeSummarizer;
27
+ const collection = yield* store.getSet(input.collectionId).pipe(Effect.mapError((error) => new KnowledgeIngestionError({
28
+ message: error.message,
29
+ stage: "store",
30
+ cause: error
31
+ })));
32
+ yield* store.upsertDocument({ document: {
33
+ id: input.documentId,
34
+ collectionId: input.collectionId,
35
+ source: input.source.source,
36
+ status: "processing",
37
+ metadata: input.source.metadata
38
+ } }).pipe(Effect.mapError((error) => new KnowledgeIngestionError({
39
+ message: error.message,
40
+ stage: "store",
41
+ cause: error
42
+ })));
43
+ const extracted = yield* extractor.extract(input.source).pipe(Effect.mapError((error) => new KnowledgeIngestionError({
44
+ message: error.message,
45
+ stage: "extract",
46
+ cause: error
47
+ })));
48
+ const chunks = yield* chunker.chunk({
49
+ collectionId: input.collectionId,
50
+ documentId: input.documentId,
51
+ content: extracted.content,
52
+ maxTokens: collection.chunkingConfig.maxTokens,
53
+ metadata: extracted.metadata
54
+ }).pipe(Effect.mapError((error) => new KnowledgeIngestionError({
55
+ message: error.message,
56
+ stage: "chunk",
57
+ cause: error
58
+ })));
59
+ const indexed = yield* Effect.all({
60
+ embeddings: embedder.embedTexts(chunks.map((chunk) => chunk.content)).pipe(Effect.mapError((error) => new KnowledgeIngestionError({
61
+ message: error.message,
62
+ stage: "embed",
63
+ cause: error
64
+ }))),
65
+ summary: summarizer.summarize({
66
+ content: extracted.content,
67
+ sourceTitle: extracted.title,
68
+ metadata: extracted.metadata
69
+ }).pipe(Effect.mapError((error) => new KnowledgeIngestionError({
70
+ message: error.message,
71
+ stage: "summarize",
72
+ cause: error
73
+ })))
74
+ }, { concurrency: "unbounded" });
75
+ if (indexed.embeddings.length !== chunks.length) return yield* Effect.fail(new KnowledgeIngestionError({
76
+ message: "Embedding count did not match chunk count",
77
+ stage: "embed"
78
+ }));
79
+ const indexedChunks = Array.zip(chunks, indexed.embeddings).map(([chunk, embedding]) => ({
80
+ chunk,
81
+ embedding
82
+ }));
83
+ yield* store.replaceDocumentChunks({
84
+ collectionId: input.collectionId,
85
+ documentId: input.documentId,
86
+ chunks: indexedChunks
87
+ }).pipe(Effect.mapError((error) => new KnowledgeIngestionError({
88
+ message: error.message,
89
+ stage: "store",
90
+ cause: error
91
+ })));
92
+ const tokenCount = chunks.reduce((total, chunk) => total + chunk.tokenCount, 0);
93
+ return yield* store.markDocumentReady({
94
+ collectionId: input.collectionId,
95
+ documentId: input.documentId,
96
+ title: indexed.summary.title ?? extracted.title,
97
+ summary: indexed.summary.summary ?? extracted.summary,
98
+ contentHash: input.contentHash,
99
+ tokenCount,
100
+ chunkCount: chunks.length
101
+ }).pipe(Effect.mapError((error) => new KnowledgeIngestionError({
102
+ message: error.message,
103
+ stage: "store",
104
+ cause: error
105
+ })));
106
+ }).pipe(Effect.withSpan("knowledge_search.ingestDocument"), Effect.catch((error) => markErrorBestEffort(input, error).pipe(Effect.flatMap(() => Effect.fail(error)))));
107
+ const makeIngestionPipeline = () => ({ ingest: ingestKnowledgeDocument });
108
+ //#endregion
109
+ export { ingestKnowledgeDocument, makeIngestionPipeline };
110
+
111
+ //# sourceMappingURL=ingestion.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ingestion.mjs","names":["Arr"],"sources":["../src/ingestion.ts"],"sourcesContent":["import { Array as Arr, Effect } from 'effect'\nimport { KnowledgeChunker } from './chunking.ts'\nimport { KnowledgeEmbedder } from './embeddings.ts'\nimport { KnowledgeExtractor } from './extraction.ts'\nimport { KnowledgeSummarizer } from './summarization.ts'\nimport type { LoadedKnowledgeSource } from './extraction.ts'\nimport { KnowledgeIngestionError } from './errors.ts'\nimport { SearchIndexStore } from './search-store.ts'\n\nexport type IngestKnowledgeDocumentInput = {\n readonly collectionId: string\n readonly documentId: string\n readonly source: LoadedKnowledgeSource\n readonly contentHash?: string\n}\n\nconst markErrorBestEffort = (input: IngestKnowledgeDocumentInput, error: KnowledgeIngestionError) =>\n Effect.gen(function* () {\n const store = yield* SearchIndexStore\n yield* store.markDocumentError({\n collectionId: input.collectionId,\n documentId: input.documentId,\n message: error.message\n })\n }).pipe(Effect.catch(() => Effect.void))\n\nexport const ingestKnowledgeDocument = (input: IngestKnowledgeDocumentInput) =>\n Effect.gen(function* () {\n yield* Effect.annotateCurrentSpan({\n 'knowledge_search.set_id': input.collectionId,\n 'knowledge_search.document_id': input.documentId,\n 'knowledge_search.source_type': input.source.source._tag\n })\n const store = yield* SearchIndexStore\n const extractor = yield* KnowledgeExtractor\n const chunker = yield* KnowledgeChunker\n const embedder = yield* KnowledgeEmbedder\n const summarizer = yield* KnowledgeSummarizer\n const collection = yield* store\n .getSet(input.collectionId)\n .pipe(\n Effect.mapError(\n error => new KnowledgeIngestionError({ message: error.message, stage: 'store', cause: error })\n )\n )\n\n yield* store\n .upsertDocument({\n document: {\n id: input.documentId,\n collectionId: input.collectionId,\n source: input.source.source,\n status: 'processing',\n metadata: input.source.metadata\n }\n })\n .pipe(\n Effect.mapError(\n error => new KnowledgeIngestionError({ message: error.message, stage: 'store', cause: error })\n )\n )\n\n const extracted = yield* extractor\n .extract(input.source)\n .pipe(\n Effect.mapError(\n error => new KnowledgeIngestionError({ message: error.message, stage: 'extract', cause: error })\n )\n )\n\n const chunks = yield* chunker\n .chunk({\n collectionId: input.collectionId,\n documentId: input.documentId,\n content: extracted.content,\n maxTokens: collection.chunkingConfig.maxTokens,\n metadata: extracted.metadata\n })\n .pipe(\n Effect.mapError(\n error => new KnowledgeIngestionError({ message: error.message, stage: 'chunk', cause: error })\n )\n )\n\n const indexed = yield* Effect.all(\n {\n embeddings: embedder.embedTexts(chunks.map(chunk => chunk.content)).pipe(\n Effect.mapError(\n error => new KnowledgeIngestionError({ message: error.message, stage: 'embed', cause: error })\n )\n ),\n summary: summarizer\n .summarize({\n content: extracted.content,\n sourceTitle: extracted.title,\n metadata: extracted.metadata\n })\n .pipe(\n Effect.mapError(\n error =>\n new KnowledgeIngestionError({ message: error.message, stage: 'summarize', cause: error })\n )\n )\n },\n { concurrency: 'unbounded' }\n )\n\n if (indexed.embeddings.length !== chunks.length) {\n return yield* Effect.fail(\n new KnowledgeIngestionError({ message: 'Embedding count did not match chunk count', stage: 'embed' })\n )\n }\n\n const indexedChunks = Arr.zip(chunks, indexed.embeddings).map(([chunk, embedding]) => ({\n chunk,\n embedding\n }))\n yield* store\n .replaceDocumentChunks({\n collectionId: input.collectionId,\n documentId: input.documentId,\n chunks: indexedChunks\n })\n .pipe(\n Effect.mapError(\n error => new KnowledgeIngestionError({ message: error.message, stage: 'store', cause: error })\n )\n )\n\n const tokenCount = chunks.reduce((total, chunk) => total + chunk.tokenCount, 0)\n return yield* store\n .markDocumentReady({\n collectionId: input.collectionId,\n documentId: input.documentId,\n title: indexed.summary.title ?? extracted.title,\n summary: indexed.summary.summary ?? extracted.summary,\n contentHash: input.contentHash,\n tokenCount,\n chunkCount: chunks.length\n })\n .pipe(\n Effect.mapError(\n error => new KnowledgeIngestionError({ message: error.message, stage: 'store', cause: error })\n )\n )\n }).pipe(\n Effect.withSpan('knowledge_search.ingestDocument'),\n Effect.catch(error => markErrorBestEffort(input, error).pipe(Effect.flatMap(() => Effect.fail(error))))\n )\n\nexport type KnowledgeIngestionPipeline = {\n readonly ingest: (input: IngestKnowledgeDocumentInput) => ReturnType<typeof ingestKnowledgeDocument>\n}\n\nexport const makeIngestionPipeline = (): KnowledgeIngestionPipeline => ({\n ingest: ingestKnowledgeDocument\n})\n"],"mappings":";;;;;;;;AAgBA,MAAM,uBAAuB,OAAqC,UAChE,OAAO,IAAI,aAAa;CAEtB,QAAO,OADc,kBACR,kBAAkB;EAC7B,cAAc,MAAM;EACpB,YAAY,MAAM;EAClB,SAAS,MAAM;CACjB,CAAC;AACH,CAAC,EAAE,KAAK,OAAO,YAAY,OAAO,IAAI,CAAC;AAEzC,MAAa,2BAA2B,UACtC,OAAO,IAAI,aAAa;CACtB,OAAO,OAAO,oBAAoB;EAChC,2BAA2B,MAAM;EACjC,gCAAgC,MAAM;EACtC,gCAAgC,MAAM,OAAO,OAAO;CACtD,CAAC;CACD,MAAM,QAAQ,OAAO;CACrB,MAAM,YAAY,OAAO;CACzB,MAAM,UAAU,OAAO;CACvB,MAAM,WAAW,OAAO;CACxB,MAAM,aAAa,OAAO;CAC1B,MAAM,aAAa,OAAO,MACvB,OAAO,MAAM,YAAY,EACzB,KACC,OAAO,UACL,UAAS,IAAI,wBAAwB;EAAE,SAAS,MAAM;EAAS,OAAO;EAAS,OAAO;CAAM,CAAC,CAC/F,CACF;CAEF,OAAO,MACJ,eAAe,EACd,UAAU;EACR,IAAI,MAAM;EACV,cAAc,MAAM;EACpB,QAAQ,MAAM,OAAO;EACrB,QAAQ;EACR,UAAU,MAAM,OAAO;CACzB,EACF,CAAC,EACA,KACC,OAAO,UACL,UAAS,IAAI,wBAAwB;EAAE,SAAS,MAAM;EAAS,OAAO;EAAS,OAAO;CAAM,CAAC,CAC/F,CACF;CAEF,MAAM,YAAY,OAAO,UACtB,QAAQ,MAAM,MAAM,EACpB,KACC,OAAO,UACL,UAAS,IAAI,wBAAwB;EAAE,SAAS,MAAM;EAAS,OAAO;EAAW,OAAO;CAAM,CAAC,CACjG,CACF;CAEF,MAAM,SAAS,OAAO,QACnB,MAAM;EACL,cAAc,MAAM;EACpB,YAAY,MAAM;EAClB,SAAS,UAAU;EACnB,WAAW,WAAW,eAAe;EACrC,UAAU,UAAU;CACtB,CAAC,EACA,KACC,OAAO,UACL,UAAS,IAAI,wBAAwB;EAAE,SAAS,MAAM;EAAS,OAAO;EAAS,OAAO;CAAM,CAAC,CAC/F,CACF;CAEF,MAAM,UAAU,OAAO,OAAO,IAC5B;EACE,YAAY,SAAS,WAAW,OAAO,KAAI,UAAS,MAAM,OAAO,CAAC,EAAE,KAClE,OAAO,UACL,UAAS,IAAI,wBAAwB;GAAE,SAAS,MAAM;GAAS,OAAO;GAAS,OAAO;EAAM,CAAC,CAC/F,CACF;EACA,SAAS,WACN,UAAU;GACT,SAAS,UAAU;GACnB,aAAa,UAAU;GACvB,UAAU,UAAU;EACtB,CAAC,EACA,KACC,OAAO,UACL,UACE,IAAI,wBAAwB;GAAE,SAAS,MAAM;GAAS,OAAO;GAAa,OAAO;EAAM,CAAC,CAC5F,CACF;CACJ,GACA,EAAE,aAAa,YAAY,CAC7B;CAEA,IAAI,QAAQ,WAAW,WAAW,OAAO,QACvC,OAAO,OAAO,OAAO,KACnB,IAAI,wBAAwB;EAAE,SAAS;EAA6C,OAAO;CAAQ,CAAC,CACtG;CAGF,MAAM,gBAAgBA,MAAI,IAAI,QAAQ,QAAQ,UAAU,EAAE,KAAK,CAAC,OAAO,gBAAgB;EACrF;EACA;CACF,EAAE;CACF,OAAO,MACJ,sBAAsB;EACrB,cAAc,MAAM;EACpB,YAAY,MAAM;EAClB,QAAQ;CACV,CAAC,EACA,KACC,OAAO,UACL,UAAS,IAAI,wBAAwB;EAAE,SAAS,MAAM;EAAS,OAAO;EAAS,OAAO;CAAM,CAAC,CAC/F,CACF;CAEF,MAAM,aAAa,OAAO,QAAQ,OAAO,UAAU,QAAQ,MAAM,YAAY,CAAC;CAC9E,OAAO,OAAO,MACX,kBAAkB;EACjB,cAAc,MAAM;EACpB,YAAY,MAAM;EAClB,OAAO,QAAQ,QAAQ,SAAS,UAAU;EAC1C,SAAS,QAAQ,QAAQ,WAAW,UAAU;EAC9C,aAAa,MAAM;EACnB;EACA,YAAY,OAAO;CACrB,CAAC,EACA,KACC,OAAO,UACL,UAAS,IAAI,wBAAwB;EAAE,SAAS,MAAM;EAAS,OAAO;EAAS,OAAO;CAAM,CAAC,CAC/F,CACF;AACJ,CAAC,EAAE,KACD,OAAO,SAAS,iCAAiC,GACjD,OAAO,OAAM,UAAS,oBAAoB,OAAO,KAAK,EAAE,KAAK,OAAO,cAAc,OAAO,KAAK,KAAK,CAAC,CAAC,CAAC,CACxG;AAMF,MAAa,+BAA2D,EACtE,QAAQ,wBACV"}
@@ -0,0 +1,17 @@
1
+ import * as Schema from "effect/Schema";
2
+
3
+ //#region src/links.d.ts
4
+ declare const KnowledgeLinkTypeSchema: Schema.Literals<readonly ["cites", "supports", "contradicts", "supersedes", "mentions", "derived_from", "related_to"]>;
5
+ type KnowledgeLinkType = Schema.Schema.Type<typeof KnowledgeLinkTypeSchema>;
6
+ declare const KnowledgeLinkSchema: Schema.Struct<{
7
+ readonly id: Schema.Trimmed;
8
+ readonly fromRecordId: Schema.Trimmed;
9
+ readonly toRecordId: Schema.Trimmed;
10
+ readonly type: Schema.Literals<readonly ["cites", "supports", "contradicts", "supersedes", "mentions", "derived_from", "related_to"]>;
11
+ readonly metadata: Schema.optional<Schema.$Record<Schema.String, Schema.Unknown>>;
12
+ readonly createdAt: Schema.DateTimeUtc;
13
+ }>;
14
+ type KnowledgeLink = Schema.Schema.Type<typeof KnowledgeLinkSchema>;
15
+ //#endregion
16
+ export { KnowledgeLink, KnowledgeLinkSchema, KnowledgeLinkType, KnowledgeLinkTypeSchema };
17
+ //# sourceMappingURL=links.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"links.d.mts","names":[],"sources":["../src/links.ts"],"mappings":";;;cAGa,uBAAA,EAAuB,MAAA,CAAA,QAAA;AAAA,KASxB,iBAAA,GAAoB,MAAA,CAAO,MAAA,CAAO,IAAI,QAAQ,uBAAA;AAAA,cAE7C,mBAAA,EAAmB,MAAA,CAAA,MAAA;EAAA;;;;;;;KAQpB,aAAA,GAAgB,MAAA,CAAO,MAAA,CAAO,IAAI,QAAQ,mBAAA"}
package/dist/links.mjs ADDED
@@ -0,0 +1,24 @@
1
+ import { KnowledgeMetadataSchema, NonEmptyTrimmedString } from "./records.mjs";
2
+ import * as Schema from "effect/Schema";
3
+ //#region src/links.ts
4
+ const KnowledgeLinkTypeSchema = Schema.Literals([
5
+ "cites",
6
+ "supports",
7
+ "contradicts",
8
+ "supersedes",
9
+ "mentions",
10
+ "derived_from",
11
+ "related_to"
12
+ ]);
13
+ const KnowledgeLinkSchema = Schema.Struct({
14
+ id: NonEmptyTrimmedString,
15
+ fromRecordId: NonEmptyTrimmedString,
16
+ toRecordId: NonEmptyTrimmedString,
17
+ type: KnowledgeLinkTypeSchema,
18
+ metadata: Schema.optional(KnowledgeMetadataSchema),
19
+ createdAt: Schema.DateTimeUtc
20
+ });
21
+ //#endregion
22
+ export { KnowledgeLinkSchema, KnowledgeLinkTypeSchema };
23
+
24
+ //# sourceMappingURL=links.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"links.mjs","names":[],"sources":["../src/links.ts"],"sourcesContent":["import * as Schema from 'effect/Schema'\nimport { KnowledgeMetadataSchema, NonEmptyTrimmedString } from './records.ts'\n\nexport const KnowledgeLinkTypeSchema = Schema.Literals([\n 'cites',\n 'supports',\n 'contradicts',\n 'supersedes',\n 'mentions',\n 'derived_from',\n 'related_to'\n])\nexport type KnowledgeLinkType = Schema.Schema.Type<typeof KnowledgeLinkTypeSchema>\n\nexport const KnowledgeLinkSchema = Schema.Struct({\n id: NonEmptyTrimmedString,\n fromRecordId: NonEmptyTrimmedString,\n toRecordId: NonEmptyTrimmedString,\n type: KnowledgeLinkTypeSchema,\n metadata: Schema.optional(KnowledgeMetadataSchema),\n createdAt: Schema.DateTimeUtc\n})\nexport type KnowledgeLink = Schema.Schema.Type<typeof KnowledgeLinkSchema>\n"],"mappings":";;;AAGA,MAAa,0BAA0B,OAAO,SAAS;CACrD;CACA;CACA;CACA;CACA;CACA;CACA;AACF,CAAC;AAGD,MAAa,sBAAsB,OAAO,OAAO;CAC/C,IAAI;CACJ,cAAc;CACd,YAAY;CACZ,MAAM;CACN,UAAU,OAAO,SAAS,uBAAuB;CACjD,WAAW,OAAO;AACpB,CAAC"}
@@ -0,0 +1,20 @@
1
+ import * as Schema from "effect/Schema";
2
+
3
+ //#region src/provenance.d.ts
4
+ declare const KnowledgeProvenanceSourceKindSchema: Schema.Literals<readonly ["upload", "user_statement", "url", "generated", "imported", "external_api"]>;
5
+ type KnowledgeProvenanceSourceKind = Schema.Schema.Type<typeof KnowledgeProvenanceSourceKindSchema>;
6
+ declare const KnowledgeProvenanceSchema: Schema.Struct<{
7
+ readonly id: Schema.Trimmed;
8
+ readonly recordId: Schema.Trimmed;
9
+ readonly artifactId: Schema.optional<Schema.Trimmed>;
10
+ readonly sourceKind: Schema.Literals<readonly ["upload", "user_statement", "url", "generated", "imported", "external_api"]>;
11
+ readonly sourceLabel: Schema.Trimmed;
12
+ readonly sourceUrl: Schema.optional<Schema.Trimmed>;
13
+ readonly observedAt: Schema.optional<Schema.DateTimeUtc>;
14
+ readonly metadata: Schema.optional<Schema.$Record<Schema.String, Schema.Unknown>>;
15
+ readonly createdAt: Schema.DateTimeUtc;
16
+ }>;
17
+ type KnowledgeProvenance = Schema.Schema.Type<typeof KnowledgeProvenanceSchema>;
18
+ //#endregion
19
+ export { KnowledgeProvenance, KnowledgeProvenanceSchema, KnowledgeProvenanceSourceKind, KnowledgeProvenanceSourceKindSchema };
20
+ //# sourceMappingURL=provenance.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"provenance.d.mts","names":[],"sources":["../src/provenance.ts"],"mappings":";;;cAGa,mCAAA,EAAmC,MAAA,CAAA,QAAA;AAAA,KAQpC,6BAAA,GAAgC,MAAA,CAAO,MAAA,CAAO,IAAI,QAAQ,mCAAA;AAAA,cAEzD,yBAAA,EAAyB,MAAA,CAAA,MAAA;EAAA;;;;;;;;;;KAW1B,mBAAA,GAAsB,MAAA,CAAO,MAAA,CAAO,IAAI,QAAQ,yBAAA"}
@@ -0,0 +1,26 @@
1
+ import { KnowledgeMetadataSchema, NonEmptyTrimmedString } from "./records.mjs";
2
+ import * as Schema from "effect/Schema";
3
+ //#region src/provenance.ts
4
+ const KnowledgeProvenanceSourceKindSchema = Schema.Literals([
5
+ "upload",
6
+ "user_statement",
7
+ "url",
8
+ "generated",
9
+ "imported",
10
+ "external_api"
11
+ ]);
12
+ const KnowledgeProvenanceSchema = Schema.Struct({
13
+ id: NonEmptyTrimmedString,
14
+ recordId: NonEmptyTrimmedString,
15
+ artifactId: Schema.optional(NonEmptyTrimmedString),
16
+ sourceKind: KnowledgeProvenanceSourceKindSchema,
17
+ sourceLabel: NonEmptyTrimmedString,
18
+ sourceUrl: Schema.optional(NonEmptyTrimmedString),
19
+ observedAt: Schema.optional(Schema.DateTimeUtc),
20
+ metadata: Schema.optional(KnowledgeMetadataSchema),
21
+ createdAt: Schema.DateTimeUtc
22
+ });
23
+ //#endregion
24
+ export { KnowledgeProvenanceSchema, KnowledgeProvenanceSourceKindSchema };
25
+
26
+ //# sourceMappingURL=provenance.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"provenance.mjs","names":[],"sources":["../src/provenance.ts"],"sourcesContent":["import * as Schema from 'effect/Schema'\nimport { KnowledgeMetadataSchema, NonEmptyTrimmedString } from './records.ts'\n\nexport const KnowledgeProvenanceSourceKindSchema = Schema.Literals([\n 'upload',\n 'user_statement',\n 'url',\n 'generated',\n 'imported',\n 'external_api'\n])\nexport type KnowledgeProvenanceSourceKind = Schema.Schema.Type<typeof KnowledgeProvenanceSourceKindSchema>\n\nexport const KnowledgeProvenanceSchema = Schema.Struct({\n id: NonEmptyTrimmedString,\n recordId: NonEmptyTrimmedString,\n artifactId: Schema.optional(NonEmptyTrimmedString),\n sourceKind: KnowledgeProvenanceSourceKindSchema,\n sourceLabel: NonEmptyTrimmedString,\n sourceUrl: Schema.optional(NonEmptyTrimmedString),\n observedAt: Schema.optional(Schema.DateTimeUtc),\n metadata: Schema.optional(KnowledgeMetadataSchema),\n createdAt: Schema.DateTimeUtc\n})\nexport type KnowledgeProvenance = Schema.Schema.Type<typeof KnowledgeProvenanceSchema>\n"],"mappings":";;;AAGA,MAAa,sCAAsC,OAAO,SAAS;CACjE;CACA;CACA;CACA;CACA;CACA;AACF,CAAC;AAGD,MAAa,4BAA4B,OAAO,OAAO;CACrD,IAAI;CACJ,UAAU;CACV,YAAY,OAAO,SAAS,qBAAqB;CACjD,YAAY;CACZ,aAAa;CACb,WAAW,OAAO,SAAS,qBAAqB;CAChD,YAAY,OAAO,SAAS,OAAO,WAAW;CAC9C,UAAU,OAAO,SAAS,uBAAuB;CACjD,WAAW,OAAO;AACpB,CAAC"}