memo-grafter 0.2.5 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -0
- package/USER_GUIDE.md +42 -5
- package/dist/MemoGrafter.d.ts +4 -1
- package/dist/MemoGrafter.d.ts.map +1 -1
- package/dist/MemoGrafter.js +24 -0
- package/dist/MemoGrafter.js.map +1 -1
- package/dist/MemoGrafterAgent.d.ts +3 -1
- package/dist/MemoGrafterAgent.d.ts.map +1 -1
- package/dist/MemoGrafterAgent.js +24 -1
- package/dist/MemoGrafterAgent.js.map +1 -1
- package/dist/crawler/VersioningPass.d.ts.map +1 -1
- package/dist/crawler/VersioningPass.js +7 -10
- package/dist/crawler/VersioningPass.js.map +1 -1
- package/dist/crawler/memoryMaintenance.d.ts +7 -0
- package/dist/crawler/memoryMaintenance.d.ts.map +1 -1
- package/dist/crawler/memoryMaintenance.js +30 -0
- package/dist/crawler/memoryMaintenance.js.map +1 -1
- package/dist/crawler/types.d.ts +1 -0
- package/dist/crawler/types.d.ts.map +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/pipeline/IngestPipeline.d.ts +3 -2
- package/dist/pipeline/IngestPipeline.d.ts.map +1 -1
- package/dist/pipeline/IngestPipeline.js +29 -5
- package/dist/pipeline/IngestPipeline.js.map +1 -1
- package/dist/pipeline/SegmentProcessor.d.ts +2 -4
- package/dist/pipeline/SegmentProcessor.d.ts.map +1 -1
- package/dist/pipeline/SegmentProcessor.js +10 -8
- package/dist/pipeline/SegmentProcessor.js.map +1 -1
- package/dist/prompts/segmentExtractionPrompt.d.ts +1 -1
- package/dist/prompts/segmentExtractionPrompt.d.ts.map +1 -1
- package/dist/prompts/segmentExtractionPrompt.js +7 -1
- package/dist/prompts/segmentExtractionPrompt.js.map +1 -1
- package/dist/queue/IngestQueue.d.ts +3 -2
- package/dist/queue/IngestQueue.d.ts.map +1 -1
- package/dist/queue/IngestQueue.js +19 -0
- package/dist/queue/IngestQueue.js.map +1 -1
- package/dist/store/postgres-pgvector/GraphStore.d.ts.map +1 -1
- package/dist/store/postgres-pgvector/GraphStore.js +19 -1
- package/dist/store/postgres-pgvector/GraphStore.js.map +1 -1
- package/dist/types.d.ts +15 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/utils/text/splitTextForIngestion.d.ts +2 -0
- package/dist/utils/text/splitTextForIngestion.d.ts.map +1 -0
- package/dist/utils/text/splitTextForIngestion.js +37 -0
- package/dist/utils/text/splitTextForIngestion.js.map +1 -0
- package/dist/utils/text/splitTextForIngestion.test.d.ts +2 -0
- package/dist/utils/text/splitTextForIngestion.test.d.ts.map +1 -0
- package/dist/utils/text/splitTextForIngestion.test.js +19 -0
- package/dist/utils/text/splitTextForIngestion.test.js.map +1 -0
- package/package.json +1 -1
package/dist/types.d.ts
CHANGED
|
@@ -12,6 +12,7 @@ export interface TopicNode {
|
|
|
12
12
|
summary: string;
|
|
13
13
|
embedding: number[];
|
|
14
14
|
tags?: string[];
|
|
15
|
+
source?: string;
|
|
15
16
|
messageRange: [number, number];
|
|
16
17
|
topicOrder: number;
|
|
17
18
|
driftScore: number;
|
|
@@ -51,6 +52,7 @@ export interface MemoryNode {
|
|
|
51
52
|
confidence: number;
|
|
52
53
|
embedding: number[];
|
|
53
54
|
tags?: string[];
|
|
55
|
+
source?: string;
|
|
54
56
|
sourceUrl: string | null;
|
|
55
57
|
sourceTitle: string | null;
|
|
56
58
|
supersededBy: string | null;
|
|
@@ -146,6 +148,19 @@ export interface TagFilterOptions {
|
|
|
146
148
|
export interface IngestOptions {
|
|
147
149
|
tags?: string[];
|
|
148
150
|
}
|
|
151
|
+
export interface IngestTextOptions {
|
|
152
|
+
replace?: boolean;
|
|
153
|
+
label?: string;
|
|
154
|
+
source?: string;
|
|
155
|
+
}
|
|
156
|
+
/** @internal */
|
|
157
|
+
export interface IngestPipelineOptions extends IngestOptions {
|
|
158
|
+
replace?: boolean;
|
|
159
|
+
label?: string;
|
|
160
|
+
source?: string;
|
|
161
|
+
sourceType?: MemorySourceType;
|
|
162
|
+
minSegmentMessages?: number;
|
|
163
|
+
}
|
|
149
164
|
export interface RetrievalResult {
|
|
150
165
|
facts: (MemoryNode & {
|
|
151
166
|
similarity: number;
|
package/dist/types.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,OAAO;IACtB,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,CAAC;IACtC,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,MAAM,SAAS,GAAG,QAAQ,GAAG,QAAQ,CAAC;AAC5C,MAAM,MAAM,gBAAgB,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AAEzD,MAAM,WAAW,SAAS;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,YAAY,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,SAAS;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,MAAM,UAAU,GAAG,MAAM,GAAG,SAAS,GAAG,UAAU,GAAG,MAAM,GAAG,WAAW,CAAC;AAChF,MAAM,MAAM,gBAAgB,GAAG,cAAc,GAAG,MAAM,GAAG,UAAU,GAAG,MAAM,CAAC;AAE7E,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,UAAU,CAAC;IACvB,UAAU,EAAE,gBAAgB,CAAC;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,OAAO,EAAE,OAAO,CAAC;IACjB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,gBAAiB,SAAQ,IAAI,CAAC,UAAU,EAAE,WAAW,CAAC;CAAG;AAE1E,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,UAAU,GAAG,WAAW,GAAG,SAAS,GAAG,SAAS,CAAC;IAC3D,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,eAAe;IAC9B,UAAU,EAAE,UAAU,CAAC;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,uBAAuB;IACtC,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,QAAQ,EAAE,eAAe,EAAE,CAAC;CAC7B;AAED,MAAM,WAAW,eAAe;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,SAAS,EAAE,CAAC;IACnB,QAAQ,CAAC,EAAE,UAAU,EAAE,CAAC;IACxB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,kBAAkB;IACjC,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,eAAe,EAAE,MAAM,CAAC;IACxB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,WAAW;IAC1B,eAAe,EAAE,MAAM,CAAC;IACxB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,SAAS,CAAC;IAChB,WAAW,CAAC,EAAE,WAAW,CAAC;CAC3B;AAED,MAAM,WAAW,aAAa;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,SAAS,EAAE,CAAC;IACnB,aAAa,CAAC,EAAE,iBAAiB,EAAE,CAAC;IACpC,KAAK,EAAE,SAAS,EAAE,CAAC;IACnB,QAAQ,EAAE,UAAU,EAAE,CAAC;IACvB,WAAW,CAAC,EAAE,UAAU,EAAE,CAAC;IAC3B,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,kBAAkB;IACjC,SAAS,EAAE,MAAM,CAAC;IAClB,wBAAwB,EAAE,MAAM,CAAC;IACjC,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,eAAe;IAC9B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,OAAO,CAAC,EAAE,KAAK,GAAG,KAAK,CAAC;IACxB,KAAK,CAAC,EAAE,SAAS,GAAG,kBAAkB,GAAG,QAAQ,CAAC;IAClD,OAAO,CAAC,EAAE;QACR,uFAAuF;QACvF,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,qFAAqF;QACrF,gBAAgB,CAAC,EAAE,MAAM,CAAC;KAC3B,CAAC;IACF,KAAK,CAAC,EAAE;QACN,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB,CAAC;CACH;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,OAAO,CAAC,EAAE,KAAK,GAAG,KAAK,CAAC;IACxB,KAAK,CAAC,EAAE,SAAS,GAAG,kBAAkB,GAAG,QAAQ,CAAC;CACnD;AAED,MAAM,WAAW,aAAa;IAC5B,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;CACjB;AAED,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,CAAC,UAAU,GAAG;QAAE,UAAU,EAAE,MAAM,CAAA;KAAE,CAAC,EAAE,CAAC;IAC/C,KAAK,EAAE,SAAS,EAAE,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,sBAAsB;IACrC,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,UAAU;IACzB,QAAQ,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,MAAM,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;CACjE;AAED,MAAM,WAAW,YAAY;IAC3B,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;CACxC;AAED,MAAM,WAAW,sBAAsB;IACrC,IAAI,CAAC,EAAE,SAAS,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;IACpC,mBAAmB,CAAC,EAAE;QACpB,OAAO,CAAC,EAAE,OAAO,CAAC;QAClB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,qBAAqB,CAAC,EAAE;YACtB,GAAG,CAAC,EAAE,MAAM,CAAC;YACb,GAAG,CAAC,EAAE,MAAM,CAAC;SACd,CAAC;QACF,cAAc,CAAC,EAAE,MAAM,CAAC;QACxB,aAAa,CAAC,EAAE,MAAM,CAAC;QACvB,gBAAgB,CAAC,EAAE,MAAM,CAAC;KAC3B,CAAC;IACF,gDAAgD;IAChD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,qBAAqB,CAAC,EAAE,OAAO,CAAC;IAChC,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED,MAAM,WAAW,sBAAsB;IACrC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,uBAAuB;IACtC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,+EAA+E;IAC/E,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,uEAAuE;IACvE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,uFAAuF;IACvF,mBAAmB,CAAC,EAAE,MAAM,CAAC;CAC9B;AAED,MAAM,WAAW,sBAAsB;IACrC,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,gBAAgB,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IACpC,YAAY,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;CACjC;AAED,MAAM,WAAW,sBAAsB;IACrC,gBAAgB,EAAE,MAAM,CAAC;IACzB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,iBAAiB;IAChC,EAAE,EAAE;QAAE,gBAAgB,EAAE,MAAM,CAAA;KAAE,CAAC;IACjC,GAAG,EAAE,UAAU,CAAC;IAChB,QAAQ,EAAE,YAAY,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,sBAAsB,CAAC;IAC/B,KAAK,CAAC,EAAE,sBAAsB,CAAC;IAC/B,MAAM,CAAC,EAAE,uBAAuB,CAAC;IACjC,KAAK,CAAC,EAAE,sBAAsB,CAAC;IAC/B,KAAK,CAAC,EAAE,sBAAsB,CAAC;CAChC"}
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,OAAO;IACtB,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,CAAC;IACtC,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,MAAM,SAAS,GAAG,QAAQ,GAAG,QAAQ,CAAC;AAC5C,MAAM,MAAM,gBAAgB,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AAEzD,MAAM,WAAW,SAAS;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,YAAY,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,SAAS;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,MAAM,UAAU,GAAG,MAAM,GAAG,SAAS,GAAG,UAAU,GAAG,MAAM,GAAG,WAAW,CAAC;AAChF,MAAM,MAAM,gBAAgB,GAAG,cAAc,GAAG,MAAM,GAAG,UAAU,GAAG,MAAM,CAAC;AAE7E,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,UAAU,CAAC;IACvB,UAAU,EAAE,gBAAgB,CAAC;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,OAAO,EAAE,OAAO,CAAC;IACjB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,gBAAiB,SAAQ,IAAI,CAAC,UAAU,EAAE,WAAW,CAAC;CAAG;AAE1E,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,UAAU,GAAG,WAAW,GAAG,SAAS,GAAG,SAAS,CAAC;IAC3D,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,eAAe;IAC9B,UAAU,EAAE,UAAU,CAAC;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,uBAAuB;IACtC,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,QAAQ,EAAE,eAAe,EAAE,CAAC;CAC7B;AAED,MAAM,WAAW,eAAe;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,SAAS,EAAE,CAAC;IACnB,QAAQ,CAAC,EAAE,UAAU,EAAE,CAAC;IACxB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,kBAAkB;IACjC,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,eAAe,EAAE,MAAM,CAAC;IACxB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,WAAW;IAC1B,eAAe,EAAE,MAAM,CAAC;IACxB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,SAAS,CAAC;IAChB,WAAW,CAAC,EAAE,WAAW,CAAC;CAC3B;AAED,MAAM,WAAW,aAAa;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,SAAS,EAAE,CAAC;IACnB,aAAa,CAAC,EAAE,iBAAiB,EAAE,CAAC;IACpC,KAAK,EAAE,SAAS,EAAE,CAAC;IACnB,QAAQ,EAAE,UAAU,EAAE,CAAC;IACvB,WAAW,CAAC,EAAE,UAAU,EAAE,CAAC;IAC3B,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,kBAAkB;IACjC,SAAS,EAAE,MAAM,CAAC;IAClB,wBAAwB,EAAE,MAAM,CAAC;IACjC,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,eAAe;IAC9B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,OAAO,CAAC,EAAE,KAAK,GAAG,KAAK,CAAC;IACxB,KAAK,CAAC,EAAE,SAAS,GAAG,kBAAkB,GAAG,QAAQ,CAAC;IAClD,OAAO,CAAC,EAAE;QACR,uFAAuF;QACvF,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,qFAAqF;QACrF,gBAAgB,CAAC,EAAE,MAAM,CAAC;KAC3B,CAAC;IACF,KAAK,CAAC,EAAE;QACN,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB,CAAC;CACH;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,OAAO,CAAC,EAAE,KAAK,GAAG,KAAK,CAAC;IACxB,KAAK,CAAC,EAAE,SAAS,GAAG,kBAAkB,GAAG,QAAQ,CAAC;CACnD;AAED,MAAM,WAAW,aAAa;IAC5B,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;CACjB;AAED,MAAM,WAAW,iBAAiB;IAChC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,gBAAgB;AAChB,MAAM,WAAW,qBAAsB,SAAQ,aAAa;IAC1D,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,gBAAgB,CAAC;IAC9B,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAED,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,CAAC,UAAU,GAAG;QAAE,UAAU,EAAE,MAAM,CAAA;KAAE,CAAC,EAAE,CAAC;IAC/C,KAAK,EAAE,SAAS,EAAE,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,sBAAsB;IACrC,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,UAAU;IACzB,QAAQ,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,MAAM,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;CACjE;AAED,MAAM,WAAW,YAAY;IAC3B,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;CACxC;AAED,MAAM,WAAW,sBAAsB;IACrC,IAAI,CAAC,EAAE,SAAS,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;IACpC,mBAAmB,CAAC,EAAE;QACpB,OAAO,CAAC,EAAE,OAAO,CAAC;QAClB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,qBAAqB,CAAC,EAAE;YACtB,GAAG,CAAC,EAAE,MAAM,CAAC;YACb,GAAG,CAAC,EAAE,MAAM,CAAC;SACd,CAAC;QACF,cAAc,CAAC,EAAE,MAAM,CAAC;QACxB,aAAa,CAAC,EAAE,MAAM,CAAC;QACvB,gBAAgB,CAAC,EAAE,MAAM,CAAC;KAC3B,CAAC;IACF,gDAAgD;IAChD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,qBAAqB,CAAC,EAAE,OAAO,CAAC;IAChC,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED,MAAM,WAAW,sBAAsB;IACrC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,uBAAuB;IACtC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,+EAA+E;IAC/E,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,uEAAuE;IACvE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,uFAAuF;IACvF,mBAAmB,CAAC,EAAE,MAAM,CAAC;CAC9B;AAED,MAAM,WAAW,sBAAsB;IACrC,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,gBAAgB,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IACpC,YAAY,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;CACjC;AAED,MAAM,WAAW,sBAAsB;IACrC,gBAAgB,EAAE,MAAM,CAAC;IACzB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,iBAAiB;IAChC,EAAE,EAAE;QAAE,gBAAgB,EAAE,MAAM,CAAA;KAAE,CAAC;IACjC,GAAG,EAAE,UAAU,CAAC;IAChB,QAAQ,EAAE,YAAY,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,sBAAsB,CAAC;IAC/B,KAAK,CAAC,EAAE,sBAAsB,CAAC;IAC/B,MAAM,CAAC,EAAE,uBAAuB,CAAC;IACjC,KAAK,CAAC,EAAE,sBAAsB,CAAC;IAC/B,KAAK,CAAC,EAAE,sBAAsB,CAAC;CAChC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"splitTextForIngestion.d.ts","sourceRoot":"","sources":["../../../src/utils/text/splitTextForIngestion.ts"],"names":[],"mappings":"AAIA,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAQ5D"}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { normalizeText } from "./normalizeText.js";
|
|
2
|
+
const MAX_CHUNK_CHARACTERS = 800;
|
|
3
|
+
export function splitTextForIngestion(text) {
|
|
4
|
+
const lines = text
|
|
5
|
+
.replace(/\r\n?/g, "\n")
|
|
6
|
+
.split(/\n+/)
|
|
7
|
+
.map((line) => normalizeText(line))
|
|
8
|
+
.filter((line) => line !== null);
|
|
9
|
+
return lines.flatMap((line) => splitLine(line));
|
|
10
|
+
}
|
|
11
|
+
function splitLine(line) {
|
|
12
|
+
const sentences = line
|
|
13
|
+
.split(/(?<=[.!?])\s+/)
|
|
14
|
+
.map((sentence) => normalizeText(sentence))
|
|
15
|
+
.filter((sentence) => sentence !== null);
|
|
16
|
+
return sentences.flatMap((sentence) => splitLongSentence(sentence));
|
|
17
|
+
}
|
|
18
|
+
function splitLongSentence(sentence) {
|
|
19
|
+
if (sentence.length <= MAX_CHUNK_CHARACTERS)
|
|
20
|
+
return [sentence];
|
|
21
|
+
const words = sentence.split(/\s+/);
|
|
22
|
+
const chunks = [];
|
|
23
|
+
let current = "";
|
|
24
|
+
for (const word of words) {
|
|
25
|
+
const next = current ? `${current} ${word}` : word;
|
|
26
|
+
if (current && next.length > MAX_CHUNK_CHARACTERS) {
|
|
27
|
+
chunks.push(current);
|
|
28
|
+
current = word;
|
|
29
|
+
continue;
|
|
30
|
+
}
|
|
31
|
+
current = next;
|
|
32
|
+
}
|
|
33
|
+
if (current)
|
|
34
|
+
chunks.push(current);
|
|
35
|
+
return chunks;
|
|
36
|
+
}
|
|
37
|
+
//# sourceMappingURL=splitTextForIngestion.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"splitTextForIngestion.js","sourceRoot":"","sources":["../../../src/utils/text/splitTextForIngestion.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAEnD,MAAM,oBAAoB,GAAG,GAAG,CAAC;AAEjC,MAAM,UAAU,qBAAqB,CAAC,IAAY;IAChD,MAAM,KAAK,GAAG,IAAI;SACf,OAAO,CAAC,QAAQ,EAAE,IAAI,CAAC;SACvB,KAAK,CAAC,KAAK,CAAC;SACZ,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;SAClC,MAAM,CAAC,CAAC,IAAI,EAAkB,EAAE,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;IAEnD,OAAO,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC;AAClD,CAAC;AAED,SAAS,SAAS,CAAC,IAAY;IAC7B,MAAM,SAAS,GAAG,IAAI;SACnB,KAAK,CAAC,eAAe,CAAC;SACtB,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;SAC1C,MAAM,CAAC,CAAC,QAAQ,EAAsB,EAAE,CAAC,QAAQ,KAAK,IAAI,CAAC,CAAC;IAE/D,OAAO,SAAS,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,iBAAiB,CAAC,QAAQ,CAAC,CAAC,CAAC;AACtE,CAAC;AAED,SAAS,iBAAiB,CAAC,QAAgB;IACzC,IAAI,QAAQ,CAAC,MAAM,IAAI,oBAAoB;QAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;IAE/D,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACpC,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,OAAO,GAAG,EAAE,CAAC;IAEjB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,OAAO,CAAC,CAAC,CAAC,GAAG,OAAO,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;QACnD,IAAI,OAAO,IAAI,IAAI,CAAC,MAAM,GAAG,oBAAoB,EAAE,CAAC;YAClD,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACrB,OAAO,GAAG,IAAI,CAAC;YACf,SAAS;QACX,CAAC;QAED,OAAO,GAAG,IAAI,CAAC;IACjB,CAAC;IAED,IAAI,OAAO;QAAE,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAClC,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"splitTextForIngestion.test.d.ts","sourceRoot":"","sources":["../../../src/utils/text/splitTextForIngestion.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { splitTextForIngestion } from "./splitTextForIngestion.js";
|
|
3
|
+
describe("splitTextForIngestion", () => {
|
|
4
|
+
it("splits raw text by lines and sentence boundaries", () => {
|
|
5
|
+
expect(splitTextForIngestion([
|
|
6
|
+
"The roadmap prioritizes imports. The editor autosaves drafts.",
|
|
7
|
+
"",
|
|
8
|
+
"Hiring plans begin next quarter.",
|
|
9
|
+
].join("\n"))).toEqual([
|
|
10
|
+
"The roadmap prioritizes imports.",
|
|
11
|
+
"The editor autosaves drafts.",
|
|
12
|
+
"Hiring plans begin next quarter.",
|
|
13
|
+
]);
|
|
14
|
+
});
|
|
15
|
+
it("returns no chunks for whitespace-only text", () => {
|
|
16
|
+
expect(splitTextForIngestion(" \n \t ")).toEqual([]);
|
|
17
|
+
});
|
|
18
|
+
});
|
|
19
|
+
//# sourceMappingURL=splitTextForIngestion.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"splitTextForIngestion.test.js","sourceRoot":"","sources":["../../../src/utils/text/splitTextForIngestion.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,qBAAqB,EAAE,MAAM,4BAA4B,CAAC;AAEnE,QAAQ,CAAC,uBAAuB,EAAE,GAAG,EAAE;IACrC,EAAE,CAAC,kDAAkD,EAAE,GAAG,EAAE;QAC1D,MAAM,CAAC,qBAAqB,CAAC;YAC3B,+DAA+D;YAC/D,EAAE;YACF,kCAAkC;SACnC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;YACrB,kCAAkC;YAClC,8BAA8B;YAC9B,kCAAkC;SACnC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4CAA4C,EAAE,GAAG,EAAE;QACpD,MAAM,CAAC,qBAAqB,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IACvD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|