@signetai/core 0.140.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents.d.ts +45 -0
- package/dist/agents.d.ts.map +1 -0
- package/dist/connector-types.d.ts +90 -0
- package/dist/connector-types.d.ts.map +1 -0
- package/dist/constants.d.ts +9 -0
- package/dist/constants.d.ts.map +1 -0
- package/dist/daemon-url.d.ts +7 -0
- package/dist/daemon-url.d.ts.map +1 -0
- package/dist/database.d.ts +41 -0
- package/dist/database.d.ts.map +1 -0
- package/dist/export.d.ts +84 -0
- package/dist/export.d.ts.map +1 -0
- package/dist/fts-schema.d.ts +13 -0
- package/dist/fts-schema.d.ts.map +1 -0
- package/dist/gitignore.d.ts +3 -0
- package/dist/gitignore.d.ts.map +1 -0
- package/dist/graphiq.d.ts +40 -0
- package/dist/graphiq.d.ts.map +1 -0
- package/dist/harness-config.d.ts +3 -0
- package/dist/harness-config.d.ts.map +1 -0
- package/dist/identity.d.ts +193 -0
- package/dist/identity.d.ts.map +1 -0
- package/dist/import.d.ts +86 -0
- package/dist/import.d.ts.map +1 -0
- package/dist/index.d.ts +67 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +20057 -0
- package/dist/ingest/chat-extractor.d.ts +35 -0
- package/dist/ingest/chat-extractor.d.ts.map +1 -0
- package/dist/ingest/chat-utils.d.ts +18 -0
- package/dist/ingest/chat-utils.d.ts.map +1 -0
- package/dist/ingest/chunker.d.ts +27 -0
- package/dist/ingest/chunker.d.ts.map +1 -0
- package/dist/ingest/code-parser.d.ts +30 -0
- package/dist/ingest/code-parser.d.ts.map +1 -0
- package/dist/ingest/discord-parser.d.ts +30 -0
- package/dist/ingest/discord-parser.d.ts.map +1 -0
- package/dist/ingest/entire-extractor.d.ts +33 -0
- package/dist/ingest/entire-extractor.d.ts.map +1 -0
- package/dist/ingest/entire-parser.d.ts +51 -0
- package/dist/ingest/entire-parser.d.ts.map +1 -0
- package/dist/ingest/extractor.d.ts +27 -0
- package/dist/ingest/extractor.d.ts.map +1 -0
- package/dist/ingest/git-utils.d.ts +9 -0
- package/dist/ingest/git-utils.d.ts.map +1 -0
- package/dist/ingest/index.d.ts +52 -0
- package/dist/ingest/index.d.ts.map +1 -0
- package/dist/ingest/markdown-parser.d.ts +26 -0
- package/dist/ingest/markdown-parser.d.ts.map +1 -0
- package/dist/ingest/pdf-parser.d.ts +14 -0
- package/dist/ingest/pdf-parser.d.ts.map +1 -0
- package/dist/ingest/provenance.d.ts +55 -0
- package/dist/ingest/provenance.d.ts.map +1 -0
- package/dist/ingest/response-parser.d.ts +40 -0
- package/dist/ingest/response-parser.d.ts.map +1 -0
- package/dist/ingest/slack-parser.d.ts +34 -0
- package/dist/ingest/slack-parser.d.ts.map +1 -0
- package/dist/ingest/types.d.ts +163 -0
- package/dist/ingest/types.d.ts.map +1 -0
- package/dist/llm-model-catalog.d.ts +176 -0
- package/dist/llm-model-catalog.d.ts.map +1 -0
- package/dist/llm-model-catalog.js +110 -0
- package/dist/manifest.d.ts +4 -0
- package/dist/manifest.d.ts.map +1 -0
- package/dist/markdown.d.ts +40 -0
- package/dist/markdown.d.ts.map +1 -0
- package/dist/memory.d.ts +23 -0
- package/dist/memory.d.ts.map +1 -0
- package/dist/migrate.d.ts +8 -0
- package/dist/migrate.d.ts.map +1 -0
- package/dist/migration.d.ts +58 -0
- package/dist/migration.d.ts.map +1 -0
- package/dist/migrations/001-baseline.d.ts +10 -0
- package/dist/migrations/001-baseline.d.ts.map +1 -0
- package/dist/migrations/002-pipeline-v2.d.ts +10 -0
- package/dist/migrations/002-pipeline-v2.d.ts.map +1 -0
- package/dist/migrations/003-unique-content-hash.d.ts +13 -0
- package/dist/migrations/003-unique-content-hash.d.ts.map +1 -0
- package/dist/migrations/004-history-actor-and-retention.d.ts +10 -0
- package/dist/migrations/004-history-actor-and-retention.d.ts.map +1 -0
- package/dist/migrations/005-graph-extended.d.ts +3 -0
- package/dist/migrations/005-graph-extended.d.ts.map +1 -0
- package/dist/migrations/006-idempotency-key.d.ts +3 -0
- package/dist/migrations/006-idempotency-key.d.ts.map +1 -0
- package/dist/migrations/007-documents-and-connectors.d.ts +3 -0
- package/dist/migrations/007-documents-and-connectors.d.ts.map +1 -0
- package/dist/migrations/008-embeddings-unique-hash.d.ts +11 -0
- package/dist/migrations/008-embeddings-unique-hash.d.ts.map +1 -0
- package/dist/migrations/009-summary-jobs.d.ts +3 -0
- package/dist/migrations/009-summary-jobs.d.ts.map +1 -0
- package/dist/migrations/010-umap-cache.d.ts +3 -0
- package/dist/migrations/010-umap-cache.d.ts.map +1 -0
- package/dist/migrations/011-session-scores.d.ts +3 -0
- package/dist/migrations/011-session-scores.d.ts.map +1 -0
- package/dist/migrations/012-scheduled-tasks.d.ts +3 -0
- package/dist/migrations/012-scheduled-tasks.d.ts.map +1 -0
- package/dist/migrations/013-ingestion-tracking.d.ts +11 -0
- package/dist/migrations/013-ingestion-tracking.d.ts.map +1 -0
- package/dist/migrations/014-telemetry.d.ts +11 -0
- package/dist/migrations/014-telemetry.d.ts.map +1 -0
- package/dist/migrations/015-session-memories.d.ts +11 -0
- package/dist/migrations/015-session-memories.d.ts.map +1 -0
- package/dist/migrations/016-session-checkpoints.d.ts +10 -0
- package/dist/migrations/016-session-checkpoints.d.ts.map +1 -0
- package/dist/migrations/017-task-skills.d.ts +10 -0
- package/dist/migrations/017-task-skills.d.ts.map +1 -0
- package/dist/migrations/018-skill-meta.d.ts +10 -0
- package/dist/migrations/018-skill-meta.d.ts.map +1 -0
- package/dist/migrations/019-knowledge-structure.d.ts +12 -0
- package/dist/migrations/019-knowledge-structure.d.ts.map +1 -0
- package/dist/migrations/020-predictor-comparisons.d.ts +3 -0
- package/dist/migrations/020-predictor-comparisons.d.ts.map +1 -0
- package/dist/migrations/021-checkpoint-structural.d.ts +10 -0
- package/dist/migrations/021-checkpoint-structural.d.ts.map +1 -0
- package/dist/migrations/022-entity-pinning.d.ts +3 -0
- package/dist/migrations/022-entity-pinning.d.ts.map +1 -0
- package/dist/migrations/023-predictor-columns.d.ts +3 -0
- package/dist/migrations/023-predictor-columns.d.ts.map +1 -0
- package/dist/migrations/024-predictor-comparison-columns.d.ts +3 -0
- package/dist/migrations/024-predictor-comparison-columns.d.ts.map +1 -0
- package/dist/migrations/025-agent-feedback.d.ts +10 -0
- package/dist/migrations/025-agent-feedback.d.ts.map +1 -0
- package/dist/migrations/026-predictor-training-pairs.d.ts +3 -0
- package/dist/migrations/026-predictor-training-pairs.d.ts.map +1 -0
- package/dist/migrations/027-backfill-canonical-names.d.ts +12 -0
- package/dist/migrations/027-backfill-canonical-names.d.ts.map +1 -0
- package/dist/migrations/028-lossless-retention.d.ts +10 -0
- package/dist/migrations/028-lossless-retention.d.ts.map +1 -0
- package/dist/migrations/029-session-summary-dag.d.ts +3 -0
- package/dist/migrations/029-session-summary-dag.d.ts.map +1 -0
- package/dist/migrations/030-nullable-memory-job-memory-id.d.ts +18 -0
- package/dist/migrations/030-nullable-memory-job-memory-id.d.ts.map +1 -0
- package/dist/migrations/031-dependency-reason.d.ts +11 -0
- package/dist/migrations/031-dependency-reason.d.ts.map +1 -0
- package/dist/migrations/032-embeddings-vector-column.d.ts +11 -0
- package/dist/migrations/032-embeddings-vector-column.d.ts.map +1 -0
- package/dist/migrations/033-scope.d.ts +10 -0
- package/dist/migrations/033-scope.d.ts.map +1 -0
- package/dist/migrations/034-scope-aware-dedup.d.ts +12 -0
- package/dist/migrations/034-scope-aware-dedup.d.ts.map +1 -0
- package/dist/migrations/035-entity-fts.d.ts +10 -0
- package/dist/migrations/035-entity-fts.d.ts.map +1 -0
- package/dist/migrations/036-dependency-confidence.d.ts +15 -0
- package/dist/migrations/036-dependency-confidence.d.ts.map +1 -0
- package/dist/migrations/037-entity-communities.d.ts +10 -0
- package/dist/migrations/037-entity-communities.d.ts.map +1 -0
- package/dist/migrations/038-memory-hints.d.ts +11 -0
- package/dist/migrations/038-memory-hints.d.ts.map +1 -0
- package/dist/migrations/039-dedup-entity-dependencies.d.ts +11 -0
- package/dist/migrations/039-dedup-entity-dependencies.d.ts.map +1 -0
- package/dist/migrations/040-session-transcripts.d.ts +10 -0
- package/dist/migrations/040-session-transcripts.d.ts.map +1 -0
- package/dist/migrations/041-path-feedback.d.ts +3 -0
- package/dist/migrations/041-path-feedback.d.ts.map +1 -0
- package/dist/migrations/042-session-memories-agent-id.d.ts +10 -0
- package/dist/migrations/042-session-memories-agent-id.d.ts.map +1 -0
- package/dist/migrations/043-agents-table.d.ts +15 -0
- package/dist/migrations/043-agents-table.d.ts.map +1 -0
- package/dist/migrations/044-memory-md-temporal-head.d.ts +10 -0
- package/dist/migrations/044-memory-md-temporal-head.d.ts.map +1 -0
- package/dist/migrations/045-lossless-working-memory-hardening.d.ts +11 -0
- package/dist/migrations/045-lossless-working-memory-hardening.d.ts.map +1 -0
- package/dist/migrations/046-session-summary-uniqueness.d.ts +3 -0
- package/dist/migrations/046-session-summary-uniqueness.d.ts.map +1 -0
- package/dist/migrations/047-agent-scoped-temporal-uniqueness.d.ts +9 -0
- package/dist/migrations/047-agent-scoped-temporal-uniqueness.d.ts.map +1 -0
- package/dist/migrations/048-thread-heads.d.ts +9 -0
- package/dist/migrations/048-thread-heads.d.ts.map +1 -0
- package/dist/migrations/049-session-extract-cursors.d.ts +11 -0
- package/dist/migrations/049-session-extract-cursors.d.ts.map +1 -0
- package/dist/migrations/050-related-to-audit.d.ts +3 -0
- package/dist/migrations/050-related-to-audit.d.ts.map +1 -0
- package/dist/migrations/051-memory-md-rolling-window-lineage.d.ts +12 -0
- package/dist/migrations/051-memory-md-rolling-window-lineage.d.ts.map +1 -0
- package/dist/migrations/052-mcp-invocations.d.ts +10 -0
- package/dist/migrations/052-mcp-invocations.d.ts.map +1 -0
- package/dist/migrations/053-skill-invocations.d.ts +10 -0
- package/dist/migrations/053-skill-invocations.d.ts.map +1 -0
- package/dist/migrations/054-task-agent-scope.d.ts +3 -0
- package/dist/migrations/054-task-agent-scope.d.ts.map +1 -0
- package/dist/migrations/055-dreaming-state.d.ts +9 -0
- package/dist/migrations/055-dreaming-state.d.ts.map +1 -0
- package/dist/migrations/056-agent-scoped-content-hash.d.ts +11 -0
- package/dist/migrations/056-agent-scoped-content-hash.d.ts.map +1 -0
- package/dist/migrations/057-memories-fts-tokenizer-repair.d.ts +12 -0
- package/dist/migrations/057-memories-fts-tokenizer-repair.d.ts.map +1 -0
- package/dist/migrations/058-knowledge-graph-indices.d.ts +16 -0
- package/dist/migrations/058-knowledge-graph-indices.d.ts.map +1 -0
- package/dist/migrations/059-entity-attribute-claim-key.d.ts +11 -0
- package/dist/migrations/059-entity-attribute-claim-key.d.ts.map +1 -0
- package/dist/migrations/060-entity-attribute-group-key.d.ts +10 -0
- package/dist/migrations/060-entity-attribute-group-key.d.ts.map +1 -0
- package/dist/migrations/061-memory-artifact-source-mtime.d.ts +12 -0
- package/dist/migrations/061-memory-artifact-source-mtime.d.ts.map +1 -0
- package/dist/migrations/062-memory-artifact-soft-delete.d.ts +10 -0
- package/dist/migrations/062-memory-artifact-soft-delete.d.ts.map +1 -0
- package/dist/migrations/063-content-only-memories-fts-update.d.ts +13 -0
- package/dist/migrations/063-content-only-memories-fts-update.d.ts.map +1 -0
- package/dist/migrations/064-source-graph-provenance.d.ts +11 -0
- package/dist/migrations/064-source-graph-provenance.d.ts.map +1 -0
- package/dist/migrations/065-source-embedding-agent-scope.d.ts +10 -0
- package/dist/migrations/065-source-embedding-agent-scope.d.ts.map +1 -0
- package/dist/migrations/066-memory-search-telemetry.d.ts +10 -0
- package/dist/migrations/066-memory-search-telemetry.d.ts.map +1 -0
- package/dist/migrations/067-ontology-proposals.d.ts +10 -0
- package/dist/migrations/067-ontology-proposals.d.ts.map +1 -0
- package/dist/migrations/068-daily-reflections.d.ts +9 -0
- package/dist/migrations/068-daily-reflections.d.ts.map +1 -0
- package/dist/migrations/069-daily-reflections-multiple-insights.d.ts +11 -0
- package/dist/migrations/069-daily-reflections-multiple-insights.d.ts.map +1 -0
- package/dist/migrations/070-ontology-control-plane-state.d.ts +9 -0
- package/dist/migrations/070-ontology-control-plane-state.d.ts.map +1 -0
- package/dist/migrations/071-epistemic-assertions.d.ts +10 -0
- package/dist/migrations/071-epistemic-assertions.d.ts.map +1 -0
- package/dist/migrations/072-agent-scoped-idempotency-key.d.ts +9 -0
- package/dist/migrations/072-agent-scoped-idempotency-key.d.ts.map +1 -0
- package/dist/migrations/073-recall-context-dedupe.d.ts +10 -0
- package/dist/migrations/073-recall-context-dedupe.d.ts.map +1 -0
- package/dist/migrations/074-aggregate-memory-links.d.ts +7 -0
- package/dist/migrations/074-aggregate-memory-links.d.ts.map +1 -0
- package/dist/migrations/075-memory-artifact-source-provenance.d.ts +3 -0
- package/dist/migrations/075-memory-artifact-source-provenance.d.ts.map +1 -0
- package/dist/migrations/076-temporal-edges.d.ts +3 -0
- package/dist/migrations/076-temporal-edges.d.ts.map +1 -0
- package/dist/migrations/077-entity-aliases.d.ts +3 -0
- package/dist/migrations/077-entity-aliases.d.ts.map +1 -0
- package/dist/migrations/078-api-keys.d.ts +3 -0
- package/dist/migrations/078-api-keys.d.ts.map +1 -0
- package/dist/migrations/index.d.ts +54 -0
- package/dist/migrations/index.d.ts.map +1 -0
- package/dist/network.d.ts +10 -0
- package/dist/network.d.ts.map +1 -0
- package/dist/oh-my-pi.d.ts +15 -0
- package/dist/oh-my-pi.d.ts.map +1 -0
- package/dist/package-manager.d.ts +33 -0
- package/dist/package-manager.d.ts.map +1 -0
- package/dist/pi.d.ts +15 -0
- package/dist/pi.d.ts.map +1 -0
- package/dist/pipeline-pause.d.ts +18 -0
- package/dist/pipeline-pause.d.ts.map +1 -0
- package/dist/pipeline-providers.d.ts +11 -0
- package/dist/pipeline-providers.d.ts.map +1 -0
- package/dist/pipeline-providers.js +144 -0
- package/dist/plugins.d.ts +6 -0
- package/dist/plugins.d.ts.map +1 -0
- package/dist/recall.d.ts +188 -0
- package/dist/recall.d.ts.map +1 -0
- package/dist/routing.d.ts +221 -0
- package/dist/routing.d.ts.map +1 -0
- package/dist/search.d.ts +75 -0
- package/dist/search.d.ts.map +1 -0
- package/dist/signet-os-types.d.ts +116 -0
- package/dist/signet-os-types.d.ts.map +1 -0
- package/dist/signet.d.ts +36 -0
- package/dist/signet.d.ts.map +1 -0
- package/dist/skills.d.ts +111 -0
- package/dist/skills.d.ts.map +1 -0
- package/dist/soul.d.ts +3 -0
- package/dist/soul.d.ts.map +1 -0
- package/dist/source-substrate.d.ts +63 -0
- package/dist/source-substrate.d.ts.map +1 -0
- package/dist/sources-config.d.ts +131 -0
- package/dist/sources-config.d.ts.map +1 -0
- package/dist/symlinks.d.ts +45 -0
- package/dist/symlinks.d.ts.map +1 -0
- package/dist/types.d.ts +731 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/workspace-source-repo.d.ts +19 -0
- package/dist/workspace-source-repo.d.ts.map +1 -0
- package/dist/yaml.d.ts +29 -0
- package/dist/yaml.d.ts.map +1 -0
- package/package.json +69 -0
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Conversation-Aware Extraction for the ingestion engine.
|
|
3
|
+
*
|
|
4
|
+
* Specialized extraction logic for chat/conversation content.
|
|
5
|
+
* Different from document extraction:
|
|
6
|
+
* - Tracks speaker attribution
|
|
7
|
+
* - Extracts decisions, action items, preferences
|
|
8
|
+
* - Filters out greetings, casual banter, meta-conversation
|
|
9
|
+
* - Understands conversational context (agreements, disagreements)
|
|
10
|
+
*
|
|
11
|
+
* Uses an LlmProvider for extraction with a conversation-specific prompt.
|
|
12
|
+
*/
|
|
13
|
+
import type { LlmProvider } from "../types";
|
|
14
|
+
import type { ChunkResult, ExtractionResult } from "./types";
|
|
15
|
+
import type { ExtractionOptions } from "./extractor";
|
|
16
|
+
/** @deprecated Use ExtractionOptions instead */
|
|
17
|
+
export type ChatExtractorConfig = ExtractionOptions;
|
|
18
|
+
export declare const DEFAULT_CHAT_EXTRACTOR_CONFIG: ExtractionOptions;
|
|
19
|
+
/**
|
|
20
|
+
* Extract knowledge from a conversation chunk using an LLM.
|
|
21
|
+
*
|
|
22
|
+
* This is the conversation-specific equivalent of the document extractor.
|
|
23
|
+
* It uses a prompt tailored for conversational content with speaker attribution.
|
|
24
|
+
*/
|
|
25
|
+
export declare function extractFromConversation(chunk: ChunkResult, channelName: string | null, participants: string[], provider: LlmProvider, opts?: ExtractionOptions): Promise<ExtractionResult>;
|
|
26
|
+
/**
|
|
27
|
+
* Extract knowledge from all conversation chunks.
|
|
28
|
+
*/
|
|
29
|
+
export declare function extractFromConversations(chunks: readonly ChunkResult[], channelName: string | null, participants: string[], provider: LlmProvider, onChunkDone?: (chunkIndex: number, itemCount: number) => void, opts?: ExtractionOptions): Promise<ExtractionResult[]>;
|
|
30
|
+
/**
|
|
31
|
+
* Extract participants from a conversation chunk text.
|
|
32
|
+
* Looks for patterns like "[timestamp] Name: message"
|
|
33
|
+
*/
|
|
34
|
+
export declare function extractParticipants(chunkText: string): string[];
|
|
35
|
+
//# sourceMappingURL=chat-extractor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chat-extractor.d.ts","sourceRoot":"","sources":["../../src/ingest/chat-extractor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAC5C,OAAO,KAAK,EAAE,WAAW,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAC7D,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAOrD,gDAAgD;AAChD,MAAM,MAAM,mBAAmB,GAAG,iBAAiB,CAAC;AAEpD,eAAO,MAAM,6BAA6B,EAAE,iBAE3C,CAAC;AA0IF;;;;;GAKG;AACH,wBAAsB,uBAAuB,CAC5C,KAAK,EAAE,WAAW,EAClB,WAAW,EAAE,MAAM,GAAG,IAAI,EAC1B,YAAY,EAAE,MAAM,EAAE,EACtB,QAAQ,EAAE,WAAW,EACrB,IAAI,GAAE,iBAAiD,GACrD,OAAO,CAAC,gBAAgB,CAAC,CA2B3B;AAED;;GAEG;AACH,wBAAsB,wBAAwB,CAC7C,MAAM,EAAE,SAAS,WAAW,EAAE,EAC9B,WAAW,EAAE,MAAM,GAAG,IAAI,EAC1B,YAAY,EAAE,MAAM,EAAE,EACtB,QAAQ,EAAE,WAAW,EACrB,WAAW,CAAC,EAAE,CAAC,UAAU,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,KAAK,IAAI,EAC7D,IAAI,GAAE,iBAAiD,GACrD,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAY7B;AAED;;;GAGG;AACH,wBAAgB,mBAAmB,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,EAAE,CAY/D"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared utilities for conversation-based parsers (Slack, Discord).
|
|
3
|
+
*/
|
|
4
|
+
/** Time gap threshold for splitting unthreaded messages (30 minutes) */
|
|
5
|
+
export declare const TIME_GAP_MS: number;
|
|
6
|
+
/**
|
|
7
|
+
* Batch a sorted array of items into groups separated by time gaps.
|
|
8
|
+
*
|
|
9
|
+
* Items within `TIME_GAP_MS` of each other end up in the same batch.
|
|
10
|
+
* The caller provides a `getTimestamp` function that returns a
|
|
11
|
+
* millisecond-epoch number for each item.
|
|
12
|
+
*
|
|
13
|
+
* @param items - Pre-sorted array of items (oldest first)
|
|
14
|
+
* @param getTimestamp - Extract ms-epoch timestamp from an item
|
|
15
|
+
* @returns Array of batches (each batch is a non-empty array of items)
|
|
16
|
+
*/
|
|
17
|
+
export declare function batchByTimeGap<T>(items: readonly T[], getTimestamp: (item: T) => number): T[][];
|
|
18
|
+
//# sourceMappingURL=chat-utils.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chat-utils.d.ts","sourceRoot":"","sources":["../../src/ingest/chat-utils.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,wEAAwE;AACxE,eAAO,MAAM,WAAW,QAAiB,CAAC;AAE1C;;;;;;;;;;GAUG;AACH,wBAAgB,cAAc,CAAC,CAAC,EAAE,KAAK,EAAE,SAAS,CAAC,EAAE,EAAE,YAAY,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,MAAM,GAAG,CAAC,EAAE,EAAE,CAwB/F"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structure-aware intelligent chunking for the ingestion engine.
|
|
3
|
+
*
|
|
4
|
+
* Unlike naive fixed-size chunking, this respects document structure:
|
|
5
|
+
* - Headers define chunk boundaries
|
|
6
|
+
* - Code blocks stay together
|
|
7
|
+
* - Lists stay together
|
|
8
|
+
* - Tables stay together
|
|
9
|
+
* - Overlap is applied at natural boundaries (sentences/paragraphs)
|
|
10
|
+
*
|
|
11
|
+
* Config: max ~2000 tokens (~8000 chars), min ~100 tokens, overlap ~200 tokens
|
|
12
|
+
*/
|
|
13
|
+
import type { ParsedDocument, ChunkResult } from "./types";
|
|
14
|
+
export interface ChunkerConfig {
|
|
15
|
+
/** Maximum tokens per chunk (estimated as chars / 4) */
|
|
16
|
+
readonly maxTokens: number;
|
|
17
|
+
/** Minimum tokens per chunk — avoid tiny fragments */
|
|
18
|
+
readonly minTokens: number;
|
|
19
|
+
/** Overlap tokens between consecutive chunks */
|
|
20
|
+
readonly overlapTokens: number;
|
|
21
|
+
}
|
|
22
|
+
export declare const DEFAULT_CHUNKER_CONFIG: ChunkerConfig;
|
|
23
|
+
/**
|
|
24
|
+
* Chunk a parsed document into overlapping, structure-aware chunks.
|
|
25
|
+
*/
|
|
26
|
+
export declare function chunkDocument(doc: ParsedDocument, config?: ChunkerConfig): ChunkResult[];
|
|
27
|
+
//# sourceMappingURL=chunker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../../src/ingest/chunker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAiB,WAAW,EAAE,MAAM,SAAS,CAAC;AAM1E,MAAM,WAAW,aAAa;IAC7B,wDAAwD;IACxD,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,sDAAsD;IACtD,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,gDAAgD;IAChD,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC;CAC/B;AAED,eAAO,MAAM,sBAAsB,EAAE,aAIpC,CAAC;AAeF;;GAEG;AACH,wBAAgB,aAAa,CAAC,GAAG,EAAE,cAAc,EAAE,MAAM,GAAE,aAAsC,GAAG,WAAW,EAAE,CA+FhH"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Code Repository Parser for the ingestion engine.
|
|
3
|
+
*
|
|
4
|
+
* Parses a git repository directory to extract:
|
|
5
|
+
* - README.md content (project overview, setup instructions)
|
|
6
|
+
* - package.json / pyproject.toml / Cargo.toml (dependencies, scripts)
|
|
7
|
+
* - Config files (.env.example, docker-compose.yml, etc.)
|
|
8
|
+
* - Git log (recent commit messages for architecture decisions, bug patterns)
|
|
9
|
+
* - Function/class extraction for major languages
|
|
10
|
+
* - Language detection from file extensions
|
|
11
|
+
*
|
|
12
|
+
* Produces a ParsedDocument with sections for architecture, dependencies,
|
|
13
|
+
* patterns, and recent development activity.
|
|
14
|
+
*/
|
|
15
|
+
import type { ParsedDocument } from "./types";
|
|
16
|
+
/**
|
|
17
|
+
* Parse a git repository directory into a ParsedDocument.
|
|
18
|
+
*
|
|
19
|
+
* @param repoPath - Path to the repository root (must have .git/)
|
|
20
|
+
* @param options - Optional configuration
|
|
21
|
+
*/
|
|
22
|
+
export declare function parseCodeRepository(repoPath: string, options?: {
|
|
23
|
+
/** Include git log analysis (default: true) */
|
|
24
|
+
readonly includeGitLog?: boolean;
|
|
25
|
+
/** Max commits to parse (default: 100) */
|
|
26
|
+
readonly gitLogDepth?: number;
|
|
27
|
+
/** Only include specific file patterns */
|
|
28
|
+
readonly includePatterns?: string[];
|
|
29
|
+
}): ParsedDocument;
|
|
30
|
+
//# sourceMappingURL=code-parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"code-parser.d.ts","sourceRoot":"","sources":["../../src/ingest/code-parser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAKH,OAAO,KAAK,EAAE,cAAc,EAAiB,MAAM,SAAS,CAAC;AA0G7D;;;;;GAKG;AACH,wBAAgB,mBAAmB,CAClC,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE;IACT,+CAA+C;IAC/C,QAAQ,CAAC,aAAa,CAAC,EAAE,OAAO,CAAC;IACjC,0CAA0C;IAC1C,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAC9B,0CAA0C;IAC1C,QAAQ,CAAC,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;CACpC,GACC,cAAc,CAmDhB"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Discord Export Parser for the ingestion engine.
|
|
3
|
+
*
|
|
4
|
+
* Parses DiscordChatExporter JSON format, which can be:
|
|
5
|
+
* 1. A single JSON file with a channel's messages
|
|
6
|
+
* 2. A directory of JSON files (one per channel)
|
|
7
|
+
*
|
|
8
|
+
* DiscordChatExporter schema: { guild, channel, dateRange, messages[] }
|
|
9
|
+
* Each message: { id, type, timestamp, content, author, attachments, embeds, reference }
|
|
10
|
+
*
|
|
11
|
+
* Produces a ParsedDocument with sections grouped by conversation thread/time.
|
|
12
|
+
*/
|
|
13
|
+
import type { ParsedDocument } from "./types";
|
|
14
|
+
/**
|
|
15
|
+
* Parse a Discord export (DiscordChatExporter format) into a ParsedDocument.
|
|
16
|
+
*
|
|
17
|
+
* @param path - Path to a JSON file or directory of JSON files
|
|
18
|
+
* @param options - Optional filtering
|
|
19
|
+
*/
|
|
20
|
+
export declare function parseDiscordExport(path: string, options?: {
|
|
21
|
+
/** Only include these channel names */
|
|
22
|
+
readonly channels?: string[];
|
|
23
|
+
/** Only include messages after this date */
|
|
24
|
+
readonly since?: string;
|
|
25
|
+
/** Only include messages before this date */
|
|
26
|
+
readonly until?: string;
|
|
27
|
+
/** Filter to specific speakers */
|
|
28
|
+
readonly speakers?: string[];
|
|
29
|
+
}): ParsedDocument;
|
|
30
|
+
//# sourceMappingURL=discord-parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"discord-parser.d.ts","sourceRoot":"","sources":["../../src/ingest/discord-parser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAIH,OAAO,KAAK,EAAE,cAAc,EAAiB,MAAM,SAAS,CAAC;AAsH7D;;;;;GAKG;AACH,wBAAgB,kBAAkB,CACjC,IAAI,EAAE,MAAM,EACZ,OAAO,CAAC,EAAE;IACT,uCAAuC;IACvC,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IAC7B,4CAA4C;IAC5C,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,6CAA6C;IAC7C,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,kCAAkC;IAClC,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;CAC7B,GACC,cAAc,CA8EhB"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Entire.io Session Extraction for the ingestion engine.
|
|
3
|
+
*
|
|
4
|
+
* Specialized extraction logic for Entire.io AI coding session data.
|
|
5
|
+
* DIFFERENT from regular chat extraction — this focuses on extracting
|
|
6
|
+
* SKILL SIGNALS from developer-AI interaction patterns:
|
|
7
|
+
*
|
|
8
|
+
* - Skills demonstrated: technologies, tools, patterns used
|
|
9
|
+
* - Problem-solving approach: decomposition strategy, first moves
|
|
10
|
+
* - Decision paths: choices made, alternatives considered
|
|
11
|
+
* - Communication style with AI: prompt craftsmanship
|
|
12
|
+
* - Domain knowledge signals: what they know vs. ask about
|
|
13
|
+
* - Workflow patterns: build->test->commit cycles
|
|
14
|
+
* - Tool mastery: IDE features, CLI commands, framework expertise
|
|
15
|
+
*
|
|
16
|
+
* Uses an LlmProvider for extraction with a prompt specifically designed
|
|
17
|
+
* for developer skill assessment.
|
|
18
|
+
*/
|
|
19
|
+
import type { LlmProvider } from "../types";
|
|
20
|
+
import type { ChunkResult, ExtractionResult } from "./types";
|
|
21
|
+
import type { ExtractionOptions } from "./extractor";
|
|
22
|
+
/** @deprecated Use ExtractionOptions instead */
|
|
23
|
+
export type EntireExtractorConfig = ExtractionOptions;
|
|
24
|
+
export declare const DEFAULT_ENTIRE_EXTRACTOR_CONFIG: ExtractionOptions;
|
|
25
|
+
/**
|
|
26
|
+
* Extract skill signals from an Entire.io session chunk.
|
|
27
|
+
*/
|
|
28
|
+
export declare function extractFromEntireSession(chunk: ChunkResult, sessionMetadata: string | null, provider: LlmProvider, opts?: ExtractionOptions): Promise<ExtractionResult>;
|
|
29
|
+
/**
|
|
30
|
+
* Extract skill signals from all Entire.io session chunks.
|
|
31
|
+
*/
|
|
32
|
+
export declare function extractFromEntireSessions(chunks: readonly ChunkResult[], sessionMetadata: string | null, provider: LlmProvider, onChunkDone?: (chunkIndex: number, itemCount: number) => void, opts?: ExtractionOptions): Promise<ExtractionResult[]>;
|
|
33
|
+
//# sourceMappingURL=entire-extractor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"entire-extractor.d.ts","sourceRoot":"","sources":["../../src/ingest/entire-extractor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAC5C,OAAO,KAAK,EAAE,WAAW,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAC7D,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAOrD,gDAAgD;AAChD,MAAM,MAAM,qBAAqB,GAAG,iBAAiB,CAAC;AAEtD,eAAO,MAAM,+BAA+B,EAAE,iBAE7C,CAAC;AA8HF;;GAEG;AACH,wBAAsB,wBAAwB,CAC7C,KAAK,EAAE,WAAW,EAClB,eAAe,EAAE,MAAM,GAAG,IAAI,EAC9B,QAAQ,EAAE,WAAW,EACrB,IAAI,GAAE,iBAAmD,GACvD,OAAO,CAAC,gBAAgB,CAAC,CAsB3B;AAED;;GAEG;AACH,wBAAsB,yBAAyB,CAC9C,MAAM,EAAE,SAAS,WAAW,EAAE,EAC9B,eAAe,EAAE,MAAM,GAAG,IAAI,EAC9B,QAAQ,EAAE,WAAW,EACrB,WAAW,CAAC,EAAE,CAAC,UAAU,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,KAAK,IAAI,EAC7D,IAAI,GAAE,iBAAmD,GACvD,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAY7B"}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Entire.io Session Parser for the ingestion engine.
|
|
3
|
+
*
|
|
4
|
+
* Parses Entire checkpoint data stored on the `entire/checkpoints/v1`
|
|
5
|
+
* shadow branch WITHOUT checking it out. Extracts:
|
|
6
|
+
* - Session transcripts (full prompt→response conversations)
|
|
7
|
+
* - Files touched per session
|
|
8
|
+
* - Checkpoint metadata (timestamps, commit links, strategy)
|
|
9
|
+
* - Agent info (Claude Code, Gemini CLI, etc.)
|
|
10
|
+
*
|
|
11
|
+
* Entire stores checkpoints in a sharded path structure on the branch:
|
|
12
|
+
* <checkpoint-id[:2]>/<checkpoint-id[2:]>/
|
|
13
|
+
* ├── metadata.json (CheckpointSummary: aggregated stats, sessions list)
|
|
14
|
+
* ├── 1/ (session subdirectory)
|
|
15
|
+
* │ ├── metadata.json (CommittedMetadata: session-specific details)
|
|
16
|
+
* │ ├── full.jsonl (transcript in JSONL format)
|
|
17
|
+
* │ ├── prompt.txt (user prompts)
|
|
18
|
+
* │ └── context.md (generated context)
|
|
19
|
+
* └── 2/ ...
|
|
20
|
+
*
|
|
21
|
+
* The JSONL transcript format has lines like:
|
|
22
|
+
* {"uuid":"...","type":"user","message":{"content":"..."},"timestamp":"..."}
|
|
23
|
+
* {"uuid":"...","type":"assistant","message":{"content":[{"type":"text","text":"..."}]},"timestamp":"..."}
|
|
24
|
+
*/
|
|
25
|
+
import type { ParsedDocument } from "./types";
|
|
26
|
+
/**
|
|
27
|
+
* Check if a repository has Entire.io checkpoint data.
|
|
28
|
+
*
|
|
29
|
+
* @param repoPath - Path to the git repository root
|
|
30
|
+
* @returns true if the `entire/checkpoints/v1` branch exists
|
|
31
|
+
*/
|
|
32
|
+
export declare function hasEntireBranch(repoPath: string): boolean;
|
|
33
|
+
/**
|
|
34
|
+
* Parse Entire.io sessions from a git repository.
|
|
35
|
+
*
|
|
36
|
+
* Reads the `entire/checkpoints/v1` branch without checking it out
|
|
37
|
+
* and extracts all session transcripts, metadata, and file change info.
|
|
38
|
+
*
|
|
39
|
+
* @param repoPath - Path to the repository root (must have .git/)
|
|
40
|
+
* @param options - Optional configuration
|
|
41
|
+
* @returns ParsedDocument with sections per session
|
|
42
|
+
*/
|
|
43
|
+
export declare function parseEntireRepo(repoPath: string, options?: {
|
|
44
|
+
/** Maximum number of sessions to parse (default: all) */
|
|
45
|
+
readonly maxSessions?: number;
|
|
46
|
+
/** Only include sessions after this date */
|
|
47
|
+
readonly since?: string;
|
|
48
|
+
/** Include raw transcript text in sections (default: true) */
|
|
49
|
+
readonly includeTranscripts?: boolean;
|
|
50
|
+
}): ParsedDocument;
|
|
51
|
+
//# sourceMappingURL=entire-parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"entire-parser.d.ts","sourceRoot":"","sources":["../../src/ingest/entire-parser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAKH,OAAO,KAAK,EAAE,cAAc,EAAiB,MAAM,SAAS,CAAC;AAmG7D;;;;;GAKG;AACH,wBAAgB,eAAe,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAmBzD;AAED;;;;;;;;;GASG;AACH,wBAAgB,eAAe,CAC9B,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE;IACT,yDAAyD;IACzD,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAC9B,4CAA4C;IAC5C,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,8DAA8D;IAC9D,QAAQ,CAAC,kBAAkB,CAAC,EAAE,OAAO,CAAC;CACtC,GACC,cAAc,CA+LhB"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM-based knowledge extraction from document chunks.
|
|
3
|
+
*
|
|
4
|
+
* Uses an LlmProvider to extract structured knowledge:
|
|
5
|
+
* facts, decisions, preferences, procedures, relationships.
|
|
6
|
+
*
|
|
7
|
+
* The extraction prompt is the most critical part of the ingestion engine.
|
|
8
|
+
* It needs to produce genuinely useful, self-contained memories — not noise.
|
|
9
|
+
*/
|
|
10
|
+
import type { LlmProvider } from "../types";
|
|
11
|
+
import type { ChunkResult, ExtractionResult } from "./types";
|
|
12
|
+
export interface ExtractionOptions {
|
|
13
|
+
/** Minimum confidence to keep an extracted item */
|
|
14
|
+
readonly minConfidence: number;
|
|
15
|
+
}
|
|
16
|
+
/** @deprecated Use ExtractionOptions instead */
|
|
17
|
+
export type ExtractorConfig = ExtractionOptions;
|
|
18
|
+
export declare const DEFAULT_EXTRACTOR_CONFIG: ExtractionOptions;
|
|
19
|
+
/**
|
|
20
|
+
* Extract knowledge from a single chunk using an LLM.
|
|
21
|
+
*/
|
|
22
|
+
export declare function extractFromChunk(chunk: ChunkResult, sourceTitle: string | null, provider: LlmProvider, opts?: ExtractionOptions): Promise<ExtractionResult>;
|
|
23
|
+
/**
|
|
24
|
+
* Extract knowledge from all chunks in a document.
|
|
25
|
+
*/
|
|
26
|
+
export declare function extractFromChunks(chunks: readonly ChunkResult[], sourceTitle: string | null, provider: LlmProvider, onChunkDone?: (chunkIndex: number, itemCount: number) => void, opts?: ExtractionOptions): Promise<ExtractionResult[]>;
|
|
27
|
+
//# sourceMappingURL=extractor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"extractor.d.ts","sourceRoot":"","sources":["../../src/ingest/extractor.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAC5C,OAAO,KAAK,EAAE,WAAW,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAO7D,MAAM,WAAW,iBAAiB;IACjC,mDAAmD;IACnD,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC;CAC/B;AAED,gDAAgD;AAChD,MAAM,MAAM,eAAe,GAAG,iBAAiB,CAAC;AAEhD,eAAO,MAAM,wBAAwB,EAAE,iBAEtC,CAAC;AAmGF;;GAEG;AACH,wBAAsB,gBAAgB,CACrC,KAAK,EAAE,WAAW,EAClB,WAAW,EAAE,MAAM,GAAG,IAAI,EAC1B,QAAQ,EAAE,WAAW,EACrB,IAAI,GAAE,iBAA4C,GAChD,OAAO,CAAC,gBAAgB,CAAC,CAsB3B;AAED;;GAEG;AACH,wBAAsB,iBAAiB,CACtC,MAAM,EAAE,SAAS,WAAW,EAAE,EAC9B,WAAW,EAAE,MAAM,GAAG,IAAI,EAC1B,QAAQ,EAAE,WAAW,EACrB,WAAW,CAAC,EAAE,CAAC,UAAU,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,KAAK,IAAI,EAC7D,IAAI,GAAE,iBAA4C,GAChD,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAa7B"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared git binary resolution for parsers that shell out to git.
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Find the git binary path. Checks common locations, then falls back to `which`.
|
|
6
|
+
* Returns null if git is not found.
|
|
7
|
+
*/
|
|
8
|
+
export declare function findGit(): string | null;
|
|
9
|
+
//# sourceMappingURL=git-utils.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"git-utils.d.ts","sourceRoot":"","sources":["../../src/ingest/git-utils.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH;;;GAGG;AACH,wBAAgB,OAAO,IAAI,MAAM,GAAG,IAAI,CAyBvC"}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document Ingestion Engine — "Pour your brain in"
|
|
3
|
+
*
|
|
4
|
+
* Main entry point: `ingestPath(path, options)` detects file type,
|
|
5
|
+
* parses documents, chunks intelligently, extracts knowledge via LLM,
|
|
6
|
+
* and stores as signed memories.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* import { ingestPath } from "@signetai/core/ingest";
|
|
10
|
+
* const result = await ingestPath("~/Documents/notes/", { db, verbose: true });
|
|
11
|
+
*/
|
|
12
|
+
import type { LlmProvider } from "../types";
|
|
13
|
+
import type { IngestOptions, IngestResult, ProgressCallback } from "./types";
|
|
14
|
+
export type { DatabaseLike, IngestOptions, IngestResult, FileIngestResult, ParsedDocument, ParsedSection, ChunkResult, ExtractionResult, ExtractedItem, ExtractedRelation, ProvenanceRecord, ProgressCallback, ProgressEvent, } from "./types";
|
|
15
|
+
export { chunkDocument, DEFAULT_CHUNKER_CONFIG } from "./chunker";
|
|
16
|
+
export type { ChunkerConfig } from "./chunker";
|
|
17
|
+
export { extractFromChunk, extractFromChunks, DEFAULT_EXTRACTOR_CONFIG } from "./extractor";
|
|
18
|
+
export type { ExtractionOptions, ExtractorConfig } from "./extractor";
|
|
19
|
+
export { parseMarkdown, parseMarkdownContent, parseTxt, parseCode } from "./markdown-parser";
|
|
20
|
+
export { parsePdf } from "./pdf-parser";
|
|
21
|
+
export { parseSlackExport } from "./slack-parser";
|
|
22
|
+
export { parseDiscordExport } from "./discord-parser";
|
|
23
|
+
export { parseCodeRepository } from "./code-parser";
|
|
24
|
+
export { parseEntireRepo, hasEntireBranch } from "./entire-parser";
|
|
25
|
+
export { extractFromEntireSession, extractFromEntireSessions } from "./entire-extractor";
|
|
26
|
+
export type { EntireExtractorConfig } from "./entire-extractor";
|
|
27
|
+
export { extractFromConversation, extractFromConversations, extractParticipants } from "./chat-extractor";
|
|
28
|
+
export type { ChatExtractorConfig } from "./chat-extractor";
|
|
29
|
+
export { computeFileHash, buildProvenance } from "./provenance";
|
|
30
|
+
export { parseExtractionResponse } from "./response-parser";
|
|
31
|
+
export type { ParseOptions } from "./response-parser";
|
|
32
|
+
export { findGit } from "./git-utils";
|
|
33
|
+
export { batchByTimeGap, TIME_GAP_MS } from "./chat-utils";
|
|
34
|
+
/**
|
|
35
|
+
* Ingest a file or directory of documents.
|
|
36
|
+
*
|
|
37
|
+
* This is the "pour your brain in" function. Point it at a path
|
|
38
|
+
* and it will:
|
|
39
|
+
* 1. Detect all supported files
|
|
40
|
+
* 2. Parse each file into sections
|
|
41
|
+
* 3. Chunk sections intelligently
|
|
42
|
+
* 4. Extract knowledge using an LLM
|
|
43
|
+
* 5. Store as memories with provenance tracking
|
|
44
|
+
*
|
|
45
|
+
* @param inputPath - File or directory to ingest
|
|
46
|
+
* @param options - Configuration options
|
|
47
|
+
* @param provider - LLM provider for extraction (required unless skipExtraction)
|
|
48
|
+
* @param onProgress - Optional progress callback
|
|
49
|
+
* @returns Ingestion results summary
|
|
50
|
+
*/
|
|
51
|
+
export declare function ingestPath(inputPath: string, options?: IngestOptions, provider?: LlmProvider, onProgress?: ProgressCallback): Promise<IngestResult>;
|
|
52
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/ingest/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAIH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAC5C,OAAO,KAAK,EAEX,aAAa,EACb,YAAY,EAKZ,gBAAgB,EAChB,MAAM,SAAS,CAAC;AAoBjB,YAAY,EACX,YAAY,EACZ,aAAa,EACb,YAAY,EACZ,gBAAgB,EAChB,cAAc,EACd,aAAa,EACb,WAAW,EACX,gBAAgB,EAChB,aAAa,EACb,iBAAiB,EACjB,gBAAgB,EAChB,gBAAgB,EAChB,aAAa,GACb,MAAM,SAAS,CAAC;AACjB,OAAO,EAAE,aAAa,EAAE,sBAAsB,EAAE,MAAM,WAAW,CAAC;AAClE,YAAY,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC;AAC/C,OAAO,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,wBAAwB,EAAE,MAAM,aAAa,CAAC;AAC5F,YAAY,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AACtE,OAAO,EAAE,aAAa,EAAE,oBAAoB,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAC7F,OAAO,EAAE,QAAQ,EAAE,MAAM,cAAc,CAAC;AACxC,OAAO,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAClD,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,EAAE,mBAAmB,EAAE,MAAM,eAAe,CAAC;AACpD,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AACnE,OAAO,EAAE,wBAAwB,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AACzF,YAAY,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAC;AAChE,OAAO,EAAE,uBAAuB,EAAE,wBAAwB,EAAE,mBAAmB,EAAE,MAAM,kBAAkB,CAAC;AAC1G,YAAY,EAAE,mBAAmB,EAAE,MAAM,kBAAkB,CAAC;AAC5D,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAChE,OAAO,EAAE,uBAAuB,EAAE,MAAM,mBAAmB,CAAC;AAC5D,YAAY,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AACtD,OAAO,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AACtC,OAAO,EAAE,cAAc,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AA2T3D;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAsB,UAAU,CAC/B,SAAS,EAAE,MAAM,EACjB,OAAO,GAAE,aAAkB,EAC3B,QAAQ,CAAC,EAAE,WAAW,EACtB,UAAU,CAAC,EAAE,gBAAgB,GAC3B,OAAO,CAAC,YAAY,CAAC,CAiGvB"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Markdown / TXT parser for the ingestion engine.
|
|
3
|
+
*
|
|
4
|
+
* Parses markdown into sections, respecting heading hierarchy.
|
|
5
|
+
* Handles code blocks, tables, lists, and blockquotes as distinct content types.
|
|
6
|
+
*/
|
|
7
|
+
import type { ParsedDocument } from "./types";
|
|
8
|
+
/**
|
|
9
|
+
* Parse a markdown or plain text file into structured sections.
|
|
10
|
+
*/
|
|
11
|
+
export declare function parseMarkdown(filePath: string): ParsedDocument;
|
|
12
|
+
/**
|
|
13
|
+
* Parse markdown content string directly (for testing / reuse).
|
|
14
|
+
*/
|
|
15
|
+
export declare function parseMarkdownContent(content: string, title?: string | null): ParsedDocument;
|
|
16
|
+
/**
|
|
17
|
+
* Parse a plain text file (no markdown structure).
|
|
18
|
+
* Splits on blank lines as paragraph boundaries.
|
|
19
|
+
*/
|
|
20
|
+
export declare function parseTxt(filePath: string): ParsedDocument;
|
|
21
|
+
/**
|
|
22
|
+
* Parse a code file — treat the whole file as one code section
|
|
23
|
+
* with the filename as the heading.
|
|
24
|
+
*/
|
|
25
|
+
export declare function parseCode(filePath: string): ParsedDocument;
|
|
26
|
+
//# sourceMappingURL=markdown-parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"markdown-parser.d.ts","sourceRoot":"","sources":["../../src/ingest/markdown-parser.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,OAAO,KAAK,EAAE,cAAc,EAAiB,MAAM,SAAS,CAAC;AAM7D;;GAEG;AACH,wBAAgB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,cAAc,CAG9D;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,GAAE,MAAM,GAAG,IAAW,GAAG,cAAc,CA4JjG;AAED;;;GAGG;AACH,wBAAgB,QAAQ,CAAC,QAAQ,EAAE,MAAM,GAAG,cAAc,CAoBzD;AAED;;;GAGG;AACH,wBAAgB,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,cAAc,CAoD1D"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PDF parser for the ingestion engine.
|
|
3
|
+
*
|
|
4
|
+
* Uses pdf-parse (v2) to extract text, then applies heuristic section detection
|
|
5
|
+
* (large text on its own line → heading, page breaks → section boundaries).
|
|
6
|
+
*/
|
|
7
|
+
import type { ParsedDocument } from "./types";
|
|
8
|
+
/**
|
|
9
|
+
* Parse a PDF file into structured sections.
|
|
10
|
+
*
|
|
11
|
+
* Uses pdf-parse v2 API (PDFParse class with { data } constructor).
|
|
12
|
+
*/
|
|
13
|
+
export declare function parsePdf(filePath: string): Promise<ParsedDocument>;
|
|
14
|
+
//# sourceMappingURL=pdf-parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdf-parser.d.ts","sourceRoot":"","sources":["../../src/ingest/pdf-parser.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,OAAO,KAAK,EAAE,cAAc,EAAiB,MAAM,SAAS,CAAC;AAyB7D;;;;GAIG;AACH,wBAAsB,QAAQ,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,CAAC,CA+ExE"}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Provenance tracking for ingested documents.
|
|
3
|
+
*
|
|
4
|
+
* Records where each piece of knowledge came from:
|
|
5
|
+
* source file, section, page, line range, and ingestion timestamp.
|
|
6
|
+
*/
|
|
7
|
+
import type { ChunkResult, DatabaseLike, ProvenanceRecord } from "./types";
|
|
8
|
+
/**
|
|
9
|
+
* Compute a SHA-256 hash of a file for deduplication.
|
|
10
|
+
*/
|
|
11
|
+
export declare function computeFileHash(filePath: string): string;
|
|
12
|
+
/**
|
|
13
|
+
* Check if a file has already been ingested (by hash).
|
|
14
|
+
* Returns the existing job ID if found, null otherwise.
|
|
15
|
+
*/
|
|
16
|
+
export declare function checkAlreadyIngested(db: DatabaseLike, fileHash: string): string | null;
|
|
17
|
+
/**
|
|
18
|
+
* Build a provenance record for a chunk.
|
|
19
|
+
*/
|
|
20
|
+
export declare function buildProvenance(chunk: ChunkResult, filePath: string, sourceType: string, fileHash: string): ProvenanceRecord;
|
|
21
|
+
/**
|
|
22
|
+
* Get file metadata for tracking.
|
|
23
|
+
*/
|
|
24
|
+
export declare function getFileMetadata(filePath: string): {
|
|
25
|
+
size: number;
|
|
26
|
+
modified: string;
|
|
27
|
+
};
|
|
28
|
+
export interface IngestionJobRow {
|
|
29
|
+
id: string;
|
|
30
|
+
source_path: string;
|
|
31
|
+
source_type: string;
|
|
32
|
+
file_hash: string;
|
|
33
|
+
status: string;
|
|
34
|
+
chunks_total: number;
|
|
35
|
+
chunks_processed: number;
|
|
36
|
+
memories_created: number;
|
|
37
|
+
started_at: string;
|
|
38
|
+
completed_at: string | null;
|
|
39
|
+
error: string | null;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Create an ingestion job record in the database.
|
|
43
|
+
*/
|
|
44
|
+
export declare function createIngestionJob(db: DatabaseLike, jobId: string, sourcePath: string, sourceType: string, fileHash: string): void;
|
|
45
|
+
/**
|
|
46
|
+
* Update an ingestion job's progress.
|
|
47
|
+
*/
|
|
48
|
+
export declare function updateIngestionJob(db: DatabaseLike, jobId: string, updates: {
|
|
49
|
+
status?: string;
|
|
50
|
+
chunksTotal?: number;
|
|
51
|
+
chunksProcessed?: number;
|
|
52
|
+
memoriesCreated?: number;
|
|
53
|
+
error?: string;
|
|
54
|
+
}): void;
|
|
55
|
+
//# sourceMappingURL=provenance.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"provenance.d.ts","sourceRoot":"","sources":["../../src/ingest/provenance.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,OAAO,KAAK,EAAE,WAAW,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAM3E;;GAEG;AACH,wBAAgB,eAAe,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAQxD;AAED;;;GAGG;AACH,wBAAgB,oBAAoB,CAAC,EAAE,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAStF;AAED;;GAEG;AACH,wBAAgB,eAAe,CAC9B,KAAK,EAAE,WAAW,EAClB,QAAQ,EAAE,MAAM,EAChB,UAAU,EAAE,MAAM,EAClB,QAAQ,EAAE,MAAM,GACd,gBAAgB,CAYlB;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,QAAQ,EAAE,MAAM,GAAG;IAClD,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;CACjB,CAUA;AAMD,MAAM,WAAW,eAAe;IAC/B,EAAE,EAAE,MAAM,CAAC;IACX,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,gBAAgB,EAAE,MAAM,CAAC;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;CACrB;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CACjC,EAAE,EAAE,YAAY,EAChB,KAAK,EAAE,MAAM,EACb,UAAU,EAAE,MAAM,EAClB,UAAU,EAAE,MAAM,EAClB,QAAQ,EAAE,MAAM,GACd,IAAI,CAUN;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CACjC,EAAE,EAAE,YAAY,EAChB,KAAK,EAAE,MAAM,EACb,OAAO,EAAE;IACR,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;CACf,GACC,IAAI,CAsCN"}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared LLM response parser for the extraction pipeline.
|
|
3
|
+
*
|
|
4
|
+
* Parses raw LLM text responses into structured extraction results.
|
|
5
|
+
* Handles markdown fences, <think> blocks, JSON repair, type normalization,
|
|
6
|
+
* and confidence filtering.
|
|
7
|
+
*
|
|
8
|
+
* Previously named ollama-client.ts — renamed to reflect actual responsibility
|
|
9
|
+
* now that LLM transport is handled by the LlmProvider interface.
|
|
10
|
+
*/
|
|
11
|
+
import type { ExtractedItem, ExtractedRelation } from "./types";
|
|
12
|
+
export interface ParseOptions {
|
|
13
|
+
/** Minimum confidence to keep an extracted item */
|
|
14
|
+
readonly minConfidence: number;
|
|
15
|
+
/** Set of valid type strings */
|
|
16
|
+
readonly validTypes: ReadonlySet<string>;
|
|
17
|
+
/** Map of alternative type names → canonical type names */
|
|
18
|
+
readonly typeMap: Readonly<Record<string, string>>;
|
|
19
|
+
/** Default type when none matches */
|
|
20
|
+
readonly defaultType: string;
|
|
21
|
+
/** Minimum content length to keep an item (0 = no minimum) */
|
|
22
|
+
readonly minContentLength?: number;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Parse an LLM response string into structured items and relations.
|
|
26
|
+
*
|
|
27
|
+
* Handles:
|
|
28
|
+
* - Markdown code fences around JSON
|
|
29
|
+
* - <think> reasoning blocks (some models emit these)
|
|
30
|
+
* - Trailing comma repair
|
|
31
|
+
* - Flexible key names ("items" / "facts", "relations" / "entities")
|
|
32
|
+
* - Type normalization via validTypes + typeMap
|
|
33
|
+
* - Confidence clamping and filtering
|
|
34
|
+
*/
|
|
35
|
+
export declare function parseExtractionResponse(raw: string, options: ParseOptions): {
|
|
36
|
+
items: ExtractedItem[];
|
|
37
|
+
relations: ExtractedRelation[];
|
|
38
|
+
warnings: string[];
|
|
39
|
+
};
|
|
40
|
+
//# sourceMappingURL=response-parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"response-parser.d.ts","sourceRoot":"","sources":["../../src/ingest/response-parser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,iBAAiB,EAAE,MAAM,SAAS,CAAC;AAMhE,MAAM,WAAW,YAAY;IAC5B,mDAAmD;IACnD,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC;IAC/B,gCAAgC;IAChC,QAAQ,CAAC,UAAU,EAAE,WAAW,CAAC,MAAM,CAAC,CAAC;IACzC,2DAA2D;IAC3D,QAAQ,CAAC,OAAO,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IACnD,qCAAqC;IACrC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,8DAA8D;IAC9D,QAAQ,CAAC,gBAAgB,CAAC,EAAE,MAAM,CAAC;CACnC;AAMD;;;;;;;;;;GAUG;AACH,wBAAgB,uBAAuB,CACtC,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,YAAY,GACnB;IACF,KAAK,EAAE,aAAa,EAAE,CAAC;IACvB,SAAS,EAAE,iBAAiB,EAAE,CAAC;IAC/B,QAAQ,EAAE,MAAM,EAAE,CAAC;CACnB,CAiHA"}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Slack Export Parser for the ingestion engine.
|
|
3
|
+
*
|
|
4
|
+
* Parses the Slack JSON export format:
|
|
5
|
+
* - Root directory contains channels.json, users.json
|
|
6
|
+
* - Each channel has a subdirectory with dated JSON files (YYYY-MM-DD.json)
|
|
7
|
+
* - Each JSON file is an array of message objects
|
|
8
|
+
*
|
|
9
|
+
* Produces a ParsedDocument with messages grouped by conversation thread.
|
|
10
|
+
* Filters out bot messages, join/leave events, and noise.
|
|
11
|
+
*/
|
|
12
|
+
import type { ParsedDocument } from "./types";
|
|
13
|
+
/**
|
|
14
|
+
* Parse a Slack export directory into a ParsedDocument.
|
|
15
|
+
*
|
|
16
|
+
* The directory should contain:
|
|
17
|
+
* - users.json (optional but recommended)
|
|
18
|
+
* - channels.json (optional)
|
|
19
|
+
* - One subdirectory per channel, each containing YYYY-MM-DD.json files
|
|
20
|
+
*
|
|
21
|
+
* @param dirPath - Path to the Slack export root directory
|
|
22
|
+
* @param options - Optional filtering
|
|
23
|
+
*/
|
|
24
|
+
export declare function parseSlackExport(dirPath: string, options?: {
|
|
25
|
+
/** Only include these channels */
|
|
26
|
+
readonly channels?: string[];
|
|
27
|
+
/** Only include messages after this date */
|
|
28
|
+
readonly since?: string;
|
|
29
|
+
/** Only include messages before this date */
|
|
30
|
+
readonly until?: string;
|
|
31
|
+
/** Filter to specific speakers */
|
|
32
|
+
readonly speakers?: string[];
|
|
33
|
+
}): ParsedDocument;
|
|
34
|
+
//# sourceMappingURL=slack-parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"slack-parser.d.ts","sourceRoot":"","sources":["../../src/ingest/slack-parser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAIH,OAAO,KAAK,EAAE,cAAc,EAAiB,MAAM,SAAS,CAAC;AA8E7D;;;;;;;;;;GAUG;AACH,wBAAgB,gBAAgB,CAC/B,OAAO,EAAE,MAAM,EACf,OAAO,CAAC,EAAE;IACT,kCAAkC;IAClC,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IAC7B,4CAA4C;IAC5C,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,6CAA6C;IAC7C,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,kCAAkC;IAClC,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;CAC7B,GACC,cAAc,CA8DhB"}
|