bot-knows 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. bot_knows/__init__.py +70 -0
  2. bot_knows/config.py +115 -0
  3. bot_knows/domain/__init__.py +5 -0
  4. bot_knows/domain/chat.py +62 -0
  5. bot_knows/domain/message.py +64 -0
  6. bot_knows/domain/relation.py +56 -0
  7. bot_knows/domain/topic.py +132 -0
  8. bot_knows/domain/topic_evidence.py +55 -0
  9. bot_knows/importers/__init__.py +12 -0
  10. bot_knows/importers/base.py +116 -0
  11. bot_knows/importers/chatgpt.py +154 -0
  12. bot_knows/importers/claude.py +172 -0
  13. bot_knows/importers/generic_json.py +272 -0
  14. bot_knows/importers/registry.py +125 -0
  15. bot_knows/infra/__init__.py +5 -0
  16. bot_knows/infra/llm/__init__.py +6 -0
  17. bot_knows/infra/llm/anthropic_provider.py +172 -0
  18. bot_knows/infra/llm/openai_provider.py +195 -0
  19. bot_knows/infra/mongo/__init__.py +5 -0
  20. bot_knows/infra/mongo/client.py +145 -0
  21. bot_knows/infra/mongo/repositories.py +348 -0
  22. bot_knows/infra/neo4j/__init__.py +5 -0
  23. bot_knows/infra/neo4j/client.py +152 -0
  24. bot_knows/infra/neo4j/graph_repository.py +329 -0
  25. bot_knows/infra/redis/__init__.py +6 -0
  26. bot_knows/infra/redis/cache.py +198 -0
  27. bot_knows/infra/redis/client.py +193 -0
  28. bot_knows/interfaces/__init__.py +18 -0
  29. bot_knows/interfaces/embedding.py +55 -0
  30. bot_knows/interfaces/graph.py +194 -0
  31. bot_knows/interfaces/llm.py +70 -0
  32. bot_knows/interfaces/recall.py +92 -0
  33. bot_knows/interfaces/storage.py +225 -0
  34. bot_knows/logging.py +101 -0
  35. bot_knows/models/__init__.py +22 -0
  36. bot_knows/models/chat.py +55 -0
  37. bot_knows/models/ingest.py +70 -0
  38. bot_knows/models/message.py +49 -0
  39. bot_knows/models/recall.py +58 -0
  40. bot_knows/models/topic.py +100 -0
  41. bot_knows/orchestrator.py +398 -0
  42. bot_knows/py.typed +0 -0
  43. bot_knows/services/__init__.py +24 -0
  44. bot_knows/services/chat_processing.py +182 -0
  45. bot_knows/services/dedup_service.py +161 -0
  46. bot_knows/services/graph_service.py +217 -0
  47. bot_knows/services/message_builder.py +135 -0
  48. bot_knows/services/recall_service.py +296 -0
  49. bot_knows/services/tasks.py +128 -0
  50. bot_knows/services/topic_extraction.py +199 -0
  51. bot_knows/utils/__init__.py +22 -0
  52. bot_knows/utils/hashing.py +126 -0
  53. bot_knows-0.1.0.dist-info/METADATA +294 -0
  54. bot_knows-0.1.0.dist-info/RECORD +56 -0
  55. bot_knows-0.1.0.dist-info/WHEEL +4 -0
  56. bot_knows-0.1.0.dist-info/licenses/LICENSE +21 -0
bot_knows/__init__.py ADDED
@@ -0,0 +1,70 @@
1
+ """bot_knows - Framework-agnostic Python library for graph-backed personal knowledge bases.
2
+
3
+ This package provides tools for:
4
+ - Ingesting chats from multiple sources (ChatGPT, Claude, custom JSON)
5
+ - Classifying and organizing chat data
6
+ - Extracting semantic topics with deduplication
7
+ - Building a graph-backed knowledge base
8
+ - Evidence-weighted recall with spaced repetition
9
+
10
+ Example usage:
11
+ from bot_knows import (
12
+ BotKnows,
13
+ MongoStorageRepository,
14
+ Neo4jGraphRepository,
15
+ OpenAIProvider,
16
+ ChatGPTAdapter,
17
+ )
18
+
19
+ # Simple usage - config loaded from .env automatically
20
+ async with BotKnows(
21
+ storage_class=MongoStorageRepository,
22
+ graphdb_class=Neo4jGraphRepository,
23
+ llm_class=OpenAIProvider,
24
+ ) as bk:
25
+ result = await bk.insert_chats("conversations.json", ChatGPTAdapter)
26
+ topics = await bk.get_chat_topics(chat_id)
27
+ """
28
+
29
+ __version__ = "0.1.0"
30
+
31
+ # Orchestrator
32
+ # Interfaces
33
+ from bot_knows.importers.base import ChatImportAdapter
34
+
35
+ # Import adapters
36
+ from bot_knows.importers.chatgpt import ChatGPTAdapter
37
+ from bot_knows.importers.claude import ClaudeAdapter
38
+ from bot_knows.importers.generic_json import GenericJSONAdapter
39
+ from bot_knows.infra.llm.anthropic_provider import AnthropicProvider
40
+ from bot_knows.infra.llm.openai_provider import OpenAIProvider
41
+
42
+ # Implementations
43
+ from bot_knows.infra.mongo.repositories import MongoStorageRepository
44
+ from bot_knows.infra.neo4j.graph_repository import Neo4jGraphRepository
45
+ from bot_knows.interfaces.embedding import EmbeddingServiceInterface
46
+ from bot_knows.interfaces.graph import GraphServiceInterface
47
+ from bot_knows.interfaces.llm import LLMInterface
48
+ from bot_knows.interfaces.storage import StorageInterface
49
+ from bot_knows.orchestrator import BotKnows, InsertResult
50
+
51
+ __all__ = [ # noqa: RUF022
52
+ # Orchestrator
53
+ "BotKnows",
54
+ "InsertResult",
55
+ # Implementations
56
+ "MongoStorageRepository",
57
+ "Neo4jGraphRepository",
58
+ "OpenAIProvider",
59
+ "AnthropicProvider",
60
+ # Import adapters
61
+ "ChatGPTAdapter",
62
+ "ClaudeAdapter",
63
+ "GenericJSONAdapter",
64
+ # Interfaces
65
+ "ChatImportAdapter",
66
+ "EmbeddingServiceInterface",
67
+ "GraphServiceInterface",
68
+ "LLMInterface",
69
+ "StorageInterface",
70
+ ]
bot_knows/config.py ADDED
@@ -0,0 +1,115 @@
1
+ """Configuration management for bot_knows.
2
+
3
+ This module provides typed configuration classes using pydantic-settings.
4
+ Configuration is loaded from environment variables with optional .env file support.
5
+ """
6
+
7
+ from pydantic import SecretStr
8
+ from pydantic_settings import BaseSettings, SettingsConfigDict
9
+
10
+ __all__ = [
11
+ "BotKnowsConfig",
12
+ "LLMSettings",
13
+ "MongoSettings",
14
+ "Neo4jSettings",
15
+ "RedisSettings",
16
+ ]
17
+
18
+
19
+ class MongoSettings(BaseSettings):
20
+ """MongoDB connection settings."""
21
+
22
+ model_config = SettingsConfigDict(
23
+ env_prefix="BOT_KNOWS_MONGO_",
24
+ env_file=".env",
25
+ env_file_encoding="utf-8",
26
+ extra="ignore",
27
+ )
28
+
29
+ uri: SecretStr = SecretStr("mongodb://localhost:27017")
30
+ database: str = "bot_knows"
31
+ collection_prefix: str = ""
32
+
33
+
34
+ class Neo4jSettings(BaseSettings):
35
+ """Neo4j connection settings."""
36
+
37
+ model_config = SettingsConfigDict(
38
+ env_prefix="BOT_KNOWS_NEO4J_",
39
+ env_file=".env",
40
+ env_file_encoding="utf-8",
41
+ extra="ignore",
42
+ )
43
+
44
+ uri: str = "bolt://localhost:7687"
45
+ username: str = "neo4j"
46
+ password: SecretStr = SecretStr("")
47
+
48
+
49
+ class RedisSettings(BaseSettings):
50
+ """Redis connection settings (optional).
51
+
52
+ If url is not configured or connection fails, caching will be disabled.
53
+ """
54
+
55
+ model_config = SettingsConfigDict(
56
+ env_prefix="BOT_KNOWS_REDIS_",
57
+ env_file=".env",
58
+ env_file_encoding="utf-8",
59
+ extra="ignore",
60
+ )
61
+
62
+ url: str | None = None
63
+ enabled: bool = True # Can be explicitly disabled
64
+
65
+
66
+ class LLMSettings(BaseSettings):
67
+ """LLM provider settings."""
68
+
69
+ model_config = SettingsConfigDict(
70
+ env_prefix="BOT_KNOWS_LLM_",
71
+ env_file=".env",
72
+ env_file_encoding="utf-8",
73
+ extra="ignore",
74
+ )
75
+
76
+ provider: str = "openai" # "openai" or "anthropic"
77
+ api_key: SecretStr | None = None
78
+ model: str = "gpt-4o"
79
+ embedding_model: str = "text-embedding-3-small"
80
+ embedding_dimensions: int = 1536
81
+
82
+
83
+ class BotKnowsConfig(BaseSettings):
84
+ """Main configuration aggregating all settings.
85
+
86
+ Example usage:
87
+ config = BotKnowsConfig()
88
+ mongo_uri = config.mongo.uri.get_secret_value()
89
+ """
90
+
91
+ model_config = SettingsConfigDict(
92
+ env_file=".env",
93
+ env_file_encoding="utf-8",
94
+ extra="ignore",
95
+ )
96
+
97
+ # Component settings (nested)
98
+ mongo: MongoSettings = MongoSettings()
99
+ neo4j: Neo4jSettings = Neo4jSettings()
100
+ redis: RedisSettings = RedisSettings()
101
+ llm: LLMSettings = LLMSettings()
102
+
103
+ # Deduplication thresholds
104
+ dedup_high_threshold: float = 0.92
105
+ dedup_low_threshold: float = 0.80
106
+
107
+ # Recall settings
108
+ recall_stability_k: float = 0.1
109
+ recall_semantic_boost: float = 0.1
110
+ decay_batch_interval_hours: int = 24
111
+
112
+ @property
113
+ def redis_enabled(self) -> bool:
114
+ """Check if Redis caching is enabled and configured."""
115
+ return self.redis.enabled and self.redis.url is not None
@@ -0,0 +1,5 @@
1
+ """Internal domain entities for bot_knows.
2
+
3
+ This module contains internal domain models with business logic.
4
+ These are not part of the public API.
5
+ """
@@ -0,0 +1,62 @@
1
+ """Internal Chat entity for bot_knows.
2
+
3
+ This module contains the internal Chat domain model with business logic.
4
+ """
5
+
6
+ import time
7
+ from dataclasses import dataclass, field
8
+
9
+ from bot_knows.models.chat import ChatCategory, ChatDTO
10
+
11
+ __all__ = [
12
+ "Chat",
13
+ ]
14
+
15
+
16
+ @dataclass
17
+ class Chat:
18
+ """Internal Chat entity with business logic.
19
+
20
+ This is a mutable internal representation used during processing.
21
+ Convert to ChatDTO for persistence and external use.
22
+ """
23
+
24
+ id: str
25
+ title: str
26
+ source: str
27
+ category: ChatCategory = ChatCategory.GENERAL
28
+ tags: list[str] = field(default_factory=list)
29
+ created_on: int = field(default_factory=lambda: int(time.time()))
30
+
31
+ def add_tag(self, tag: str) -> None:
32
+ """Add a tag if not already present."""
33
+ if tag and tag not in self.tags:
34
+ self.tags.append(tag)
35
+
36
+ def add_tags(self, tags: list[str]) -> None:
37
+ """Add multiple tags."""
38
+ for tag in tags:
39
+ self.add_tag(tag)
40
+
41
+ def to_dto(self) -> ChatDTO:
42
+ """Convert to immutable DTO for persistence."""
43
+ return ChatDTO(
44
+ id=self.id,
45
+ title=self.title,
46
+ source=self.source,
47
+ category=self.category,
48
+ tags=list(self.tags),
49
+ created_on=self.created_on,
50
+ )
51
+
52
+ @classmethod
53
+ def from_dto(cls, dto: ChatDTO) -> "Chat":
54
+ """Create from DTO."""
55
+ return cls(
56
+ id=dto.id,
57
+ title=dto.title,
58
+ source=dto.source,
59
+ category=dto.category,
60
+ tags=list(dto.tags),
61
+ created_on=dto.created_on,
62
+ )
@@ -0,0 +1,64 @@
1
+ """Internal Message entity for bot_knows.
2
+
3
+ This module contains the internal Message domain model.
4
+ """
5
+
6
+ import time
7
+ from dataclasses import dataclass, field
8
+
9
+ from bot_knows.models.message import MessageDTO
10
+
11
+ __all__ = [
12
+ "Message",
13
+ ]
14
+
15
+
16
+ @dataclass
17
+ class Message:
18
+ """Internal Message entity.
19
+
20
+ This is a mutable internal representation used during processing.
21
+ Convert to MessageDTO for persistence and external use.
22
+ """
23
+
24
+ message_id: str
25
+ chat_id: str
26
+ user_content: str = ""
27
+ assistant_content: str = ""
28
+ created_on: int = field(default_factory=lambda: int(time.time()))
29
+
30
+ @property
31
+ def combined_content(self) -> str:
32
+ """Get combined user and assistant content."""
33
+ parts = []
34
+ if self.user_content:
35
+ parts.append(f"User: {self.user_content}")
36
+ if self.assistant_content:
37
+ parts.append(f"Assistant: {self.assistant_content}")
38
+ return "\n\n".join(parts)
39
+
40
+ @property
41
+ def is_empty(self) -> bool:
42
+ """Check if both contents are empty."""
43
+ return not self.user_content and not self.assistant_content
44
+
45
+ def to_dto(self) -> MessageDTO:
46
+ """Convert to immutable DTO for persistence."""
47
+ return MessageDTO(
48
+ message_id=self.message_id,
49
+ chat_id=self.chat_id,
50
+ user_content=self.user_content,
51
+ assistant_content=self.assistant_content,
52
+ created_on=self.created_on,
53
+ )
54
+
55
+ @classmethod
56
+ def from_dto(cls, dto: MessageDTO) -> "Message":
57
+ """Create from DTO."""
58
+ return cls(
59
+ message_id=dto.message_id,
60
+ chat_id=dto.chat_id,
61
+ user_content=dto.user_content,
62
+ assistant_content=dto.assistant_content,
63
+ created_on=dto.created_on,
64
+ )
@@ -0,0 +1,56 @@
1
+ """Graph relationship types for bot_knows.
2
+
3
+ This module defines the edge types used in the Neo4j knowledge graph.
4
+ """
5
+
6
+ from enum import StrEnum
7
+
8
+ __all__ = [
9
+ "RelationType",
10
+ ]
11
+
12
+
13
+ class RelationType(StrEnum):
14
+ """Graph edge types for the knowledge graph.
15
+
16
+ These define the relationships between nodes in Neo4j.
17
+ """
18
+
19
+ # Message relationships
20
+ IS_PART_OF = "IS_PART_OF"
21
+ """(Message)-[:IS_PART_OF]->(Chat)"""
22
+
23
+ FOLLOWS_AFTER = "FOLLOWS_AFTER"
24
+ """(Message)-[:FOLLOWS_AFTER]->(Message) - defines ordering"""
25
+
26
+ # Topic relationships
27
+ IS_SUPPORTED_BY = "IS_SUPPORTED_BY"
28
+ """(Topic)-[:IS_SUPPORTED_BY {evidence}]->(Message)"""
29
+
30
+ POTENTIALLY_DUPLICATE_OF = "POTENTIALLY_DUPLICATE_OF"
31
+ """(Topic)-[:POTENTIALLY_DUPLICATE_OF {similarity}]->(Topic)"""
32
+
33
+ RELATES_TO = "RELATES_TO"
34
+ """(Topic)-[:RELATES_TO {type, weight}]->(Topic)"""
35
+
36
+
37
+ class SemanticRelationType(StrEnum):
38
+ """Semantic relationship types between topics.
39
+
40
+ Used as the 'type' property on RELATES_TO edges.
41
+ """
42
+
43
+ PART_OF = "part_of"
44
+ """Topic A is part of Topic B"""
45
+
46
+ CAUSES = "causes"
47
+ """Topic A causes Topic B"""
48
+
49
+ RELATED_TO = "related_to"
50
+ """General semantic relationship"""
51
+
52
+ PREREQUISITE_OF = "prerequisite_of"
53
+ """Topic A is a prerequisite for Topic B"""
54
+
55
+ SIMILAR_TO = "similar_to"
56
+ """Topics are semantically similar"""
@@ -0,0 +1,132 @@
1
+ """Internal Topic entity for bot_knows.
2
+
3
+ This module contains the internal Topic domain model with
4
+ recall business logic including decay and reinforcement.
5
+ """
6
+
7
+ import math
8
+ import time
9
+ from dataclasses import dataclass, field
10
+
11
+ from bot_knows.models.topic import TopicDTO
12
+
13
+ __all__ = [
14
+ "Topic",
15
+ ]
16
+
17
+
18
+ @dataclass
19
+ class Topic:
20
+ """Internal Topic entity with recall business logic.
21
+
22
+ This is a mutable internal representation used during processing.
23
+ Includes methods for decay and reinforcement calculations.
24
+ """
25
+
26
+ topic_id: str
27
+ canonical_name: str
28
+ centroid_embedding: list[float] = field(default_factory=list)
29
+ evidence_count: int = 0
30
+ importance: float = 0.0
31
+ recall_strength: float = 0.0
32
+ stability: float = 1.0
33
+ last_seen: int = field(default_factory=lambda: int(time.time()))
34
+ last_updated: int = field(default_factory=lambda: int(time.time()))
35
+
36
+ def update_centroid(self, new_embedding: list[float]) -> None:
37
+ """Incrementally update centroid embedding.
38
+
39
+ Uses formula: new_centroid = (old_centroid * n + new_embedding) / (n + 1)
40
+
41
+ Args:
42
+ new_embedding: New embedding to incorporate
43
+ """
44
+ n = self.evidence_count
45
+ if n == 0:
46
+ self.centroid_embedding = list(new_embedding)
47
+ else:
48
+ self.centroid_embedding = [
49
+ (old * n + new) / (n + 1)
50
+ for old, new in zip(self.centroid_embedding, new_embedding, strict=False)
51
+ ]
52
+ self.evidence_count += 1
53
+
54
+ def reinforce(
55
+ self,
56
+ confidence: float,
57
+ novelty_factor: float = 1.0,
58
+ context_weight: float = 1.0,
59
+ stability_k: float = 0.1,
60
+ ) -> None:
61
+ """Reinforce topic recall strength.
62
+
63
+ Context weights:
64
+ - passive: 0.2 (reading without interaction)
65
+ - active: 0.6 (actively querying)
66
+ - recall: 1.0 (responding to recall prompt)
67
+
68
+ Formula:
69
+ delta = confidence * novelty_factor * context_weight
70
+ strength = min(1.0, strength + delta)
71
+ stability += k * confidence
72
+
73
+ Args:
74
+ confidence: Evidence confidence (0.0 - 1.0)
75
+ novelty_factor: How novel this reinforcement is
76
+ context_weight: Weight based on interaction type
77
+ stability_k: Stability increment factor
78
+ """
79
+ delta = confidence * novelty_factor * context_weight
80
+ self.recall_strength = min(1.0, self.recall_strength + delta)
81
+ self.stability += stability_k * confidence
82
+ self.last_seen = int(time.time())
83
+ self.last_updated = int(time.time())
84
+
85
+ def apply_decay(self, current_time: int | None = None) -> None:
86
+ """Apply time-based decay to recall strength.
87
+
88
+ Formula: strength *= exp(-Δt / (stability * 86400))
89
+
90
+ Higher stability means slower decay.
91
+
92
+ Args:
93
+ current_time: Current time in epoch seconds (default: now)
94
+ """
95
+ now = current_time or int(time.time())
96
+ delta_t = now - self.last_updated
97
+ if delta_t > 0:
98
+ # Stability is multiplied by seconds per day for the decay rate
99
+ decay_factor = math.exp(-delta_t / (self.stability * 86400))
100
+ self.recall_strength *= decay_factor
101
+ self.last_updated = now
102
+
103
+ def increment_importance(self, delta: float = 0.1) -> None:
104
+ """Increment importance score.
105
+
106
+ Args:
107
+ delta: Amount to increment (capped at 1.0)
108
+ """
109
+ self.importance = min(1.0, self.importance + delta)
110
+
111
+ def to_dto(self) -> TopicDTO:
112
+ """Convert to immutable DTO for persistence."""
113
+ return TopicDTO(
114
+ topic_id=self.topic_id,
115
+ canonical_name=self.canonical_name,
116
+ centroid_embedding=list(self.centroid_embedding),
117
+ evidence_count=self.evidence_count,
118
+ importance=self.importance,
119
+ recall_strength=self.recall_strength,
120
+ )
121
+
122
+ @classmethod
123
+ def from_dto(cls, dto: TopicDTO) -> "Topic":
124
+ """Create from DTO."""
125
+ return cls(
126
+ topic_id=dto.topic_id,
127
+ canonical_name=dto.canonical_name,
128
+ centroid_embedding=list(dto.centroid_embedding),
129
+ evidence_count=dto.evidence_count,
130
+ importance=dto.importance,
131
+ recall_strength=dto.recall_strength,
132
+ )
@@ -0,0 +1,55 @@
1
+ """Internal TopicEvidence entity for bot_knows.
2
+
3
+ This module contains the internal TopicEvidence domain model.
4
+ Evidence records are append-only and never modified.
5
+ """
6
+
7
+ import time
8
+ from dataclasses import dataclass, field
9
+
10
+ from bot_knows.models.topic import TopicEvidenceDTO
11
+
12
+ __all__ = [
13
+ "TopicEvidence",
14
+ ]
15
+
16
+
17
+ @dataclass(frozen=True)
18
+ class TopicEvidence:
19
+ """Internal TopicEvidence entity.
20
+
21
+ Evidence records are append-only - they are never modified or deleted.
22
+ This provides a complete audit trail of topic extractions.
23
+
24
+ Note: This dataclass is frozen (immutable) to enforce append-only semantics.
25
+ """
26
+
27
+ evidence_id: str
28
+ topic_id: str
29
+ extracted_name: str
30
+ source_message_id: str
31
+ confidence: float
32
+ timestamp: int = field(default_factory=lambda: int(time.time()))
33
+
34
+ def to_dto(self) -> TopicEvidenceDTO:
35
+ """Convert to immutable DTO for persistence."""
36
+ return TopicEvidenceDTO(
37
+ evidence_id=self.evidence_id,
38
+ topic_id=self.topic_id,
39
+ extracted_name=self.extracted_name,
40
+ source_message_id=self.source_message_id,
41
+ confidence=self.confidence,
42
+ timestamp=self.timestamp,
43
+ )
44
+
45
+ @classmethod
46
+ def from_dto(cls, dto: TopicEvidenceDTO) -> "TopicEvidence":
47
+ """Create from DTO."""
48
+ return cls(
49
+ evidence_id=dto.evidence_id,
50
+ topic_id=dto.topic_id,
51
+ extracted_name=dto.extracted_name,
52
+ source_message_id=dto.source_message_id,
53
+ confidence=dto.confidence,
54
+ timestamp=dto.timestamp,
55
+ )
@@ -0,0 +1,12 @@
1
+ """Import adapters for bot_knows.
2
+
3
+ This module exports the import adapter base class and registry.
4
+ """
5
+
6
+ from bot_knows.importers.base import ChatImportAdapter
7
+ from bot_knows.importers.registry import ImportAdapterRegistry
8
+
9
+ __all__ = [
10
+ "ChatImportAdapter",
11
+ "ImportAdapterRegistry",
12
+ ]
@@ -0,0 +1,116 @@
1
+ """Base import adapter for bot_knows.
2
+
3
+ This module defines the abstract base class for chat import adapters.
4
+ """
5
+
6
+ from abc import ABC, abstractmethod
7
+ from pathlib import Path
8
+ from typing import Any, BinaryIO
9
+
10
+ from bot_knows.models.ingest import ChatIngest
11
+
12
+ __all__ = [
13
+ "ChatImportAdapter",
14
+ ]
15
+
16
+
17
+ class ChatImportAdapter(ABC):
18
+ """Abstract base class for chat import adapters.
19
+
20
+ Import adapters are responsible for parsing provider-specific
21
+ export formats into the canonical ChatIngest model.
22
+
23
+ Important: Adapters must NOT persist data or mutate any state.
24
+ They only normalize data.
25
+
26
+ Example:
27
+ class MyAdapter(ChatImportAdapter):
28
+ @property
29
+ def source_name(self) -> str:
30
+ return "my_source"
31
+
32
+ def parse(self, raw_export: dict) -> list[ChatIngest]:
33
+ # Parse and return ChatIngest objects
34
+ ...
35
+ """
36
+
37
+ @property
38
+ @abstractmethod
39
+ def source_name(self) -> str:
40
+ """Return unique identifier for this import source.
41
+
42
+ This name is used to identify the source in ChatIngest.source
43
+ and for adapter registry lookup.
44
+
45
+ Returns:
46
+ Source identifier string (e.g., "chatgpt", "claude")
47
+ """
48
+ ...
49
+
50
+ @abstractmethod
51
+ def parse(self, raw_export: dict[str, Any]) -> list[ChatIngest]:
52
+ """Parse raw export data into ChatIngest objects.
53
+
54
+ This method must be pure - it should not persist data,
55
+ generate IDs, classify, or mutate any state.
56
+
57
+ Args:
58
+ raw_export: Raw JSON data from the export file
59
+
60
+ Returns:
61
+ List of ChatIngest objects (one export may contain multiple chats)
62
+
63
+ Raises:
64
+ ValueError: If the export format is invalid
65
+ """
66
+ ...
67
+
68
+ def parse_file(self, path: Path | str) -> list[ChatIngest]:
69
+ """Parse from file path.
70
+
71
+ Convenience method that loads JSON from file and calls parse().
72
+
73
+ Args:
74
+ path: Path to the export JSON file
75
+
76
+ Returns:
77
+ List of ChatIngest objects
78
+ """
79
+ import json
80
+
81
+ path = Path(path)
82
+ with path.open("r", encoding="utf-8") as f:
83
+ data = json.load(f)
84
+ return self.parse(data)
85
+
86
+ def parse_stream(self, stream: BinaryIO) -> list[ChatIngest]:
87
+ """Parse from file stream.
88
+
89
+ Convenience method that loads JSON from stream and calls parse().
90
+
91
+ Args:
92
+ stream: Binary file stream containing JSON data
93
+
94
+ Returns:
95
+ List of ChatIngest objects
96
+ """
97
+ import json
98
+
99
+ data = json.load(stream)
100
+ return self.parse(data)
101
+
102
+ def parse_string(self, json_string: str) -> list[ChatIngest]:
103
+ """Parse from JSON string.
104
+
105
+ Convenience method that parses JSON string and calls parse().
106
+
107
+ Args:
108
+ json_string: JSON string
109
+
110
+ Returns:
111
+ List of ChatIngest objects
112
+ """
113
+ import json
114
+
115
+ data = json.loads(json_string)
116
+ return self.parse(data)