bot-knows 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. bot_knows-0.1.0/.gitignore +6 -0
  2. bot_knows-0.1.0/LICENSE +21 -0
  3. bot_knows-0.1.0/PKG-INFO +294 -0
  4. bot_knows-0.1.0/README.md +250 -0
  5. bot_knows-0.1.0/pyproject.toml +174 -0
  6. bot_knows-0.1.0/src/bot_knows/__init__.py +70 -0
  7. bot_knows-0.1.0/src/bot_knows/config.py +115 -0
  8. bot_knows-0.1.0/src/bot_knows/domain/__init__.py +5 -0
  9. bot_knows-0.1.0/src/bot_knows/domain/chat.py +62 -0
  10. bot_knows-0.1.0/src/bot_knows/domain/message.py +64 -0
  11. bot_knows-0.1.0/src/bot_knows/domain/relation.py +56 -0
  12. bot_knows-0.1.0/src/bot_knows/domain/topic.py +132 -0
  13. bot_knows-0.1.0/src/bot_knows/domain/topic_evidence.py +55 -0
  14. bot_knows-0.1.0/src/bot_knows/importers/__init__.py +12 -0
  15. bot_knows-0.1.0/src/bot_knows/importers/base.py +116 -0
  16. bot_knows-0.1.0/src/bot_knows/importers/chatgpt.py +154 -0
  17. bot_knows-0.1.0/src/bot_knows/importers/claude.py +172 -0
  18. bot_knows-0.1.0/src/bot_knows/importers/generic_json.py +272 -0
  19. bot_knows-0.1.0/src/bot_knows/importers/registry.py +125 -0
  20. bot_knows-0.1.0/src/bot_knows/infra/__init__.py +5 -0
  21. bot_knows-0.1.0/src/bot_knows/infra/llm/__init__.py +6 -0
  22. bot_knows-0.1.0/src/bot_knows/infra/llm/anthropic_provider.py +172 -0
  23. bot_knows-0.1.0/src/bot_knows/infra/llm/openai_provider.py +195 -0
  24. bot_knows-0.1.0/src/bot_knows/infra/mongo/__init__.py +5 -0
  25. bot_knows-0.1.0/src/bot_knows/infra/mongo/client.py +145 -0
  26. bot_knows-0.1.0/src/bot_knows/infra/mongo/repositories.py +348 -0
  27. bot_knows-0.1.0/src/bot_knows/infra/neo4j/__init__.py +5 -0
  28. bot_knows-0.1.0/src/bot_knows/infra/neo4j/client.py +152 -0
  29. bot_knows-0.1.0/src/bot_knows/infra/neo4j/graph_repository.py +329 -0
  30. bot_knows-0.1.0/src/bot_knows/infra/redis/__init__.py +6 -0
  31. bot_knows-0.1.0/src/bot_knows/infra/redis/cache.py +198 -0
  32. bot_knows-0.1.0/src/bot_knows/infra/redis/client.py +193 -0
  33. bot_knows-0.1.0/src/bot_knows/interfaces/__init__.py +18 -0
  34. bot_knows-0.1.0/src/bot_knows/interfaces/embedding.py +55 -0
  35. bot_knows-0.1.0/src/bot_knows/interfaces/graph.py +194 -0
  36. bot_knows-0.1.0/src/bot_knows/interfaces/llm.py +70 -0
  37. bot_knows-0.1.0/src/bot_knows/interfaces/recall.py +92 -0
  38. bot_knows-0.1.0/src/bot_knows/interfaces/storage.py +225 -0
  39. bot_knows-0.1.0/src/bot_knows/logging.py +101 -0
  40. bot_knows-0.1.0/src/bot_knows/models/__init__.py +22 -0
  41. bot_knows-0.1.0/src/bot_knows/models/chat.py +55 -0
  42. bot_knows-0.1.0/src/bot_knows/models/ingest.py +70 -0
  43. bot_knows-0.1.0/src/bot_knows/models/message.py +49 -0
  44. bot_knows-0.1.0/src/bot_knows/models/recall.py +58 -0
  45. bot_knows-0.1.0/src/bot_knows/models/topic.py +100 -0
  46. bot_knows-0.1.0/src/bot_knows/orchestrator.py +398 -0
  47. bot_knows-0.1.0/src/bot_knows/py.typed +0 -0
  48. bot_knows-0.1.0/src/bot_knows/services/__init__.py +24 -0
  49. bot_knows-0.1.0/src/bot_knows/services/chat_processing.py +182 -0
  50. bot_knows-0.1.0/src/bot_knows/services/dedup_service.py +161 -0
  51. bot_knows-0.1.0/src/bot_knows/services/graph_service.py +217 -0
  52. bot_knows-0.1.0/src/bot_knows/services/message_builder.py +135 -0
  53. bot_knows-0.1.0/src/bot_knows/services/recall_service.py +296 -0
  54. bot_knows-0.1.0/src/bot_knows/services/tasks.py +128 -0
  55. bot_knows-0.1.0/src/bot_knows/services/topic_extraction.py +199 -0
  56. bot_knows-0.1.0/src/bot_knows/utils/__init__.py +22 -0
  57. bot_knows-0.1.0/src/bot_knows/utils/hashing.py +126 -0
  58. bot_knows-0.1.0/tests/__init__.py +1 -0
  59. bot_knows-0.1.0/tests/conftest.py +181 -0
  60. bot_knows-0.1.0/tests/fixtures/chatgpt_export.json +102 -0
  61. bot_knows-0.1.0/tests/fixtures/claude_export.json +36 -0
  62. bot_knows-0.1.0/tests/integration/__init__.py +1 -0
  63. bot_knows-0.1.0/tests/integration/test_pipeline.py +109 -0
  64. bot_knows-0.1.0/tests/mocks/__init__.py +1 -0
  65. bot_knows-0.1.0/tests/mocks/mock_embedding.py +52 -0
  66. bot_knows-0.1.0/tests/mocks/mock_mongo.py +120 -0
  67. bot_knows-0.1.0/tests/mocks/mock_neo4j.py +87 -0
  68. bot_knows-0.1.0/tests/unit/__init__.py +1 -0
  69. bot_knows-0.1.0/tests/unit/test_importers.py +215 -0
  70. bot_knows-0.1.0/tests/unit/test_models.py +257 -0
  71. bot_knows-0.1.0/tests/unit/test_orchestrator.py +632 -0
  72. bot_knows-0.1.0/tests/unit/test_services.py +207 -0
@@ -0,0 +1,6 @@
1
+ # Learn more https://docs.github.com/en/get-started/getting-started-with-git/ignoring-files
2
+
3
+ *.pyc
4
+ PKG-INFO.*
5
+ .env
6
+ .coverage
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Snezhana Stojanova
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,294 @@
1
+ Metadata-Version: 2.4
2
+ Name: bot-knows
3
+ Version: 0.1.0
4
+ Summary: Framework-agnostic Python library for graph-backed personal knowledge bases from chat data
5
+ Project-URL: Homepage, https://github.com/Snezhana/bot-knows
6
+ Project-URL: Documentation, https://github.com/Snezhana/bot-knows#readme
7
+ Project-URL: Repository, https://github.com/Snezhana/bot-knows
8
+ Project-URL: Issues, https://github.com/Snezhana/bot-knows/issues
9
+ Author-email: Your Name <your@email.com>
10
+ License-Expression: MIT
11
+ License-File: LICENSE
12
+ Keywords: chat,embedding,graph,knowledge-base,memory,nlp,recall
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Framework :: AsyncIO
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Programming Language :: Python :: 3.14
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Classifier: Typing :: Typed
21
+ Requires-Python: >=3.13
22
+ Requires-Dist: anthropic<1.0,>=0.42
23
+ Requires-Dist: anyio<5.0,>=4.7
24
+ Requires-Dist: motor<4.0,>=3.6
25
+ Requires-Dist: neo4j<6.0,>=5.27
26
+ Requires-Dist: numpy<3.0,>=2.2
27
+ Requires-Dist: openai<2.0,>=1.59
28
+ Requires-Dist: pydantic-settings<3.0,>=2.6
29
+ Requires-Dist: pydantic<3.0,>=2.10
30
+ Requires-Dist: redis<6.0,>=5.2
31
+ Requires-Dist: structlog<26.0,>=25.1
32
+ Requires-Dist: taskiq-redis<2.0,>=1.0
33
+ Requires-Dist: taskiq<1.0,>=0.11
34
+ Provides-Extra: dev
35
+ Requires-Dist: hypothesis<7.0,>=6.100; extra == 'dev'
36
+ Requires-Dist: mongomock-motor>=0.0.32; extra == 'dev'
37
+ Requires-Dist: mypy<2.0,>=1.14; extra == 'dev'
38
+ Requires-Dist: pre-commit<5.0,>=4.0; extra == 'dev'
39
+ Requires-Dist: pytest-asyncio<1.0,>=0.25; extra == 'dev'
40
+ Requires-Dist: pytest-cov<7.0,>=6.0; extra == 'dev'
41
+ Requires-Dist: pytest<9.0,>=8.3; extra == 'dev'
42
+ Requires-Dist: ruff<1.0,>=0.9; extra == 'dev'
43
+ Description-Content-Type: text/markdown
44
+
45
+ # bot-knows
46
+
47
+ A framework-agnostic Python library for building graph-backed personal knowledge bases from chat data.
48
+
49
+ ## Features
50
+
51
+ - **Multi-source Chat Ingestion**: Import chats from ChatGPT, Claude, and custom JSON formats
52
+ - **Semantic Topic Extraction**: LLM-powered topic extraction with confidence scores
53
+ - **Intelligent Deduplication**: Embedding-based semantic deduplication with configurable thresholds
54
+ - **Graph-backed Knowledge Base**: Neo4j-powered relationship graph for topics and messages
55
+ - **Evidence-weighted Recall**: Spaced repetition-inspired recall system with decay and reinforcement
56
+ - **Pluggable Infrastructure**: Bring your own storage, graph database, or LLM provider
57
+
58
+ ## Requirements
59
+
60
+ - Python >= 3.13
61
+ - MongoDB (storage) - or custom storage implementation
62
+ - Neo4j (graph database) - or custom graph implementation
63
+ - Redis (optional, for caching)
64
+ - OpenAI or Anthropic API key (for LLM features) - or custom LLM implementation
65
+
66
+ ## Installation
67
+
68
+ ```bash
69
+ pip install bot-knows
70
+ ```
71
+
72
+ Or with uv:
73
+
74
+ ```bash
75
+ uv add bot-knows
76
+ ```
77
+
78
+ ## Quick Start
79
+
80
+ The `BotKnows` class is the main orchestrator that accepts implementation classes for storage, graph database, and LLM providers. Configuration is automatically loaded from environment variables.
81
+
82
+ ### Using Built-in Infrastructure
83
+
84
+ ```python
85
+ from bot_knows import (
86
+ BotKnows,
87
+ MongoStorageRepository,
88
+ Neo4jGraphRepository,
89
+ OpenAIProvider,
90
+ ChatGPTAdapter,
91
+ )
92
+
93
+ async def main():
94
+ # Config is loaded from .env automatically
95
+ async with BotKnows(
96
+ storage_class=MongoStorageRepository,
97
+ graphdb_class=Neo4jGraphRepository,
98
+ llm_class=OpenAIProvider,
99
+ ) as bk:
100
+ # Import ChatGPT conversations
101
+ result = await bk.insert_chats("conversations.json", ChatGPTAdapter)
102
+ print(f"Imported {result.chats_new} chats, {result.topics_created} topics")
103
+
104
+ # Query the knowledge base
105
+ topics = await bk.get_chat_topics(chat_id)
106
+ due_topics = await bk.get_due_topics(threshold=0.3)
107
+ ```
108
+
109
+ ### Available Implementations
110
+
111
+ **Storage:**
112
+ - `MongoStorageRepository` - MongoDB-based storage
113
+
114
+ **Graph Database:**
115
+ - `Neo4jGraphRepository` - Neo4j graph database
116
+
117
+ **LLM Providers:**
118
+ - `OpenAIProvider` - OpenAI API (GPT models + embeddings)
119
+ - `AnthropicProvider` - Anthropic API (Claude models)
120
+
121
+ **Import Adapters:**
122
+ - `ChatGPTAdapter` - ChatGPT export format
123
+ - `ClaudeAdapter` - Claude export format
124
+ - `GenericJSONAdapter` - Custom JSON format
125
+
126
+
127
+ ## Custom Implementations
128
+
129
+ You can provide your own implementations by implementing the required interfaces. Set `config_class = None` on your class and pass configuration via the `*_custom_config` parameters.
130
+
131
+ ### Interfaces
132
+
133
+ - `StorageInterface` - Persistent storage for chats, messages, topics, evidence, and recall state
134
+ - `GraphServiceInterface` - Graph database operations for the knowledge graph
135
+ - `LLMInterface` - LLM interactions for classification and topic extraction
136
+ - `EmbeddingServiceInterface` - Text embedding generation
137
+
138
+ ### Example: Custom Storage Implementation
139
+
140
+ ```python
141
+ from bot_knows import BotKnows, StorageInterface, Neo4jGraphRepository, OpenAIProvider
142
+
143
+ class MyCustomStorage:
144
+ """Custom storage implementation."""
145
+
146
+ config_class = None # Signals custom config
147
+
148
+ @classmethod
149
+ async def from_dict(cls, config: dict) -> "MyCustomStorage":
150
+ """Factory method for custom config."""
151
+ return cls(connection_string=config["connection_string"])
152
+
153
+ def __init__(self, connection_string: str):
154
+ self.conn = connection_string
155
+
156
+ # Implement all StorageInterface methods...
157
+ async def save_chat(self, chat): ...
158
+ async def get_chat(self, chat_id): ...
159
+ # ... etc
160
+
161
+ async with BotKnows(
162
+ storage_class=MyCustomStorage,
163
+ graphdb_class=Neo4jGraphRepository,
164
+ llm_class=OpenAIProvider,
165
+ storage_custom_config={"connection_string": "postgresql://..."},
166
+ ) as bk:
167
+ result = await bk.insert_chats("data.json", ChatGPTAdapter)
168
+ ```
169
+
170
+ ### Example: Custom LLM Provider
171
+
172
+ ```python
173
+ from bot_knows import BotKnows, LLMInterface, MongoStorageRepository, Neo4jGraphRepository
174
+
175
+ class MyLLMProvider:
176
+ """Custom LLM provider (e.g., local model, different API)."""
177
+
178
+ config_class = None
179
+
180
+ @classmethod
181
+ async def from_dict(cls, config: dict) -> "MyLLMProvider":
182
+ return cls(api_url=config["api_url"], model=config["model"])
183
+
184
+ def __init__(self, api_url: str, model: str):
185
+ self.api_url = api_url
186
+ self.model = model
187
+
188
+ # Implement LLMInterface methods
189
+ async def classify_chat(self, first_pair, last_pair): ...
190
+ async def extract_topics(self, user_content, assistant_content): ...
191
+ async def normalize_topic_name(self, name): ...
192
+
193
+ # Implement EmbeddingServiceInterface if used as embedding provider
194
+ async def embed(self, texts): ...
195
+
196
+ async with BotKnows(
197
+ storage_class=MongoStorageRepository,
198
+ graphdb_class=Neo4jGraphRepository,
199
+ llm_class=MyLLMProvider,
200
+ llm_custom_config={"api_url": "http://localhost:8000", "model": "llama3"},
201
+ ) as bk:
202
+ result = await bk.insert_chats("data.json", ChatGPTAdapter)
203
+ ```
204
+
205
+ ## Configuration
206
+
207
+ Configuration is loaded from environment variables. See `.env.example` for all available options.
208
+
209
+ Key environment variables:
210
+ - `MONGODB_URI` - MongoDB connection string
211
+ - `NEO4J_URI`, `NEO4J_USER`, `NEO4J_PASSWORD` - Neo4j connection
212
+ - `OPENAI_API_KEY` - OpenAI API key
213
+ - `ANTHROPIC_API_KEY` - Anthropic API key
214
+ - `DEDUP_HIGH_THRESHOLD`, `DEDUP_LOW_THRESHOLD` - Deduplication thresholds
215
+
216
+ ## Architecture
217
+
218
+ ```
219
+ Input Sources (ChatGPT, Claude, Custom JSON)
220
+
221
+ Import Adapters (normalize to ChatIngest)
222
+
223
+ Domain Processing
224
+ ├── Chat identity resolution
225
+ ├── One-time Chat classification
226
+ ├── Message creation & ordering
227
+
228
+ Topic Extraction
229
+ ├── LLM-based extraction
230
+ ├── Semantic deduplication
231
+ ├── Evidence append
232
+
233
+ Graph Updates (Neo4j)
234
+ ```
235
+
236
+ ## Retrieval API
237
+
238
+ ```python
239
+ async with BotKnows(...) as bk:
240
+ # Get messages for a chat
241
+ messages = await bk.get_messages_for_chat(chat_id)
242
+
243
+ # Get topics for a chat
244
+ topic_ids = await bk.get_chat_topics(chat_id)
245
+
246
+ # Get related topics
247
+ related = await bk.get_related_topics(topic_id, limit=10)
248
+
249
+ # Get topic evidence
250
+ evidence = await bk.get_topic_evidence(topic_id)
251
+
252
+ # Spaced repetition recall
253
+ recall_state = await bk.get_recall_state(topic_id)
254
+ due_topics = await bk.get_due_topics(threshold=0.3)
255
+ all_states = await bk.get_all_recall_states()
256
+ ```
257
+
258
+ ## Development
259
+
260
+ ```bash
261
+ # Install with dev dependencies
262
+ uv sync --dev
263
+
264
+ # Run tests
265
+ uv run pytest
266
+
267
+ # Type checking
268
+ uv run mypy src/
269
+
270
+ # Linting
271
+ uv run ruff check src/
272
+ ```
273
+
274
+ ## Future Plans
275
+
276
+ The built-in infrastructure will be extended with additional providers:
277
+
278
+ - **Storage**: PostgreSQL, SQLite
279
+ - **Graph**: Amazon Neptune, TigerGraph, MemGraph
280
+ - **LLM**: Google Gemini, Ollama, HuggingFace
281
+
282
+ ## Contributing
283
+
284
+ Contributions are welcome! If you'd like to add a new infrastructure implementation:
285
+
286
+ 1. Implement the appropriate interface (`StorageInterface`, `GraphServiceInterface`, `LLMInterface`, or `EmbeddingServiceInterface`)
287
+ 2. Add a `config_class` for environment-based configuration (or set to `None` for custom config)
288
+ 3. Implement the `from_config` class method (or `from_dict` if `config_class` is `None`)
289
+ 4. Add tests for your implementation
290
+ 5. Submit a pull request
291
+
292
+ ## License
293
+
294
+ MIT
@@ -0,0 +1,250 @@
1
+ # bot-knows
2
+
3
+ A framework-agnostic Python library for building graph-backed personal knowledge bases from chat data.
4
+
5
+ ## Features
6
+
7
+ - **Multi-source Chat Ingestion**: Import chats from ChatGPT, Claude, and custom JSON formats
8
+ - **Semantic Topic Extraction**: LLM-powered topic extraction with confidence scores
9
+ - **Intelligent Deduplication**: Embedding-based semantic deduplication with configurable thresholds
10
+ - **Graph-backed Knowledge Base**: Neo4j-powered relationship graph for topics and messages
11
+ - **Evidence-weighted Recall**: Spaced repetition-inspired recall system with decay and reinforcement
12
+ - **Pluggable Infrastructure**: Bring your own storage, graph database, or LLM provider
13
+
14
+ ## Requirements
15
+
16
+ - Python >= 3.13
17
+ - MongoDB (storage) - or custom storage implementation
18
+ - Neo4j (graph database) - or custom graph implementation
19
+ - Redis (optional, for caching)
20
+ - OpenAI or Anthropic API key (for LLM features) - or custom LLM implementation
21
+
22
+ ## Installation
23
+
24
+ ```bash
25
+ pip install bot-knows
26
+ ```
27
+
28
+ Or with uv:
29
+
30
+ ```bash
31
+ uv add bot-knows
32
+ ```
33
+
34
+ ## Quick Start
35
+
36
+ The `BotKnows` class is the main orchestrator that accepts implementation classes for storage, graph database, and LLM providers. Configuration is automatically loaded from environment variables.
37
+
38
+ ### Using Built-in Infrastructure
39
+
40
+ ```python
41
+ from bot_knows import (
42
+ BotKnows,
43
+ MongoStorageRepository,
44
+ Neo4jGraphRepository,
45
+ OpenAIProvider,
46
+ ChatGPTAdapter,
47
+ )
48
+
49
+ async def main():
50
+ # Config is loaded from .env automatically
51
+ async with BotKnows(
52
+ storage_class=MongoStorageRepository,
53
+ graphdb_class=Neo4jGraphRepository,
54
+ llm_class=OpenAIProvider,
55
+ ) as bk:
56
+ # Import ChatGPT conversations
57
+ result = await bk.insert_chats("conversations.json", ChatGPTAdapter)
58
+ print(f"Imported {result.chats_new} chats, {result.topics_created} topics")
59
+
60
+ # Query the knowledge base
61
+ topics = await bk.get_chat_topics(chat_id)
62
+ due_topics = await bk.get_due_topics(threshold=0.3)
63
+ ```
64
+
65
+ ### Available Implementations
66
+
67
+ **Storage:**
68
+ - `MongoStorageRepository` - MongoDB-based storage
69
+
70
+ **Graph Database:**
71
+ - `Neo4jGraphRepository` - Neo4j graph database
72
+
73
+ **LLM Providers:**
74
+ - `OpenAIProvider` - OpenAI API (GPT models + embeddings)
75
+ - `AnthropicProvider` - Anthropic API (Claude models)
76
+
77
+ **Import Adapters:**
78
+ - `ChatGPTAdapter` - ChatGPT export format
79
+ - `ClaudeAdapter` - Claude export format
80
+ - `GenericJSONAdapter` - Custom JSON format
81
+
82
+
83
+ ## Custom Implementations
84
+
85
+ You can provide your own implementations by implementing the required interfaces. Set `config_class = None` on your class and pass configuration via the `*_custom_config` parameters.
86
+
87
+ ### Interfaces
88
+
89
+ - `StorageInterface` - Persistent storage for chats, messages, topics, evidence, and recall state
90
+ - `GraphServiceInterface` - Graph database operations for the knowledge graph
91
+ - `LLMInterface` - LLM interactions for classification and topic extraction
92
+ - `EmbeddingServiceInterface` - Text embedding generation
93
+
94
+ ### Example: Custom Storage Implementation
95
+
96
+ ```python
97
+ from bot_knows import BotKnows, StorageInterface, Neo4jGraphRepository, OpenAIProvider
98
+
99
+ class MyCustomStorage:
100
+ """Custom storage implementation."""
101
+
102
+ config_class = None # Signals custom config
103
+
104
+ @classmethod
105
+ async def from_dict(cls, config: dict) -> "MyCustomStorage":
106
+ """Factory method for custom config."""
107
+ return cls(connection_string=config["connection_string"])
108
+
109
+ def __init__(self, connection_string: str):
110
+ self.conn = connection_string
111
+
112
+ # Implement all StorageInterface methods...
113
+ async def save_chat(self, chat): ...
114
+ async def get_chat(self, chat_id): ...
115
+ # ... etc
116
+
117
+ async with BotKnows(
118
+ storage_class=MyCustomStorage,
119
+ graphdb_class=Neo4jGraphRepository,
120
+ llm_class=OpenAIProvider,
121
+ storage_custom_config={"connection_string": "postgresql://..."},
122
+ ) as bk:
123
+ result = await bk.insert_chats("data.json", ChatGPTAdapter)
124
+ ```
125
+
126
+ ### Example: Custom LLM Provider
127
+
128
+ ```python
129
+ from bot_knows import BotKnows, LLMInterface, MongoStorageRepository, Neo4jGraphRepository
130
+
131
+ class MyLLMProvider:
132
+ """Custom LLM provider (e.g., local model, different API)."""
133
+
134
+ config_class = None
135
+
136
+ @classmethod
137
+ async def from_dict(cls, config: dict) -> "MyLLMProvider":
138
+ return cls(api_url=config["api_url"], model=config["model"])
139
+
140
+ def __init__(self, api_url: str, model: str):
141
+ self.api_url = api_url
142
+ self.model = model
143
+
144
+ # Implement LLMInterface methods
145
+ async def classify_chat(self, first_pair, last_pair): ...
146
+ async def extract_topics(self, user_content, assistant_content): ...
147
+ async def normalize_topic_name(self, name): ...
148
+
149
+ # Implement EmbeddingServiceInterface if used as embedding provider
150
+ async def embed(self, texts): ...
151
+
152
+ async with BotKnows(
153
+ storage_class=MongoStorageRepository,
154
+ graphdb_class=Neo4jGraphRepository,
155
+ llm_class=MyLLMProvider,
156
+ llm_custom_config={"api_url": "http://localhost:8000", "model": "llama3"},
157
+ ) as bk:
158
+ result = await bk.insert_chats("data.json", ChatGPTAdapter)
159
+ ```
160
+
161
+ ## Configuration
162
+
163
+ Configuration is loaded from environment variables. See `.env.example` for all available options.
164
+
165
+ Key environment variables:
166
+ - `MONGODB_URI` - MongoDB connection string
167
+ - `NEO4J_URI`, `NEO4J_USER`, `NEO4J_PASSWORD` - Neo4j connection
168
+ - `OPENAI_API_KEY` - OpenAI API key
169
+ - `ANTHROPIC_API_KEY` - Anthropic API key
170
+ - `DEDUP_HIGH_THRESHOLD`, `DEDUP_LOW_THRESHOLD` - Deduplication thresholds
171
+
172
+ ## Architecture
173
+
174
+ ```
175
+ Input Sources (ChatGPT, Claude, Custom JSON)
176
+
177
+ Import Adapters (normalize to ChatIngest)
178
+
179
+ Domain Processing
180
+ ├── Chat identity resolution
181
+ ├── One-time Chat classification
182
+ ├── Message creation & ordering
183
+
184
+ Topic Extraction
185
+ ├── LLM-based extraction
186
+ ├── Semantic deduplication
187
+ ├── Evidence append
188
+
189
+ Graph Updates (Neo4j)
190
+ ```
191
+
192
+ ## Retrieval API
193
+
194
+ ```python
195
+ async with BotKnows(...) as bk:
196
+ # Get messages for a chat
197
+ messages = await bk.get_messages_for_chat(chat_id)
198
+
199
+ # Get topics for a chat
200
+ topic_ids = await bk.get_chat_topics(chat_id)
201
+
202
+ # Get related topics
203
+ related = await bk.get_related_topics(topic_id, limit=10)
204
+
205
+ # Get topic evidence
206
+ evidence = await bk.get_topic_evidence(topic_id)
207
+
208
+ # Spaced repetition recall
209
+ recall_state = await bk.get_recall_state(topic_id)
210
+ due_topics = await bk.get_due_topics(threshold=0.3)
211
+ all_states = await bk.get_all_recall_states()
212
+ ```
213
+
214
+ ## Development
215
+
216
+ ```bash
217
+ # Install with dev dependencies
218
+ uv sync --dev
219
+
220
+ # Run tests
221
+ uv run pytest
222
+
223
+ # Type checking
224
+ uv run mypy src/
225
+
226
+ # Linting
227
+ uv run ruff check src/
228
+ ```
229
+
230
+ ## Future Plans
231
+
232
+ The built-in infrastructure will be extended with additional providers:
233
+
234
+ - **Storage**: PostgreSQL, SQLite
235
+ - **Graph**: Amazon Neptune, TigerGraph, MemGraph
236
+ - **LLM**: Google Gemini, Ollama, HuggingFace
237
+
238
+ ## Contributing
239
+
240
+ Contributions are welcome! If you'd like to add a new infrastructure implementation:
241
+
242
+ 1. Implement the appropriate interface (`StorageInterface`, `GraphServiceInterface`, `LLMInterface`, or `EmbeddingServiceInterface`)
243
+ 2. Add a `config_class` for environment-based configuration (or set to `None` for custom config)
244
+ 3. Implement the `from_config` class method (or `from_dict` if `config_class` is `None`)
245
+ 4. Add tests for your implementation
246
+ 5. Submit a pull request
247
+
248
+ ## License
249
+
250
+ MIT