basic-memory 0.17.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. basic_memory/__init__.py +7 -0
  2. basic_memory/alembic/alembic.ini +119 -0
  3. basic_memory/alembic/env.py +185 -0
  4. basic_memory/alembic/migrations.py +24 -0
  5. basic_memory/alembic/script.py.mako +26 -0
  6. basic_memory/alembic/versions/314f1ea54dc4_add_postgres_full_text_search_support_.py +131 -0
  7. basic_memory/alembic/versions/3dae7c7b1564_initial_schema.py +93 -0
  8. basic_memory/alembic/versions/502b60eaa905_remove_required_from_entity_permalink.py +51 -0
  9. basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +120 -0
  10. basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +112 -0
  11. basic_memory/alembic/versions/9d9c1cb7d8f5_add_mtime_and_size_columns_to_entity_.py +49 -0
  12. basic_memory/alembic/versions/a1b2c3d4e5f6_fix_project_foreign_keys.py +49 -0
  13. basic_memory/alembic/versions/a2b3c4d5e6f7_add_search_index_entity_cascade.py +56 -0
  14. basic_memory/alembic/versions/b3c3938bacdb_relation_to_name_unique_index.py +44 -0
  15. basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +113 -0
  16. basic_memory/alembic/versions/e7e1f4367280_add_scan_watermark_tracking_to_project.py +37 -0
  17. basic_memory/alembic/versions/f8a9b2c3d4e5_add_pg_trgm_for_fuzzy_link_resolution.py +239 -0
  18. basic_memory/api/__init__.py +5 -0
  19. basic_memory/api/app.py +131 -0
  20. basic_memory/api/routers/__init__.py +11 -0
  21. basic_memory/api/routers/directory_router.py +84 -0
  22. basic_memory/api/routers/importer_router.py +152 -0
  23. basic_memory/api/routers/knowledge_router.py +318 -0
  24. basic_memory/api/routers/management_router.py +80 -0
  25. basic_memory/api/routers/memory_router.py +90 -0
  26. basic_memory/api/routers/project_router.py +448 -0
  27. basic_memory/api/routers/prompt_router.py +260 -0
  28. basic_memory/api/routers/resource_router.py +249 -0
  29. basic_memory/api/routers/search_router.py +36 -0
  30. basic_memory/api/routers/utils.py +169 -0
  31. basic_memory/api/template_loader.py +292 -0
  32. basic_memory/api/v2/__init__.py +35 -0
  33. basic_memory/api/v2/routers/__init__.py +21 -0
  34. basic_memory/api/v2/routers/directory_router.py +93 -0
  35. basic_memory/api/v2/routers/importer_router.py +182 -0
  36. basic_memory/api/v2/routers/knowledge_router.py +413 -0
  37. basic_memory/api/v2/routers/memory_router.py +130 -0
  38. basic_memory/api/v2/routers/project_router.py +342 -0
  39. basic_memory/api/v2/routers/prompt_router.py +270 -0
  40. basic_memory/api/v2/routers/resource_router.py +286 -0
  41. basic_memory/api/v2/routers/search_router.py +73 -0
  42. basic_memory/cli/__init__.py +1 -0
  43. basic_memory/cli/app.py +84 -0
  44. basic_memory/cli/auth.py +277 -0
  45. basic_memory/cli/commands/__init__.py +18 -0
  46. basic_memory/cli/commands/cloud/__init__.py +6 -0
  47. basic_memory/cli/commands/cloud/api_client.py +112 -0
  48. basic_memory/cli/commands/cloud/bisync_commands.py +110 -0
  49. basic_memory/cli/commands/cloud/cloud_utils.py +101 -0
  50. basic_memory/cli/commands/cloud/core_commands.py +195 -0
  51. basic_memory/cli/commands/cloud/rclone_commands.py +371 -0
  52. basic_memory/cli/commands/cloud/rclone_config.py +110 -0
  53. basic_memory/cli/commands/cloud/rclone_installer.py +263 -0
  54. basic_memory/cli/commands/cloud/upload.py +233 -0
  55. basic_memory/cli/commands/cloud/upload_command.py +124 -0
  56. basic_memory/cli/commands/command_utils.py +77 -0
  57. basic_memory/cli/commands/db.py +44 -0
  58. basic_memory/cli/commands/format.py +198 -0
  59. basic_memory/cli/commands/import_chatgpt.py +84 -0
  60. basic_memory/cli/commands/import_claude_conversations.py +87 -0
  61. basic_memory/cli/commands/import_claude_projects.py +86 -0
  62. basic_memory/cli/commands/import_memory_json.py +87 -0
  63. basic_memory/cli/commands/mcp.py +76 -0
  64. basic_memory/cli/commands/project.py +889 -0
  65. basic_memory/cli/commands/status.py +174 -0
  66. basic_memory/cli/commands/telemetry.py +81 -0
  67. basic_memory/cli/commands/tool.py +341 -0
  68. basic_memory/cli/main.py +28 -0
  69. basic_memory/config.py +616 -0
  70. basic_memory/db.py +394 -0
  71. basic_memory/deps.py +705 -0
  72. basic_memory/file_utils.py +478 -0
  73. basic_memory/ignore_utils.py +297 -0
  74. basic_memory/importers/__init__.py +27 -0
  75. basic_memory/importers/base.py +79 -0
  76. basic_memory/importers/chatgpt_importer.py +232 -0
  77. basic_memory/importers/claude_conversations_importer.py +180 -0
  78. basic_memory/importers/claude_projects_importer.py +148 -0
  79. basic_memory/importers/memory_json_importer.py +108 -0
  80. basic_memory/importers/utils.py +61 -0
  81. basic_memory/markdown/__init__.py +21 -0
  82. basic_memory/markdown/entity_parser.py +279 -0
  83. basic_memory/markdown/markdown_processor.py +160 -0
  84. basic_memory/markdown/plugins.py +242 -0
  85. basic_memory/markdown/schemas.py +70 -0
  86. basic_memory/markdown/utils.py +117 -0
  87. basic_memory/mcp/__init__.py +1 -0
  88. basic_memory/mcp/async_client.py +139 -0
  89. basic_memory/mcp/project_context.py +141 -0
  90. basic_memory/mcp/prompts/__init__.py +19 -0
  91. basic_memory/mcp/prompts/ai_assistant_guide.py +70 -0
  92. basic_memory/mcp/prompts/continue_conversation.py +62 -0
  93. basic_memory/mcp/prompts/recent_activity.py +188 -0
  94. basic_memory/mcp/prompts/search.py +57 -0
  95. basic_memory/mcp/prompts/utils.py +162 -0
  96. basic_memory/mcp/resources/ai_assistant_guide.md +283 -0
  97. basic_memory/mcp/resources/project_info.py +71 -0
  98. basic_memory/mcp/server.py +81 -0
  99. basic_memory/mcp/tools/__init__.py +48 -0
  100. basic_memory/mcp/tools/build_context.py +120 -0
  101. basic_memory/mcp/tools/canvas.py +152 -0
  102. basic_memory/mcp/tools/chatgpt_tools.py +190 -0
  103. basic_memory/mcp/tools/delete_note.py +242 -0
  104. basic_memory/mcp/tools/edit_note.py +324 -0
  105. basic_memory/mcp/tools/list_directory.py +168 -0
  106. basic_memory/mcp/tools/move_note.py +551 -0
  107. basic_memory/mcp/tools/project_management.py +201 -0
  108. basic_memory/mcp/tools/read_content.py +281 -0
  109. basic_memory/mcp/tools/read_note.py +267 -0
  110. basic_memory/mcp/tools/recent_activity.py +534 -0
  111. basic_memory/mcp/tools/search.py +385 -0
  112. basic_memory/mcp/tools/utils.py +540 -0
  113. basic_memory/mcp/tools/view_note.py +78 -0
  114. basic_memory/mcp/tools/write_note.py +230 -0
  115. basic_memory/models/__init__.py +15 -0
  116. basic_memory/models/base.py +10 -0
  117. basic_memory/models/knowledge.py +226 -0
  118. basic_memory/models/project.py +87 -0
  119. basic_memory/models/search.py +85 -0
  120. basic_memory/repository/__init__.py +11 -0
  121. basic_memory/repository/entity_repository.py +503 -0
  122. basic_memory/repository/observation_repository.py +73 -0
  123. basic_memory/repository/postgres_search_repository.py +379 -0
  124. basic_memory/repository/project_info_repository.py +10 -0
  125. basic_memory/repository/project_repository.py +128 -0
  126. basic_memory/repository/relation_repository.py +146 -0
  127. basic_memory/repository/repository.py +385 -0
  128. basic_memory/repository/search_index_row.py +95 -0
  129. basic_memory/repository/search_repository.py +94 -0
  130. basic_memory/repository/search_repository_base.py +241 -0
  131. basic_memory/repository/sqlite_search_repository.py +439 -0
  132. basic_memory/schemas/__init__.py +86 -0
  133. basic_memory/schemas/base.py +297 -0
  134. basic_memory/schemas/cloud.py +50 -0
  135. basic_memory/schemas/delete.py +37 -0
  136. basic_memory/schemas/directory.py +30 -0
  137. basic_memory/schemas/importer.py +35 -0
  138. basic_memory/schemas/memory.py +285 -0
  139. basic_memory/schemas/project_info.py +212 -0
  140. basic_memory/schemas/prompt.py +90 -0
  141. basic_memory/schemas/request.py +112 -0
  142. basic_memory/schemas/response.py +229 -0
  143. basic_memory/schemas/search.py +117 -0
  144. basic_memory/schemas/sync_report.py +72 -0
  145. basic_memory/schemas/v2/__init__.py +27 -0
  146. basic_memory/schemas/v2/entity.py +129 -0
  147. basic_memory/schemas/v2/resource.py +46 -0
  148. basic_memory/services/__init__.py +8 -0
  149. basic_memory/services/context_service.py +601 -0
  150. basic_memory/services/directory_service.py +308 -0
  151. basic_memory/services/entity_service.py +864 -0
  152. basic_memory/services/exceptions.py +37 -0
  153. basic_memory/services/file_service.py +541 -0
  154. basic_memory/services/initialization.py +216 -0
  155. basic_memory/services/link_resolver.py +121 -0
  156. basic_memory/services/project_service.py +880 -0
  157. basic_memory/services/search_service.py +404 -0
  158. basic_memory/services/service.py +15 -0
  159. basic_memory/sync/__init__.py +6 -0
  160. basic_memory/sync/background_sync.py +26 -0
  161. basic_memory/sync/sync_service.py +1259 -0
  162. basic_memory/sync/watch_service.py +510 -0
  163. basic_memory/telemetry.py +249 -0
  164. basic_memory/templates/prompts/continue_conversation.hbs +110 -0
  165. basic_memory/templates/prompts/search.hbs +101 -0
  166. basic_memory/utils.py +468 -0
  167. basic_memory-0.17.1.dist-info/METADATA +617 -0
  168. basic_memory-0.17.1.dist-info/RECORD +171 -0
  169. basic_memory-0.17.1.dist-info/WHEEL +4 -0
  170. basic_memory-0.17.1.dist-info/entry_points.txt +3 -0
  171. basic_memory-0.17.1.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,297 @@
1
+ """Utilities for handling .gitignore patterns and file filtering."""
2
+
3
+ import fnmatch
4
+ from pathlib import Path
5
+ from typing import Set
6
+
7
+
8
+ # Common directories and patterns to ignore by default
9
+ # These are used as fallback if .bmignore doesn't exist
10
+ DEFAULT_IGNORE_PATTERNS = {
11
+ # Hidden files (files starting with dot)
12
+ ".*",
13
+ # Basic Memory internal files
14
+ "*.db",
15
+ "*.db-shm",
16
+ "*.db-wal",
17
+ "config.json",
18
+ # Version control
19
+ ".git",
20
+ ".svn",
21
+ # Python
22
+ "__pycache__",
23
+ "*.pyc",
24
+ "*.pyo",
25
+ "*.pyd",
26
+ ".pytest_cache",
27
+ ".coverage",
28
+ "*.egg-info",
29
+ ".tox",
30
+ ".mypy_cache",
31
+ ".ruff_cache",
32
+ # Virtual environments
33
+ ".venv",
34
+ "venv",
35
+ "env",
36
+ ".env",
37
+ # Node.js
38
+ "node_modules",
39
+ # Build artifacts
40
+ "build",
41
+ "dist",
42
+ ".cache",
43
+ # IDE
44
+ ".idea",
45
+ ".vscode",
46
+ # OS files
47
+ ".DS_Store",
48
+ "Thumbs.db",
49
+ "desktop.ini",
50
+ # Obsidian
51
+ ".obsidian",
52
+ # Temporary files
53
+ "*.tmp",
54
+ "*.swp",
55
+ "*.swo",
56
+ "*~",
57
+ }
58
+
59
+
60
+ def get_bmignore_path() -> Path:
61
+ """Get path to .bmignore file.
62
+
63
+ Returns:
64
+ Path to ~/.basic-memory/.bmignore
65
+ """
66
+ return Path.home() / ".basic-memory" / ".bmignore"
67
+
68
+
69
+ def create_default_bmignore() -> None:
70
+ """Create default .bmignore file if it doesn't exist.
71
+
72
+ This ensures users have a file they can customize for all Basic Memory operations.
73
+ """
74
+ bmignore_path = get_bmignore_path()
75
+
76
+ if bmignore_path.exists():
77
+ return
78
+
79
+ bmignore_path.parent.mkdir(parents=True, exist_ok=True)
80
+ bmignore_path.write_text("""# Basic Memory Ignore Patterns
81
+ # This file is used by both 'bm cloud upload', 'bm cloud bisync', and file sync
82
+ # Patterns use standard gitignore-style syntax
83
+
84
+ # Hidden files (files starting with dot)
85
+ .*
86
+
87
+ # Basic Memory internal files (includes test databases)
88
+ *.db
89
+ *.db-shm
90
+ *.db-wal
91
+ config.json
92
+
93
+ # Version control
94
+ .git
95
+ .svn
96
+
97
+ # Python
98
+ __pycache__
99
+ *.pyc
100
+ *.pyo
101
+ *.pyd
102
+ .pytest_cache
103
+ .coverage
104
+ *.egg-info
105
+ .tox
106
+ .mypy_cache
107
+ .ruff_cache
108
+
109
+ # Virtual environments
110
+ .venv
111
+ venv
112
+ env
113
+ .env
114
+
115
+ # Node.js
116
+ node_modules
117
+
118
+ # Build artifacts
119
+ build
120
+ dist
121
+ .cache
122
+
123
+ # IDE
124
+ .idea
125
+ .vscode
126
+
127
+ # OS files
128
+ .DS_Store
129
+ Thumbs.db
130
+ desktop.ini
131
+
132
+ # Obsidian
133
+ .obsidian
134
+
135
+ # Temporary files
136
+ *.tmp
137
+ *.swp
138
+ *.swo
139
+ *~
140
+ """)
141
+
142
+
143
+ def load_bmignore_patterns() -> Set[str]:
144
+ """Load patterns from .bmignore file.
145
+
146
+ Returns:
147
+ Set of patterns from .bmignore, or DEFAULT_IGNORE_PATTERNS if file doesn't exist
148
+ """
149
+ bmignore_path = get_bmignore_path()
150
+
151
+ # Create default file if it doesn't exist
152
+ if not bmignore_path.exists():
153
+ create_default_bmignore()
154
+
155
+ patterns = set()
156
+
157
+ try:
158
+ with bmignore_path.open("r", encoding="utf-8") as f:
159
+ for line in f:
160
+ line = line.strip()
161
+ # Skip empty lines and comments
162
+ if line and not line.startswith("#"):
163
+ patterns.add(line)
164
+ except Exception:
165
+ # If we can't read .bmignore, fall back to defaults
166
+ return set(DEFAULT_IGNORE_PATTERNS)
167
+
168
+ # If no patterns were loaded, use defaults
169
+ if not patterns:
170
+ return set(DEFAULT_IGNORE_PATTERNS)
171
+
172
+ return patterns
173
+
174
+
175
+ def load_gitignore_patterns(base_path: Path, use_gitignore: bool = True) -> Set[str]:
176
+ """Load gitignore patterns from .gitignore file and .bmignore.
177
+
178
+ Combines patterns from:
179
+ 1. ~/.basic-memory/.bmignore (user's global ignore patterns)
180
+ 2. {base_path}/.gitignore (project-specific patterns, if use_gitignore=True)
181
+
182
+ Args:
183
+ base_path: The base directory to search for .gitignore file
184
+ use_gitignore: If False, only load patterns from .bmignore (default: True)
185
+
186
+ Returns:
187
+ Set of patterns to ignore
188
+ """
189
+ # Start with patterns from .bmignore
190
+ patterns = load_bmignore_patterns()
191
+
192
+ if use_gitignore:
193
+ gitignore_file = base_path / ".gitignore"
194
+ if gitignore_file.exists():
195
+ try:
196
+ with gitignore_file.open("r", encoding="utf-8") as f:
197
+ for line in f:
198
+ line = line.strip()
199
+ # Skip empty lines and comments
200
+ if line and not line.startswith("#"):
201
+ patterns.add(line)
202
+ except Exception:
203
+ # If we can't read .gitignore, just use default patterns
204
+ pass
205
+
206
+ return patterns
207
+
208
+
209
+ def should_ignore_path(file_path: Path, base_path: Path, ignore_patterns: Set[str]) -> bool:
210
+ """Check if a file path should be ignored based on gitignore patterns.
211
+
212
+ Args:
213
+ file_path: The file path to check
214
+ base_path: The base directory for relative path calculation
215
+ ignore_patterns: Set of patterns to match against
216
+
217
+ Returns:
218
+ True if the path should be ignored, False otherwise
219
+ """
220
+ # Get the relative path from base
221
+ try:
222
+ relative_path = file_path.relative_to(base_path)
223
+ relative_str = str(relative_path)
224
+ relative_posix = relative_path.as_posix() # Use forward slashes for matching
225
+
226
+ # Check each pattern
227
+ for pattern in ignore_patterns:
228
+ # Handle patterns starting with / (root relative)
229
+ if pattern.startswith("/"):
230
+ root_pattern = pattern[1:] # Remove leading /
231
+
232
+ # For directory patterns ending with /
233
+ if root_pattern.endswith("/"):
234
+ dir_name = root_pattern[:-1] # Remove trailing /
235
+ # Check if the first part of the path matches the directory name
236
+ if len(relative_path.parts) > 0 and relative_path.parts[0] == dir_name:
237
+ return True
238
+ else:
239
+ # Regular root-relative pattern
240
+ if fnmatch.fnmatch(relative_posix, root_pattern):
241
+ return True
242
+ continue
243
+
244
+ # Handle directory patterns (ending with /)
245
+ if pattern.endswith("/"):
246
+ dir_name = pattern[:-1] # Remove trailing /
247
+ # Check if any path part matches the directory name
248
+ if dir_name in relative_path.parts:
249
+ return True
250
+ continue
251
+
252
+ # Direct name match (e.g., ".git", "node_modules")
253
+ if pattern in relative_path.parts:
254
+ return True
255
+
256
+ # Check if any individual path part matches the glob pattern
257
+ # This handles cases like ".*" matching ".hidden.md" in "concept/.hidden.md"
258
+ for part in relative_path.parts:
259
+ if fnmatch.fnmatch(part, pattern):
260
+ return True
261
+
262
+ # Glob pattern match on full path
263
+ if fnmatch.fnmatch(relative_posix, pattern) or fnmatch.fnmatch(relative_str, pattern):
264
+ return True
265
+
266
+ return False
267
+ except ValueError:
268
+ # If we can't get relative path, don't ignore
269
+ return False
270
+
271
+
272
+ def filter_files(
273
+ files: list[Path], base_path: Path, ignore_patterns: Set[str] | None = None
274
+ ) -> tuple[list[Path], int]:
275
+ """Filter a list of files based on gitignore patterns.
276
+
277
+ Args:
278
+ files: List of file paths to filter
279
+ base_path: The base directory for relative path calculation
280
+ ignore_patterns: Set of patterns to ignore. If None, loads from .gitignore
281
+
282
+ Returns:
283
+ Tuple of (filtered_files, ignored_count)
284
+ """
285
+ if ignore_patterns is None:
286
+ ignore_patterns = load_gitignore_patterns(base_path)
287
+
288
+ filtered_files = []
289
+ ignored_count = 0
290
+
291
+ for file_path in files:
292
+ if should_ignore_path(file_path, base_path, ignore_patterns):
293
+ ignored_count += 1
294
+ else:
295
+ filtered_files.append(file_path)
296
+
297
+ return filtered_files, ignored_count
@@ -0,0 +1,27 @@
1
+ """Import services for Basic Memory."""
2
+
3
+ from basic_memory.importers.base import Importer
4
+ from basic_memory.importers.chatgpt_importer import ChatGPTImporter
5
+ from basic_memory.importers.claude_conversations_importer import (
6
+ ClaudeConversationsImporter,
7
+ )
8
+ from basic_memory.importers.claude_projects_importer import ClaudeProjectsImporter
9
+ from basic_memory.importers.memory_json_importer import MemoryJsonImporter
10
+ from basic_memory.schemas.importer import (
11
+ ChatImportResult,
12
+ EntityImportResult,
13
+ ImportResult,
14
+ ProjectImportResult,
15
+ )
16
+
17
+ __all__ = [
18
+ "Importer",
19
+ "ChatGPTImporter",
20
+ "ClaudeConversationsImporter",
21
+ "ClaudeProjectsImporter",
22
+ "MemoryJsonImporter",
23
+ "ImportResult",
24
+ "ChatImportResult",
25
+ "EntityImportResult",
26
+ "ProjectImportResult",
27
+ ]
@@ -0,0 +1,79 @@
1
+ """Base import service for Basic Memory."""
2
+
3
+ import logging
4
+ from abc import abstractmethod
5
+ from pathlib import Path
6
+ from typing import Any, Optional, TypeVar
7
+
8
+ from basic_memory.markdown.markdown_processor import MarkdownProcessor
9
+ from basic_memory.markdown.schemas import EntityMarkdown
10
+ from basic_memory.schemas.importer import ImportResult
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ T = TypeVar("T", bound=ImportResult)
15
+
16
+
17
+ class Importer[T: ImportResult]:
18
+ """Base class for all import services."""
19
+
20
+ def __init__(self, base_path: Path, markdown_processor: MarkdownProcessor):
21
+ """Initialize the import service.
22
+
23
+ Args:
24
+ markdown_processor: MarkdownProcessor instance for writing markdown files.
25
+ """
26
+ self.base_path = base_path.resolve() # Get absolute path
27
+ self.markdown_processor = markdown_processor
28
+
29
+ @abstractmethod
30
+ async def import_data(self, source_data, destination_folder: str, **kwargs: Any) -> T:
31
+ """Import data from source file to destination folder.
32
+
33
+ Args:
34
+ source_path: Path to the source file.
35
+ destination_folder: Destination folder within the project.
36
+ **kwargs: Additional keyword arguments for specific import types.
37
+
38
+ Returns:
39
+ ImportResult containing statistics and status of the import.
40
+ """
41
+ pass # pragma: no cover
42
+
43
+ async def write_entity(self, entity: EntityMarkdown, file_path: Path) -> None:
44
+ """Write entity to file using markdown processor.
45
+
46
+ Args:
47
+ entity: EntityMarkdown instance to write.
48
+ file_path: Path to write the entity to.
49
+ """
50
+ await self.markdown_processor.write_file(file_path, entity)
51
+
52
+ def ensure_folder_exists(self, folder: str) -> Path:
53
+ """Ensure folder exists, create if it doesn't.
54
+
55
+ Args:
56
+ base_path: Base path of the project.
57
+ folder: Folder name or path within the project.
58
+
59
+ Returns:
60
+ Path to the folder.
61
+ """
62
+ folder_path = self.base_path / folder
63
+ folder_path.mkdir(parents=True, exist_ok=True)
64
+ return folder_path
65
+
66
+ @abstractmethod
67
+ def handle_error(
68
+ self, message: str, error: Optional[Exception] = None
69
+ ) -> T: # pragma: no cover
70
+ """Handle errors during import.
71
+
72
+ Args:
73
+ message: Error message.
74
+ error: Optional exception that caused the error.
75
+
76
+ Returns:
77
+ ImportResult with error information.
78
+ """
79
+ pass
@@ -0,0 +1,232 @@
1
+ """ChatGPT import service for Basic Memory."""
2
+
3
+ import logging
4
+ from datetime import datetime
5
+ from typing import Any, Dict, List, Optional, Set
6
+
7
+ from basic_memory.markdown.schemas import EntityFrontmatter, EntityMarkdown
8
+ from basic_memory.importers.base import Importer
9
+ from basic_memory.schemas.importer import ChatImportResult
10
+ from basic_memory.importers.utils import clean_filename, format_timestamp
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class ChatGPTImporter(Importer[ChatImportResult]):
16
+ """Service for importing ChatGPT conversations."""
17
+
18
+ async def import_data(
19
+ self, source_data, destination_folder: str, **kwargs: Any
20
+ ) -> ChatImportResult:
21
+ """Import conversations from ChatGPT JSON export.
22
+
23
+ Args:
24
+ source_path: Path to the ChatGPT conversations.json file.
25
+ destination_folder: Destination folder within the project.
26
+ **kwargs: Additional keyword arguments.
27
+
28
+ Returns:
29
+ ChatImportResult containing statistics and status of the import.
30
+ """
31
+ try: # pragma: no cover
32
+ # Ensure the destination folder exists
33
+ self.ensure_folder_exists(destination_folder)
34
+ conversations = source_data
35
+
36
+ # Process each conversation
37
+ messages_imported = 0
38
+ chats_imported = 0
39
+
40
+ for chat in conversations:
41
+ # Convert to entity
42
+ entity = self._format_chat_content(destination_folder, chat)
43
+
44
+ # Write file
45
+ file_path = self.base_path / f"{entity.frontmatter.metadata['permalink']}.md"
46
+ await self.write_entity(entity, file_path)
47
+
48
+ # Count messages
49
+ msg_count = sum(
50
+ 1
51
+ for node in chat["mapping"].values()
52
+ if node.get("message")
53
+ and not node.get("message", {})
54
+ .get("metadata", {})
55
+ .get("is_visually_hidden_from_conversation")
56
+ )
57
+
58
+ chats_imported += 1
59
+ messages_imported += msg_count
60
+
61
+ return ChatImportResult(
62
+ import_count={"conversations": chats_imported, "messages": messages_imported},
63
+ success=True,
64
+ conversations=chats_imported,
65
+ messages=messages_imported,
66
+ )
67
+
68
+ except Exception as e: # pragma: no cover
69
+ logger.exception("Failed to import ChatGPT conversations")
70
+ return self.handle_error("Failed to import ChatGPT conversations", e) # pyright: ignore [reportReturnType]
71
+
72
+ def _format_chat_content(
73
+ self, folder: str, conversation: Dict[str, Any]
74
+ ) -> EntityMarkdown: # pragma: no cover
75
+ """Convert chat conversation to Basic Memory entity.
76
+
77
+ Args:
78
+ folder: Destination folder name.
79
+ conversation: ChatGPT conversation data.
80
+
81
+ Returns:
82
+ EntityMarkdown instance representing the conversation.
83
+ """
84
+ # Extract timestamps
85
+ created_at = conversation["create_time"]
86
+ modified_at = conversation["update_time"]
87
+
88
+ root_id = None
89
+ # Find root message
90
+ for node_id, node in conversation["mapping"].items():
91
+ if node.get("parent") is None:
92
+ root_id = node_id
93
+ break
94
+
95
+ # Generate permalink
96
+ date_prefix = datetime.fromtimestamp(created_at).astimezone().strftime("%Y%m%d")
97
+ clean_title = clean_filename(conversation["title"])
98
+
99
+ # Format content
100
+ content = self._format_chat_markdown(
101
+ title=conversation["title"],
102
+ mapping=conversation["mapping"],
103
+ root_id=root_id,
104
+ created_at=created_at,
105
+ modified_at=modified_at,
106
+ )
107
+
108
+ # Create entity
109
+ entity = EntityMarkdown(
110
+ frontmatter=EntityFrontmatter(
111
+ metadata={
112
+ "type": "conversation",
113
+ "title": conversation["title"],
114
+ "created": format_timestamp(created_at),
115
+ "modified": format_timestamp(modified_at),
116
+ "permalink": f"{folder}/{date_prefix}-{clean_title}",
117
+ }
118
+ ),
119
+ content=content,
120
+ )
121
+
122
+ return entity
123
+
124
+ def _format_chat_markdown(
125
+ self,
126
+ title: str,
127
+ mapping: Dict[str, Any],
128
+ root_id: Optional[str],
129
+ created_at: float,
130
+ modified_at: float,
131
+ ) -> str: # pragma: no cover
132
+ """Format chat as clean markdown.
133
+
134
+ Args:
135
+ title: Chat title.
136
+ mapping: Message mapping.
137
+ root_id: Root message ID.
138
+ created_at: Creation timestamp.
139
+ modified_at: Modification timestamp.
140
+
141
+ Returns:
142
+ Formatted markdown content.
143
+ """
144
+ # Start with title
145
+ lines = [f"# {title}\n"]
146
+
147
+ # Traverse message tree
148
+ seen_msgs: Set[str] = set()
149
+ messages = self._traverse_messages(mapping, root_id, seen_msgs)
150
+
151
+ # Format each message
152
+ for msg in messages:
153
+ # Skip hidden messages
154
+ if msg.get("metadata", {}).get("is_visually_hidden_from_conversation"):
155
+ continue
156
+
157
+ # Get author and timestamp
158
+ author = msg["author"]["role"].title()
159
+ ts = format_timestamp(msg["create_time"]) if msg.get("create_time") else ""
160
+
161
+ # Add message header
162
+ lines.append(f"### {author} ({ts})")
163
+
164
+ # Add message content
165
+ content = self._get_message_content(msg)
166
+ if content:
167
+ lines.append(content)
168
+
169
+ # Add spacing
170
+ lines.append("")
171
+
172
+ return "\n".join(lines)
173
+
174
+ def _get_message_content(self, message: Dict[str, Any]) -> str: # pragma: no cover
175
+ """Extract clean message content.
176
+
177
+ Args:
178
+ message: Message data.
179
+
180
+ Returns:
181
+ Cleaned message content.
182
+ """
183
+ if not message or "content" not in message:
184
+ return ""
185
+
186
+ content = message["content"]
187
+ if content.get("content_type") == "text":
188
+ return "\n".join(content.get("parts", []))
189
+ elif content.get("content_type") == "code":
190
+ return f"```{content.get('language', '')}\n{content.get('text', '')}\n```"
191
+ return ""
192
+
193
+ def _traverse_messages(
194
+ self, mapping: Dict[str, Any], root_id: Optional[str], seen: Set[str]
195
+ ) -> List[Dict[str, Any]]: # pragma: no cover
196
+ """Traverse message tree iteratively to handle deep conversations.
197
+
198
+ Args:
199
+ mapping: Message mapping.
200
+ root_id: Root message ID.
201
+ seen: Set of seen message IDs.
202
+
203
+ Returns:
204
+ List of message data.
205
+ """
206
+ messages = []
207
+ if not root_id:
208
+ return messages
209
+
210
+ # Use iterative approach with stack to avoid recursion depth issues
211
+ stack = [root_id]
212
+
213
+ while stack:
214
+ node_id = stack.pop()
215
+ if not node_id:
216
+ continue
217
+
218
+ node = mapping.get(node_id)
219
+ if not node:
220
+ continue
221
+
222
+ # Process current node if it has a message and hasn't been seen
223
+ if node["id"] not in seen and node.get("message"):
224
+ seen.add(node["id"])
225
+ messages.append(node["message"])
226
+
227
+ # Add children to stack in reverse order to maintain conversation flow
228
+ children = node.get("children", [])
229
+ for child_id in reversed(children):
230
+ stack.append(child_id)
231
+
232
+ return messages