basic-memory 0.7.0__py3-none-any.whl → 0.17.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of basic-memory might be problematic. Click here for more details.

Files changed (195) hide show
  1. basic_memory/__init__.py +5 -1
  2. basic_memory/alembic/alembic.ini +119 -0
  3. basic_memory/alembic/env.py +130 -20
  4. basic_memory/alembic/migrations.py +4 -9
  5. basic_memory/alembic/versions/314f1ea54dc4_add_postgres_full_text_search_support_.py +131 -0
  6. basic_memory/alembic/versions/502b60eaa905_remove_required_from_entity_permalink.py +51 -0
  7. basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +120 -0
  8. basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +112 -0
  9. basic_memory/alembic/versions/6830751f5fb6_merge_multiple_heads.py +24 -0
  10. basic_memory/alembic/versions/9d9c1cb7d8f5_add_mtime_and_size_columns_to_entity_.py +49 -0
  11. basic_memory/alembic/versions/a1b2c3d4e5f6_fix_project_foreign_keys.py +49 -0
  12. basic_memory/alembic/versions/a2b3c4d5e6f7_add_search_index_entity_cascade.py +56 -0
  13. basic_memory/alembic/versions/b3c3938bacdb_relation_to_name_unique_index.py +44 -0
  14. basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +113 -0
  15. basic_memory/alembic/versions/e7e1f4367280_add_scan_watermark_tracking_to_project.py +37 -0
  16. basic_memory/alembic/versions/f8a9b2c3d4e5_add_pg_trgm_for_fuzzy_link_resolution.py +239 -0
  17. basic_memory/alembic/versions/g9a0b3c4d5e6_add_external_id_to_project_and_entity.py +173 -0
  18. basic_memory/api/app.py +87 -20
  19. basic_memory/api/container.py +133 -0
  20. basic_memory/api/routers/__init__.py +4 -1
  21. basic_memory/api/routers/directory_router.py +84 -0
  22. basic_memory/api/routers/importer_router.py +152 -0
  23. basic_memory/api/routers/knowledge_router.py +180 -23
  24. basic_memory/api/routers/management_router.py +80 -0
  25. basic_memory/api/routers/memory_router.py +9 -64
  26. basic_memory/api/routers/project_router.py +460 -0
  27. basic_memory/api/routers/prompt_router.py +260 -0
  28. basic_memory/api/routers/resource_router.py +136 -11
  29. basic_memory/api/routers/search_router.py +5 -5
  30. basic_memory/api/routers/utils.py +169 -0
  31. basic_memory/api/template_loader.py +292 -0
  32. basic_memory/api/v2/__init__.py +35 -0
  33. basic_memory/api/v2/routers/__init__.py +21 -0
  34. basic_memory/api/v2/routers/directory_router.py +93 -0
  35. basic_memory/api/v2/routers/importer_router.py +181 -0
  36. basic_memory/api/v2/routers/knowledge_router.py +427 -0
  37. basic_memory/api/v2/routers/memory_router.py +130 -0
  38. basic_memory/api/v2/routers/project_router.py +359 -0
  39. basic_memory/api/v2/routers/prompt_router.py +269 -0
  40. basic_memory/api/v2/routers/resource_router.py +286 -0
  41. basic_memory/api/v2/routers/search_router.py +73 -0
  42. basic_memory/cli/app.py +80 -10
  43. basic_memory/cli/auth.py +300 -0
  44. basic_memory/cli/commands/__init__.py +15 -2
  45. basic_memory/cli/commands/cloud/__init__.py +6 -0
  46. basic_memory/cli/commands/cloud/api_client.py +127 -0
  47. basic_memory/cli/commands/cloud/bisync_commands.py +110 -0
  48. basic_memory/cli/commands/cloud/cloud_utils.py +108 -0
  49. basic_memory/cli/commands/cloud/core_commands.py +195 -0
  50. basic_memory/cli/commands/cloud/rclone_commands.py +397 -0
  51. basic_memory/cli/commands/cloud/rclone_config.py +110 -0
  52. basic_memory/cli/commands/cloud/rclone_installer.py +263 -0
  53. basic_memory/cli/commands/cloud/upload.py +240 -0
  54. basic_memory/cli/commands/cloud/upload_command.py +124 -0
  55. basic_memory/cli/commands/command_utils.py +99 -0
  56. basic_memory/cli/commands/db.py +87 -12
  57. basic_memory/cli/commands/format.py +198 -0
  58. basic_memory/cli/commands/import_chatgpt.py +47 -223
  59. basic_memory/cli/commands/import_claude_conversations.py +48 -171
  60. basic_memory/cli/commands/import_claude_projects.py +53 -160
  61. basic_memory/cli/commands/import_memory_json.py +55 -111
  62. basic_memory/cli/commands/mcp.py +67 -11
  63. basic_memory/cli/commands/project.py +889 -0
  64. basic_memory/cli/commands/status.py +52 -34
  65. basic_memory/cli/commands/telemetry.py +81 -0
  66. basic_memory/cli/commands/tool.py +341 -0
  67. basic_memory/cli/container.py +84 -0
  68. basic_memory/cli/main.py +14 -6
  69. basic_memory/config.py +580 -26
  70. basic_memory/db.py +285 -28
  71. basic_memory/deps/__init__.py +293 -0
  72. basic_memory/deps/config.py +26 -0
  73. basic_memory/deps/db.py +56 -0
  74. basic_memory/deps/importers.py +200 -0
  75. basic_memory/deps/projects.py +238 -0
  76. basic_memory/deps/repositories.py +179 -0
  77. basic_memory/deps/services.py +480 -0
  78. basic_memory/deps.py +16 -185
  79. basic_memory/file_utils.py +318 -54
  80. basic_memory/ignore_utils.py +297 -0
  81. basic_memory/importers/__init__.py +27 -0
  82. basic_memory/importers/base.py +100 -0
  83. basic_memory/importers/chatgpt_importer.py +245 -0
  84. basic_memory/importers/claude_conversations_importer.py +192 -0
  85. basic_memory/importers/claude_projects_importer.py +184 -0
  86. basic_memory/importers/memory_json_importer.py +128 -0
  87. basic_memory/importers/utils.py +61 -0
  88. basic_memory/markdown/entity_parser.py +182 -23
  89. basic_memory/markdown/markdown_processor.py +70 -7
  90. basic_memory/markdown/plugins.py +43 -23
  91. basic_memory/markdown/schemas.py +1 -1
  92. basic_memory/markdown/utils.py +38 -14
  93. basic_memory/mcp/async_client.py +135 -4
  94. basic_memory/mcp/clients/__init__.py +28 -0
  95. basic_memory/mcp/clients/directory.py +70 -0
  96. basic_memory/mcp/clients/knowledge.py +176 -0
  97. basic_memory/mcp/clients/memory.py +120 -0
  98. basic_memory/mcp/clients/project.py +89 -0
  99. basic_memory/mcp/clients/resource.py +71 -0
  100. basic_memory/mcp/clients/search.py +65 -0
  101. basic_memory/mcp/container.py +110 -0
  102. basic_memory/mcp/project_context.py +155 -0
  103. basic_memory/mcp/prompts/__init__.py +19 -0
  104. basic_memory/mcp/prompts/ai_assistant_guide.py +70 -0
  105. basic_memory/mcp/prompts/continue_conversation.py +62 -0
  106. basic_memory/mcp/prompts/recent_activity.py +188 -0
  107. basic_memory/mcp/prompts/search.py +57 -0
  108. basic_memory/mcp/prompts/utils.py +162 -0
  109. basic_memory/mcp/resources/ai_assistant_guide.md +283 -0
  110. basic_memory/mcp/resources/project_info.py +71 -0
  111. basic_memory/mcp/server.py +61 -9
  112. basic_memory/mcp/tools/__init__.py +33 -21
  113. basic_memory/mcp/tools/build_context.py +120 -0
  114. basic_memory/mcp/tools/canvas.py +152 -0
  115. basic_memory/mcp/tools/chatgpt_tools.py +190 -0
  116. basic_memory/mcp/tools/delete_note.py +249 -0
  117. basic_memory/mcp/tools/edit_note.py +325 -0
  118. basic_memory/mcp/tools/list_directory.py +157 -0
  119. basic_memory/mcp/tools/move_note.py +549 -0
  120. basic_memory/mcp/tools/project_management.py +204 -0
  121. basic_memory/mcp/tools/read_content.py +281 -0
  122. basic_memory/mcp/tools/read_note.py +265 -0
  123. basic_memory/mcp/tools/recent_activity.py +528 -0
  124. basic_memory/mcp/tools/search.py +377 -24
  125. basic_memory/mcp/tools/utils.py +402 -16
  126. basic_memory/mcp/tools/view_note.py +78 -0
  127. basic_memory/mcp/tools/write_note.py +230 -0
  128. basic_memory/models/__init__.py +3 -2
  129. basic_memory/models/knowledge.py +82 -17
  130. basic_memory/models/project.py +93 -0
  131. basic_memory/models/search.py +68 -8
  132. basic_memory/project_resolver.py +222 -0
  133. basic_memory/repository/__init__.py +2 -0
  134. basic_memory/repository/entity_repository.py +437 -8
  135. basic_memory/repository/observation_repository.py +36 -3
  136. basic_memory/repository/postgres_search_repository.py +451 -0
  137. basic_memory/repository/project_info_repository.py +10 -0
  138. basic_memory/repository/project_repository.py +140 -0
  139. basic_memory/repository/relation_repository.py +79 -4
  140. basic_memory/repository/repository.py +148 -29
  141. basic_memory/repository/search_index_row.py +95 -0
  142. basic_memory/repository/search_repository.py +79 -268
  143. basic_memory/repository/search_repository_base.py +241 -0
  144. basic_memory/repository/sqlite_search_repository.py +437 -0
  145. basic_memory/runtime.py +61 -0
  146. basic_memory/schemas/__init__.py +22 -9
  147. basic_memory/schemas/base.py +131 -12
  148. basic_memory/schemas/cloud.py +50 -0
  149. basic_memory/schemas/directory.py +31 -0
  150. basic_memory/schemas/importer.py +35 -0
  151. basic_memory/schemas/memory.py +194 -25
  152. basic_memory/schemas/project_info.py +213 -0
  153. basic_memory/schemas/prompt.py +90 -0
  154. basic_memory/schemas/request.py +56 -2
  155. basic_memory/schemas/response.py +85 -28
  156. basic_memory/schemas/search.py +36 -35
  157. basic_memory/schemas/sync_report.py +72 -0
  158. basic_memory/schemas/v2/__init__.py +27 -0
  159. basic_memory/schemas/v2/entity.py +133 -0
  160. basic_memory/schemas/v2/resource.py +47 -0
  161. basic_memory/services/__init__.py +2 -1
  162. basic_memory/services/context_service.py +451 -138
  163. basic_memory/services/directory_service.py +310 -0
  164. basic_memory/services/entity_service.py +636 -71
  165. basic_memory/services/exceptions.py +21 -0
  166. basic_memory/services/file_service.py +402 -33
  167. basic_memory/services/initialization.py +216 -0
  168. basic_memory/services/link_resolver.py +50 -56
  169. basic_memory/services/project_service.py +888 -0
  170. basic_memory/services/search_service.py +232 -37
  171. basic_memory/sync/__init__.py +4 -2
  172. basic_memory/sync/background_sync.py +26 -0
  173. basic_memory/sync/coordinator.py +160 -0
  174. basic_memory/sync/sync_service.py +1200 -109
  175. basic_memory/sync/watch_service.py +432 -135
  176. basic_memory/telemetry.py +249 -0
  177. basic_memory/templates/prompts/continue_conversation.hbs +110 -0
  178. basic_memory/templates/prompts/search.hbs +101 -0
  179. basic_memory/utils.py +407 -54
  180. basic_memory-0.17.4.dist-info/METADATA +617 -0
  181. basic_memory-0.17.4.dist-info/RECORD +193 -0
  182. {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/WHEEL +1 -1
  183. {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/entry_points.txt +1 -0
  184. basic_memory/alembic/README +0 -1
  185. basic_memory/cli/commands/sync.py +0 -206
  186. basic_memory/cli/commands/tools.py +0 -157
  187. basic_memory/mcp/tools/knowledge.py +0 -68
  188. basic_memory/mcp/tools/memory.py +0 -170
  189. basic_memory/mcp/tools/notes.py +0 -202
  190. basic_memory/schemas/discovery.py +0 -28
  191. basic_memory/sync/file_change_scanner.py +0 -158
  192. basic_memory/sync/utils.py +0 -31
  193. basic_memory-0.7.0.dist-info/METADATA +0 -378
  194. basic_memory-0.7.0.dist-info/RECORD +0 -82
  195. {basic_memory-0.7.0.dist-info → basic_memory-0.17.4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,297 @@
1
+ """Utilities for handling .gitignore patterns and file filtering."""
2
+
3
+ import fnmatch
4
+ from pathlib import Path
5
+ from typing import Set
6
+
7
+
8
+ # Common directories and patterns to ignore by default
9
+ # These are used as fallback if .bmignore doesn't exist
10
+ DEFAULT_IGNORE_PATTERNS = {
11
+ # Hidden files (files starting with dot)
12
+ ".*",
13
+ # Basic Memory internal files
14
+ "*.db",
15
+ "*.db-shm",
16
+ "*.db-wal",
17
+ "config.json",
18
+ # Version control
19
+ ".git",
20
+ ".svn",
21
+ # Python
22
+ "__pycache__",
23
+ "*.pyc",
24
+ "*.pyo",
25
+ "*.pyd",
26
+ ".pytest_cache",
27
+ ".coverage",
28
+ "*.egg-info",
29
+ ".tox",
30
+ ".mypy_cache",
31
+ ".ruff_cache",
32
+ # Virtual environments
33
+ ".venv",
34
+ "venv",
35
+ "env",
36
+ ".env",
37
+ # Node.js
38
+ "node_modules",
39
+ # Build artifacts
40
+ "build",
41
+ "dist",
42
+ ".cache",
43
+ # IDE
44
+ ".idea",
45
+ ".vscode",
46
+ # OS files
47
+ ".DS_Store",
48
+ "Thumbs.db",
49
+ "desktop.ini",
50
+ # Obsidian
51
+ ".obsidian",
52
+ # Temporary files
53
+ "*.tmp",
54
+ "*.swp",
55
+ "*.swo",
56
+ "*~",
57
+ }
58
+
59
+
60
+ def get_bmignore_path() -> Path:
61
+ """Get path to .bmignore file.
62
+
63
+ Returns:
64
+ Path to ~/.basic-memory/.bmignore
65
+ """
66
+ return Path.home() / ".basic-memory" / ".bmignore"
67
+
68
+
69
+ def create_default_bmignore() -> None:
70
+ """Create default .bmignore file if it doesn't exist.
71
+
72
+ This ensures users have a file they can customize for all Basic Memory operations.
73
+ """
74
+ bmignore_path = get_bmignore_path()
75
+
76
+ if bmignore_path.exists():
77
+ return
78
+
79
+ bmignore_path.parent.mkdir(parents=True, exist_ok=True)
80
+ bmignore_path.write_text("""# Basic Memory Ignore Patterns
81
+ # This file is used by both 'bm cloud upload', 'bm cloud bisync', and file sync
82
+ # Patterns use standard gitignore-style syntax
83
+
84
+ # Hidden files (files starting with dot)
85
+ .*
86
+
87
+ # Basic Memory internal files (includes test databases)
88
+ *.db
89
+ *.db-shm
90
+ *.db-wal
91
+ config.json
92
+
93
+ # Version control
94
+ .git
95
+ .svn
96
+
97
+ # Python
98
+ __pycache__
99
+ *.pyc
100
+ *.pyo
101
+ *.pyd
102
+ .pytest_cache
103
+ .coverage
104
+ *.egg-info
105
+ .tox
106
+ .mypy_cache
107
+ .ruff_cache
108
+
109
+ # Virtual environments
110
+ .venv
111
+ venv
112
+ env
113
+ .env
114
+
115
+ # Node.js
116
+ node_modules
117
+
118
+ # Build artifacts
119
+ build
120
+ dist
121
+ .cache
122
+
123
+ # IDE
124
+ .idea
125
+ .vscode
126
+
127
+ # OS files
128
+ .DS_Store
129
+ Thumbs.db
130
+ desktop.ini
131
+
132
+ # Obsidian
133
+ .obsidian
134
+
135
+ # Temporary files
136
+ *.tmp
137
+ *.swp
138
+ *.swo
139
+ *~
140
+ """)
141
+
142
+
143
+ def load_bmignore_patterns() -> Set[str]:
144
+ """Load patterns from .bmignore file.
145
+
146
+ Returns:
147
+ Set of patterns from .bmignore, or DEFAULT_IGNORE_PATTERNS if file doesn't exist
148
+ """
149
+ bmignore_path = get_bmignore_path()
150
+
151
+ # Create default file if it doesn't exist
152
+ if not bmignore_path.exists():
153
+ create_default_bmignore()
154
+
155
+ patterns = set()
156
+
157
+ try:
158
+ with bmignore_path.open("r", encoding="utf-8") as f:
159
+ for line in f:
160
+ line = line.strip()
161
+ # Skip empty lines and comments
162
+ if line and not line.startswith("#"):
163
+ patterns.add(line)
164
+ except Exception: # pragma: no cover
165
+ # If we can't read .bmignore, fall back to defaults
166
+ return set(DEFAULT_IGNORE_PATTERNS) # pragma: no cover
167
+
168
+ # If no patterns were loaded, use defaults
169
+ if not patterns: # pragma: no cover
170
+ return set(DEFAULT_IGNORE_PATTERNS) # pragma: no cover
171
+
172
+ return patterns
173
+
174
+
175
+ def load_gitignore_patterns(base_path: Path, use_gitignore: bool = True) -> Set[str]:
176
+ """Load gitignore patterns from .gitignore file and .bmignore.
177
+
178
+ Combines patterns from:
179
+ 1. ~/.basic-memory/.bmignore (user's global ignore patterns)
180
+ 2. {base_path}/.gitignore (project-specific patterns, if use_gitignore=True)
181
+
182
+ Args:
183
+ base_path: The base directory to search for .gitignore file
184
+ use_gitignore: If False, only load patterns from .bmignore (default: True)
185
+
186
+ Returns:
187
+ Set of patterns to ignore
188
+ """
189
+ # Start with patterns from .bmignore
190
+ patterns = load_bmignore_patterns()
191
+
192
+ if use_gitignore:
193
+ gitignore_file = base_path / ".gitignore"
194
+ if gitignore_file.exists():
195
+ try:
196
+ with gitignore_file.open("r", encoding="utf-8") as f:
197
+ for line in f:
198
+ line = line.strip()
199
+ # Skip empty lines and comments
200
+ if line and not line.startswith("#"):
201
+ patterns.add(line)
202
+ except Exception:
203
+ # If we can't read .gitignore, just use default patterns
204
+ pass
205
+
206
+ return patterns
207
+
208
+
209
+ def should_ignore_path(file_path: Path, base_path: Path, ignore_patterns: Set[str]) -> bool:
210
+ """Check if a file path should be ignored based on gitignore patterns.
211
+
212
+ Args:
213
+ file_path: The file path to check
214
+ base_path: The base directory for relative path calculation
215
+ ignore_patterns: Set of patterns to match against
216
+
217
+ Returns:
218
+ True if the path should be ignored, False otherwise
219
+ """
220
+ # Get the relative path from base
221
+ try:
222
+ relative_path = file_path.relative_to(base_path)
223
+ relative_str = str(relative_path)
224
+ relative_posix = relative_path.as_posix() # Use forward slashes for matching
225
+
226
+ # Check each pattern
227
+ for pattern in ignore_patterns:
228
+ # Handle patterns starting with / (root relative)
229
+ if pattern.startswith("/"):
230
+ root_pattern = pattern[1:] # Remove leading /
231
+
232
+ # For directory patterns ending with /
233
+ if root_pattern.endswith("/"):
234
+ dir_name = root_pattern[:-1] # Remove trailing /
235
+ # Check if the first part of the path matches the directory name
236
+ if len(relative_path.parts) > 0 and relative_path.parts[0] == dir_name:
237
+ return True
238
+ else:
239
+ # Regular root-relative pattern
240
+ if fnmatch.fnmatch(relative_posix, root_pattern):
241
+ return True
242
+ continue
243
+
244
+ # Handle directory patterns (ending with /)
245
+ if pattern.endswith("/"):
246
+ dir_name = pattern[:-1] # Remove trailing /
247
+ # Check if any path part matches the directory name
248
+ if dir_name in relative_path.parts:
249
+ return True
250
+ continue
251
+
252
+ # Direct name match (e.g., ".git", "node_modules")
253
+ if pattern in relative_path.parts:
254
+ return True
255
+
256
+ # Check if any individual path part matches the glob pattern
257
+ # This handles cases like ".*" matching ".hidden.md" in "concept/.hidden.md"
258
+ for part in relative_path.parts:
259
+ if fnmatch.fnmatch(part, pattern):
260
+ return True
261
+
262
+ # Glob pattern match on full path
263
+ if fnmatch.fnmatch(relative_posix, pattern) or fnmatch.fnmatch(relative_str, pattern):
264
+ return True # pragma: no cover
265
+
266
+ return False
267
+ except ValueError:
268
+ # If we can't get relative path, don't ignore
269
+ return False
270
+
271
+
272
+ def filter_files(
273
+ files: list[Path], base_path: Path, ignore_patterns: Set[str] | None = None
274
+ ) -> tuple[list[Path], int]:
275
+ """Filter a list of files based on gitignore patterns.
276
+
277
+ Args:
278
+ files: List of file paths to filter
279
+ base_path: The base directory for relative path calculation
280
+ ignore_patterns: Set of patterns to ignore. If None, loads from .gitignore
281
+
282
+ Returns:
283
+ Tuple of (filtered_files, ignored_count)
284
+ """
285
+ if ignore_patterns is None:
286
+ ignore_patterns = load_gitignore_patterns(base_path)
287
+
288
+ filtered_files = []
289
+ ignored_count = 0
290
+
291
+ for file_path in files:
292
+ if should_ignore_path(file_path, base_path, ignore_patterns):
293
+ ignored_count += 1
294
+ else:
295
+ filtered_files.append(file_path)
296
+
297
+ return filtered_files, ignored_count
@@ -0,0 +1,27 @@
1
+ """Import services for Basic Memory."""
2
+
3
+ from basic_memory.importers.base import Importer
4
+ from basic_memory.importers.chatgpt_importer import ChatGPTImporter
5
+ from basic_memory.importers.claude_conversations_importer import (
6
+ ClaudeConversationsImporter,
7
+ )
8
+ from basic_memory.importers.claude_projects_importer import ClaudeProjectsImporter
9
+ from basic_memory.importers.memory_json_importer import MemoryJsonImporter
10
+ from basic_memory.schemas.importer import (
11
+ ChatImportResult,
12
+ EntityImportResult,
13
+ ImportResult,
14
+ ProjectImportResult,
15
+ )
16
+
17
+ __all__ = [
18
+ "Importer",
19
+ "ChatGPTImporter",
20
+ "ClaudeConversationsImporter",
21
+ "ClaudeProjectsImporter",
22
+ "MemoryJsonImporter",
23
+ "ImportResult",
24
+ "ChatImportResult",
25
+ "EntityImportResult",
26
+ "ProjectImportResult",
27
+ ]
@@ -0,0 +1,100 @@
1
+ """Base import service for Basic Memory."""
2
+
3
+ import logging
4
+ from abc import abstractmethod
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING, Any, Optional, TypeVar
7
+
8
+ from basic_memory.markdown.markdown_processor import MarkdownProcessor
9
+ from basic_memory.markdown.schemas import EntityMarkdown
10
+ from basic_memory.schemas.importer import ImportResult
11
+
12
+ if TYPE_CHECKING: # pragma: no cover
13
+ from basic_memory.services.file_service import FileService
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ T = TypeVar("T", bound=ImportResult)
18
+
19
+
20
+ class Importer[T: ImportResult]:
21
+ """Base class for all import services.
22
+
23
+ All file operations are delegated to FileService, which can be overridden
24
+ in cloud environments to use S3 or other storage backends.
25
+ """
26
+
27
+ def __init__(
28
+ self,
29
+ base_path: Path,
30
+ markdown_processor: MarkdownProcessor,
31
+ file_service: "FileService",
32
+ ):
33
+ """Initialize the import service.
34
+
35
+ Args:
36
+ base_path: Base path for the project.
37
+ markdown_processor: MarkdownProcessor instance for markdown serialization.
38
+ file_service: FileService instance for all file operations.
39
+ """
40
+ self.base_path = base_path.resolve() # Get absolute path
41
+ self.markdown_processor = markdown_processor
42
+ self.file_service = file_service
43
+
44
+ @abstractmethod
45
+ async def import_data(self, source_data, destination_folder: str, **kwargs: Any) -> T:
46
+ """Import data from source file to destination folder.
47
+
48
+ Args:
49
+ source_path: Path to the source file.
50
+ destination_folder: Destination folder within the project.
51
+ **kwargs: Additional keyword arguments for specific import types.
52
+
53
+ Returns:
54
+ ImportResult containing statistics and status of the import.
55
+ """
56
+ pass # pragma: no cover
57
+
58
+ async def write_entity(self, entity: EntityMarkdown, file_path: str | Path) -> str:
59
+ """Write entity to file using FileService.
60
+
61
+ This method serializes the entity to markdown and writes it using
62
+ FileService, which handles directory creation and storage backend
63
+ abstraction (local filesystem vs cloud storage).
64
+
65
+ Args:
66
+ entity: EntityMarkdown instance to write.
67
+ file_path: Relative path to write the entity to. FileService handles base_path.
68
+
69
+ Returns:
70
+ Checksum of written file.
71
+ """
72
+ content = self.markdown_processor.to_markdown_string(entity)
73
+ # FileService.write_file handles directory creation and returns checksum
74
+ return await self.file_service.write_file(file_path, content)
75
+
76
+ async def ensure_folder_exists(self, folder: str) -> None:
77
+ """Ensure folder exists using FileService.
78
+
79
+ For cloud storage (S3), this is essentially a no-op since S3 doesn't
80
+ have actual folders - they're just key prefixes.
81
+
82
+ Args:
83
+ folder: Relative folder path within the project. FileService handles base_path.
84
+ """
85
+ await self.file_service.ensure_directory(folder)
86
+
87
+ @abstractmethod
88
+ def handle_error(
89
+ self, message: str, error: Optional[Exception] = None
90
+ ) -> T: # pragma: no cover
91
+ """Handle errors during import.
92
+
93
+ Args:
94
+ message: Error message.
95
+ error: Optional exception that caused the error.
96
+
97
+ Returns:
98
+ ImportResult with error information.
99
+ """
100
+ pass
@@ -0,0 +1,245 @@
1
+ """ChatGPT import service for Basic Memory."""
2
+
3
+ import logging
4
+ from datetime import datetime
5
+ from typing import Any, Dict, List, Optional, Set
6
+
7
+ from basic_memory.markdown.schemas import EntityFrontmatter, EntityMarkdown
8
+ from basic_memory.importers.base import Importer
9
+ from basic_memory.schemas.importer import ChatImportResult
10
+ from basic_memory.importers.utils import clean_filename, format_timestamp
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class ChatGPTImporter(Importer[ChatImportResult]):
16
+ """Service for importing ChatGPT conversations."""
17
+
18
+ def handle_error( # pragma: no cover
19
+ self, message: str, error: Optional[Exception] = None
20
+ ) -> ChatImportResult:
21
+ """Return a failed ChatImportResult with an error message."""
22
+ error_msg = f"{message}: {error}" if error else message
23
+ return ChatImportResult(
24
+ import_count={},
25
+ success=False,
26
+ error_message=error_msg,
27
+ conversations=0,
28
+ messages=0,
29
+ )
30
+
31
+ async def import_data(
32
+ self, source_data, destination_folder: str, **kwargs: Any
33
+ ) -> ChatImportResult:
34
+ """Import conversations from ChatGPT JSON export.
35
+
36
+ Args:
37
+ source_path: Path to the ChatGPT conversations.json file.
38
+ destination_folder: Destination folder within the project.
39
+ **kwargs: Additional keyword arguments.
40
+
41
+ Returns:
42
+ ChatImportResult containing statistics and status of the import.
43
+ """
44
+ try: # pragma: no cover
45
+ # Ensure the destination folder exists
46
+ await self.ensure_folder_exists(destination_folder)
47
+ conversations = source_data
48
+
49
+ # Process each conversation
50
+ messages_imported = 0
51
+ chats_imported = 0
52
+
53
+ for chat in conversations:
54
+ # Convert to entity
55
+ entity = self._format_chat_content(destination_folder, chat)
56
+
57
+ # Write file using relative path - FileService handles base_path
58
+ file_path = f"{entity.frontmatter.metadata['permalink']}.md"
59
+ await self.write_entity(entity, file_path)
60
+
61
+ # Count messages
62
+ msg_count = sum(
63
+ 1
64
+ for node in chat["mapping"].values()
65
+ if node.get("message")
66
+ and not node.get("message", {})
67
+ .get("metadata", {})
68
+ .get("is_visually_hidden_from_conversation")
69
+ )
70
+
71
+ chats_imported += 1
72
+ messages_imported += msg_count
73
+
74
+ return ChatImportResult(
75
+ import_count={"conversations": chats_imported, "messages": messages_imported},
76
+ success=True,
77
+ conversations=chats_imported,
78
+ messages=messages_imported,
79
+ )
80
+
81
+ except Exception as e: # pragma: no cover
82
+ logger.exception("Failed to import ChatGPT conversations")
83
+ return self.handle_error("Failed to import ChatGPT conversations", e)
84
+
85
+ def _format_chat_content(
86
+ self, folder: str, conversation: Dict[str, Any]
87
+ ) -> EntityMarkdown: # pragma: no cover
88
+ """Convert chat conversation to Basic Memory entity.
89
+
90
+ Args:
91
+ folder: Destination folder name.
92
+ conversation: ChatGPT conversation data.
93
+
94
+ Returns:
95
+ EntityMarkdown instance representing the conversation.
96
+ """
97
+ # Extract timestamps
98
+ created_at = conversation["create_time"]
99
+ modified_at = conversation["update_time"]
100
+
101
+ root_id = None
102
+ # Find root message
103
+ for node_id, node in conversation["mapping"].items():
104
+ if node.get("parent") is None:
105
+ root_id = node_id
106
+ break
107
+
108
+ # Generate permalink
109
+ date_prefix = datetime.fromtimestamp(created_at).astimezone().strftime("%Y%m%d")
110
+ clean_title = clean_filename(conversation["title"])
111
+
112
+ # Format content
113
+ content = self._format_chat_markdown(
114
+ title=conversation["title"],
115
+ mapping=conversation["mapping"],
116
+ root_id=root_id,
117
+ created_at=created_at,
118
+ modified_at=modified_at,
119
+ )
120
+
121
+ # Create entity
122
+ entity = EntityMarkdown(
123
+ frontmatter=EntityFrontmatter(
124
+ metadata={
125
+ "type": "conversation",
126
+ "title": conversation["title"],
127
+ "created": format_timestamp(created_at),
128
+ "modified": format_timestamp(modified_at),
129
+ "permalink": f"{folder}/{date_prefix}-{clean_title}",
130
+ }
131
+ ),
132
+ content=content,
133
+ )
134
+
135
+ return entity
136
+
137
+ def _format_chat_markdown(
138
+ self,
139
+ title: str,
140
+ mapping: Dict[str, Any],
141
+ root_id: Optional[str],
142
+ created_at: float,
143
+ modified_at: float,
144
+ ) -> str: # pragma: no cover
145
+ """Format chat as clean markdown.
146
+
147
+ Args:
148
+ title: Chat title.
149
+ mapping: Message mapping.
150
+ root_id: Root message ID.
151
+ created_at: Creation timestamp.
152
+ modified_at: Modification timestamp.
153
+
154
+ Returns:
155
+ Formatted markdown content.
156
+ """
157
+ # Start with title
158
+ lines = [f"# {title}\n"]
159
+
160
+ # Traverse message tree
161
+ seen_msgs: Set[str] = set()
162
+ messages = self._traverse_messages(mapping, root_id, seen_msgs)
163
+
164
+ # Format each message
165
+ for msg in messages:
166
+ # Skip hidden messages
167
+ if msg.get("metadata", {}).get("is_visually_hidden_from_conversation"):
168
+ continue
169
+
170
+ # Get author and timestamp
171
+ author = msg["author"]["role"].title()
172
+ ts = format_timestamp(msg["create_time"]) if msg.get("create_time") else ""
173
+
174
+ # Add message header
175
+ lines.append(f"### {author} ({ts})")
176
+
177
+ # Add message content
178
+ content = self._get_message_content(msg)
179
+ if content:
180
+ lines.append(content)
181
+
182
+ # Add spacing
183
+ lines.append("")
184
+
185
+ return "\n".join(lines)
186
+
187
+ def _get_message_content(self, message: Dict[str, Any]) -> str: # pragma: no cover
188
+ """Extract clean message content.
189
+
190
+ Args:
191
+ message: Message data.
192
+
193
+ Returns:
194
+ Cleaned message content.
195
+ """
196
+ if not message or "content" not in message:
197
+ return ""
198
+
199
+ content = message["content"]
200
+ if content.get("content_type") == "text":
201
+ return "\n".join(content.get("parts", []))
202
+ elif content.get("content_type") == "code":
203
+ return f"```{content.get('language', '')}\n{content.get('text', '')}\n```"
204
+ return ""
205
+
206
+ def _traverse_messages(
207
+ self, mapping: Dict[str, Any], root_id: Optional[str], seen: Set[str]
208
+ ) -> List[Dict[str, Any]]: # pragma: no cover
209
+ """Traverse message tree iteratively to handle deep conversations.
210
+
211
+ Args:
212
+ mapping: Message mapping.
213
+ root_id: Root message ID.
214
+ seen: Set of seen message IDs.
215
+
216
+ Returns:
217
+ List of message data.
218
+ """
219
+ messages = []
220
+ if not root_id:
221
+ return messages
222
+
223
+ # Use iterative approach with stack to avoid recursion depth issues
224
+ stack = [root_id]
225
+
226
+ while stack:
227
+ node_id = stack.pop()
228
+ if not node_id:
229
+ continue
230
+
231
+ node = mapping.get(node_id)
232
+ if not node:
233
+ continue
234
+
235
+ # Process current node if it has a message and hasn't been seen
236
+ if node["id"] not in seen and node.get("message"):
237
+ seen.add(node["id"])
238
+ messages.append(node["message"])
239
+
240
+ # Add children to stack in reverse order to maintain conversation flow
241
+ children = node.get("children", [])
242
+ for child_id in reversed(children):
243
+ stack.append(child_id)
244
+
245
+ return messages