basic-memory 0.7.0__py3-none-any.whl → 0.16.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of basic-memory might be problematic. Click here for more details.

Files changed (150) hide show
  1. basic_memory/__init__.py +5 -1
  2. basic_memory/alembic/alembic.ini +119 -0
  3. basic_memory/alembic/env.py +27 -3
  4. basic_memory/alembic/migrations.py +4 -9
  5. basic_memory/alembic/versions/502b60eaa905_remove_required_from_entity_permalink.py +51 -0
  6. basic_memory/alembic/versions/5fe1ab1ccebe_add_projects_table.py +108 -0
  7. basic_memory/alembic/versions/647e7a75e2cd_project_constraint_fix.py +104 -0
  8. basic_memory/alembic/versions/9d9c1cb7d8f5_add_mtime_and_size_columns_to_entity_.py +49 -0
  9. basic_memory/alembic/versions/a1b2c3d4e5f6_fix_project_foreign_keys.py +49 -0
  10. basic_memory/alembic/versions/b3c3938bacdb_relation_to_name_unique_index.py +44 -0
  11. basic_memory/alembic/versions/cc7172b46608_update_search_index_schema.py +100 -0
  12. basic_memory/alembic/versions/e7e1f4367280_add_scan_watermark_tracking_to_project.py +37 -0
  13. basic_memory/api/app.py +64 -18
  14. basic_memory/api/routers/__init__.py +4 -1
  15. basic_memory/api/routers/directory_router.py +84 -0
  16. basic_memory/api/routers/importer_router.py +152 -0
  17. basic_memory/api/routers/knowledge_router.py +166 -21
  18. basic_memory/api/routers/management_router.py +80 -0
  19. basic_memory/api/routers/memory_router.py +9 -64
  20. basic_memory/api/routers/project_router.py +406 -0
  21. basic_memory/api/routers/prompt_router.py +260 -0
  22. basic_memory/api/routers/resource_router.py +119 -4
  23. basic_memory/api/routers/search_router.py +5 -5
  24. basic_memory/api/routers/utils.py +130 -0
  25. basic_memory/api/template_loader.py +292 -0
  26. basic_memory/cli/app.py +43 -9
  27. basic_memory/cli/auth.py +277 -0
  28. basic_memory/cli/commands/__init__.py +13 -2
  29. basic_memory/cli/commands/cloud/__init__.py +6 -0
  30. basic_memory/cli/commands/cloud/api_client.py +112 -0
  31. basic_memory/cli/commands/cloud/bisync_commands.py +110 -0
  32. basic_memory/cli/commands/cloud/cloud_utils.py +101 -0
  33. basic_memory/cli/commands/cloud/core_commands.py +195 -0
  34. basic_memory/cli/commands/cloud/rclone_commands.py +301 -0
  35. basic_memory/cli/commands/cloud/rclone_config.py +110 -0
  36. basic_memory/cli/commands/cloud/rclone_installer.py +249 -0
  37. basic_memory/cli/commands/cloud/upload.py +233 -0
  38. basic_memory/cli/commands/cloud/upload_command.py +124 -0
  39. basic_memory/cli/commands/command_utils.py +51 -0
  40. basic_memory/cli/commands/db.py +28 -12
  41. basic_memory/cli/commands/import_chatgpt.py +40 -220
  42. basic_memory/cli/commands/import_claude_conversations.py +41 -168
  43. basic_memory/cli/commands/import_claude_projects.py +46 -157
  44. basic_memory/cli/commands/import_memory_json.py +48 -108
  45. basic_memory/cli/commands/mcp.py +84 -10
  46. basic_memory/cli/commands/project.py +876 -0
  47. basic_memory/cli/commands/status.py +50 -33
  48. basic_memory/cli/commands/tool.py +341 -0
  49. basic_memory/cli/main.py +8 -7
  50. basic_memory/config.py +477 -23
  51. basic_memory/db.py +168 -17
  52. basic_memory/deps.py +251 -25
  53. basic_memory/file_utils.py +113 -58
  54. basic_memory/ignore_utils.py +297 -0
  55. basic_memory/importers/__init__.py +27 -0
  56. basic_memory/importers/base.py +79 -0
  57. basic_memory/importers/chatgpt_importer.py +232 -0
  58. basic_memory/importers/claude_conversations_importer.py +177 -0
  59. basic_memory/importers/claude_projects_importer.py +148 -0
  60. basic_memory/importers/memory_json_importer.py +108 -0
  61. basic_memory/importers/utils.py +58 -0
  62. basic_memory/markdown/entity_parser.py +143 -23
  63. basic_memory/markdown/markdown_processor.py +3 -3
  64. basic_memory/markdown/plugins.py +39 -21
  65. basic_memory/markdown/schemas.py +1 -1
  66. basic_memory/markdown/utils.py +28 -13
  67. basic_memory/mcp/async_client.py +134 -4
  68. basic_memory/mcp/project_context.py +141 -0
  69. basic_memory/mcp/prompts/__init__.py +19 -0
  70. basic_memory/mcp/prompts/ai_assistant_guide.py +70 -0
  71. basic_memory/mcp/prompts/continue_conversation.py +62 -0
  72. basic_memory/mcp/prompts/recent_activity.py +188 -0
  73. basic_memory/mcp/prompts/search.py +57 -0
  74. basic_memory/mcp/prompts/utils.py +162 -0
  75. basic_memory/mcp/resources/ai_assistant_guide.md +283 -0
  76. basic_memory/mcp/resources/project_info.py +71 -0
  77. basic_memory/mcp/server.py +7 -13
  78. basic_memory/mcp/tools/__init__.py +33 -21
  79. basic_memory/mcp/tools/build_context.py +120 -0
  80. basic_memory/mcp/tools/canvas.py +130 -0
  81. basic_memory/mcp/tools/chatgpt_tools.py +187 -0
  82. basic_memory/mcp/tools/delete_note.py +225 -0
  83. basic_memory/mcp/tools/edit_note.py +320 -0
  84. basic_memory/mcp/tools/list_directory.py +167 -0
  85. basic_memory/mcp/tools/move_note.py +545 -0
  86. basic_memory/mcp/tools/project_management.py +200 -0
  87. basic_memory/mcp/tools/read_content.py +271 -0
  88. basic_memory/mcp/tools/read_note.py +255 -0
  89. basic_memory/mcp/tools/recent_activity.py +534 -0
  90. basic_memory/mcp/tools/search.py +369 -23
  91. basic_memory/mcp/tools/utils.py +374 -16
  92. basic_memory/mcp/tools/view_note.py +77 -0
  93. basic_memory/mcp/tools/write_note.py +207 -0
  94. basic_memory/models/__init__.py +3 -2
  95. basic_memory/models/knowledge.py +67 -15
  96. basic_memory/models/project.py +87 -0
  97. basic_memory/models/search.py +10 -6
  98. basic_memory/repository/__init__.py +2 -0
  99. basic_memory/repository/entity_repository.py +229 -7
  100. basic_memory/repository/observation_repository.py +35 -3
  101. basic_memory/repository/project_info_repository.py +10 -0
  102. basic_memory/repository/project_repository.py +103 -0
  103. basic_memory/repository/relation_repository.py +21 -2
  104. basic_memory/repository/repository.py +147 -29
  105. basic_memory/repository/search_repository.py +411 -62
  106. basic_memory/schemas/__init__.py +22 -9
  107. basic_memory/schemas/base.py +97 -8
  108. basic_memory/schemas/cloud.py +50 -0
  109. basic_memory/schemas/directory.py +30 -0
  110. basic_memory/schemas/importer.py +35 -0
  111. basic_memory/schemas/memory.py +187 -25
  112. basic_memory/schemas/project_info.py +211 -0
  113. basic_memory/schemas/prompt.py +90 -0
  114. basic_memory/schemas/request.py +56 -2
  115. basic_memory/schemas/response.py +1 -1
  116. basic_memory/schemas/search.py +31 -35
  117. basic_memory/schemas/sync_report.py +72 -0
  118. basic_memory/services/__init__.py +2 -1
  119. basic_memory/services/context_service.py +241 -104
  120. basic_memory/services/directory_service.py +295 -0
  121. basic_memory/services/entity_service.py +590 -60
  122. basic_memory/services/exceptions.py +21 -0
  123. basic_memory/services/file_service.py +284 -30
  124. basic_memory/services/initialization.py +191 -0
  125. basic_memory/services/link_resolver.py +49 -56
  126. basic_memory/services/project_service.py +863 -0
  127. basic_memory/services/search_service.py +168 -32
  128. basic_memory/sync/__init__.py +3 -2
  129. basic_memory/sync/background_sync.py +26 -0
  130. basic_memory/sync/sync_service.py +1180 -109
  131. basic_memory/sync/watch_service.py +412 -135
  132. basic_memory/templates/prompts/continue_conversation.hbs +110 -0
  133. basic_memory/templates/prompts/search.hbs +101 -0
  134. basic_memory/utils.py +383 -51
  135. basic_memory-0.16.1.dist-info/METADATA +493 -0
  136. basic_memory-0.16.1.dist-info/RECORD +148 -0
  137. {basic_memory-0.7.0.dist-info → basic_memory-0.16.1.dist-info}/entry_points.txt +1 -0
  138. basic_memory/alembic/README +0 -1
  139. basic_memory/cli/commands/sync.py +0 -206
  140. basic_memory/cli/commands/tools.py +0 -157
  141. basic_memory/mcp/tools/knowledge.py +0 -68
  142. basic_memory/mcp/tools/memory.py +0 -170
  143. basic_memory/mcp/tools/notes.py +0 -202
  144. basic_memory/schemas/discovery.py +0 -28
  145. basic_memory/sync/file_change_scanner.py +0 -158
  146. basic_memory/sync/utils.py +0 -31
  147. basic_memory-0.7.0.dist-info/METADATA +0 -378
  148. basic_memory-0.7.0.dist-info/RECORD +0 -82
  149. {basic_memory-0.7.0.dist-info → basic_memory-0.16.1.dist-info}/WHEEL +0 -0
  150. {basic_memory-0.7.0.dist-info → basic_memory-0.16.1.dist-info}/licenses/LICENSE +0 -0
basic_memory/utils.py CHANGED
@@ -1,29 +1,85 @@
1
1
  """Utility functions for basic-memory."""
2
2
 
3
3
  import os
4
+
5
+ import logging
4
6
  import re
5
7
  import sys
8
+ from datetime import datetime
6
9
  from pathlib import Path
7
- from typing import Optional, Union
10
+ from typing import Optional, Protocol, Union, runtime_checkable, List
8
11
 
9
12
  from loguru import logger
10
13
  from unidecode import unidecode
11
14
 
12
- import basic_memory
13
- from basic_memory.config import config
14
15
 
15
- import logfire
16
+ def normalize_project_path(path: str) -> str:
17
+ """Normalize project path by stripping mount point prefix.
18
+
19
+ In cloud deployments, the S3 bucket is mounted at /app/data. We strip this
20
+ prefix from project paths to avoid leaking implementation details and to
21
+ ensure paths match the actual S3 bucket structure.
22
+
23
+ For local paths (including Windows paths), returns the path unchanged.
24
+
25
+ Args:
26
+ path: Project path (e.g., "/app/data/basic-memory-llc" or "C:\\Users\\...")
27
+
28
+ Returns:
29
+ Normalized path (e.g., "/basic-memory-llc" or "C:\\Users\\...")
30
+
31
+ Examples:
32
+ >>> normalize_project_path("/app/data/my-project")
33
+ '/my-project'
34
+ >>> normalize_project_path("/my-project")
35
+ '/my-project'
36
+ >>> normalize_project_path("app/data/my-project")
37
+ '/my-project'
38
+ >>> normalize_project_path("C:\\\\Users\\\\project")
39
+ 'C:\\\\Users\\\\project'
40
+ """
41
+ # Check if this is a Windows absolute path (e.g., C:\Users\...)
42
+ # Windows paths have a drive letter followed by a colon
43
+ if len(path) >= 2 and path[1] == ":":
44
+ # Windows absolute path - return unchanged
45
+ return path
46
+
47
+ # Handle both absolute and relative Unix paths
48
+ normalized = path.lstrip("/")
49
+ if normalized.startswith("app/data/"):
50
+ normalized = normalized.removeprefix("app/data/")
51
+
52
+ # Ensure leading slash for Unix absolute paths
53
+ if not normalized.startswith("/"):
54
+ normalized = "/" + normalized
55
+
56
+ return normalized
57
+
16
58
 
59
+ @runtime_checkable
60
+ class PathLike(Protocol):
61
+ """Protocol for objects that can be used as paths."""
17
62
 
18
- def generate_permalink(file_path: Union[Path, str]) -> str:
63
+ def __str__(self) -> str: ...
64
+
65
+
66
+ # In type annotations, use Union[Path, str] instead of FilePath for now
67
+ # This preserves compatibility with existing code while we migrate
68
+ FilePath = Union[Path, str]
69
+
70
+ # Disable the "Queue is full" warning
71
+ logging.getLogger("opentelemetry.sdk.metrics._internal.instrument").setLevel(logging.ERROR)
72
+
73
+
74
+ def generate_permalink(file_path: Union[Path, str, PathLike], split_extension: bool = True) -> str:
19
75
  """Generate a stable permalink from a file path.
20
76
 
21
77
  Args:
22
- file_path: Original file path
78
+ file_path: Original file path (str, Path, or PathLike)
23
79
 
24
80
  Returns:
25
81
  Normalized permalink that matches validation rules. Converts spaces and underscores
26
- to hyphens for consistency.
82
+ to hyphens for consistency. Preserves non-ASCII characters like Chinese.
27
83
 
28
84
  Examples:
29
85
  >>> generate_permalink("docs/My Feature.md")
@@ -32,27 +88,88 @@ def generate_permalink(file_path: Union[Path, str]) -> str:
32
88
  'specs/api-v2'
33
89
  >>> generate_permalink("design/unified_model_refactor.md")
34
90
  'design/unified-model-refactor'
91
+ >>> generate_permalink("中文/测试文档.md")
92
+ '中文/测试文档'
35
93
  """
36
94
  # Convert Path to string if needed
37
- path_str = str(file_path)
95
+ path_str = Path(str(file_path)).as_posix()
96
+
97
+ # Remove extension (for now, possibly)
98
+ (base, extension) = os.path.splitext(path_str)
99
+
100
+ # Check if we have CJK characters that should be preserved
101
+ # CJK ranges: \u4e00-\u9fff (CJK Unified Ideographs), \u3000-\u303f (CJK symbols),
102
+ # \u3400-\u4dbf (CJK Extension A), \uff00-\uffef (Fullwidth forms)
103
+ has_cjk_chars = any(
104
+ "\u4e00" <= char <= "\u9fff"
105
+ or "\u3000" <= char <= "\u303f"
106
+ or "\u3400" <= char <= "\u4dbf"
107
+ or "\uff00" <= char <= "\uffef"
108
+ for char in base
109
+ )
110
+
111
+ if has_cjk_chars:
112
+ # For text with CJK characters, selectively transliterate only Latin accented chars
113
+ result = ""
114
+ for char in base:
115
+ if (
116
+ "\u4e00" <= char <= "\u9fff"
117
+ or "\u3000" <= char <= "\u303f"
118
+ or "\u3400" <= char <= "\u4dbf"
119
+ ):
120
+ # Preserve CJK ideographs and symbols
121
+ result += char
122
+ elif "\uff00" <= char <= "\uffef":
123
+ # Remove Chinese fullwidth punctuation entirely (like ,!?)
124
+ continue
125
+ else:
126
+ # Transliterate Latin accented characters to ASCII
127
+ result += unidecode(char)
128
+
129
+ # Insert hyphens between CJK and Latin character transitions
130
+ # Match: CJK followed by Latin letter/digit, or Latin letter/digit followed by CJK
131
+ result = re.sub(
132
+ r"([\u4e00-\u9fff\u3000-\u303f\u3400-\u4dbf])([a-zA-Z0-9])", r"\1-\2", result
133
+ )
134
+ result = re.sub(
135
+ r"([a-zA-Z0-9])([\u4e00-\u9fff\u3000-\u303f\u3400-\u4dbf])", r"\1-\2", result
136
+ )
137
+
138
+ # Insert dash between camelCase
139
+ result = re.sub(r"([a-z0-9])([A-Z])", r"\1-\2", result)
140
+
141
+ # Convert ASCII letters to lowercase, preserve CJK
142
+ lower_text = "".join(c.lower() if c.isascii() and c.isalpha() else c for c in result)
38
143
 
39
- # Remove extension
40
- base = os.path.splitext(path_str)[0]
144
+ # Replace underscores with hyphens
145
+ text_with_hyphens = lower_text.replace("_", "-")
41
146
 
42
- # Transliterate unicode to ascii
43
- ascii_text = unidecode(base)
147
+ # Remove apostrophes entirely (don't replace with hyphens)
148
+ text_no_apostrophes = text_with_hyphens.replace("'", "")
44
149
 
45
- # Insert dash between camelCase
46
- ascii_text = re.sub(r"([a-z0-9])([A-Z])", r"\1-\2", ascii_text)
150
+ # Replace unsafe chars with hyphens, but preserve CJK characters
151
+ clean_text = re.sub(
152
+ r"[^a-z0-9\u4e00-\u9fff\u3000-\u303f\u3400-\u4dbf/\-]", "-", text_no_apostrophes
153
+ )
154
+ else:
155
+ # Original ASCII-only processing for backward compatibility
156
+ # Transliterate unicode to ascii
157
+ ascii_text = unidecode(base)
158
+
159
+ # Insert dash between camelCase
160
+ ascii_text = re.sub(r"([a-z0-9])([A-Z])", r"\1-\2", ascii_text)
47
161
 
48
- # Convert to lowercase
49
- lower_text = ascii_text.lower()
162
+ # Convert to lowercase
163
+ lower_text = ascii_text.lower()
50
164
 
51
- # replace underscores with hyphens
52
- text_with_hyphens = lower_text.replace("_", "-")
165
+ # replace underscores with hyphens
166
+ text_with_hyphens = lower_text.replace("_", "-")
53
167
 
54
- # Replace remaining invalid chars with hyphens
55
- clean_text = re.sub(r"[^a-z0-9/\-]", "-", text_with_hyphens)
168
+ # Remove apostrophes entirely (don't replace with hyphens)
169
+ text_no_apostrophes = text_with_hyphens.replace("'", "")
170
+
171
+ # Replace remaining invalid chars with hyphens
172
+ clean_text = re.sub(r"[^a-z0-9/\-]", "-", text_no_apostrophes)
56
173
 
57
174
  # Collapse multiple hyphens
58
175
  clean_text = re.sub(r"-+", "-", clean_text)
@@ -61,47 +178,43 @@ def generate_permalink(file_path: Union[Path, str]) -> str:
61
178
  segments = clean_text.split("/")
62
179
  clean_segments = [s.strip("-") for s in segments]
63
180
 
64
- return "/".join(clean_segments)
181
+ return_val = "/".join(clean_segments)
182
+
183
+ # Append file extension back, if necessary
184
+ if not split_extension and extension:
185
+ return_val += extension
186
+
187
+ return return_val
65
188
 
66
189
 
67
190
  def setup_logging(
68
- home_dir: Path = config.home, log_file: Optional[str] = None, console: bool = True
191
+ env: str,
192
+ home_dir: Path,
193
+ log_file: Optional[str] = None,
194
+ log_level: str = "INFO",
195
+ console: bool = True,
69
196
  ) -> None: # pragma: no cover
70
197
  """
71
198
  Configure logging for the application.
72
- :param home_dir: the root directory for the application
73
- :param log_file: the name of the log file to write to
74
- :param app: the fastapi application instance
75
- :param console: whether to log to the console
76
- """
77
199
 
200
+ Args:
201
+ env: The environment name (dev, test, prod)
202
+ home_dir: The root directory for the application
203
+ log_file: The name of the log file to write to
204
+ log_level: The logging level to use
205
+ console: Whether to log to the console
206
+ """
78
207
  # Remove default handler and any existing handlers
79
208
  logger.remove()
80
209
 
81
- # Add file handler if we are not running tests
82
- if log_file and config.env != "test":
83
- # enable pydantic logfire
84
- logfire.configure(
85
- code_source=logfire.CodeSource(
86
- repository="https://github.com/basicmachines-co/basic-memory",
87
- revision=basic_memory.__version__,
88
- root_path="/src/basic_memory",
89
- ),
90
- environment=config.env,
91
- console=False,
92
- )
93
- logger.configure(handlers=[logfire.loguru_handler()])
94
-
95
- # instrument code spans
96
- logfire.instrument_sqlite3()
97
- logfire.instrument_httpx()
98
-
99
- # setup logger
210
+ # Add file handler if we are not running tests and a log file is specified
211
+ if log_file and env != "test":
212
+ # Setup file logger
100
213
  log_path = home_dir / log_file
101
214
  logger.add(
102
215
  str(log_path),
103
- level=config.log_level,
104
- rotation="100 MB",
216
+ level=log_level,
217
+ rotation="10 MB",
105
218
  retention="10 days",
106
219
  backtrace=True,
107
220
  diagnose=True,
@@ -109,7 +222,226 @@ def setup_logging(
109
222
  colorize=False,
110
223
  )
111
224
 
112
- # Add stderr handler
113
- logger.add(sys.stderr, level=config.log_level, backtrace=True, diagnose=True, colorize=True)
225
+ # Add console logger if requested or in test mode
226
+ if env == "test" or console:
227
+ logger.add(sys.stderr, level=log_level, backtrace=True, diagnose=True, colorize=True)
228
+
229
+ logger.info(f"ENV: '{env}' Log level: '{log_level}' Logging to {log_file}")
230
+
231
+ # Bind environment context for structured logging (works in both local and cloud)
232
+ tenant_id = os.getenv("BASIC_MEMORY_TENANT_ID", "local")
233
+ fly_app_name = os.getenv("FLY_APP_NAME", "local")
234
+ fly_machine_id = os.getenv("FLY_MACHINE_ID", "local")
235
+ fly_region = os.getenv("FLY_REGION", "local")
236
+
237
+ logger.configure(
238
+ extra={
239
+ "tenant_id": tenant_id,
240
+ "fly_app_name": fly_app_name,
241
+ "fly_machine_id": fly_machine_id,
242
+ "fly_region": fly_region,
243
+ }
244
+ )
245
+
246
+ # Reduce noise from third-party libraries
247
+ noisy_loggers = {
248
+ # HTTP client logs
249
+ "httpx": logging.WARNING,
250
+ # File watching logs
251
+ "watchfiles.main": logging.WARNING,
252
+ }
253
+
254
+ # Set log levels for noisy loggers
255
+ for logger_name, level in noisy_loggers.items():
256
+ logging.getLogger(logger_name).setLevel(level)
257
+
258
+
259
+ def parse_tags(tags: Union[List[str], str, None]) -> List[str]:
260
+ """Parse tags from various input formats into a consistent list.
261
+
262
+ Args:
263
+ tags: Can be a list of strings, a comma-separated string, or None
264
+
265
+ Returns:
266
+ A list of tag strings, or an empty list if no tags
267
+
268
+ Note:
269
+ This function strips leading '#' characters from tags to prevent
270
+ their accumulation when tags are processed multiple times.
271
+ """
272
+ if tags is None:
273
+ return []
274
+
275
+ # Process list of tags
276
+ if isinstance(tags, list):
277
+ # First strip whitespace, then strip leading '#' characters to prevent accumulation
278
+ return [tag.strip().lstrip("#") for tag in tags if tag and tag.strip()]
279
+
280
+ # Process string input
281
+ if isinstance(tags, str):
282
+ # Check if it's a JSON array string (common issue from AI assistants)
283
+ import json
284
+
285
+ if tags.strip().startswith("[") and tags.strip().endswith("]"):
286
+ try:
287
+ # Try to parse as JSON array
288
+ parsed_json = json.loads(tags)
289
+ if isinstance(parsed_json, list):
290
+ # Recursively parse the JSON array as a list
291
+ return parse_tags(parsed_json)
292
+ except json.JSONDecodeError:
293
+ # Not valid JSON, fall through to comma-separated parsing
294
+ pass
114
295
 
115
- logger.info(f"ENV: '{config.env}' Log level: '{config.log_level}' Logging to {log_file}")
296
+ # Split by comma, strip whitespace, then strip leading '#' characters
297
+ return [tag.strip().lstrip("#") for tag in tags.split(",") if tag and tag.strip()]
298
+
299
+ # For any other type, try to convert to string and parse
300
+ try: # pragma: no cover
301
+ return parse_tags(str(tags))
302
+ except (ValueError, TypeError): # pragma: no cover
303
+ logger.warning(f"Couldn't parse tags from input of type {type(tags)}: {tags}")
304
+ return []
305
+
306
+
307
+ def normalize_newlines(multiline: str) -> str:
308
+ """Replace any \r\n, \r, or \n with the native newline.
309
+
310
+ Args:
311
+ multiline: String containing any mixture of newlines.
312
+
313
+ Returns:
314
+ A string with normalized newlines native to the platform.
315
+ """
316
+ return re.sub(r"\r\n?|\n", os.linesep, multiline)
317
+
318
+
319
+ def normalize_file_path_for_comparison(file_path: str) -> str:
320
+ """Normalize a file path for conflict detection.
321
+
322
+ This function normalizes file paths to help detect potential conflicts:
323
+ - Converts to lowercase for case-insensitive comparison
324
+ - Normalizes Unicode characters
325
+ - Handles path separators consistently
326
+
327
+ Args:
328
+ file_path: The file path to normalize
329
+
330
+ Returns:
331
+ Normalized file path for comparison purposes
332
+ """
333
+ import unicodedata
334
+
335
+ # Convert to lowercase for case-insensitive comparison
336
+ normalized = file_path.lower()
337
+
338
+ # Normalize Unicode characters (NFD normalization)
339
+ normalized = unicodedata.normalize("NFD", normalized)
340
+
341
+ # Replace path separators with forward slashes
342
+ normalized = normalized.replace("\\", "/")
343
+
344
+ # Remove multiple slashes
345
+ normalized = re.sub(r"/+", "/", normalized)
346
+
347
+ return normalized
348
+
349
+
350
+ def detect_potential_file_conflicts(file_path: str, existing_paths: List[str]) -> List[str]:
351
+ """Detect potential conflicts between a file path and existing paths.
352
+
353
+ This function checks for various types of conflicts:
354
+ - Case sensitivity differences
355
+ - Unicode normalization differences
356
+ - Path separator differences
357
+ - Permalink generation conflicts
358
+
359
+ Args:
360
+ file_path: The file path to check
361
+ existing_paths: List of existing file paths to check against
362
+
363
+ Returns:
364
+ List of existing paths that might conflict with the given file path
365
+ """
366
+ conflicts = []
367
+
368
+ # Normalize the input file path
369
+ normalized_input = normalize_file_path_for_comparison(file_path)
370
+ input_permalink = generate_permalink(file_path)
371
+
372
+ for existing_path in existing_paths:
373
+ # Skip identical paths
374
+ if existing_path == file_path:
375
+ continue
376
+
377
+ # Check for case-insensitive path conflicts
378
+ normalized_existing = normalize_file_path_for_comparison(existing_path)
379
+ if normalized_input == normalized_existing:
380
+ conflicts.append(existing_path)
381
+ continue
382
+
383
+ # Check for permalink conflicts
384
+ existing_permalink = generate_permalink(existing_path)
385
+ if input_permalink == existing_permalink:
386
+ conflicts.append(existing_path)
387
+ continue
388
+
389
+ return conflicts
390
+
391
+
392
+ def valid_project_path_value(path: str):
393
+ """Ensure project path is valid."""
394
+ # Allow empty strings as they resolve to the project root
395
+ if not path:
396
+ return True
397
+
398
+ # Check for obvious path traversal patterns first
399
+ if ".." in path or "~" in path:
400
+ return False
401
+
402
+ # Check for Windows-style path traversal (even on Unix systems)
403
+ if "\\.." in path or path.startswith("\\"):
404
+ return False
405
+
406
+ # Block absolute paths (Unix-style starting with / or Windows-style with drive letters)
407
+ if path.startswith("/") or (len(path) >= 2 and path[1] == ":"):
408
+ return False
409
+
410
+ # Block paths with control characters (but allow whitespace that will be stripped)
411
+ if path.strip() and any(ord(c) < 32 and c not in [" ", "\t"] for c in path):
412
+ return False
413
+
414
+ return True
415
+
416
+
417
+ def validate_project_path(path: str, project_path: Path) -> bool:
418
+ """Ensure path is valid and stays within project boundaries."""
419
+
420
+ if not valid_project_path_value(path):
421
+ return False
422
+
423
+ try:
424
+ resolved = (project_path / path).resolve()
425
+ return resolved.is_relative_to(project_path.resolve())
426
+ except (ValueError, OSError):
427
+ return False
428
+
429
+
430
+ def ensure_timezone_aware(dt: datetime) -> datetime:
431
+ """Ensure a datetime is timezone-aware using system timezone.
432
+
433
+ If the datetime is naive, convert it to timezone-aware using the system's local timezone.
434
+ If it's already timezone-aware, return it unchanged.
435
+
436
+ Args:
437
+ dt: The datetime to ensure is timezone-aware
438
+
439
+ Returns:
440
+ A timezone-aware datetime
441
+ """
442
+ if dt.tzinfo is None:
443
+ # Naive datetime - assume it's in local time and add timezone
444
+ return dt.astimezone()
445
+ else:
446
+ # Already timezone-aware
447
+ return dt