basic-memory 0.14.3__py3-none-any.whl → 0.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of basic-memory might be problematic. Click here for more details.
- basic_memory/__init__.py +1 -1
- basic_memory/alembic/versions/a1b2c3d4e5f6_fix_project_foreign_keys.py +49 -0
- basic_memory/api/app.py +10 -4
- basic_memory/api/routers/knowledge_router.py +25 -8
- basic_memory/api/routers/project_router.py +99 -4
- basic_memory/api/routers/resource_router.py +3 -3
- basic_memory/cli/app.py +9 -28
- basic_memory/cli/auth.py +277 -0
- basic_memory/cli/commands/cloud/__init__.py +5 -0
- basic_memory/cli/commands/cloud/api_client.py +112 -0
- basic_memory/cli/commands/cloud/bisync_commands.py +818 -0
- basic_memory/cli/commands/cloud/core_commands.py +288 -0
- basic_memory/cli/commands/cloud/mount_commands.py +295 -0
- basic_memory/cli/commands/cloud/rclone_config.py +288 -0
- basic_memory/cli/commands/cloud/rclone_installer.py +198 -0
- basic_memory/cli/commands/command_utils.py +60 -0
- basic_memory/cli/commands/import_memory_json.py +0 -4
- basic_memory/cli/commands/mcp.py +16 -4
- basic_memory/cli/commands/project.py +141 -145
- basic_memory/cli/commands/status.py +34 -22
- basic_memory/cli/commands/sync.py +45 -228
- basic_memory/cli/commands/tool.py +87 -16
- basic_memory/cli/main.py +1 -0
- basic_memory/config.py +96 -20
- basic_memory/db.py +104 -3
- basic_memory/deps.py +20 -3
- basic_memory/file_utils.py +89 -0
- basic_memory/ignore_utils.py +295 -0
- basic_memory/importers/chatgpt_importer.py +1 -1
- basic_memory/importers/utils.py +2 -2
- basic_memory/markdown/entity_parser.py +2 -2
- basic_memory/markdown/markdown_processor.py +2 -2
- basic_memory/markdown/plugins.py +39 -21
- basic_memory/markdown/utils.py +1 -1
- basic_memory/mcp/async_client.py +22 -10
- basic_memory/mcp/project_context.py +141 -0
- basic_memory/mcp/prompts/ai_assistant_guide.py +49 -4
- basic_memory/mcp/prompts/continue_conversation.py +1 -1
- basic_memory/mcp/prompts/recent_activity.py +116 -32
- basic_memory/mcp/prompts/search.py +1 -1
- basic_memory/mcp/prompts/utils.py +11 -4
- basic_memory/mcp/resources/ai_assistant_guide.md +179 -41
- basic_memory/mcp/resources/project_info.py +20 -6
- basic_memory/mcp/server.py +0 -37
- basic_memory/mcp/tools/__init__.py +5 -6
- basic_memory/mcp/tools/build_context.py +39 -19
- basic_memory/mcp/tools/canvas.py +19 -8
- basic_memory/mcp/tools/chatgpt_tools.py +178 -0
- basic_memory/mcp/tools/delete_note.py +67 -34
- basic_memory/mcp/tools/edit_note.py +55 -39
- basic_memory/mcp/tools/headers.py +44 -0
- basic_memory/mcp/tools/list_directory.py +18 -8
- basic_memory/mcp/tools/move_note.py +119 -41
- basic_memory/mcp/tools/project_management.py +77 -229
- basic_memory/mcp/tools/read_content.py +28 -12
- basic_memory/mcp/tools/read_note.py +97 -57
- basic_memory/mcp/tools/recent_activity.py +441 -42
- basic_memory/mcp/tools/search.py +82 -70
- basic_memory/mcp/tools/sync_status.py +5 -4
- basic_memory/mcp/tools/utils.py +19 -0
- basic_memory/mcp/tools/view_note.py +31 -6
- basic_memory/mcp/tools/write_note.py +65 -14
- basic_memory/models/knowledge.py +19 -2
- basic_memory/models/project.py +6 -2
- basic_memory/repository/entity_repository.py +31 -84
- basic_memory/repository/project_repository.py +1 -1
- basic_memory/repository/relation_repository.py +13 -0
- basic_memory/repository/repository.py +2 -2
- basic_memory/repository/search_repository.py +9 -3
- basic_memory/schemas/__init__.py +6 -0
- basic_memory/schemas/base.py +70 -12
- basic_memory/schemas/cloud.py +46 -0
- basic_memory/schemas/memory.py +99 -18
- basic_memory/schemas/project_info.py +9 -10
- basic_memory/schemas/sync_report.py +48 -0
- basic_memory/services/context_service.py +35 -11
- basic_memory/services/directory_service.py +7 -0
- basic_memory/services/entity_service.py +82 -52
- basic_memory/services/initialization.py +30 -11
- basic_memory/services/project_service.py +23 -33
- basic_memory/sync/sync_service.py +148 -24
- basic_memory/sync/watch_service.py +128 -44
- basic_memory/utils.py +181 -109
- {basic_memory-0.14.3.dist-info → basic_memory-0.15.0.dist-info}/METADATA +26 -96
- basic_memory-0.15.0.dist-info/RECORD +147 -0
- basic_memory/mcp/project_session.py +0 -120
- basic_memory-0.14.3.dist-info/RECORD +0 -132
- {basic_memory-0.14.3.dist-info → basic_memory-0.15.0.dist-info}/WHEEL +0 -0
- {basic_memory-0.14.3.dist-info → basic_memory-0.15.0.dist-info}/entry_points.txt +0 -0
- {basic_memory-0.14.3.dist-info → basic_memory-0.15.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -5,9 +5,10 @@ import os
|
|
|
5
5
|
from collections import defaultdict
|
|
6
6
|
from datetime import datetime
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import List, Optional, Set
|
|
8
|
+
from typing import List, Optional, Set, Sequence
|
|
9
9
|
|
|
10
10
|
from basic_memory.config import BasicMemoryConfig, WATCH_STATUS_JSON
|
|
11
|
+
from basic_memory.ignore_utils import load_gitignore_patterns, should_ignore_path
|
|
11
12
|
from basic_memory.models import Project
|
|
12
13
|
from basic_memory.repository import ProjectRepository
|
|
13
14
|
from loguru import logger
|
|
@@ -15,6 +16,7 @@ from pydantic import BaseModel
|
|
|
15
16
|
from rich.console import Console
|
|
16
17
|
from watchfiles import awatch
|
|
17
18
|
from watchfiles.main import FileChange, Change
|
|
19
|
+
import time
|
|
18
20
|
|
|
19
21
|
|
|
20
22
|
class WatchEvent(BaseModel):
|
|
@@ -81,54 +83,110 @@ class WatchService:
|
|
|
81
83
|
self.state = WatchServiceState()
|
|
82
84
|
self.status_path = Path.home() / ".basic-memory" / WATCH_STATUS_JSON
|
|
83
85
|
self.status_path.parent.mkdir(parents=True, exist_ok=True)
|
|
86
|
+
self._ignore_patterns_cache: dict[Path, Set[str]] = {}
|
|
84
87
|
|
|
85
88
|
# quiet mode for mcp so it doesn't mess up stdout
|
|
86
89
|
self.console = Console(quiet=quiet)
|
|
87
90
|
|
|
91
|
+
async def _schedule_restart(self, stop_event: asyncio.Event):
|
|
92
|
+
"""Schedule a restart of the watch service after the configured interval."""
|
|
93
|
+
await asyncio.sleep(self.app_config.watch_project_reload_interval)
|
|
94
|
+
stop_event.set()
|
|
95
|
+
|
|
96
|
+
def _get_ignore_patterns(self, project_path: Path) -> Set[str]:
|
|
97
|
+
"""Get or load ignore patterns for a project path."""
|
|
98
|
+
if project_path not in self._ignore_patterns_cache:
|
|
99
|
+
self._ignore_patterns_cache[project_path] = load_gitignore_patterns(project_path)
|
|
100
|
+
return self._ignore_patterns_cache[project_path]
|
|
101
|
+
|
|
102
|
+
async def _watch_projects_cycle(self, projects: Sequence[Project], stop_event: asyncio.Event):
|
|
103
|
+
"""Run one cycle of watching the given projects until stop_event is set."""
|
|
104
|
+
project_paths = [project.path for project in projects]
|
|
105
|
+
|
|
106
|
+
async for changes in awatch(
|
|
107
|
+
*project_paths,
|
|
108
|
+
debounce=self.app_config.sync_delay,
|
|
109
|
+
watch_filter=self.filter_changes,
|
|
110
|
+
recursive=True,
|
|
111
|
+
stop_event=stop_event,
|
|
112
|
+
):
|
|
113
|
+
# group changes by project and filter using ignore patterns
|
|
114
|
+
project_changes = defaultdict(list)
|
|
115
|
+
for change, path in changes:
|
|
116
|
+
for project in projects:
|
|
117
|
+
if self.is_project_path(project, path):
|
|
118
|
+
# Check if the file should be ignored based on gitignore patterns
|
|
119
|
+
project_path = Path(project.path)
|
|
120
|
+
file_path = Path(path)
|
|
121
|
+
ignore_patterns = self._get_ignore_patterns(project_path)
|
|
122
|
+
|
|
123
|
+
if should_ignore_path(file_path, project_path, ignore_patterns):
|
|
124
|
+
logger.trace(
|
|
125
|
+
f"Ignoring watched file change: {file_path.relative_to(project_path)}"
|
|
126
|
+
)
|
|
127
|
+
continue
|
|
128
|
+
|
|
129
|
+
project_changes[project].append((change, path))
|
|
130
|
+
break
|
|
131
|
+
|
|
132
|
+
# create coroutines to handle changes
|
|
133
|
+
change_handlers = [
|
|
134
|
+
self.handle_changes(project, changes) # pyright: ignore
|
|
135
|
+
for project, changes in project_changes.items()
|
|
136
|
+
]
|
|
137
|
+
|
|
138
|
+
# process changes
|
|
139
|
+
await asyncio.gather(*change_handlers)
|
|
140
|
+
|
|
88
141
|
async def run(self): # pragma: no cover
|
|
89
142
|
"""Watch for file changes and sync them"""
|
|
90
143
|
|
|
91
|
-
|
|
92
|
-
|
|
144
|
+
self.state.running = True
|
|
145
|
+
self.state.start_time = datetime.now()
|
|
146
|
+
await self.write_status()
|
|
93
147
|
|
|
94
148
|
logger.info(
|
|
95
149
|
"Watch service started",
|
|
96
|
-
f"directories={project_paths}",
|
|
97
150
|
f"debounce_ms={self.app_config.sync_delay}",
|
|
98
151
|
f"pid={os.getpid()}",
|
|
99
152
|
)
|
|
100
153
|
|
|
101
|
-
self.state.running = True
|
|
102
|
-
self.state.start_time = datetime.now()
|
|
103
|
-
await self.write_status()
|
|
104
|
-
|
|
105
154
|
try:
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
155
|
+
while self.state.running:
|
|
156
|
+
# Clear ignore patterns cache to pick up any .gitignore changes
|
|
157
|
+
self._ignore_patterns_cache.clear()
|
|
158
|
+
|
|
159
|
+
# Reload projects to catch any new/removed projects
|
|
160
|
+
projects = await self.project_repository.get_active_projects()
|
|
161
|
+
|
|
162
|
+
project_paths = [project.path for project in projects]
|
|
163
|
+
logger.debug(f"Starting watch cycle for directories: {project_paths}")
|
|
164
|
+
|
|
165
|
+
# Create stop event for this watch cycle
|
|
166
|
+
stop_event = asyncio.Event()
|
|
167
|
+
|
|
168
|
+
# Schedule restart after configured interval to reload projects
|
|
169
|
+
timer_task = asyncio.create_task(self._schedule_restart(stop_event))
|
|
170
|
+
|
|
171
|
+
try:
|
|
172
|
+
await self._watch_projects_cycle(projects, stop_event)
|
|
173
|
+
except Exception as e:
|
|
174
|
+
logger.exception("Watch service error during cycle", error=str(e))
|
|
175
|
+
self.state.record_error(str(e))
|
|
176
|
+
await self.write_status()
|
|
177
|
+
# Continue to next cycle instead of exiting
|
|
178
|
+
await asyncio.sleep(5) # Brief pause before retry
|
|
179
|
+
finally:
|
|
180
|
+
# Cancel timer task if it's still running
|
|
181
|
+
if not timer_task.done():
|
|
182
|
+
timer_task.cancel()
|
|
183
|
+
try:
|
|
184
|
+
await timer_task
|
|
185
|
+
except asyncio.CancelledError:
|
|
186
|
+
pass
|
|
128
187
|
|
|
129
188
|
except Exception as e:
|
|
130
189
|
logger.exception("Watch service error", error=str(e))
|
|
131
|
-
|
|
132
190
|
self.state.record_error(str(e))
|
|
133
191
|
await self.write_status()
|
|
134
192
|
raise
|
|
@@ -175,11 +233,8 @@ class WatchService:
|
|
|
175
233
|
|
|
176
234
|
async def handle_changes(self, project: Project, changes: Set[FileChange]) -> None:
|
|
177
235
|
"""Process a batch of file changes"""
|
|
178
|
-
|
|
179
|
-
from
|
|
180
|
-
|
|
181
|
-
# Lazily initialize sync service for project changes
|
|
182
|
-
from basic_memory.cli.commands.sync import get_sync_service
|
|
236
|
+
# avoid circular imports
|
|
237
|
+
from basic_memory.sync.sync_service import get_sync_service
|
|
183
238
|
|
|
184
239
|
sync_service = await get_sync_service(project)
|
|
185
240
|
file_service = sync_service.file_service
|
|
@@ -197,7 +252,7 @@ class WatchService:
|
|
|
197
252
|
|
|
198
253
|
for change, path in changes:
|
|
199
254
|
# convert to relative path
|
|
200
|
-
relative_path =
|
|
255
|
+
relative_path = Path(path).relative_to(directory).as_posix()
|
|
201
256
|
|
|
202
257
|
# Skip .tmp files - they're temporary and shouldn't be synced
|
|
203
258
|
if relative_path.endswith(".tmp"):
|
|
@@ -284,13 +339,42 @@ class WatchService:
|
|
|
284
339
|
# Process deletes
|
|
285
340
|
for path in deletes:
|
|
286
341
|
if path not in processed:
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
342
|
+
# Check if file still exists on disk (vim atomic write edge case)
|
|
343
|
+
full_path = directory / path
|
|
344
|
+
if full_path.exists() and full_path.is_file():
|
|
345
|
+
# File still exists despite DELETE event - treat as modification
|
|
346
|
+
logger.debug(
|
|
347
|
+
"File exists despite DELETE event, treating as modification", path=path
|
|
348
|
+
)
|
|
349
|
+
entity, checksum = await sync_service.sync_file(path, new=False)
|
|
350
|
+
self.state.add_event(
|
|
351
|
+
path=path, action="modified", status="success", checksum=checksum
|
|
352
|
+
)
|
|
353
|
+
self.console.print(f"[yellow]✎[/yellow] {path} (atomic write)")
|
|
354
|
+
logger.info(f"atomic write detected: {path}")
|
|
355
|
+
processed.add(path)
|
|
356
|
+
modify_count += 1
|
|
357
|
+
else:
|
|
358
|
+
# Check if this was a directory - skip if so
|
|
359
|
+
# (we can't tell if the deleted path was a directory since it no longer exists,
|
|
360
|
+
# so we check if there's an entity in the database for it)
|
|
361
|
+
entity = await sync_service.entity_repository.get_by_file_path(path)
|
|
362
|
+
if entity is None:
|
|
363
|
+
# No entity means this was likely a directory - skip it
|
|
364
|
+
logger.debug(
|
|
365
|
+
f"Skipping deleted path with no entity (likely directory), path={path}"
|
|
366
|
+
)
|
|
367
|
+
processed.add(path)
|
|
368
|
+
continue
|
|
369
|
+
|
|
370
|
+
# File truly deleted
|
|
371
|
+
logger.debug("Processing deleted file", path=path)
|
|
372
|
+
await sync_service.handle_delete(path)
|
|
373
|
+
self.state.add_event(path=path, action="deleted", status="success")
|
|
374
|
+
self.console.print(f"[red]✕[/red] {path}")
|
|
375
|
+
logger.info(f"deleted: {path}")
|
|
376
|
+
processed.add(path)
|
|
377
|
+
delete_count += 1
|
|
294
378
|
|
|
295
379
|
# Process adds
|
|
296
380
|
for path in adds:
|
basic_memory/utils.py
CHANGED
|
@@ -4,11 +4,13 @@ import os
|
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
6
|
import re
|
|
7
|
-
import
|
|
7
|
+
import sys
|
|
8
|
+
from datetime import datetime
|
|
8
9
|
from pathlib import Path
|
|
9
|
-
from typing import Optional, Protocol, Union, runtime_checkable, List
|
|
10
|
+
from typing import Optional, Protocol, Union, runtime_checkable, List
|
|
10
11
|
|
|
11
12
|
from loguru import logger
|
|
13
|
+
from unidecode import unidecode
|
|
12
14
|
|
|
13
15
|
|
|
14
16
|
@runtime_checkable
|
|
@@ -26,9 +28,11 @@ FilePath = Union[Path, str]
|
|
|
26
28
|
logging.getLogger("opentelemetry.sdk.metrics._internal.instrument").setLevel(logging.ERROR)
|
|
27
29
|
|
|
28
30
|
|
|
29
|
-
def generate_permalink(file_path: Union[Path, str,
|
|
30
|
-
"""
|
|
31
|
-
|
|
31
|
+
def generate_permalink(file_path: Union[Path, str, PathLike], split_extension: bool = True) -> str:
|
|
32
|
+
"""Generate a stable permalink from a file path.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
file_path: Original file path (str, Path, or PathLike)
|
|
32
36
|
|
|
33
37
|
Returns:
|
|
34
38
|
Normalized permalink that matches validation rules. Converts spaces and underscores
|
|
@@ -37,7 +41,7 @@ def generate_permalink(file_path: Union[Path, str, Any]) -> str:
|
|
|
37
41
|
Examples:
|
|
38
42
|
>>> generate_permalink("docs/My Feature.md")
|
|
39
43
|
'docs/my-feature'
|
|
40
|
-
>>> generate_permalink("specs/
|
|
44
|
+
>>> generate_permalink("specs/API (v2).md")
|
|
41
45
|
'specs/api-v2'
|
|
42
46
|
>>> generate_permalink("design/unified_model_refactor.md")
|
|
43
47
|
'design/unified-model-refactor'
|
|
@@ -45,84 +49,99 @@ def generate_permalink(file_path: Union[Path, str, Any]) -> str:
|
|
|
45
49
|
'中文/测试文档'
|
|
46
50
|
"""
|
|
47
51
|
# Convert Path to string if needed
|
|
48
|
-
path_str = str(file_path)
|
|
49
|
-
|
|
50
|
-
# Remove extension
|
|
51
|
-
base = os.path.splitext(path_str)
|
|
52
|
-
|
|
53
|
-
#
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
"
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
"ñ": "n", # Handle Niño -> nino
|
|
64
|
-
"ö": "o", # Handle Björk -> bjork
|
|
65
|
-
"ä": "a", # Handle Häagen -> haagen
|
|
66
|
-
# Add more mappings as needed
|
|
67
|
-
}
|
|
52
|
+
path_str = Path(str(file_path)).as_posix()
|
|
53
|
+
|
|
54
|
+
# Remove extension (for now, possibly)
|
|
55
|
+
(base, extension) = os.path.splitext(path_str)
|
|
56
|
+
|
|
57
|
+
# Check if we have CJK characters that should be preserved
|
|
58
|
+
# CJK ranges: \u4e00-\u9fff (CJK Unified Ideographs), \u3000-\u303f (CJK symbols),
|
|
59
|
+
# \u3400-\u4dbf (CJK Extension A), \uff00-\uffef (Fullwidth forms)
|
|
60
|
+
has_cjk_chars = any(
|
|
61
|
+
"\u4e00" <= char <= "\u9fff"
|
|
62
|
+
or "\u3000" <= char <= "\u303f"
|
|
63
|
+
or "\u3400" <= char <= "\u4dbf"
|
|
64
|
+
or "\uff00" <= char <= "\uffef"
|
|
65
|
+
for char in base
|
|
66
|
+
)
|
|
68
67
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
# If decomposition produced multiple characters and first one is ASCII
|
|
80
|
-
if len(decomposed) > 1 and ord(decomposed[0]) < 128:
|
|
81
|
-
# Keep only the base character
|
|
82
|
-
result += decomposed[0].lower()
|
|
83
|
-
else:
|
|
84
|
-
# For non-Latin scripts like Chinese, preserve the character
|
|
68
|
+
if has_cjk_chars:
|
|
69
|
+
# For text with CJK characters, selectively transliterate only Latin accented chars
|
|
70
|
+
result = ""
|
|
71
|
+
for char in base:
|
|
72
|
+
if (
|
|
73
|
+
"\u4e00" <= char <= "\u9fff"
|
|
74
|
+
or "\u3000" <= char <= "\u303f"
|
|
75
|
+
or "\u3400" <= char <= "\u4dbf"
|
|
76
|
+
):
|
|
77
|
+
# Preserve CJK ideographs and symbols
|
|
85
78
|
result += char
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
79
|
+
elif "\uff00" <= char <= "\uffef":
|
|
80
|
+
# Remove Chinese fullwidth punctuation entirely (like ,!?)
|
|
81
|
+
continue
|
|
82
|
+
else:
|
|
83
|
+
# Transliterate Latin accented characters to ASCII
|
|
84
|
+
result += unidecode(char)
|
|
89
85
|
|
|
90
|
-
|
|
91
|
-
|
|
86
|
+
# Insert hyphens between CJK and Latin character transitions
|
|
87
|
+
# Match: CJK followed by Latin letter/digit, or Latin letter/digit followed by CJK
|
|
88
|
+
result = re.sub(
|
|
89
|
+
r"([\u4e00-\u9fff\u3000-\u303f\u3400-\u4dbf])([a-zA-Z0-9])", r"\1-\2", result
|
|
90
|
+
)
|
|
91
|
+
result = re.sub(
|
|
92
|
+
r"([a-zA-Z0-9])([\u4e00-\u9fff\u3000-\u303f\u3400-\u4dbf])", r"\1-\2", result
|
|
93
|
+
)
|
|
92
94
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
result = re.sub(r"([a-z0-9])([A-Z])", r"\1-\2", result)
|
|
95
|
+
# Insert dash between camelCase
|
|
96
|
+
result = re.sub(r"([a-z0-9])([A-Z])", r"\1-\2", result)
|
|
96
97
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
result = re.sub(r"([\u4e00-\u9fff])([a-zA-Z])", r"\1-\2", result)
|
|
100
|
-
result = re.sub(r"([a-zA-Z])([\u4e00-\u9fff])", r"\1-\2", result)
|
|
98
|
+
# Convert ASCII letters to lowercase, preserve CJK
|
|
99
|
+
lower_text = "".join(c.lower() if c.isascii() and c.isalpha() else c for c in result)
|
|
101
100
|
|
|
102
|
-
|
|
103
|
-
|
|
101
|
+
# Replace underscores with hyphens
|
|
102
|
+
text_with_hyphens = lower_text.replace("_", "-")
|
|
104
103
|
|
|
105
|
-
|
|
106
|
-
|
|
104
|
+
# Remove apostrophes entirely (don't replace with hyphens)
|
|
105
|
+
text_no_apostrophes = text_with_hyphens.replace("'", "")
|
|
107
106
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
107
|
+
# Replace unsafe chars with hyphens, but preserve CJK characters
|
|
108
|
+
clean_text = re.sub(
|
|
109
|
+
r"[^a-z0-9\u4e00-\u9fff\u3000-\u303f\u3400-\u4dbf/\-]", "-", text_no_apostrophes
|
|
110
|
+
)
|
|
111
|
+
else:
|
|
112
|
+
# Original ASCII-only processing for backward compatibility
|
|
113
|
+
# Transliterate unicode to ascii
|
|
114
|
+
ascii_text = unidecode(base)
|
|
115
|
+
|
|
116
|
+
# Insert dash between camelCase
|
|
117
|
+
ascii_text = re.sub(r"([a-z0-9])([A-Z])", r"\1-\2", ascii_text)
|
|
118
|
+
|
|
119
|
+
# Convert to lowercase
|
|
120
|
+
lower_text = ascii_text.lower()
|
|
121
|
+
|
|
122
|
+
# replace underscores with hyphens
|
|
123
|
+
text_with_hyphens = lower_text.replace("_", "-")
|
|
124
|
+
|
|
125
|
+
# Remove apostrophes entirely (don't replace with hyphens)
|
|
126
|
+
text_no_apostrophes = text_with_hyphens.replace("'", "")
|
|
127
|
+
|
|
128
|
+
# Replace remaining invalid chars with hyphens
|
|
129
|
+
clean_text = re.sub(r"[^a-z0-9/\-]", "-", text_no_apostrophes)
|
|
113
130
|
|
|
114
131
|
# Collapse multiple hyphens
|
|
115
132
|
clean_text = re.sub(r"-+", "-", clean_text)
|
|
116
133
|
|
|
117
|
-
# Remove hyphens between adjacent Chinese characters only
|
|
118
|
-
# This handles cases like "你好-世界" -> "你好世界"
|
|
119
|
-
clean_text = re.sub(r"([\u4e00-\u9fff])-([\u4e00-\u9fff])", r"\1\2", clean_text)
|
|
120
|
-
|
|
121
134
|
# Clean each path segment
|
|
122
135
|
segments = clean_text.split("/")
|
|
123
136
|
clean_segments = [s.strip("-") for s in segments]
|
|
124
137
|
|
|
125
|
-
|
|
138
|
+
return_val = "/".join(clean_segments)
|
|
139
|
+
|
|
140
|
+
# Append file extension back, if necessary
|
|
141
|
+
if not split_extension and extension:
|
|
142
|
+
return_val += extension
|
|
143
|
+
|
|
144
|
+
return return_val
|
|
126
145
|
|
|
127
146
|
|
|
128
147
|
def setup_logging(
|
|
@@ -143,7 +162,7 @@ def setup_logging(
|
|
|
143
162
|
console: Whether to log to the console
|
|
144
163
|
"""
|
|
145
164
|
# Remove default handler and any existing handlers
|
|
146
|
-
|
|
165
|
+
logger.remove()
|
|
147
166
|
|
|
148
167
|
# Add file handler if we are not running tests and a log file is specified
|
|
149
168
|
if log_file and env != "test":
|
|
@@ -161,8 +180,8 @@ def setup_logging(
|
|
|
161
180
|
)
|
|
162
181
|
|
|
163
182
|
# Add console logger if requested or in test mode
|
|
164
|
-
|
|
165
|
-
|
|
183
|
+
if env == "test" or console:
|
|
184
|
+
logger.add(sys.stderr, level=log_level, backtrace=True, diagnose=True, colorize=True)
|
|
166
185
|
|
|
167
186
|
logger.info(f"ENV: '{env}' Log level: '{log_level}' Logging to {log_file}")
|
|
168
187
|
|
|
@@ -172,8 +191,6 @@ def setup_logging(
|
|
|
172
191
|
"httpx": logging.WARNING,
|
|
173
192
|
# File watching logs
|
|
174
193
|
"watchfiles.main": logging.WARNING,
|
|
175
|
-
# SQLAlchemy deprecation warnings
|
|
176
|
-
"sqlalchemy": logging.WARNING,
|
|
177
194
|
}
|
|
178
195
|
|
|
179
196
|
# Set log levels for noisy loggers
|
|
@@ -181,6 +198,66 @@ def setup_logging(
|
|
|
181
198
|
logging.getLogger(logger_name).setLevel(level)
|
|
182
199
|
|
|
183
200
|
|
|
201
|
+
def parse_tags(tags: Union[List[str], str, None]) -> List[str]:
|
|
202
|
+
"""Parse tags from various input formats into a consistent list.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
tags: Can be a list of strings, a comma-separated string, or None
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
A list of tag strings, or an empty list if no tags
|
|
209
|
+
|
|
210
|
+
Note:
|
|
211
|
+
This function strips leading '#' characters from tags to prevent
|
|
212
|
+
their accumulation when tags are processed multiple times.
|
|
213
|
+
"""
|
|
214
|
+
if tags is None:
|
|
215
|
+
return []
|
|
216
|
+
|
|
217
|
+
# Process list of tags
|
|
218
|
+
if isinstance(tags, list):
|
|
219
|
+
# First strip whitespace, then strip leading '#' characters to prevent accumulation
|
|
220
|
+
return [tag.strip().lstrip("#") for tag in tags if tag and tag.strip()]
|
|
221
|
+
|
|
222
|
+
# Process string input
|
|
223
|
+
if isinstance(tags, str):
|
|
224
|
+
# Check if it's a JSON array string (common issue from AI assistants)
|
|
225
|
+
import json
|
|
226
|
+
|
|
227
|
+
if tags.strip().startswith("[") and tags.strip().endswith("]"):
|
|
228
|
+
try:
|
|
229
|
+
# Try to parse as JSON array
|
|
230
|
+
parsed_json = json.loads(tags)
|
|
231
|
+
if isinstance(parsed_json, list):
|
|
232
|
+
# Recursively parse the JSON array as a list
|
|
233
|
+
return parse_tags(parsed_json)
|
|
234
|
+
except json.JSONDecodeError:
|
|
235
|
+
# Not valid JSON, fall through to comma-separated parsing
|
|
236
|
+
pass
|
|
237
|
+
|
|
238
|
+
# Split by comma, strip whitespace, then strip leading '#' characters
|
|
239
|
+
return [tag.strip().lstrip("#") for tag in tags.split(",") if tag and tag.strip()]
|
|
240
|
+
|
|
241
|
+
# For any other type, try to convert to string and parse
|
|
242
|
+
try: # pragma: no cover
|
|
243
|
+
return parse_tags(str(tags))
|
|
244
|
+
except (ValueError, TypeError): # pragma: no cover
|
|
245
|
+
logger.warning(f"Couldn't parse tags from input of type {type(tags)}: {tags}")
|
|
246
|
+
return []
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def normalize_newlines(multiline: str) -> str:
|
|
250
|
+
"""Replace any \r\n, \r, or \n with the native newline.
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
multiline: String containing any mixture of newlines.
|
|
254
|
+
|
|
255
|
+
Returns:
|
|
256
|
+
A string with normalized newlines native to the platform.
|
|
257
|
+
"""
|
|
258
|
+
return re.sub(r"\r\n?|\n", os.linesep, multiline)
|
|
259
|
+
|
|
260
|
+
|
|
184
261
|
def normalize_file_path_for_comparison(file_path: str) -> str:
|
|
185
262
|
"""Normalize a file path for conflict detection.
|
|
186
263
|
|
|
@@ -254,42 +331,8 @@ def detect_potential_file_conflicts(file_path: str, existing_paths: List[str]) -
|
|
|
254
331
|
return conflicts
|
|
255
332
|
|
|
256
333
|
|
|
257
|
-
def
|
|
258
|
-
"""
|
|
259
|
-
|
|
260
|
-
Args:
|
|
261
|
-
tags: Can be a list of strings, a comma-separated string, or None
|
|
262
|
-
|
|
263
|
-
Returns:
|
|
264
|
-
A list of tag strings, or an empty list if no tags
|
|
265
|
-
|
|
266
|
-
Note:
|
|
267
|
-
This function strips leading '#' characters from tags to prevent
|
|
268
|
-
their accumulation when tags are processed multiple times.
|
|
269
|
-
"""
|
|
270
|
-
if tags is None:
|
|
271
|
-
return []
|
|
272
|
-
|
|
273
|
-
# Process list of tags
|
|
274
|
-
if isinstance(tags, list):
|
|
275
|
-
# First strip whitespace, then strip leading '#' characters to prevent accumulation
|
|
276
|
-
return [tag.strip().lstrip("#") for tag in tags if tag and tag.strip()]
|
|
277
|
-
|
|
278
|
-
# Process comma-separated string of tags
|
|
279
|
-
if isinstance(tags, str):
|
|
280
|
-
# Split by comma, strip whitespace, then strip leading '#' characters
|
|
281
|
-
return [tag.strip().lstrip("#") for tag in tags.split(",") if tag and tag.strip()]
|
|
282
|
-
|
|
283
|
-
# For any other type, try to convert to string and parse
|
|
284
|
-
try: # pragma: no cover
|
|
285
|
-
return parse_tags(str(tags))
|
|
286
|
-
except (ValueError, TypeError): # pragma: no cover
|
|
287
|
-
logger.warning(f"Couldn't parse tags from input of type {type(tags)}: {tags}")
|
|
288
|
-
return []
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
def validate_project_path(path: str, project_path: Path) -> bool:
|
|
292
|
-
"""Ensure path stays within project boundaries."""
|
|
334
|
+
def valid_project_path_value(path: str):
|
|
335
|
+
"""Ensure project path is valid."""
|
|
293
336
|
# Allow empty strings as they resolve to the project root
|
|
294
337
|
if not path:
|
|
295
338
|
return True
|
|
@@ -310,8 +353,37 @@ def validate_project_path(path: str, project_path: Path) -> bool:
|
|
|
310
353
|
if path.strip() and any(ord(c) < 32 and c not in [" ", "\t"] for c in path):
|
|
311
354
|
return False
|
|
312
355
|
|
|
356
|
+
return True
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
def validate_project_path(path: str, project_path: Path) -> bool:
|
|
360
|
+
"""Ensure path is valid and stays within project boundaries."""
|
|
361
|
+
|
|
362
|
+
if not valid_project_path_value(path):
|
|
363
|
+
return False
|
|
364
|
+
|
|
313
365
|
try:
|
|
314
366
|
resolved = (project_path / path).resolve()
|
|
315
367
|
return resolved.is_relative_to(project_path.resolve())
|
|
316
368
|
except (ValueError, OSError):
|
|
317
369
|
return False
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def ensure_timezone_aware(dt: datetime) -> datetime:
|
|
373
|
+
"""Ensure a datetime is timezone-aware using system timezone.
|
|
374
|
+
|
|
375
|
+
If the datetime is naive, convert it to timezone-aware using the system's local timezone.
|
|
376
|
+
If it's already timezone-aware, return it unchanged.
|
|
377
|
+
|
|
378
|
+
Args:
|
|
379
|
+
dt: The datetime to ensure is timezone-aware
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
A timezone-aware datetime
|
|
383
|
+
"""
|
|
384
|
+
if dt.tzinfo is None:
|
|
385
|
+
# Naive datetime - assume it's in local time and add timezone
|
|
386
|
+
return dt.astimezone()
|
|
387
|
+
else:
|
|
388
|
+
# Already timezone-aware
|
|
389
|
+
return dt
|