claude-self-reflect 2.8.4 → 2.8.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile.async-importer +8 -1
- package/Dockerfile.importer +7 -3
- package/Dockerfile.importer-isolated +8 -1
- package/Dockerfile.importer-isolated.alpine +1 -1
- package/Dockerfile.importer.alpine +1 -1
- package/Dockerfile.mcp-server +7 -2
- package/Dockerfile.mcp-server.alpine +1 -1
- package/Dockerfile.mcp-server.bak +20 -0
- package/Dockerfile.safe-watcher +8 -1
- package/Dockerfile.streaming-importer +10 -4
- package/Dockerfile.streaming-importer.alpine +1 -1
- package/Dockerfile.watcher +9 -5
- package/Dockerfile.watcher.alpine +1 -1
- package/Dockerfile.watcher.bak +50 -0
- package/package.json +1 -1
- package/scripts/import-latest.py +124 -0
- package/scripts/import-old-format.py +171 -0
|
@@ -1,4 +1,11 @@
|
|
|
1
|
-
FROM python:3.
|
|
1
|
+
FROM python:3.13-slim
|
|
2
|
+
|
|
3
|
+
# SECURITY: CVE-2025-58050 mitigation - PCRE2 heap buffer overflow
|
|
4
|
+
# TODO: Remove explicit PCRE2 upgrade when base image includes patched version
|
|
5
|
+
RUN apt-get update && \
|
|
6
|
+
(apt-get install -y --only-upgrade libpcre2-8-0 2>/dev/null || \
|
|
7
|
+
echo "Warning: PCRE2 10.46+ not yet available") && \
|
|
8
|
+
apt-get upgrade -y && rm -rf /var/lib/apt/lists/*
|
|
2
9
|
|
|
3
10
|
# Install system dependencies
|
|
4
11
|
RUN apt-get update && apt-get install -y \
|
package/Dockerfile.importer
CHANGED
|
@@ -1,9 +1,13 @@
|
|
|
1
|
-
FROM python:3.
|
|
1
|
+
FROM python:3.13-slim
|
|
2
2
|
|
|
3
3
|
WORKDIR /app
|
|
4
4
|
|
|
5
|
-
#
|
|
6
|
-
|
|
5
|
+
# SECURITY: CVE-2025-58050 mitigation - PCRE2 heap buffer overflow
|
|
6
|
+
# TODO: Remove explicit PCRE2 upgrade when base image includes patched version
|
|
7
|
+
RUN apt-get update && \
|
|
8
|
+
(apt-get install -y --only-upgrade libpcre2-8-0 2>/dev/null || \
|
|
9
|
+
echo "Warning: PCRE2 10.46+ not yet available") && \
|
|
10
|
+
apt-get upgrade -y && rm -rf /var/lib/apt/lists/*
|
|
7
11
|
|
|
8
12
|
# Install dependencies directly (avoids file path issues with global npm installs)
|
|
9
13
|
RUN pip install --no-cache-dir \
|
|
@@ -1,4 +1,11 @@
|
|
|
1
|
-
FROM python:3.
|
|
1
|
+
FROM python:3.13-slim
|
|
2
|
+
|
|
3
|
+
# SECURITY: CVE-2025-58050 mitigation - PCRE2 heap buffer overflow
|
|
4
|
+
# TODO: Remove explicit PCRE2 upgrade when base image includes patched version
|
|
5
|
+
RUN apt-get update && \
|
|
6
|
+
(apt-get install -y --only-upgrade libpcre2-8-0 2>/dev/null || \
|
|
7
|
+
echo "Warning: PCRE2 10.46+ not yet available") && \
|
|
8
|
+
apt-get upgrade -y && rm -rf /var/lib/apt/lists/*
|
|
2
9
|
|
|
3
10
|
# Update system packages for security and install curl
|
|
4
11
|
RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \
|
package/Dockerfile.mcp-server
CHANGED
|
@@ -1,9 +1,14 @@
|
|
|
1
|
-
FROM python:3.
|
|
1
|
+
FROM python:3.13-slim
|
|
2
2
|
|
|
3
3
|
WORKDIR /app
|
|
4
4
|
|
|
5
|
+
# SECURITY: CVE-2025-58050 mitigation - PCRE2 heap buffer overflow
|
|
5
6
|
# Update system packages for security
|
|
6
|
-
|
|
7
|
+
# TODO: Remove explicit PCRE2 upgrade when base image includes patched version
|
|
8
|
+
RUN apt-get update && \
|
|
9
|
+
(apt-get install -y --only-upgrade libpcre2-8-0 2>/dev/null || \
|
|
10
|
+
echo "Warning: PCRE2 10.46+ not yet available") && \
|
|
11
|
+
apt-get upgrade -y && rm -rf /var/lib/apt/lists/*
|
|
7
12
|
|
|
8
13
|
# Copy the MCP server package files
|
|
9
14
|
COPY mcp-server/pyproject.toml ./
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
FROM python:3.13-slim
|
|
2
|
+
|
|
3
|
+
WORKDIR /app
|
|
4
|
+
|
|
5
|
+
# Update system packages for security
|
|
6
|
+
RUN apt-get update && apt-get upgrade -y && rm -rf /var/lib/apt/lists/*
|
|
7
|
+
|
|
8
|
+
# Copy the MCP server package files
|
|
9
|
+
COPY mcp-server/pyproject.toml ./
|
|
10
|
+
COPY mcp-server/src ./src
|
|
11
|
+
|
|
12
|
+
# Install the package in development mode
|
|
13
|
+
RUN pip install --no-cache-dir -e .
|
|
14
|
+
|
|
15
|
+
# Create a non-root user
|
|
16
|
+
RUN useradd -m -u 1000 mcpuser
|
|
17
|
+
USER mcpuser
|
|
18
|
+
|
|
19
|
+
# Keep the container running and wait for docker exec commands
|
|
20
|
+
CMD ["tail", "-f", "/dev/null"]
|
package/Dockerfile.safe-watcher
CHANGED
|
@@ -1,4 +1,11 @@
|
|
|
1
|
-
FROM python:3.
|
|
1
|
+
FROM python:3.13-slim
|
|
2
|
+
|
|
3
|
+
# SECURITY: CVE-2025-58050 mitigation - PCRE2 heap buffer overflow
|
|
4
|
+
# TODO: Remove explicit PCRE2 upgrade when base image includes patched version
|
|
5
|
+
RUN apt-get update && \
|
|
6
|
+
(apt-get install -y --only-upgrade libpcre2-8-0 2>/dev/null || \
|
|
7
|
+
echo "Warning: PCRE2 10.46+ not yet available") && \
|
|
8
|
+
apt-get upgrade -y && rm -rf /var/lib/apt/lists/*
|
|
2
9
|
|
|
3
10
|
# Install system dependencies
|
|
4
11
|
RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \
|
|
@@ -1,7 +1,13 @@
|
|
|
1
|
-
FROM python:3.
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
|
|
1
|
+
FROM python:3.13-slim
|
|
2
|
+
|
|
3
|
+
# SECURITY: CVE-2025-58050 mitigation - PCRE2 heap buffer overflow
|
|
4
|
+
# Attempting explicit upgrade of libpcre2-8-0 (vulnerable: 10.45-1, fixed: 10.46+)
|
|
5
|
+
# TODO: Remove explicit PCRE2 upgrade when base image includes patched version
|
|
6
|
+
RUN apt-get update && \
|
|
7
|
+
(apt-get install -y --only-upgrade libpcre2-8-0 2>/dev/null || \
|
|
8
|
+
echo "Warning: PCRE2 10.46+ not yet available, continuing with security updates") && \
|
|
9
|
+
apt-get upgrade -y && \
|
|
10
|
+
apt-get install -y --no-install-recommends \
|
|
5
11
|
gcc \
|
|
6
12
|
g++ \
|
|
7
13
|
curl \
|
package/Dockerfile.watcher
CHANGED
|
@@ -1,10 +1,14 @@
|
|
|
1
|
-
FROM python:3.
|
|
1
|
+
FROM python:3.13-slim
|
|
2
2
|
|
|
3
|
+
# SECURITY: CVE-2025-58050 mitigation - PCRE2 heap buffer overflow
|
|
3
4
|
# Update system packages for security and install build dependencies for psutil
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
5
|
+
# TODO: Remove explicit PCRE2 upgrade when base image includes patched version
|
|
6
|
+
RUN apt-get update && \
|
|
7
|
+
(apt-get install -y --only-upgrade libpcre2-8-0 2>/dev/null || \
|
|
8
|
+
echo "Warning: PCRE2 10.46+ not yet available") && \
|
|
9
|
+
apt-get upgrade -y && \
|
|
10
|
+
apt-get install -y gcc python3-dev && \
|
|
11
|
+
rm -rf /var/lib/apt/lists/*
|
|
8
12
|
|
|
9
13
|
# Install Python dependencies
|
|
10
14
|
RUN pip install --no-cache-dir \
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
FROM python:3.13-slim
|
|
2
|
+
|
|
3
|
+
# Update system packages for security and install build dependencies for psutil
|
|
4
|
+
RUN apt-get update && apt-get upgrade -y && apt-get install -y \
|
|
5
|
+
gcc \
|
|
6
|
+
python3-dev \
|
|
7
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
8
|
+
|
|
9
|
+
# Install Python dependencies
|
|
10
|
+
RUN pip install --no-cache-dir \
|
|
11
|
+
psutil==5.9.5 \
|
|
12
|
+
qdrant-client>=1.7.0 \
|
|
13
|
+
openai>=1.0.0 \
|
|
14
|
+
backoff>=2.2.0 \
|
|
15
|
+
requests>=2.31.0 \
|
|
16
|
+
tqdm>=4.66.0 \
|
|
17
|
+
voyageai>=0.2.0 \
|
|
18
|
+
fastembed>=0.4.0
|
|
19
|
+
|
|
20
|
+
# Create non-root user
|
|
21
|
+
RUN useradd -m -u 1000 watcher
|
|
22
|
+
|
|
23
|
+
# Pre-download FastEmbed model to avoid runtime downloads
|
|
24
|
+
RUN mkdir -p /home/watcher/.cache && \
|
|
25
|
+
FASTEMBED_CACHE_PATH=/home/watcher/.cache/fastembed python -c "from fastembed import TextEmbedding; import os; os.environ['FASTEMBED_CACHE_PATH']='/home/watcher/.cache/fastembed'; TextEmbedding('sentence-transformers/all-MiniLM-L6-v2')" && \
|
|
26
|
+
chown -R watcher:watcher /home/watcher/.cache
|
|
27
|
+
|
|
28
|
+
# Create scripts directory and copy required files
|
|
29
|
+
RUN mkdir -p /scripts
|
|
30
|
+
|
|
31
|
+
# Copy all necessary scripts
|
|
32
|
+
COPY scripts/import-conversations-unified.py /scripts/
|
|
33
|
+
COPY scripts/import-watcher.py /scripts/
|
|
34
|
+
COPY scripts/streaming-importer.py /scripts/
|
|
35
|
+
COPY scripts/utils.py /scripts/
|
|
36
|
+
COPY scripts/trigger-import.py /scripts/
|
|
37
|
+
|
|
38
|
+
# Copy MCP server directory for utils
|
|
39
|
+
COPY mcp-server/src/utils.py /mcp-server/src/utils.py
|
|
40
|
+
|
|
41
|
+
RUN chmod +x /scripts/*.py
|
|
42
|
+
|
|
43
|
+
# Set working directory
|
|
44
|
+
WORKDIR /app
|
|
45
|
+
|
|
46
|
+
# Switch to non-root user
|
|
47
|
+
USER watcher
|
|
48
|
+
|
|
49
|
+
# Default command - use streaming importer for low memory usage
|
|
50
|
+
CMD ["python", "/scripts/streaming-importer.py"]
|
package/package.json
CHANGED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Quick import script for current project's latest conversations.
|
|
4
|
+
Designed for PreCompact hook integration - targets <10 second imports.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
import json
|
|
10
|
+
import subprocess
|
|
11
|
+
from datetime import datetime, timedelta
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
import logging
|
|
14
|
+
|
|
15
|
+
# Configuration
|
|
16
|
+
LOGS_DIR = os.getenv("LOGS_DIR", os.path.expanduser("~/.claude/projects"))
|
|
17
|
+
STATE_FILE = os.getenv("STATE_FILE", os.path.expanduser("~/.claude-self-reflect-state.json"))
|
|
18
|
+
HOURS_BACK = int(os.getenv("IMPORT_HOURS_BACK", "2")) # Only import last 2 hours by default
|
|
19
|
+
|
|
20
|
+
# Set up logging
|
|
21
|
+
logging.basicConfig(
|
|
22
|
+
level=logging.INFO,
|
|
23
|
+
format='%(asctime)s - %(levelname)s - %(message)s'
|
|
24
|
+
)
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
def load_state():
|
|
28
|
+
"""Load import state from file."""
|
|
29
|
+
if os.path.exists(STATE_FILE):
|
|
30
|
+
try:
|
|
31
|
+
with open(STATE_FILE, 'r') as f:
|
|
32
|
+
return json.load(f)
|
|
33
|
+
except:
|
|
34
|
+
return {}
|
|
35
|
+
return {}
|
|
36
|
+
|
|
37
|
+
def save_state(state):
|
|
38
|
+
"""Save import state to file."""
|
|
39
|
+
os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True)
|
|
40
|
+
with open(STATE_FILE, 'w') as f:
|
|
41
|
+
json.dump(state, f, indent=2)
|
|
42
|
+
|
|
43
|
+
def get_project_from_cwd():
|
|
44
|
+
"""Detect project from current working directory."""
|
|
45
|
+
cwd = os.getcwd()
|
|
46
|
+
# Convert path to project name format used in logs
|
|
47
|
+
# Claude logs use format: -Users-username-path-to-project
|
|
48
|
+
project_name = cwd.replace('/', '-')
|
|
49
|
+
# Keep the leading dash as that's how Claude stores it
|
|
50
|
+
if not project_name.startswith('-'):
|
|
51
|
+
project_name = '-' + project_name
|
|
52
|
+
return project_name
|
|
53
|
+
|
|
54
|
+
def get_recent_files(project_path: Path, hours_back: int):
|
|
55
|
+
"""Get JSONL files modified in the last N hours."""
|
|
56
|
+
cutoff_time = datetime.now() - timedelta(hours=hours_back)
|
|
57
|
+
recent_files = []
|
|
58
|
+
|
|
59
|
+
for jsonl_file in project_path.glob("*.jsonl"):
|
|
60
|
+
mtime = datetime.fromtimestamp(jsonl_file.stat().st_mtime)
|
|
61
|
+
if mtime > cutoff_time:
|
|
62
|
+
recent_files.append(jsonl_file)
|
|
63
|
+
|
|
64
|
+
return sorted(recent_files, key=lambda f: f.stat().st_mtime, reverse=True)
|
|
65
|
+
|
|
66
|
+
def main():
|
|
67
|
+
"""Main quick import function."""
|
|
68
|
+
start_time = datetime.now()
|
|
69
|
+
|
|
70
|
+
# Detect current project
|
|
71
|
+
project_name = get_project_from_cwd()
|
|
72
|
+
project_path = Path(LOGS_DIR) / project_name
|
|
73
|
+
|
|
74
|
+
if not project_path.exists():
|
|
75
|
+
logger.warning(f"Project logs not found: {project_path}")
|
|
76
|
+
logger.info("Make sure you're in a project directory with Claude conversations.")
|
|
77
|
+
return
|
|
78
|
+
|
|
79
|
+
logger.info(f"Quick importing latest conversations for: {project_name}")
|
|
80
|
+
|
|
81
|
+
# Get recent files
|
|
82
|
+
recent_files = get_recent_files(project_path, HOURS_BACK)
|
|
83
|
+
logger.info(f"Found {len(recent_files)} files modified in last {HOURS_BACK} hours")
|
|
84
|
+
|
|
85
|
+
if not recent_files:
|
|
86
|
+
logger.info("No recent conversations to import")
|
|
87
|
+
return
|
|
88
|
+
|
|
89
|
+
# For now, just call the unified importer with the specific project
|
|
90
|
+
# This is a temporary solution until we implement incremental imports
|
|
91
|
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
92
|
+
unified_script = os.path.join(script_dir, "import-conversations-unified.py")
|
|
93
|
+
|
|
94
|
+
# Set environment to only process this project
|
|
95
|
+
env = os.environ.copy()
|
|
96
|
+
env['LOGS_DIR'] = str(project_path.parent)
|
|
97
|
+
env['IMPORT_PROJECT'] = project_name
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
# Run the unified importer for just this project
|
|
101
|
+
result = subprocess.run(
|
|
102
|
+
[sys.executable, unified_script],
|
|
103
|
+
env=env,
|
|
104
|
+
capture_output=True,
|
|
105
|
+
text=True,
|
|
106
|
+
timeout=60 # 60 second timeout
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
if result.returncode == 0:
|
|
110
|
+
logger.info("Quick import completed successfully")
|
|
111
|
+
else:
|
|
112
|
+
logger.error(f"Import failed: {result.stderr}")
|
|
113
|
+
|
|
114
|
+
except subprocess.TimeoutExpired:
|
|
115
|
+
logger.warning("Import timed out after 60 seconds")
|
|
116
|
+
except Exception as e:
|
|
117
|
+
logger.error(f"Error during import: {e}")
|
|
118
|
+
|
|
119
|
+
# Report timing
|
|
120
|
+
elapsed = (datetime.now() - start_time).total_seconds()
|
|
121
|
+
logger.info(f"Quick import completed in {elapsed:.1f} seconds")
|
|
122
|
+
|
|
123
|
+
if __name__ == "__main__":
|
|
124
|
+
main()
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Import old format JSONL files from Claude conversations.
|
|
4
|
+
These files have a different structure with type/summary fields instead of messages.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import sys
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
import hashlib
|
|
11
|
+
import uuid
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
from qdrant_client import QdrantClient
|
|
14
|
+
from qdrant_client.models import Distance, VectorParams, PointStruct
|
|
15
|
+
from fastembed import TextEmbedding
|
|
16
|
+
import logging
|
|
17
|
+
|
|
18
|
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
def import_old_format_project(project_dir: Path, project_path: str = None):
|
|
22
|
+
"""Import old format JSONL files from a project directory."""
|
|
23
|
+
|
|
24
|
+
# Initialize
|
|
25
|
+
client = QdrantClient(url='http://localhost:6333')
|
|
26
|
+
model = TextEmbedding(model_name='sentence-transformers/all-MiniLM-L6-v2', max_length=512)
|
|
27
|
+
|
|
28
|
+
# Determine project path from directory name if not provided
|
|
29
|
+
if not project_path:
|
|
30
|
+
# Convert -Users-username-projects-projectname back to path
|
|
31
|
+
dir_name = project_dir.name
|
|
32
|
+
project_path = '/' + dir_name.strip('-').replace('-', '/')
|
|
33
|
+
|
|
34
|
+
# Create collection name
|
|
35
|
+
project_hash = hashlib.md5(project_path.encode()).hexdigest()[:8]
|
|
36
|
+
collection_name = f'conv_{project_hash}_local'
|
|
37
|
+
|
|
38
|
+
logger.info(f'Project: {project_path}')
|
|
39
|
+
logger.info(f'Collection: {collection_name}')
|
|
40
|
+
|
|
41
|
+
# Create collection if needed
|
|
42
|
+
try:
|
|
43
|
+
client.get_collection(collection_name)
|
|
44
|
+
logger.info('Collection exists')
|
|
45
|
+
except:
|
|
46
|
+
client.create_collection(
|
|
47
|
+
collection_name=collection_name,
|
|
48
|
+
vectors_config=VectorParams(size=384, distance=Distance.COSINE)
|
|
49
|
+
)
|
|
50
|
+
logger.info('Created collection')
|
|
51
|
+
|
|
52
|
+
# Process all JSONL files
|
|
53
|
+
jsonl_files = list(project_dir.glob('*.jsonl'))
|
|
54
|
+
logger.info(f'Found {len(jsonl_files)} files to import')
|
|
55
|
+
|
|
56
|
+
total_points = 0
|
|
57
|
+
for file_path in jsonl_files:
|
|
58
|
+
logger.info(f'Processing {file_path.name}...')
|
|
59
|
+
points_batch = []
|
|
60
|
+
|
|
61
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
62
|
+
conversation_text = []
|
|
63
|
+
file_timestamp = file_path.stat().st_mtime
|
|
64
|
+
|
|
65
|
+
for line_num, line in enumerate(f, 1):
|
|
66
|
+
try:
|
|
67
|
+
data = json.loads(line)
|
|
68
|
+
msg_type = data.get('type', '')
|
|
69
|
+
|
|
70
|
+
# Extract text content based on type
|
|
71
|
+
content = None
|
|
72
|
+
if msg_type == 'summary' and data.get('summary'):
|
|
73
|
+
content = f"[Conversation Summary] {data['summary']}"
|
|
74
|
+
elif msg_type == 'user' and data.get('summary'):
|
|
75
|
+
content = f"User: {data['summary']}"
|
|
76
|
+
elif msg_type == 'assistant' and data.get('summary'):
|
|
77
|
+
content = f"Assistant: {data['summary']}"
|
|
78
|
+
elif msg_type in ['user', 'assistant']:
|
|
79
|
+
# Try to get content from other fields
|
|
80
|
+
if 'content' in data:
|
|
81
|
+
content = f"{msg_type.title()}: {data['content']}"
|
|
82
|
+
elif 'text' in data:
|
|
83
|
+
content = f"{msg_type.title()}: {data['text']}"
|
|
84
|
+
|
|
85
|
+
if content:
|
|
86
|
+
conversation_text.append(content)
|
|
87
|
+
|
|
88
|
+
# Create chunks every 5 messages or at end
|
|
89
|
+
if len(conversation_text) >= 5:
|
|
90
|
+
chunk_text = '\n\n'.join(conversation_text)
|
|
91
|
+
if chunk_text.strip():
|
|
92
|
+
# Generate embedding
|
|
93
|
+
embedding = list(model.embed([chunk_text[:2000]]))[0] # Limit to 2000 chars
|
|
94
|
+
|
|
95
|
+
point = PointStruct(
|
|
96
|
+
id=str(uuid.uuid4()),
|
|
97
|
+
vector=embedding.tolist(),
|
|
98
|
+
payload={
|
|
99
|
+
'content': chunk_text[:1000], # Store first 1000 chars
|
|
100
|
+
'full_content': chunk_text[:4000], # Store more for context
|
|
101
|
+
'project_path': project_path,
|
|
102
|
+
'file_path': str(file_path),
|
|
103
|
+
'file_name': file_path.name,
|
|
104
|
+
'conversation_id': file_path.stem,
|
|
105
|
+
'chunk_index': len(points_batch),
|
|
106
|
+
'timestamp': file_timestamp,
|
|
107
|
+
'type': 'conversation_chunk'
|
|
108
|
+
}
|
|
109
|
+
)
|
|
110
|
+
points_batch.append(point)
|
|
111
|
+
conversation_text = []
|
|
112
|
+
|
|
113
|
+
except json.JSONDecodeError:
|
|
114
|
+
logger.warning(f'Invalid JSON at line {line_num} in {file_path.name}')
|
|
115
|
+
except Exception as e:
|
|
116
|
+
logger.warning(f'Error processing line {line_num}: {e}')
|
|
117
|
+
|
|
118
|
+
# Handle remaining text
|
|
119
|
+
if conversation_text:
|
|
120
|
+
chunk_text = '\n\n'.join(conversation_text)
|
|
121
|
+
if chunk_text.strip():
|
|
122
|
+
embedding = list(model.embed([chunk_text[:2000]]))[0]
|
|
123
|
+
|
|
124
|
+
point = PointStruct(
|
|
125
|
+
id=str(uuid.uuid4()),
|
|
126
|
+
vector=embedding.tolist(),
|
|
127
|
+
payload={
|
|
128
|
+
'content': chunk_text[:1000],
|
|
129
|
+
'full_content': chunk_text[:4000],
|
|
130
|
+
'project_path': project_path,
|
|
131
|
+
'file_path': str(file_path),
|
|
132
|
+
'file_name': file_path.name,
|
|
133
|
+
'conversation_id': file_path.stem,
|
|
134
|
+
'chunk_index': len(points_batch),
|
|
135
|
+
'timestamp': file_timestamp,
|
|
136
|
+
'type': 'conversation_chunk'
|
|
137
|
+
}
|
|
138
|
+
)
|
|
139
|
+
points_batch.append(point)
|
|
140
|
+
|
|
141
|
+
# Upload batch
|
|
142
|
+
if points_batch:
|
|
143
|
+
client.upsert(collection_name=collection_name, points=points_batch)
|
|
144
|
+
logger.info(f' Uploaded {len(points_batch)} chunks from {file_path.name}')
|
|
145
|
+
total_points += len(points_batch)
|
|
146
|
+
|
|
147
|
+
# Verify
|
|
148
|
+
info = client.get_collection(collection_name)
|
|
149
|
+
logger.info(f'\nImport complete!')
|
|
150
|
+
logger.info(f'Collection {collection_name} now has {info.points_count} points')
|
|
151
|
+
logger.info(f'Added {total_points} new points in this import')
|
|
152
|
+
|
|
153
|
+
return collection_name, total_points
|
|
154
|
+
|
|
155
|
+
def main():
|
|
156
|
+
if len(sys.argv) < 2:
|
|
157
|
+
print("Usage: python import-old-format.py <project-directory> [project-path]")
|
|
158
|
+
print("Example: python import-old-format.py ~/.claude/projects/-Users-me-projects-myapp /Users/me/projects/myapp")
|
|
159
|
+
sys.exit(1)
|
|
160
|
+
|
|
161
|
+
project_dir = Path(sys.argv[1]).expanduser()
|
|
162
|
+
project_path = sys.argv[2] if len(sys.argv) > 2 else None
|
|
163
|
+
|
|
164
|
+
if not project_dir.exists():
|
|
165
|
+
print(f"Error: Directory {project_dir} does not exist")
|
|
166
|
+
sys.exit(1)
|
|
167
|
+
|
|
168
|
+
import_old_format_project(project_dir, project_path)
|
|
169
|
+
|
|
170
|
+
if __name__ == "__main__":
|
|
171
|
+
main()
|