learn_bash_from_session_data 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +45 -0
- package/bin/learn-bash.js +328 -0
- package/package.json +23 -0
- package/scripts/__init__.py +34 -0
- package/scripts/analyzer.py +591 -0
- package/scripts/extractor.py +411 -0
- package/scripts/html_generator.py +2029 -0
- package/scripts/knowledge_base.py +1593 -0
- package/scripts/main.py +443 -0
- package/scripts/parser.py +623 -0
- package/scripts/quiz_generator.py +1080 -0
package/scripts/main.py
ADDED
|
@@ -0,0 +1,443 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Main orchestration script for learn_bash_from_session_data.
|
|
4
|
+
|
|
5
|
+
Discovers Claude session files, extracts bash commands, analyzes them,
|
|
6
|
+
generates quizzes, and produces HTML learning materials.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import argparse
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
import sys
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Dict, List, Optional, Tuple
|
|
16
|
+
|
|
17
|
+
# Version check
|
|
18
|
+
if sys.version_info < (3, 8):
|
|
19
|
+
sys.exit("Error: Python 3.8 or higher is required. Current version: "
|
|
20
|
+
f"{sys.version_info.major}.{sys.version_info.minor}")
|
|
21
|
+
|
|
22
|
+
# Constants
|
|
23
|
+
DEFAULT_OUTPUT_DIR = "./bash-learner-output/"
|
|
24
|
+
MAX_UNIQUE_COMMANDS = 500
|
|
25
|
+
SESSIONS_BASE_PATH = Path.home() / ".claude" / "projects"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def get_session_metadata(session_path: Path) -> Dict:
|
|
29
|
+
"""
|
|
30
|
+
Extract metadata from a session file.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
session_path: Path to the session JSONL file
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Dictionary with session metadata
|
|
37
|
+
"""
|
|
38
|
+
stat = session_path.stat()
|
|
39
|
+
mod_time = datetime.fromtimestamp(stat.st_mtime)
|
|
40
|
+
|
|
41
|
+
# Try to extract project path hint from parent directory name
|
|
42
|
+
project_hash = session_path.parent.parent.name
|
|
43
|
+
|
|
44
|
+
# Try to read first line to get more metadata
|
|
45
|
+
first_message = None
|
|
46
|
+
try:
|
|
47
|
+
with open(session_path, 'r', encoding='utf-8', errors='replace') as f:
|
|
48
|
+
first_line = f.readline().strip()
|
|
49
|
+
if first_line:
|
|
50
|
+
first_message = json.loads(first_line)
|
|
51
|
+
except (json.JSONDecodeError, IOError):
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
return {
|
|
55
|
+
"path": session_path,
|
|
56
|
+
"filename": session_path.name,
|
|
57
|
+
"project_hash": project_hash,
|
|
58
|
+
"size_bytes": stat.st_size,
|
|
59
|
+
"size_human": format_file_size(stat.st_size),
|
|
60
|
+
"modified": mod_time,
|
|
61
|
+
"modified_str": mod_time.strftime("%Y-%m-%d %H:%M:%S"),
|
|
62
|
+
"first_message": first_message,
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def format_file_size(size_bytes: int) -> str:
|
|
67
|
+
"""Format file size in human-readable format."""
|
|
68
|
+
for unit in ['B', 'KB', 'MB', 'GB']:
|
|
69
|
+
if size_bytes < 1024:
|
|
70
|
+
return f"{size_bytes:.1f} {unit}"
|
|
71
|
+
size_bytes /= 1024
|
|
72
|
+
return f"{size_bytes:.1f} TB"
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def discover_sessions(
|
|
76
|
+
project_filter: Optional[str] = None,
|
|
77
|
+
limit: Optional[int] = None
|
|
78
|
+
) -> List[Dict]:
|
|
79
|
+
"""
|
|
80
|
+
Discover available Claude session files.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
project_filter: Optional filter for project path substring
|
|
84
|
+
limit: Maximum number of sessions to return
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
List of session metadata dictionaries, sorted by modification time (newest first)
|
|
88
|
+
"""
|
|
89
|
+
sessions = []
|
|
90
|
+
|
|
91
|
+
if not SESSIONS_BASE_PATH.exists():
|
|
92
|
+
return sessions
|
|
93
|
+
|
|
94
|
+
# Find all session files
|
|
95
|
+
for project_dir in SESSIONS_BASE_PATH.iterdir():
|
|
96
|
+
if not project_dir.is_dir():
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
sessions_dir = project_dir / "sessions"
|
|
100
|
+
if not sessions_dir.exists():
|
|
101
|
+
continue
|
|
102
|
+
|
|
103
|
+
for session_file in sessions_dir.glob("*.jsonl"):
|
|
104
|
+
metadata = get_session_metadata(session_file)
|
|
105
|
+
|
|
106
|
+
# Apply project filter if specified
|
|
107
|
+
if project_filter:
|
|
108
|
+
# Check if filter matches project hash or any content
|
|
109
|
+
if project_filter.lower() not in str(session_file).lower():
|
|
110
|
+
continue
|
|
111
|
+
|
|
112
|
+
sessions.append(metadata)
|
|
113
|
+
|
|
114
|
+
# Sort by modification time (newest first)
|
|
115
|
+
sessions.sort(key=lambda x: x["modified"], reverse=True)
|
|
116
|
+
|
|
117
|
+
if limit:
|
|
118
|
+
sessions = sessions[:limit]
|
|
119
|
+
|
|
120
|
+
return sessions
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def list_sessions(project_filter: Optional[str] = None) -> None:
|
|
124
|
+
"""
|
|
125
|
+
Display available sessions in a formatted table.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
project_filter: Optional filter for project path substring
|
|
129
|
+
"""
|
|
130
|
+
sessions = discover_sessions(project_filter=project_filter)
|
|
131
|
+
|
|
132
|
+
if not sessions:
|
|
133
|
+
print("\nNo session files found.")
|
|
134
|
+
print(f"\nExpected location: {SESSIONS_BASE_PATH}/<project-hash>/sessions/*.jsonl")
|
|
135
|
+
print("\nMake sure you have Claude session data available.")
|
|
136
|
+
return
|
|
137
|
+
|
|
138
|
+
print(f"\n{'='*80}")
|
|
139
|
+
print(f"Available Claude Sessions ({len(sessions)} found)")
|
|
140
|
+
print(f"{'='*80}")
|
|
141
|
+
print(f"{'#':<4} {'Date':<20} {'Size':<10} {'Filename':<30}")
|
|
142
|
+
print(f"{'-'*80}")
|
|
143
|
+
|
|
144
|
+
for idx, session in enumerate(sessions, 1):
|
|
145
|
+
print(f"{idx:<4} {session['modified_str']:<20} {session['size_human']:<10} "
|
|
146
|
+
f"{session['filename'][:30]:<30}")
|
|
147
|
+
|
|
148
|
+
print(f"{'-'*80}")
|
|
149
|
+
print(f"\nUse -n <number> to process the N most recent sessions")
|
|
150
|
+
print(f"Use -f <path> to process a specific session file")
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def load_session_file(session_path: Path) -> List[Dict]:
|
|
154
|
+
"""
|
|
155
|
+
Load and parse a session JSONL file.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
session_path: Path to the session file
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
List of parsed JSON objects from the session
|
|
162
|
+
"""
|
|
163
|
+
entries = []
|
|
164
|
+
|
|
165
|
+
with open(session_path, 'r', encoding='utf-8', errors='replace') as f:
|
|
166
|
+
for line_num, line in enumerate(f, 1):
|
|
167
|
+
line = line.strip()
|
|
168
|
+
if not line:
|
|
169
|
+
continue
|
|
170
|
+
try:
|
|
171
|
+
entry = json.loads(line)
|
|
172
|
+
entries.append(entry)
|
|
173
|
+
except json.JSONDecodeError as e:
|
|
174
|
+
print(f"Warning: Skipping malformed JSON at line {line_num}: {e}")
|
|
175
|
+
|
|
176
|
+
return entries
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def run_extraction_pipeline(
|
|
180
|
+
sessions: List[Dict],
|
|
181
|
+
output_dir: Path
|
|
182
|
+
) -> Tuple[bool, str]:
|
|
183
|
+
"""
|
|
184
|
+
Run the full extraction and generation pipeline.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
sessions: List of session metadata dictionaries
|
|
188
|
+
output_dir: Directory for output files
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Tuple of (success: bool, message: str)
|
|
192
|
+
"""
|
|
193
|
+
# Import processing modules (lazy import to allow standalone testing)
|
|
194
|
+
try:
|
|
195
|
+
from scripts.extractor import extract_commands
|
|
196
|
+
from scripts.parser import parse_commands
|
|
197
|
+
from scripts.analyzer import analyze_commands
|
|
198
|
+
from scripts.quiz_generator import generate_quizzes
|
|
199
|
+
from scripts.html_generator import generate_html
|
|
200
|
+
except ImportError:
|
|
201
|
+
# Try relative import for when run as script
|
|
202
|
+
try:
|
|
203
|
+
from extractor import extract_commands
|
|
204
|
+
from parser import parse_commands
|
|
205
|
+
from analyzer import analyze_commands
|
|
206
|
+
from quiz_generator import generate_quizzes
|
|
207
|
+
from html_generator import generate_html
|
|
208
|
+
except ImportError as e:
|
|
209
|
+
return False, f"Failed to import processing modules: {e}"
|
|
210
|
+
|
|
211
|
+
# Safety check: prevent writing to critical system directories
|
|
212
|
+
output_resolved = output_dir.resolve()
|
|
213
|
+
forbidden_prefixes = ['/etc', '/usr', '/bin', '/sbin', '/lib', '/boot', '/root', '/sys', '/proc']
|
|
214
|
+
for prefix in forbidden_prefixes:
|
|
215
|
+
if str(output_resolved).startswith(prefix):
|
|
216
|
+
return False, f"Safety error: Cannot write to system directory: {output_resolved}"
|
|
217
|
+
|
|
218
|
+
# Create output directory
|
|
219
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
220
|
+
|
|
221
|
+
print(f"\nProcessing {len(sessions)} session(s)...")
|
|
222
|
+
print(f"Output directory: {output_dir.absolute()}")
|
|
223
|
+
print("-" * 60)
|
|
224
|
+
|
|
225
|
+
# Step 1: Load all session data
|
|
226
|
+
all_entries = []
|
|
227
|
+
for session in sessions:
|
|
228
|
+
print(f"Loading: {session['filename']} ({session['size_human']})")
|
|
229
|
+
entries = load_session_file(session['path'])
|
|
230
|
+
all_entries.extend(entries)
|
|
231
|
+
print(f" -> Loaded {len(entries)} entries")
|
|
232
|
+
|
|
233
|
+
if not all_entries:
|
|
234
|
+
return False, "No session entries found in the provided files."
|
|
235
|
+
|
|
236
|
+
print(f"\nTotal entries loaded: {len(all_entries)}")
|
|
237
|
+
|
|
238
|
+
# Step 2: Extract commands
|
|
239
|
+
print("\nExtracting bash commands...")
|
|
240
|
+
raw_commands = extract_commands(all_entries)
|
|
241
|
+
print(f" -> Found {len(raw_commands)} raw commands")
|
|
242
|
+
|
|
243
|
+
if not raw_commands:
|
|
244
|
+
return False, ("No bash commands found in the session data. "
|
|
245
|
+
"Try analyzing more sessions with -n <number>.")
|
|
246
|
+
|
|
247
|
+
# Step 3: Parse commands
|
|
248
|
+
print("\nParsing commands...")
|
|
249
|
+
parsed_commands = parse_commands(raw_commands)
|
|
250
|
+
print(f" -> Parsed {len(parsed_commands)} commands")
|
|
251
|
+
|
|
252
|
+
# Step 4: Deduplicate and cap
|
|
253
|
+
unique_commands = deduplicate_commands(parsed_commands)
|
|
254
|
+
if len(unique_commands) > MAX_UNIQUE_COMMANDS:
|
|
255
|
+
print(f"\nCapping at {MAX_UNIQUE_COMMANDS} unique commands "
|
|
256
|
+
f"(found {len(unique_commands)})")
|
|
257
|
+
unique_commands = unique_commands[:MAX_UNIQUE_COMMANDS]
|
|
258
|
+
else:
|
|
259
|
+
print(f"\n{len(unique_commands)} unique commands")
|
|
260
|
+
|
|
261
|
+
# Step 5: Analyze commands
|
|
262
|
+
print("\nAnalyzing commands...")
|
|
263
|
+
analysis = analyze_commands(unique_commands)
|
|
264
|
+
print(f" -> Generated analysis with {len(analysis.get('categories', {}))} categories")
|
|
265
|
+
|
|
266
|
+
# Step 6: Generate quizzes
|
|
267
|
+
print("\nGenerating quizzes...")
|
|
268
|
+
quizzes = generate_quizzes(unique_commands, analysis)
|
|
269
|
+
quiz_count = sum(len(q) for q in quizzes.values()) if isinstance(quizzes, dict) else len(quizzes)
|
|
270
|
+
print(f" -> Generated {quiz_count} quiz questions")
|
|
271
|
+
|
|
272
|
+
# Step 7: Generate HTML
|
|
273
|
+
print("\nGenerating HTML output...")
|
|
274
|
+
html_files = generate_html(unique_commands, analysis, quizzes, output_dir)
|
|
275
|
+
print(f" -> Created {len(html_files)} HTML files")
|
|
276
|
+
|
|
277
|
+
# Write summary JSON
|
|
278
|
+
summary = {
|
|
279
|
+
"generated_at": datetime.now().isoformat(),
|
|
280
|
+
"sessions_processed": len(sessions),
|
|
281
|
+
"total_entries": len(all_entries),
|
|
282
|
+
"raw_commands": len(raw_commands),
|
|
283
|
+
"unique_commands": len(unique_commands),
|
|
284
|
+
"categories": list(analysis.get('categories', {}).keys()),
|
|
285
|
+
"quiz_count": quiz_count,
|
|
286
|
+
"html_files": [str(f) for f in html_files],
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
summary_path = output_dir / "summary.json"
|
|
290
|
+
with open(summary_path, 'w', encoding='utf-8') as f:
|
|
291
|
+
json.dump(summary, f, indent=2)
|
|
292
|
+
|
|
293
|
+
print(f"\nSummary written to: {summary_path}")
|
|
294
|
+
|
|
295
|
+
return True, f"Successfully generated learning materials in {output_dir}"
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def deduplicate_commands(commands: List[Dict]) -> List[Dict]:
|
|
299
|
+
"""
|
|
300
|
+
Remove duplicate commands while preserving order.
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
commands: List of parsed command dictionaries
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
Deduplicated list of commands
|
|
307
|
+
"""
|
|
308
|
+
seen = set()
|
|
309
|
+
unique = []
|
|
310
|
+
|
|
311
|
+
for cmd in commands:
|
|
312
|
+
# Create a key based on the command string
|
|
313
|
+
key = cmd.get('command', '') or cmd.get('raw', '')
|
|
314
|
+
if key and key not in seen:
|
|
315
|
+
seen.add(key)
|
|
316
|
+
unique.append(cmd)
|
|
317
|
+
|
|
318
|
+
return unique
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
def parse_arguments() -> argparse.Namespace:
|
|
322
|
+
"""Parse command-line arguments."""
|
|
323
|
+
parser = argparse.ArgumentParser(
|
|
324
|
+
description="Learn Bash from Claude session data",
|
|
325
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
326
|
+
epilog="""
|
|
327
|
+
Examples:
|
|
328
|
+
%(prog)s --list List available sessions
|
|
329
|
+
%(prog)s -n 5 Process 5 most recent sessions
|
|
330
|
+
%(prog)s -f /path/to/session.jsonl Process specific session file
|
|
331
|
+
%(prog)s -n 10 -o ./my-output/ Process 10 sessions to custom directory
|
|
332
|
+
%(prog)s -l -p myproject List sessions matching 'myproject'
|
|
333
|
+
"""
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
parser.add_argument(
|
|
337
|
+
'-n', '--sessions',
|
|
338
|
+
type=int,
|
|
339
|
+
default=1,
|
|
340
|
+
help='Number of recent sessions to process (default: 1)'
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
parser.add_argument(
|
|
344
|
+
'-f', '--file',
|
|
345
|
+
type=str,
|
|
346
|
+
help='Specific session file path to process'
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
parser.add_argument(
|
|
350
|
+
'-o', '--output',
|
|
351
|
+
type=str,
|
|
352
|
+
default=DEFAULT_OUTPUT_DIR,
|
|
353
|
+
help=f'Output directory (default: {DEFAULT_OUTPUT_DIR})'
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
parser.add_argument(
|
|
357
|
+
'-l', '--list',
|
|
358
|
+
action='store_true',
|
|
359
|
+
help='List available sessions'
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
parser.add_argument(
|
|
363
|
+
'-p', '--project',
|
|
364
|
+
type=str,
|
|
365
|
+
help='Filter sessions by project path substring'
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
parser.add_argument(
|
|
369
|
+
'-v', '--verbose',
|
|
370
|
+
action='store_true',
|
|
371
|
+
help='Enable verbose output'
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
return parser.parse_args()
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def main() -> int:
|
|
378
|
+
"""
|
|
379
|
+
Main entry point.
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
Exit code (0 for success, non-zero for errors)
|
|
383
|
+
"""
|
|
384
|
+
args = parse_arguments()
|
|
385
|
+
|
|
386
|
+
# Handle --list
|
|
387
|
+
if args.list:
|
|
388
|
+
list_sessions(project_filter=args.project)
|
|
389
|
+
return 0
|
|
390
|
+
|
|
391
|
+
# Determine which sessions to process
|
|
392
|
+
sessions_to_process = []
|
|
393
|
+
|
|
394
|
+
if args.file:
|
|
395
|
+
# Process specific file
|
|
396
|
+
file_path = Path(args.file)
|
|
397
|
+
if not file_path.exists():
|
|
398
|
+
print(f"Error: Session file not found: {args.file}")
|
|
399
|
+
return 1
|
|
400
|
+
if not file_path.suffix == '.jsonl':
|
|
401
|
+
print(f"Warning: Expected .jsonl file, got: {file_path.suffix}")
|
|
402
|
+
|
|
403
|
+
sessions_to_process = [get_session_metadata(file_path)]
|
|
404
|
+
|
|
405
|
+
else:
|
|
406
|
+
# Discover and select sessions
|
|
407
|
+
sessions = discover_sessions(
|
|
408
|
+
project_filter=args.project,
|
|
409
|
+
limit=args.sessions
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
if not sessions:
|
|
413
|
+
print("\nNo session files found.")
|
|
414
|
+
print(f"\nExpected location: {SESSIONS_BASE_PATH}/<project-hash>/sessions/*.jsonl")
|
|
415
|
+
print("\nTo create session data, use Claude Code and your sessions will be stored automatically.")
|
|
416
|
+
print("\nUse --list to see available sessions once you have some.")
|
|
417
|
+
return 1
|
|
418
|
+
|
|
419
|
+
sessions_to_process = sessions
|
|
420
|
+
|
|
421
|
+
# Run the pipeline
|
|
422
|
+
output_dir = Path(args.output)
|
|
423
|
+
success, message = run_extraction_pipeline(sessions_to_process, output_dir)
|
|
424
|
+
|
|
425
|
+
if success:
|
|
426
|
+
print(f"\n{'='*60}")
|
|
427
|
+
print("SUCCESS!")
|
|
428
|
+
print(message)
|
|
429
|
+
print(f"{'='*60}")
|
|
430
|
+
|
|
431
|
+
# Print next steps
|
|
432
|
+
index_file = output_dir / "index.html"
|
|
433
|
+
if index_file.exists():
|
|
434
|
+
print(f"\nOpen {index_file.absolute()} in your browser to start learning!")
|
|
435
|
+
|
|
436
|
+
return 0
|
|
437
|
+
else:
|
|
438
|
+
print(f"\nError: {message}")
|
|
439
|
+
return 1
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
if __name__ == "__main__":
|
|
443
|
+
sys.exit(main())
|