learn_bash_from_session_data 1.0.8 → 1.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +8 -1
- package/scripts/html_generator.py +87 -6
- package/scripts/knowledge_base.py +24 -0
- package/scripts/main.py +78 -34
- package/scripts/quiz_generator.py +69 -37
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "learn_bash_from_session_data",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.10",
|
|
4
4
|
"description": "Learn bash from your Claude Code sessions - extracts commands and generates interactive HTML lessons with 400+ commands, quizzes, and comprehensive coverage",
|
|
5
5
|
"main": "bin/learn-bash.js",
|
|
6
6
|
"bin": {
|
|
@@ -27,6 +27,13 @@
|
|
|
27
27
|
"engines": {
|
|
28
28
|
"node": ">=14.0.0"
|
|
29
29
|
},
|
|
30
|
+
"files": [
|
|
31
|
+
"bin/",
|
|
32
|
+
"scripts/*.py",
|
|
33
|
+
"scripts/__init__.py",
|
|
34
|
+
"README.md",
|
|
35
|
+
"LICENSE"
|
|
36
|
+
],
|
|
30
37
|
"repository": {
|
|
31
38
|
"type": "git",
|
|
32
39
|
"url": "git+https://github.com/bjpl/learn_bash_from_session_data.git"
|
|
@@ -433,7 +433,7 @@ def render_commands_tab(commands: list[dict]) -> str:
|
|
|
433
433
|
<span class="category-badge">{category}</span>
|
|
434
434
|
</div>
|
|
435
435
|
<div class="command-meta">
|
|
436
|
-
<span class="cmd-preview">{description[:60]}{'...' if len(description) > 60 else ''}</span>
|
|
436
|
+
<span class="cmd-preview">{' '.join(description.split())[:60]}{'...' if len(' '.join(description.split())) > 60 else ''}</span>
|
|
437
437
|
<span class="expand-icon">▼</span>
|
|
438
438
|
</div>
|
|
439
439
|
</div>
|
|
@@ -2185,6 +2185,28 @@ def generate_html_files(
|
|
|
2185
2185
|
base_cmd = cmd.get('base_command', cmd_str.split()[0] if cmd_str else '')
|
|
2186
2186
|
complexity_score = cmd.get('complexity', 1)
|
|
2187
2187
|
|
|
2188
|
+
# Filter out non-bash entries (Python/JS code fragments, single chars, status text)
|
|
2189
|
+
if not base_cmd or len(base_cmd) < 2:
|
|
2190
|
+
continue
|
|
2191
|
+
# Skip entries that look like code fragments (contain parens, equals, dots as methods)
|
|
2192
|
+
if any(c in base_cmd for c in ('(', ')', '=', '{', '}')) and not base_cmd.startswith('.'):
|
|
2193
|
+
continue
|
|
2194
|
+
# Skip entries with backslashes, quotes, or HTML entities (JSONL text fragments)
|
|
2195
|
+
if any(c in base_cmd for c in ('\\', '"', "'")) or '&' in base_cmd:
|
|
2196
|
+
continue
|
|
2197
|
+
# Skip entries that are clearly not commands (capitalized status words, text fragments)
|
|
2198
|
+
if base_cmd[0].isupper() and base_cmd.isalpha() and base_cmd not in ('PATH', 'HOME'):
|
|
2199
|
+
continue
|
|
2200
|
+
# Skip common text fragments that get misidentified as commands
|
|
2201
|
+
junk_tokens = {'version', 'total', 'package', 'success', 'error', 'reading',
|
|
2202
|
+
'editing', 'done', 'warning', 'info', 'note', 'output',
|
|
2203
|
+
'task', 'goal', 'purpose', 'what', 'description'}
|
|
2204
|
+
if base_cmd.lower() in junk_tokens:
|
|
2205
|
+
continue
|
|
2206
|
+
|
|
2207
|
+
# Tokenize the command for subcommand/description generation
|
|
2208
|
+
cmd_tokens = cmd_str.split() if cmd_str else []
|
|
2209
|
+
|
|
2188
2210
|
# Look up COMMAND_DB info for this command
|
|
2189
2211
|
cmd_info = COMMAND_DB.get(base_cmd, {})
|
|
2190
2212
|
kb_flags = get_flags_for_command(base_cmd)
|
|
@@ -2261,20 +2283,79 @@ def generate_html_files(
|
|
|
2261
2283
|
'-g': 'Global scope',
|
|
2262
2284
|
'-p': 'Preserve attributes or port',
|
|
2263
2285
|
'-o': 'Output file',
|
|
2264
|
-
'-P': 'No dereference (physical path)',
|
|
2265
2286
|
}
|
|
2266
2287
|
flag_desc = common_flags.get(f, '')
|
|
2267
2288
|
formatted_flags.append({'flag': f, 'description': flag_desc})
|
|
2268
2289
|
|
|
2269
|
-
#
|
|
2290
|
+
# Generate a contextual description that differentiates commands with the same base
|
|
2270
2291
|
session_desc = cmd.get('description', '')
|
|
2271
2292
|
kb_desc = cmd_info.get('description', '')
|
|
2272
|
-
|
|
2293
|
+
|
|
2294
|
+
# Build a specific description from the actual command content
|
|
2295
|
+
args_list = cmd.get('args', [])
|
|
2296
|
+
flag_list = [fl.get('flag', '') if isinstance(fl, dict) else str(fl) for fl in formatted_flags]
|
|
2297
|
+
contextual_desc = ''
|
|
2298
|
+
|
|
2299
|
+
# For inline code execution (python -c, bash -c), summarize the code snippet
|
|
2300
|
+
if base_cmd in ('python', 'python3', 'bash', 'sh', 'node') and '-c' in flag_list:
|
|
2301
|
+
# Extract the inline code from the full command after -c
|
|
2302
|
+
c_idx = cmd_str.find('-c')
|
|
2303
|
+
if c_idx >= 0:
|
|
2304
|
+
raw_code = cmd_str[c_idx + 2:].strip().strip('"').strip("'")
|
|
2305
|
+
# Split on actual newlines before collapsing
|
|
2306
|
+
code_lines = [l.strip() for l in raw_code.splitlines() if l.strip()]
|
|
2307
|
+
# Find first non-import line for a distinctive preview
|
|
2308
|
+
action_lines = [l for l in code_lines if not l.startswith(('import ', 'from ', '#'))]
|
|
2309
|
+
if action_lines:
|
|
2310
|
+
code_part = ' '.join(action_lines[0].split())[:60]
|
|
2311
|
+
elif code_lines:
|
|
2312
|
+
# All imports - show what's being imported
|
|
2313
|
+
code_part = ' '.join(code_lines[0].split())[:60]
|
|
2314
|
+
else:
|
|
2315
|
+
code_part = ''
|
|
2316
|
+
if code_part:
|
|
2317
|
+
contextual_desc = f"{base_cmd} -c: {code_part}{'...' if len(code_part) >= 60 else ''}"
|
|
2318
|
+
|
|
2319
|
+
# For commands with subcommands (git, npm, docker, etc.), use subcommand context
|
|
2320
|
+
if not contextual_desc and cmd_tokens and len(cmd_tokens) > 1:
|
|
2321
|
+
subcmd_token = next((t for t in cmd_tokens[1:] if not t.startswith('-') and not t.startswith('"') and not t.startswith("'")), '')
|
|
2322
|
+
if subcmd_token and subcmd_token != base_cmd:
|
|
2323
|
+
subcmd_info = cmd_info.get('subcommands', {}).get(subcmd_token, '')
|
|
2324
|
+
if subcmd_info:
|
|
2325
|
+
contextual_desc = f"{base_cmd} {subcmd_token}: {subcmd_info}"
|
|
2326
|
+
else:
|
|
2327
|
+
contextual_desc = f"{base_cmd} {subcmd_token}"
|
|
2328
|
+
# Add meaningful args (skip very long ones, quotes, code)
|
|
2329
|
+
short_args = [a for a in args_list if len(str(a)) < 40 and a != subcmd_token and not a.startswith('"')]
|
|
2330
|
+
if short_args:
|
|
2331
|
+
contextual_desc += f" ({', '.join(short_args[:3])})"
|
|
2332
|
+
|
|
2333
|
+
# For commands with flags but no subcommand, describe with flags
|
|
2334
|
+
if not contextual_desc and flag_list:
|
|
2335
|
+
flag_summary = ', '.join(flag_list[:3])
|
|
2336
|
+
short_args = [a for a in args_list if len(str(a)) < 40]
|
|
2337
|
+
if short_args:
|
|
2338
|
+
contextual_desc = f"{base_cmd} {flag_summary} on {', '.join(short_args[:2])}"
|
|
2339
|
+
else:
|
|
2340
|
+
contextual_desc = f"{base_cmd} with {flag_summary}"
|
|
2341
|
+
|
|
2342
|
+
# For simple commands with just args
|
|
2343
|
+
if not contextual_desc and args_list:
|
|
2344
|
+
short_args = [a for a in args_list if len(str(a)) < 40]
|
|
2345
|
+
if short_args:
|
|
2346
|
+
contextual_desc = f"{base_cmd} {' '.join(short_args[:3])}"
|
|
2347
|
+
|
|
2348
|
+
# Priority: contextual > knowledge base > generic fallback
|
|
2349
|
+
# Session descriptions (from JSONL) describe Claude's task, NOT the command
|
|
2350
|
+
if contextual_desc:
|
|
2351
|
+
description = contextual_desc
|
|
2352
|
+
elif kb_desc:
|
|
2353
|
+
description = kb_desc
|
|
2354
|
+
else:
|
|
2355
|
+
description = f"Run {base_cmd} command"
|
|
2273
2356
|
|
|
2274
2357
|
# Get subcommand info (for commands like git, docker, npm)
|
|
2275
2358
|
subcommands = cmd_info.get('subcommands', {})
|
|
2276
|
-
# Try to identify the subcommand from the full command
|
|
2277
|
-
cmd_tokens = cmd_str.split() if cmd_str else []
|
|
2278
2359
|
subcommand_desc = ''
|
|
2279
2360
|
if subcommands and len(cmd_tokens) > 1:
|
|
2280
2361
|
for token in cmd_tokens[1:]:
|
|
@@ -91,6 +91,8 @@ CATEGORY_MAPPINGS: Dict[str, Set[str]] = {
|
|
|
91
91
|
"history", "fc", "true", "false", "test", "[", "[[", "exit",
|
|
92
92
|
"return", "break", "continue", "shift", "getopts", "trap",
|
|
93
93
|
"ulimit", "times", "let", ":", "compgen", "complete", "compopt",
|
|
94
|
+
"cmd.exe", "cmd", "start", "where", "type",
|
|
95
|
+
"session-slides", "learn-bash", "bash-learner", "claude",
|
|
94
96
|
},
|
|
95
97
|
}
|
|
96
98
|
|
|
@@ -465,11 +467,16 @@ COMMAND_DB: Dict[str, Dict[str, Any]] = {
|
|
|
465
467
|
"-n": "Show line numbers",
|
|
466
468
|
"-c": "Count matching lines",
|
|
467
469
|
"-w": "Match whole words only",
|
|
470
|
+
"-P": "Use Perl-compatible regular expressions (PCRE)",
|
|
468
471
|
"-E": "Extended regex (same as egrep)",
|
|
472
|
+
"-F": "Fixed string matching (no regex interpretation)",
|
|
469
473
|
"-o": "Show only matching part of line",
|
|
470
474
|
"-A": "Show N lines after match",
|
|
471
475
|
"-B": "Show N lines before match",
|
|
472
476
|
"-C": "Show N lines of context (before and after)",
|
|
477
|
+
"-h": "Suppress filename prefix in output",
|
|
478
|
+
"-H": "Always show filename prefix",
|
|
479
|
+
"-q": "Quiet mode, only return exit status",
|
|
473
480
|
"--include": "Search only files matching pattern",
|
|
474
481
|
"--exclude": "Skip files matching pattern",
|
|
475
482
|
},
|
|
@@ -799,6 +806,23 @@ COMMAND_DB: Dict[str, Dict[str, Any]] = {
|
|
|
799
806
|
"flags": {
|
|
800
807
|
"--version": "Print git version",
|
|
801
808
|
"-C": "Run as if started in specified directory",
|
|
809
|
+
"--force": "Override safety checks and force the operation",
|
|
810
|
+
"-f": "Force operation (shorthand for --force)",
|
|
811
|
+
"--no-verify": "Skip pre-commit and commit-msg hooks",
|
|
812
|
+
"--amend": "Replace the tip of the current branch with a new commit",
|
|
813
|
+
"-m": "Specify commit message inline",
|
|
814
|
+
"-a": "Automatically stage modified and deleted files",
|
|
815
|
+
"-b": "Create and switch to a new branch",
|
|
816
|
+
"-d": "Delete a branch",
|
|
817
|
+
"-D": "Force delete a branch even if not fully merged",
|
|
818
|
+
"--all": "Apply to all branches or remotes",
|
|
819
|
+
"--oneline": "Compact one-line log format",
|
|
820
|
+
"--graph": "Show ASCII graph of branch and merge history",
|
|
821
|
+
"-u": "Set upstream tracking branch",
|
|
822
|
+
"--hard": "Reset working tree and index to match target",
|
|
823
|
+
"--soft": "Reset only HEAD, keep staged changes",
|
|
824
|
+
"-p": "Interactively choose hunks to stage",
|
|
825
|
+
"--stat": "Show diffstat summary of changes",
|
|
802
826
|
},
|
|
803
827
|
"subcommands": {
|
|
804
828
|
"init": "Create empty repository",
|
package/scripts/main.py
CHANGED
|
@@ -22,6 +22,7 @@ if sys.version_info < (3, 8):
|
|
|
22
22
|
# Constants
|
|
23
23
|
DEFAULT_OUTPUT_BASE = "./bash-learner-output"
|
|
24
24
|
MAX_UNIQUE_COMMANDS = 500
|
|
25
|
+
VERSION = "1.0.10"
|
|
25
26
|
|
|
26
27
|
|
|
27
28
|
def generate_timestamped_output_dir(base_dir: str = DEFAULT_OUTPUT_BASE) -> Path:
|
|
@@ -51,7 +52,8 @@ def get_sessions_base_path() -> Path:
|
|
|
51
52
|
is_wsl = False
|
|
52
53
|
try:
|
|
53
54
|
with open("/proc/version", "r") as f:
|
|
54
|
-
|
|
55
|
+
proc_version = f.read().lower()
|
|
56
|
+
is_wsl = "microsoft" in proc_version or "wsl" in proc_version
|
|
55
57
|
except (FileNotFoundError, PermissionError):
|
|
56
58
|
pass
|
|
57
59
|
|
|
@@ -374,7 +376,7 @@ def run_extraction_pipeline(
|
|
|
374
376
|
# Step 5: Re-parse expanded commands to get proper base_command for each
|
|
375
377
|
parsed_expanded = parse_commands(expanded_commands)
|
|
376
378
|
|
|
377
|
-
# Step 6: Count frequencies BEFORE deduplication
|
|
379
|
+
# Step 6: Count frequencies BEFORE deduplication (for accurate usage stats)
|
|
378
380
|
cmd_frequency = Counter()
|
|
379
381
|
base_cmd_frequency = Counter()
|
|
380
382
|
|
|
@@ -386,7 +388,7 @@ def run_extraction_pipeline(
|
|
|
386
388
|
if base_cmd:
|
|
387
389
|
base_cmd_frequency[base_cmd] += 1
|
|
388
390
|
|
|
389
|
-
# Step 7: Deduplicate and
|
|
391
|
+
# Step 7: Deduplicate and attach frequency data
|
|
390
392
|
unique_commands = deduplicate_commands(parsed_expanded)
|
|
391
393
|
|
|
392
394
|
# Add frequency to each unique command
|
|
@@ -403,7 +405,7 @@ def run_extraction_pipeline(
|
|
|
403
405
|
else:
|
|
404
406
|
print(f"\n{len(unique_commands)} unique commands")
|
|
405
407
|
|
|
406
|
-
# Step
|
|
408
|
+
# Step 8: Analyze commands
|
|
407
409
|
print("\nAnalyzing commands...")
|
|
408
410
|
analysis = analyze_commands(unique_commands)
|
|
409
411
|
|
|
@@ -415,13 +417,13 @@ def run_extraction_pipeline(
|
|
|
415
417
|
analysis['operators_used'] = dict(operator_frequency)
|
|
416
418
|
print(f" -> Generated analysis with {len(analysis.get('categories', {}))} categories")
|
|
417
419
|
|
|
418
|
-
# Step
|
|
420
|
+
# Step 9: Generate quizzes
|
|
419
421
|
print("\nGenerating quizzes...")
|
|
420
422
|
quizzes = generate_quizzes(unique_commands, analysis)
|
|
421
423
|
quiz_count = sum(len(q) for q in quizzes.values()) if isinstance(quizzes, dict) else len(quizzes)
|
|
422
424
|
print(f" -> Generated {quiz_count} quiz questions")
|
|
423
425
|
|
|
424
|
-
# Step
|
|
426
|
+
# Step 10: Generate HTML
|
|
425
427
|
print("\nGenerating HTML output...")
|
|
426
428
|
html_files = generate_html(unique_commands, analysis, quizzes, output_dir)
|
|
427
429
|
print(f" -> Created {len(html_files)} HTML files")
|
|
@@ -431,7 +433,7 @@ def run_extraction_pipeline(
|
|
|
431
433
|
"metadata": {
|
|
432
434
|
"generated_at": datetime.now().isoformat(),
|
|
433
435
|
"run_id": output_dir.name,
|
|
434
|
-
"version":
|
|
436
|
+
"version": VERSION,
|
|
435
437
|
},
|
|
436
438
|
"input": {
|
|
437
439
|
"sessions_processed": len(sessions),
|
|
@@ -477,8 +479,8 @@ def extract_sub_commands(cmd_str: str) -> List[str]:
|
|
|
477
479
|
"""
|
|
478
480
|
Extract individual sub-commands from a compound command.
|
|
479
481
|
|
|
480
|
-
Splits commands by ||, &&, |, and ; while
|
|
481
|
-
|
|
482
|
+
Splits commands by ||, &&, |, and ; while respecting quoting
|
|
483
|
+
and skipping inline code commands (python -c, node -e, bash -c).
|
|
482
484
|
|
|
483
485
|
Args:
|
|
484
486
|
cmd_str: The compound command string
|
|
@@ -488,35 +490,77 @@ def extract_sub_commands(cmd_str: str) -> List[str]:
|
|
|
488
490
|
"""
|
|
489
491
|
import re
|
|
490
492
|
|
|
491
|
-
|
|
492
|
-
|
|
493
|
+
if not cmd_str or not cmd_str.strip():
|
|
494
|
+
return []
|
|
493
495
|
|
|
494
|
-
#
|
|
495
|
-
#
|
|
496
|
-
|
|
496
|
+
# Don't split commands that contain inline code - the ; and | inside
|
|
497
|
+
# quoted code would produce garbage fragments
|
|
498
|
+
inline_patterns = [' -c "', " -c '", ' -c $', ' -e "', " -e '", ' -e $',
|
|
499
|
+
' -c\n', ' -c\r']
|
|
500
|
+
first_token = cmd_str.split()[0] if cmd_str.split() else ''
|
|
501
|
+
if first_token in ('python', 'python3', 'node', 'bash', 'sh', 'ruby', 'perl'):
|
|
502
|
+
for pat in inline_patterns:
|
|
503
|
+
if pat in cmd_str:
|
|
504
|
+
return [cmd_str.strip()]
|
|
497
505
|
|
|
506
|
+
# Quote-aware splitting: track quote depth to avoid splitting inside quotes
|
|
498
507
|
sub_commands = []
|
|
508
|
+
current = []
|
|
509
|
+
in_single = False
|
|
510
|
+
in_double = False
|
|
511
|
+
i = 0
|
|
512
|
+
chars = cmd_str
|
|
513
|
+
|
|
514
|
+
while i < len(chars):
|
|
515
|
+
c = chars[i]
|
|
516
|
+
|
|
517
|
+
# Track quoting state
|
|
518
|
+
if c == "'" and not in_double:
|
|
519
|
+
in_single = not in_single
|
|
520
|
+
current.append(c)
|
|
521
|
+
i += 1
|
|
522
|
+
elif c == '"' and not in_single:
|
|
523
|
+
in_double = not in_double
|
|
524
|
+
current.append(c)
|
|
525
|
+
i += 1
|
|
526
|
+
elif not in_single and not in_double:
|
|
527
|
+
# Check for compound operators outside quotes
|
|
528
|
+
remaining = chars[i:]
|
|
529
|
+
if remaining.startswith('&&'):
|
|
530
|
+
cmd = ''.join(current).strip()
|
|
531
|
+
if cmd:
|
|
532
|
+
sub_commands.append(cmd)
|
|
533
|
+
current = []
|
|
534
|
+
i += 2
|
|
535
|
+
elif remaining.startswith('||'):
|
|
536
|
+
cmd = ''.join(current).strip()
|
|
537
|
+
if cmd:
|
|
538
|
+
sub_commands.append(cmd)
|
|
539
|
+
current = []
|
|
540
|
+
i += 2
|
|
541
|
+
elif c == ';':
|
|
542
|
+
cmd = ''.join(current).strip()
|
|
543
|
+
if cmd:
|
|
544
|
+
sub_commands.append(cmd)
|
|
545
|
+
current = []
|
|
546
|
+
i += 1
|
|
547
|
+
elif c == '|' and not remaining.startswith('||'):
|
|
548
|
+
cmd = ''.join(current).strip()
|
|
549
|
+
if cmd:
|
|
550
|
+
sub_commands.append(cmd)
|
|
551
|
+
current = []
|
|
552
|
+
i += 1
|
|
553
|
+
else:
|
|
554
|
+
current.append(c)
|
|
555
|
+
i += 1
|
|
556
|
+
else:
|
|
557
|
+
current.append(c)
|
|
558
|
+
i += 1
|
|
499
559
|
|
|
500
|
-
#
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
# Split each part by &&
|
|
505
|
-
and_parts = re.split(r'\s*&&\s*', or_part)
|
|
506
|
-
|
|
507
|
-
for and_part in and_parts:
|
|
508
|
-
# Split each part by ; (sequential)
|
|
509
|
-
seq_parts = re.split(r'\s*;\s*', and_part)
|
|
510
|
-
|
|
511
|
-
for seq_part in seq_parts:
|
|
512
|
-
# Split by single pipe |
|
|
513
|
-
# Use negative lookbehind/lookahead to avoid ||
|
|
514
|
-
pipe_parts = re.split(r'(?<!\|)\|(?!\|)', seq_part)
|
|
515
|
-
|
|
516
|
-
for pipe_part in pipe_parts:
|
|
517
|
-
cleaned = pipe_part.strip()
|
|
518
|
-
if cleaned:
|
|
519
|
-
sub_commands.append(cleaned)
|
|
560
|
+
# Add final segment
|
|
561
|
+
cmd = ''.join(current).strip()
|
|
562
|
+
if cmd:
|
|
563
|
+
sub_commands.append(cmd)
|
|
520
564
|
|
|
521
565
|
return sub_commands
|
|
522
566
|
|
|
@@ -784,29 +784,25 @@ def generate_what_does_quiz(
|
|
|
784
784
|
QuizQuestion instance
|
|
785
785
|
"""
|
|
786
786
|
cmd_string = command.get("command", "")
|
|
787
|
-
description = command.get("description", "")
|
|
788
787
|
complexity = command.get("complexity", 2)
|
|
789
788
|
|
|
790
789
|
parsed = _parse_command(cmd_string)
|
|
791
790
|
base_cmd = parsed["base"]
|
|
792
791
|
|
|
793
|
-
#
|
|
794
|
-
correct_desc =
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
correct_desc =
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
flag_descs.append(f"{flag} ({fd.lower()})")
|
|
808
|
-
if flag_descs:
|
|
809
|
-
correct_desc += " using " + ", ".join(flag_descs)
|
|
792
|
+
# Always use the educational bash description generator (not session descriptions)
|
|
793
|
+
correct_desc = _generate_bash_description(cmd_string)
|
|
794
|
+
# Capitalize first letter for consistent formatting
|
|
795
|
+
if correct_desc:
|
|
796
|
+
correct_desc = correct_desc[0].upper() + correct_desc[1:]
|
|
797
|
+
|
|
798
|
+
# Add flag details if available
|
|
799
|
+
flag_descs = []
|
|
800
|
+
for flag in parsed["flags"]:
|
|
801
|
+
fd = _get_flag_description(base_cmd, flag)
|
|
802
|
+
if fd:
|
|
803
|
+
flag_descs.append(f"{flag} ({fd.lower()})")
|
|
804
|
+
if flag_descs:
|
|
805
|
+
correct_desc += " using " + ", ".join(flag_descs)
|
|
810
806
|
|
|
811
807
|
# Generate distractors
|
|
812
808
|
distractor_descriptions = _generate_distractor_descriptions(correct_desc, 3)
|
|
@@ -965,8 +961,6 @@ def generate_build_command_quiz(
|
|
|
965
961
|
QuizQuestion instance
|
|
966
962
|
"""
|
|
967
963
|
cmd_string = command.get("command", "")
|
|
968
|
-
description = command.get("description", "")
|
|
969
|
-
intent = command.get("intent", description)
|
|
970
964
|
|
|
971
965
|
parsed = _parse_command(cmd_string)
|
|
972
966
|
base_cmd = parsed["base"]
|
|
@@ -1051,14 +1045,8 @@ def generate_build_command_quiz(
|
|
|
1051
1045
|
|
|
1052
1046
|
question_id = _generate_id(f"build_{cmd_string}")
|
|
1053
1047
|
|
|
1054
|
-
#
|
|
1055
|
-
|
|
1056
|
-
task_description = intent
|
|
1057
|
-
elif description:
|
|
1058
|
-
task_description = description
|
|
1059
|
-
else:
|
|
1060
|
-
# Generate educational description from the command
|
|
1061
|
-
task_description = _generate_bash_description(cmd_string)
|
|
1048
|
+
# Always generate description from the command itself (not session descriptions)
|
|
1049
|
+
task_description = _generate_bash_description(cmd_string)
|
|
1062
1050
|
|
|
1063
1051
|
return QuizQuestion(
|
|
1064
1052
|
id=question_id,
|
|
@@ -1113,14 +1101,19 @@ def generate_spot_difference_quiz(
|
|
|
1113
1101
|
|
|
1114
1102
|
# Build the correct explanation of difference
|
|
1115
1103
|
differences = []
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
differences.append(f"Command 1 has `{flag}` ({desc or 'unknown'})")
|
|
1120
|
-
if only_in_2:
|
|
1121
|
-
for flag in only_in_2:
|
|
1104
|
+
has_unknown = False
|
|
1105
|
+
for flag_set, label in [(only_in_1, "Command 1"), (only_in_2, "Command 2")]:
|
|
1106
|
+
for flag in flag_set:
|
|
1122
1107
|
desc = _get_flag_description(base_cmd, flag)
|
|
1123
|
-
|
|
1108
|
+
# Handle numeric flags like -3 (shorthand for -n 3)
|
|
1109
|
+
if not desc and re.match(r'^-\d+$', flag):
|
|
1110
|
+
desc = f"Specify count ({flag[1:]})"
|
|
1111
|
+
if not desc:
|
|
1112
|
+
has_unknown = True
|
|
1113
|
+
differences.append(f"{label} has `{flag}` ({desc or 'specifies an option'})")
|
|
1114
|
+
# Skip questions where we can't explain the flags well
|
|
1115
|
+
if has_unknown:
|
|
1116
|
+
return None
|
|
1124
1117
|
if parsed1["args"] != parsed2["args"]:
|
|
1125
1118
|
differences.append(f"Different arguments: '{' '.join(parsed1['args'])}' vs '{' '.join(parsed2['args'])}'")
|
|
1126
1119
|
|
|
@@ -1236,14 +1229,33 @@ def generate_quiz_set(
|
|
|
1236
1229
|
"""
|
|
1237
1230
|
questions: list[QuizQuestion] = []
|
|
1238
1231
|
|
|
1232
|
+
# Filter out non-bash entries (Python code fragments, junk tokens, single chars)
|
|
1233
|
+
junk_tokens = {'version', 'total', 'package', 'success', 'error', 'reading',
|
|
1234
|
+
'editing', 'done', 'warning', 'info', 'note', 'output',
|
|
1235
|
+
'task', 'goal', 'purpose', 'what', 'description'}
|
|
1236
|
+
clean_commands = []
|
|
1237
|
+
for cmd in analyzed_commands:
|
|
1238
|
+
base = cmd.get("base_command", "")
|
|
1239
|
+
if not base or len(base) < 2:
|
|
1240
|
+
continue
|
|
1241
|
+
if any(c in base for c in ('(', ')', '=', '{', '}')):
|
|
1242
|
+
continue
|
|
1243
|
+
if any(c in base for c in ('\\', '"', "'")) or '&' in base:
|
|
1244
|
+
continue
|
|
1245
|
+
if base[0].isupper() and base.isalpha() and base not in ('PATH', 'HOME'):
|
|
1246
|
+
continue
|
|
1247
|
+
if base.lower() in junk_tokens:
|
|
1248
|
+
continue
|
|
1249
|
+
clean_commands.append(cmd)
|
|
1250
|
+
|
|
1239
1251
|
# Filter commands by complexity >= 2
|
|
1240
1252
|
eligible_commands = [
|
|
1241
|
-
cmd for cmd in
|
|
1253
|
+
cmd for cmd in clean_commands
|
|
1242
1254
|
if cmd.get("complexity", 0) >= 2
|
|
1243
1255
|
]
|
|
1244
1256
|
|
|
1245
1257
|
if not eligible_commands:
|
|
1246
|
-
eligible_commands = analyzed_commands
|
|
1258
|
+
eligible_commands = clean_commands if clean_commands else analyzed_commands
|
|
1247
1259
|
|
|
1248
1260
|
# Weight toward high-frequency commands
|
|
1249
1261
|
weighted_commands = []
|
|
@@ -1270,12 +1282,17 @@ def generate_quiz_set(
|
|
|
1270
1282
|
QuizType.SPOT_DIFFERENCE: set(),
|
|
1271
1283
|
}
|
|
1272
1284
|
|
|
1285
|
+
# Max command length for readable quiz questions
|
|
1286
|
+
MAX_QUIZ_CMD_LEN = 200
|
|
1287
|
+
|
|
1273
1288
|
# Generate "What does this do?" questions
|
|
1274
1289
|
random.shuffle(weighted_commands)
|
|
1275
1290
|
for cmd in weighted_commands:
|
|
1276
1291
|
if len([q for q in questions if q.quiz_type == QuizType.WHAT_DOES]) >= target_what_does:
|
|
1277
1292
|
break
|
|
1278
1293
|
cmd_id = cmd.get("command", "")
|
|
1294
|
+
if len(cmd_id) > MAX_QUIZ_CMD_LEN:
|
|
1295
|
+
continue
|
|
1279
1296
|
if cmd_id not in used_per_type[QuizType.WHAT_DOES]:
|
|
1280
1297
|
q = generate_what_does_quiz(cmd)
|
|
1281
1298
|
questions.append(q)
|
|
@@ -1299,6 +1316,8 @@ def generate_quiz_set(
|
|
|
1299
1316
|
if len([q for q in questions if q.quiz_type == QuizType.BUILD_COMMAND]) >= target_build:
|
|
1300
1317
|
break
|
|
1301
1318
|
cmd_id = cmd.get("command", "")
|
|
1319
|
+
if len(cmd_id) > MAX_QUIZ_CMD_LEN:
|
|
1320
|
+
continue
|
|
1302
1321
|
if cmd_id not in used_per_type[QuizType.BUILD_COMMAND]:
|
|
1303
1322
|
q = generate_build_command_quiz(cmd)
|
|
1304
1323
|
questions.append(q)
|
|
@@ -1310,6 +1329,8 @@ def generate_quiz_set(
|
|
|
1310
1329
|
if len([q for q in questions if q.quiz_type == QuizType.SPOT_DIFFERENCE]) >= target_spot_diff:
|
|
1311
1330
|
break
|
|
1312
1331
|
cmd_id = cmd.get("command", "")
|
|
1332
|
+
if len(cmd_id) > MAX_QUIZ_CMD_LEN:
|
|
1333
|
+
continue
|
|
1313
1334
|
if cmd_id not in used_per_type[QuizType.SPOT_DIFFERENCE]:
|
|
1314
1335
|
variant = _create_similar_command_variant(cmd)
|
|
1315
1336
|
if variant:
|
|
@@ -1318,6 +1339,17 @@ def generate_quiz_set(
|
|
|
1318
1339
|
questions.append(q)
|
|
1319
1340
|
used_per_type[QuizType.SPOT_DIFFERENCE].add(cmd_id)
|
|
1320
1341
|
|
|
1342
|
+
# Deduplicate by question text (same question can come from different commands)
|
|
1343
|
+
seen_texts = set()
|
|
1344
|
+
deduped = []
|
|
1345
|
+
for q in questions:
|
|
1346
|
+
# Normalize: take first 80 chars of question text
|
|
1347
|
+
q_key = q.question_text[:80]
|
|
1348
|
+
if q_key not in seen_texts:
|
|
1349
|
+
deduped.append(q)
|
|
1350
|
+
seen_texts.add(q_key)
|
|
1351
|
+
questions = deduped
|
|
1352
|
+
|
|
1321
1353
|
# Shuffle final questions
|
|
1322
1354
|
random.shuffle(questions)
|
|
1323
1355
|
|