learn_bash_from_session_data 1.0.9 → 1.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +8 -1
- package/scripts/html_generator.py +10 -6
- package/scripts/knowledge_base.py +22 -0
- package/scripts/main.py +78 -34
- package/scripts/quiz_generator.py +69 -37
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "learn_bash_from_session_data",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.10",
|
|
4
4
|
"description": "Learn bash from your Claude Code sessions - extracts commands and generates interactive HTML lessons with 400+ commands, quizzes, and comprehensive coverage",
|
|
5
5
|
"main": "bin/learn-bash.js",
|
|
6
6
|
"bin": {
|
|
@@ -27,6 +27,13 @@
|
|
|
27
27
|
"engines": {
|
|
28
28
|
"node": ">=14.0.0"
|
|
29
29
|
},
|
|
30
|
+
"files": [
|
|
31
|
+
"bin/",
|
|
32
|
+
"scripts/*.py",
|
|
33
|
+
"scripts/__init__.py",
|
|
34
|
+
"README.md",
|
|
35
|
+
"LICENSE"
|
|
36
|
+
],
|
|
30
37
|
"repository": {
|
|
31
38
|
"type": "git",
|
|
32
39
|
"url": "git+https://github.com/bjpl/learn_bash_from_session_data.git"
|
|
@@ -2191,12 +2191,16 @@ def generate_html_files(
|
|
|
2191
2191
|
# Skip entries that look like code fragments (contain parens, equals, dots as methods)
|
|
2192
2192
|
if any(c in base_cmd for c in ('(', ')', '=', '{', '}')) and not base_cmd.startswith('.'):
|
|
2193
2193
|
continue
|
|
2194
|
+
# Skip entries with backslashes, quotes, or HTML entities (JSONL text fragments)
|
|
2195
|
+
if any(c in base_cmd for c in ('\\', '"', "'")) or '&' in base_cmd:
|
|
2196
|
+
continue
|
|
2194
2197
|
# Skip entries that are clearly not commands (capitalized status words, text fragments)
|
|
2195
2198
|
if base_cmd[0].isupper() and base_cmd.isalpha() and base_cmd not in ('PATH', 'HOME'):
|
|
2196
2199
|
continue
|
|
2197
2200
|
# Skip common text fragments that get misidentified as commands
|
|
2198
2201
|
junk_tokens = {'version', 'total', 'package', 'success', 'error', 'reading',
|
|
2199
|
-
'editing', 'done', 'warning', 'info', 'note', 'output'
|
|
2202
|
+
'editing', 'done', 'warning', 'info', 'note', 'output',
|
|
2203
|
+
'task', 'goal', 'purpose', 'what', 'description'}
|
|
2200
2204
|
if base_cmd.lower() in junk_tokens:
|
|
2201
2205
|
continue
|
|
2202
2206
|
|
|
@@ -2279,7 +2283,6 @@ def generate_html_files(
|
|
|
2279
2283
|
'-g': 'Global scope',
|
|
2280
2284
|
'-p': 'Preserve attributes or port',
|
|
2281
2285
|
'-o': 'Output file',
|
|
2282
|
-
'-P': 'No dereference (physical path)',
|
|
2283
2286
|
}
|
|
2284
2287
|
flag_desc = common_flags.get(f, '')
|
|
2285
2288
|
formatted_flags.append({'flag': f, 'description': flag_desc})
|
|
@@ -2342,13 +2345,14 @@ def generate_html_files(
|
|
|
2342
2345
|
if short_args:
|
|
2343
2346
|
contextual_desc = f"{base_cmd} {' '.join(short_args[:3])}"
|
|
2344
2347
|
|
|
2345
|
-
# Priority: contextual >
|
|
2348
|
+
# Priority: contextual > knowledge base > generic fallback
|
|
2349
|
+
# Session descriptions (from JSONL) describe Claude's task, NOT the command
|
|
2346
2350
|
if contextual_desc:
|
|
2347
2351
|
description = contextual_desc
|
|
2348
|
-
elif
|
|
2349
|
-
description =
|
|
2352
|
+
elif kb_desc:
|
|
2353
|
+
description = kb_desc
|
|
2350
2354
|
else:
|
|
2351
|
-
description =
|
|
2355
|
+
description = f"Run {base_cmd} command"
|
|
2352
2356
|
|
|
2353
2357
|
# Get subcommand info (for commands like git, docker, npm)
|
|
2354
2358
|
subcommands = cmd_info.get('subcommands', {})
|
|
@@ -467,11 +467,16 @@ COMMAND_DB: Dict[str, Dict[str, Any]] = {
|
|
|
467
467
|
"-n": "Show line numbers",
|
|
468
468
|
"-c": "Count matching lines",
|
|
469
469
|
"-w": "Match whole words only",
|
|
470
|
+
"-P": "Use Perl-compatible regular expressions (PCRE)",
|
|
470
471
|
"-E": "Extended regex (same as egrep)",
|
|
472
|
+
"-F": "Fixed string matching (no regex interpretation)",
|
|
471
473
|
"-o": "Show only matching part of line",
|
|
472
474
|
"-A": "Show N lines after match",
|
|
473
475
|
"-B": "Show N lines before match",
|
|
474
476
|
"-C": "Show N lines of context (before and after)",
|
|
477
|
+
"-h": "Suppress filename prefix in output",
|
|
478
|
+
"-H": "Always show filename prefix",
|
|
479
|
+
"-q": "Quiet mode, only return exit status",
|
|
475
480
|
"--include": "Search only files matching pattern",
|
|
476
481
|
"--exclude": "Skip files matching pattern",
|
|
477
482
|
},
|
|
@@ -801,6 +806,23 @@ COMMAND_DB: Dict[str, Dict[str, Any]] = {
|
|
|
801
806
|
"flags": {
|
|
802
807
|
"--version": "Print git version",
|
|
803
808
|
"-C": "Run as if started in specified directory",
|
|
809
|
+
"--force": "Override safety checks and force the operation",
|
|
810
|
+
"-f": "Force operation (shorthand for --force)",
|
|
811
|
+
"--no-verify": "Skip pre-commit and commit-msg hooks",
|
|
812
|
+
"--amend": "Replace the tip of the current branch with a new commit",
|
|
813
|
+
"-m": "Specify commit message inline",
|
|
814
|
+
"-a": "Automatically stage modified and deleted files",
|
|
815
|
+
"-b": "Create and switch to a new branch",
|
|
816
|
+
"-d": "Delete a branch",
|
|
817
|
+
"-D": "Force delete a branch even if not fully merged",
|
|
818
|
+
"--all": "Apply to all branches or remotes",
|
|
819
|
+
"--oneline": "Compact one-line log format",
|
|
820
|
+
"--graph": "Show ASCII graph of branch and merge history",
|
|
821
|
+
"-u": "Set upstream tracking branch",
|
|
822
|
+
"--hard": "Reset working tree and index to match target",
|
|
823
|
+
"--soft": "Reset only HEAD, keep staged changes",
|
|
824
|
+
"-p": "Interactively choose hunks to stage",
|
|
825
|
+
"--stat": "Show diffstat summary of changes",
|
|
804
826
|
},
|
|
805
827
|
"subcommands": {
|
|
806
828
|
"init": "Create empty repository",
|
package/scripts/main.py
CHANGED
|
@@ -22,6 +22,7 @@ if sys.version_info < (3, 8):
|
|
|
22
22
|
# Constants
|
|
23
23
|
DEFAULT_OUTPUT_BASE = "./bash-learner-output"
|
|
24
24
|
MAX_UNIQUE_COMMANDS = 500
|
|
25
|
+
VERSION = "1.0.10"
|
|
25
26
|
|
|
26
27
|
|
|
27
28
|
def generate_timestamped_output_dir(base_dir: str = DEFAULT_OUTPUT_BASE) -> Path:
|
|
@@ -51,7 +52,8 @@ def get_sessions_base_path() -> Path:
|
|
|
51
52
|
is_wsl = False
|
|
52
53
|
try:
|
|
53
54
|
with open("/proc/version", "r") as f:
|
|
54
|
-
|
|
55
|
+
proc_version = f.read().lower()
|
|
56
|
+
is_wsl = "microsoft" in proc_version or "wsl" in proc_version
|
|
55
57
|
except (FileNotFoundError, PermissionError):
|
|
56
58
|
pass
|
|
57
59
|
|
|
@@ -374,7 +376,7 @@ def run_extraction_pipeline(
|
|
|
374
376
|
# Step 5: Re-parse expanded commands to get proper base_command for each
|
|
375
377
|
parsed_expanded = parse_commands(expanded_commands)
|
|
376
378
|
|
|
377
|
-
# Step 6: Count frequencies BEFORE deduplication
|
|
379
|
+
# Step 6: Count frequencies BEFORE deduplication (for accurate usage stats)
|
|
378
380
|
cmd_frequency = Counter()
|
|
379
381
|
base_cmd_frequency = Counter()
|
|
380
382
|
|
|
@@ -386,7 +388,7 @@ def run_extraction_pipeline(
|
|
|
386
388
|
if base_cmd:
|
|
387
389
|
base_cmd_frequency[base_cmd] += 1
|
|
388
390
|
|
|
389
|
-
# Step 7: Deduplicate and
|
|
391
|
+
# Step 7: Deduplicate and attach frequency data
|
|
390
392
|
unique_commands = deduplicate_commands(parsed_expanded)
|
|
391
393
|
|
|
392
394
|
# Add frequency to each unique command
|
|
@@ -403,7 +405,7 @@ def run_extraction_pipeline(
|
|
|
403
405
|
else:
|
|
404
406
|
print(f"\n{len(unique_commands)} unique commands")
|
|
405
407
|
|
|
406
|
-
# Step
|
|
408
|
+
# Step 8: Analyze commands
|
|
407
409
|
print("\nAnalyzing commands...")
|
|
408
410
|
analysis = analyze_commands(unique_commands)
|
|
409
411
|
|
|
@@ -415,13 +417,13 @@ def run_extraction_pipeline(
|
|
|
415
417
|
analysis['operators_used'] = dict(operator_frequency)
|
|
416
418
|
print(f" -> Generated analysis with {len(analysis.get('categories', {}))} categories")
|
|
417
419
|
|
|
418
|
-
# Step
|
|
420
|
+
# Step 9: Generate quizzes
|
|
419
421
|
print("\nGenerating quizzes...")
|
|
420
422
|
quizzes = generate_quizzes(unique_commands, analysis)
|
|
421
423
|
quiz_count = sum(len(q) for q in quizzes.values()) if isinstance(quizzes, dict) else len(quizzes)
|
|
422
424
|
print(f" -> Generated {quiz_count} quiz questions")
|
|
423
425
|
|
|
424
|
-
# Step
|
|
426
|
+
# Step 10: Generate HTML
|
|
425
427
|
print("\nGenerating HTML output...")
|
|
426
428
|
html_files = generate_html(unique_commands, analysis, quizzes, output_dir)
|
|
427
429
|
print(f" -> Created {len(html_files)} HTML files")
|
|
@@ -431,7 +433,7 @@ def run_extraction_pipeline(
|
|
|
431
433
|
"metadata": {
|
|
432
434
|
"generated_at": datetime.now().isoformat(),
|
|
433
435
|
"run_id": output_dir.name,
|
|
434
|
-
"version":
|
|
436
|
+
"version": VERSION,
|
|
435
437
|
},
|
|
436
438
|
"input": {
|
|
437
439
|
"sessions_processed": len(sessions),
|
|
@@ -477,8 +479,8 @@ def extract_sub_commands(cmd_str: str) -> List[str]:
|
|
|
477
479
|
"""
|
|
478
480
|
Extract individual sub-commands from a compound command.
|
|
479
481
|
|
|
480
|
-
Splits commands by ||, &&, |, and ; while
|
|
481
|
-
|
|
482
|
+
Splits commands by ||, &&, |, and ; while respecting quoting
|
|
483
|
+
and skipping inline code commands (python -c, node -e, bash -c).
|
|
482
484
|
|
|
483
485
|
Args:
|
|
484
486
|
cmd_str: The compound command string
|
|
@@ -488,35 +490,77 @@ def extract_sub_commands(cmd_str: str) -> List[str]:
|
|
|
488
490
|
"""
|
|
489
491
|
import re
|
|
490
492
|
|
|
491
|
-
|
|
492
|
-
|
|
493
|
+
if not cmd_str or not cmd_str.strip():
|
|
494
|
+
return []
|
|
493
495
|
|
|
494
|
-
#
|
|
495
|
-
#
|
|
496
|
-
|
|
496
|
+
# Don't split commands that contain inline code - the ; and | inside
|
|
497
|
+
# quoted code would produce garbage fragments
|
|
498
|
+
inline_patterns = [' -c "', " -c '", ' -c $', ' -e "', " -e '", ' -e $',
|
|
499
|
+
' -c\n', ' -c\r']
|
|
500
|
+
first_token = cmd_str.split()[0] if cmd_str.split() else ''
|
|
501
|
+
if first_token in ('python', 'python3', 'node', 'bash', 'sh', 'ruby', 'perl'):
|
|
502
|
+
for pat in inline_patterns:
|
|
503
|
+
if pat in cmd_str:
|
|
504
|
+
return [cmd_str.strip()]
|
|
497
505
|
|
|
506
|
+
# Quote-aware splitting: track quote depth to avoid splitting inside quotes
|
|
498
507
|
sub_commands = []
|
|
508
|
+
current = []
|
|
509
|
+
in_single = False
|
|
510
|
+
in_double = False
|
|
511
|
+
i = 0
|
|
512
|
+
chars = cmd_str
|
|
513
|
+
|
|
514
|
+
while i < len(chars):
|
|
515
|
+
c = chars[i]
|
|
516
|
+
|
|
517
|
+
# Track quoting state
|
|
518
|
+
if c == "'" and not in_double:
|
|
519
|
+
in_single = not in_single
|
|
520
|
+
current.append(c)
|
|
521
|
+
i += 1
|
|
522
|
+
elif c == '"' and not in_single:
|
|
523
|
+
in_double = not in_double
|
|
524
|
+
current.append(c)
|
|
525
|
+
i += 1
|
|
526
|
+
elif not in_single and not in_double:
|
|
527
|
+
# Check for compound operators outside quotes
|
|
528
|
+
remaining = chars[i:]
|
|
529
|
+
if remaining.startswith('&&'):
|
|
530
|
+
cmd = ''.join(current).strip()
|
|
531
|
+
if cmd:
|
|
532
|
+
sub_commands.append(cmd)
|
|
533
|
+
current = []
|
|
534
|
+
i += 2
|
|
535
|
+
elif remaining.startswith('||'):
|
|
536
|
+
cmd = ''.join(current).strip()
|
|
537
|
+
if cmd:
|
|
538
|
+
sub_commands.append(cmd)
|
|
539
|
+
current = []
|
|
540
|
+
i += 2
|
|
541
|
+
elif c == ';':
|
|
542
|
+
cmd = ''.join(current).strip()
|
|
543
|
+
if cmd:
|
|
544
|
+
sub_commands.append(cmd)
|
|
545
|
+
current = []
|
|
546
|
+
i += 1
|
|
547
|
+
elif c == '|' and not remaining.startswith('||'):
|
|
548
|
+
cmd = ''.join(current).strip()
|
|
549
|
+
if cmd:
|
|
550
|
+
sub_commands.append(cmd)
|
|
551
|
+
current = []
|
|
552
|
+
i += 1
|
|
553
|
+
else:
|
|
554
|
+
current.append(c)
|
|
555
|
+
i += 1
|
|
556
|
+
else:
|
|
557
|
+
current.append(c)
|
|
558
|
+
i += 1
|
|
499
559
|
|
|
500
|
-
#
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
# Split each part by &&
|
|
505
|
-
and_parts = re.split(r'\s*&&\s*', or_part)
|
|
506
|
-
|
|
507
|
-
for and_part in and_parts:
|
|
508
|
-
# Split each part by ; (sequential)
|
|
509
|
-
seq_parts = re.split(r'\s*;\s*', and_part)
|
|
510
|
-
|
|
511
|
-
for seq_part in seq_parts:
|
|
512
|
-
# Split by single pipe |
|
|
513
|
-
# Use negative lookbehind/lookahead to avoid ||
|
|
514
|
-
pipe_parts = re.split(r'(?<!\|)\|(?!\|)', seq_part)
|
|
515
|
-
|
|
516
|
-
for pipe_part in pipe_parts:
|
|
517
|
-
cleaned = pipe_part.strip()
|
|
518
|
-
if cleaned:
|
|
519
|
-
sub_commands.append(cleaned)
|
|
560
|
+
# Add final segment
|
|
561
|
+
cmd = ''.join(current).strip()
|
|
562
|
+
if cmd:
|
|
563
|
+
sub_commands.append(cmd)
|
|
520
564
|
|
|
521
565
|
return sub_commands
|
|
522
566
|
|
|
@@ -784,29 +784,25 @@ def generate_what_does_quiz(
|
|
|
784
784
|
QuizQuestion instance
|
|
785
785
|
"""
|
|
786
786
|
cmd_string = command.get("command", "")
|
|
787
|
-
description = command.get("description", "")
|
|
788
787
|
complexity = command.get("complexity", 2)
|
|
789
788
|
|
|
790
789
|
parsed = _parse_command(cmd_string)
|
|
791
790
|
base_cmd = parsed["base"]
|
|
792
791
|
|
|
793
|
-
#
|
|
794
|
-
correct_desc =
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
correct_desc =
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
flag_descs.append(f"{flag} ({fd.lower()})")
|
|
808
|
-
if flag_descs:
|
|
809
|
-
correct_desc += " using " + ", ".join(flag_descs)
|
|
792
|
+
# Always use the educational bash description generator (not session descriptions)
|
|
793
|
+
correct_desc = _generate_bash_description(cmd_string)
|
|
794
|
+
# Capitalize first letter for consistent formatting
|
|
795
|
+
if correct_desc:
|
|
796
|
+
correct_desc = correct_desc[0].upper() + correct_desc[1:]
|
|
797
|
+
|
|
798
|
+
# Add flag details if available
|
|
799
|
+
flag_descs = []
|
|
800
|
+
for flag in parsed["flags"]:
|
|
801
|
+
fd = _get_flag_description(base_cmd, flag)
|
|
802
|
+
if fd:
|
|
803
|
+
flag_descs.append(f"{flag} ({fd.lower()})")
|
|
804
|
+
if flag_descs:
|
|
805
|
+
correct_desc += " using " + ", ".join(flag_descs)
|
|
810
806
|
|
|
811
807
|
# Generate distractors
|
|
812
808
|
distractor_descriptions = _generate_distractor_descriptions(correct_desc, 3)
|
|
@@ -965,8 +961,6 @@ def generate_build_command_quiz(
|
|
|
965
961
|
QuizQuestion instance
|
|
966
962
|
"""
|
|
967
963
|
cmd_string = command.get("command", "")
|
|
968
|
-
description = command.get("description", "")
|
|
969
|
-
intent = command.get("intent", description)
|
|
970
964
|
|
|
971
965
|
parsed = _parse_command(cmd_string)
|
|
972
966
|
base_cmd = parsed["base"]
|
|
@@ -1051,14 +1045,8 @@ def generate_build_command_quiz(
|
|
|
1051
1045
|
|
|
1052
1046
|
question_id = _generate_id(f"build_{cmd_string}")
|
|
1053
1047
|
|
|
1054
|
-
#
|
|
1055
|
-
|
|
1056
|
-
task_description = intent
|
|
1057
|
-
elif description:
|
|
1058
|
-
task_description = description
|
|
1059
|
-
else:
|
|
1060
|
-
# Generate educational description from the command
|
|
1061
|
-
task_description = _generate_bash_description(cmd_string)
|
|
1048
|
+
# Always generate description from the command itself (not session descriptions)
|
|
1049
|
+
task_description = _generate_bash_description(cmd_string)
|
|
1062
1050
|
|
|
1063
1051
|
return QuizQuestion(
|
|
1064
1052
|
id=question_id,
|
|
@@ -1113,14 +1101,19 @@ def generate_spot_difference_quiz(
|
|
|
1113
1101
|
|
|
1114
1102
|
# Build the correct explanation of difference
|
|
1115
1103
|
differences = []
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
differences.append(f"Command 1 has `{flag}` ({desc or 'unknown'})")
|
|
1120
|
-
if only_in_2:
|
|
1121
|
-
for flag in only_in_2:
|
|
1104
|
+
has_unknown = False
|
|
1105
|
+
for flag_set, label in [(only_in_1, "Command 1"), (only_in_2, "Command 2")]:
|
|
1106
|
+
for flag in flag_set:
|
|
1122
1107
|
desc = _get_flag_description(base_cmd, flag)
|
|
1123
|
-
|
|
1108
|
+
# Handle numeric flags like -3 (shorthand for -n 3)
|
|
1109
|
+
if not desc and re.match(r'^-\d+$', flag):
|
|
1110
|
+
desc = f"Specify count ({flag[1:]})"
|
|
1111
|
+
if not desc:
|
|
1112
|
+
has_unknown = True
|
|
1113
|
+
differences.append(f"{label} has `{flag}` ({desc or 'specifies an option'})")
|
|
1114
|
+
# Skip questions where we can't explain the flags well
|
|
1115
|
+
if has_unknown:
|
|
1116
|
+
return None
|
|
1124
1117
|
if parsed1["args"] != parsed2["args"]:
|
|
1125
1118
|
differences.append(f"Different arguments: '{' '.join(parsed1['args'])}' vs '{' '.join(parsed2['args'])}'")
|
|
1126
1119
|
|
|
@@ -1236,14 +1229,33 @@ def generate_quiz_set(
|
|
|
1236
1229
|
"""
|
|
1237
1230
|
questions: list[QuizQuestion] = []
|
|
1238
1231
|
|
|
1232
|
+
# Filter out non-bash entries (Python code fragments, junk tokens, single chars)
|
|
1233
|
+
junk_tokens = {'version', 'total', 'package', 'success', 'error', 'reading',
|
|
1234
|
+
'editing', 'done', 'warning', 'info', 'note', 'output',
|
|
1235
|
+
'task', 'goal', 'purpose', 'what', 'description'}
|
|
1236
|
+
clean_commands = []
|
|
1237
|
+
for cmd in analyzed_commands:
|
|
1238
|
+
base = cmd.get("base_command", "")
|
|
1239
|
+
if not base or len(base) < 2:
|
|
1240
|
+
continue
|
|
1241
|
+
if any(c in base for c in ('(', ')', '=', '{', '}')):
|
|
1242
|
+
continue
|
|
1243
|
+
if any(c in base for c in ('\\', '"', "'")) or '&' in base:
|
|
1244
|
+
continue
|
|
1245
|
+
if base[0].isupper() and base.isalpha() and base not in ('PATH', 'HOME'):
|
|
1246
|
+
continue
|
|
1247
|
+
if base.lower() in junk_tokens:
|
|
1248
|
+
continue
|
|
1249
|
+
clean_commands.append(cmd)
|
|
1250
|
+
|
|
1239
1251
|
# Filter commands by complexity >= 2
|
|
1240
1252
|
eligible_commands = [
|
|
1241
|
-
cmd for cmd in
|
|
1253
|
+
cmd for cmd in clean_commands
|
|
1242
1254
|
if cmd.get("complexity", 0) >= 2
|
|
1243
1255
|
]
|
|
1244
1256
|
|
|
1245
1257
|
if not eligible_commands:
|
|
1246
|
-
eligible_commands = analyzed_commands
|
|
1258
|
+
eligible_commands = clean_commands if clean_commands else analyzed_commands
|
|
1247
1259
|
|
|
1248
1260
|
# Weight toward high-frequency commands
|
|
1249
1261
|
weighted_commands = []
|
|
@@ -1270,12 +1282,17 @@ def generate_quiz_set(
|
|
|
1270
1282
|
QuizType.SPOT_DIFFERENCE: set(),
|
|
1271
1283
|
}
|
|
1272
1284
|
|
|
1285
|
+
# Max command length for readable quiz questions
|
|
1286
|
+
MAX_QUIZ_CMD_LEN = 200
|
|
1287
|
+
|
|
1273
1288
|
# Generate "What does this do?" questions
|
|
1274
1289
|
random.shuffle(weighted_commands)
|
|
1275
1290
|
for cmd in weighted_commands:
|
|
1276
1291
|
if len([q for q in questions if q.quiz_type == QuizType.WHAT_DOES]) >= target_what_does:
|
|
1277
1292
|
break
|
|
1278
1293
|
cmd_id = cmd.get("command", "")
|
|
1294
|
+
if len(cmd_id) > MAX_QUIZ_CMD_LEN:
|
|
1295
|
+
continue
|
|
1279
1296
|
if cmd_id not in used_per_type[QuizType.WHAT_DOES]:
|
|
1280
1297
|
q = generate_what_does_quiz(cmd)
|
|
1281
1298
|
questions.append(q)
|
|
@@ -1299,6 +1316,8 @@ def generate_quiz_set(
|
|
|
1299
1316
|
if len([q for q in questions if q.quiz_type == QuizType.BUILD_COMMAND]) >= target_build:
|
|
1300
1317
|
break
|
|
1301
1318
|
cmd_id = cmd.get("command", "")
|
|
1319
|
+
if len(cmd_id) > MAX_QUIZ_CMD_LEN:
|
|
1320
|
+
continue
|
|
1302
1321
|
if cmd_id not in used_per_type[QuizType.BUILD_COMMAND]:
|
|
1303
1322
|
q = generate_build_command_quiz(cmd)
|
|
1304
1323
|
questions.append(q)
|
|
@@ -1310,6 +1329,8 @@ def generate_quiz_set(
|
|
|
1310
1329
|
if len([q for q in questions if q.quiz_type == QuizType.SPOT_DIFFERENCE]) >= target_spot_diff:
|
|
1311
1330
|
break
|
|
1312
1331
|
cmd_id = cmd.get("command", "")
|
|
1332
|
+
if len(cmd_id) > MAX_QUIZ_CMD_LEN:
|
|
1333
|
+
continue
|
|
1313
1334
|
if cmd_id not in used_per_type[QuizType.SPOT_DIFFERENCE]:
|
|
1314
1335
|
variant = _create_similar_command_variant(cmd)
|
|
1315
1336
|
if variant:
|
|
@@ -1318,6 +1339,17 @@ def generate_quiz_set(
|
|
|
1318
1339
|
questions.append(q)
|
|
1319
1340
|
used_per_type[QuizType.SPOT_DIFFERENCE].add(cmd_id)
|
|
1320
1341
|
|
|
1342
|
+
# Deduplicate by question text (same question can come from different commands)
|
|
1343
|
+
seen_texts = set()
|
|
1344
|
+
deduped = []
|
|
1345
|
+
for q in questions:
|
|
1346
|
+
# Normalize: take first 80 chars of question text
|
|
1347
|
+
q_key = q.question_text[:80]
|
|
1348
|
+
if q_key not in seen_texts:
|
|
1349
|
+
deduped.append(q)
|
|
1350
|
+
seen_texts.add(q_key)
|
|
1351
|
+
questions = deduped
|
|
1352
|
+
|
|
1321
1353
|
# Shuffle final questions
|
|
1322
1354
|
random.shuffle(questions)
|
|
1323
1355
|
|