learn_bash_from_session_data 1.0.9 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +8 -1
- package/scripts/enrichment_builtins.py +1266 -0
- package/scripts/enrichment_coreutils.py +1499 -0
- package/scripts/enrichment_netproc.py +2270 -0
- package/scripts/enrichment_netsys.py +1601 -0
- package/scripts/enrichment_pkgcomp.py +2185 -0
- package/scripts/enrichment_textdev.py +2016 -0
- package/scripts/html_generator.py +154 -7
- package/scripts/knowledge_base.py +11521 -5626
- package/scripts/main.py +78 -34
- package/scripts/merge_enrichment.py +272 -0
- package/scripts/quiz_generator.py +69 -37
package/scripts/main.py
CHANGED
|
@@ -22,6 +22,7 @@ if sys.version_info < (3, 8):
|
|
|
22
22
|
# Constants
|
|
23
23
|
DEFAULT_OUTPUT_BASE = "./bash-learner-output"
|
|
24
24
|
MAX_UNIQUE_COMMANDS = 500
|
|
25
|
+
VERSION = "1.0.10"
|
|
25
26
|
|
|
26
27
|
|
|
27
28
|
def generate_timestamped_output_dir(base_dir: str = DEFAULT_OUTPUT_BASE) -> Path:
|
|
@@ -51,7 +52,8 @@ def get_sessions_base_path() -> Path:
|
|
|
51
52
|
is_wsl = False
|
|
52
53
|
try:
|
|
53
54
|
with open("/proc/version", "r") as f:
|
|
54
|
-
|
|
55
|
+
proc_version = f.read().lower()
|
|
56
|
+
is_wsl = "microsoft" in proc_version or "wsl" in proc_version
|
|
55
57
|
except (FileNotFoundError, PermissionError):
|
|
56
58
|
pass
|
|
57
59
|
|
|
@@ -374,7 +376,7 @@ def run_extraction_pipeline(
|
|
|
374
376
|
# Step 5: Re-parse expanded commands to get proper base_command for each
|
|
375
377
|
parsed_expanded = parse_commands(expanded_commands)
|
|
376
378
|
|
|
377
|
-
# Step 6: Count frequencies BEFORE deduplication
|
|
379
|
+
# Step 6: Count frequencies BEFORE deduplication (for accurate usage stats)
|
|
378
380
|
cmd_frequency = Counter()
|
|
379
381
|
base_cmd_frequency = Counter()
|
|
380
382
|
|
|
@@ -386,7 +388,7 @@ def run_extraction_pipeline(
|
|
|
386
388
|
if base_cmd:
|
|
387
389
|
base_cmd_frequency[base_cmd] += 1
|
|
388
390
|
|
|
389
|
-
# Step 7: Deduplicate and
|
|
391
|
+
# Step 7: Deduplicate and attach frequency data
|
|
390
392
|
unique_commands = deduplicate_commands(parsed_expanded)
|
|
391
393
|
|
|
392
394
|
# Add frequency to each unique command
|
|
@@ -403,7 +405,7 @@ def run_extraction_pipeline(
|
|
|
403
405
|
else:
|
|
404
406
|
print(f"\n{len(unique_commands)} unique commands")
|
|
405
407
|
|
|
406
|
-
# Step
|
|
408
|
+
# Step 8: Analyze commands
|
|
407
409
|
print("\nAnalyzing commands...")
|
|
408
410
|
analysis = analyze_commands(unique_commands)
|
|
409
411
|
|
|
@@ -415,13 +417,13 @@ def run_extraction_pipeline(
|
|
|
415
417
|
analysis['operators_used'] = dict(operator_frequency)
|
|
416
418
|
print(f" -> Generated analysis with {len(analysis.get('categories', {}))} categories")
|
|
417
419
|
|
|
418
|
-
# Step
|
|
420
|
+
# Step 9: Generate quizzes
|
|
419
421
|
print("\nGenerating quizzes...")
|
|
420
422
|
quizzes = generate_quizzes(unique_commands, analysis)
|
|
421
423
|
quiz_count = sum(len(q) for q in quizzes.values()) if isinstance(quizzes, dict) else len(quizzes)
|
|
422
424
|
print(f" -> Generated {quiz_count} quiz questions")
|
|
423
425
|
|
|
424
|
-
# Step
|
|
426
|
+
# Step 10: Generate HTML
|
|
425
427
|
print("\nGenerating HTML output...")
|
|
426
428
|
html_files = generate_html(unique_commands, analysis, quizzes, output_dir)
|
|
427
429
|
print(f" -> Created {len(html_files)} HTML files")
|
|
@@ -431,7 +433,7 @@ def run_extraction_pipeline(
|
|
|
431
433
|
"metadata": {
|
|
432
434
|
"generated_at": datetime.now().isoformat(),
|
|
433
435
|
"run_id": output_dir.name,
|
|
434
|
-
"version":
|
|
436
|
+
"version": VERSION,
|
|
435
437
|
},
|
|
436
438
|
"input": {
|
|
437
439
|
"sessions_processed": len(sessions),
|
|
@@ -477,8 +479,8 @@ def extract_sub_commands(cmd_str: str) -> List[str]:
|
|
|
477
479
|
"""
|
|
478
480
|
Extract individual sub-commands from a compound command.
|
|
479
481
|
|
|
480
|
-
Splits commands by ||, &&, |, and ; while
|
|
481
|
-
|
|
482
|
+
Splits commands by ||, &&, |, and ; while respecting quoting
|
|
483
|
+
and skipping inline code commands (python -c, node -e, bash -c).
|
|
482
484
|
|
|
483
485
|
Args:
|
|
484
486
|
cmd_str: The compound command string
|
|
@@ -488,35 +490,77 @@ def extract_sub_commands(cmd_str: str) -> List[str]:
|
|
|
488
490
|
"""
|
|
489
491
|
import re
|
|
490
492
|
|
|
491
|
-
|
|
492
|
-
|
|
493
|
+
if not cmd_str or not cmd_str.strip():
|
|
494
|
+
return []
|
|
493
495
|
|
|
494
|
-
#
|
|
495
|
-
#
|
|
496
|
-
|
|
496
|
+
# Don't split commands that contain inline code - the ; and | inside
|
|
497
|
+
# quoted code would produce garbage fragments
|
|
498
|
+
inline_patterns = [' -c "', " -c '", ' -c $', ' -e "', " -e '", ' -e $',
|
|
499
|
+
' -c\n', ' -c\r']
|
|
500
|
+
first_token = cmd_str.split()[0] if cmd_str.split() else ''
|
|
501
|
+
if first_token in ('python', 'python3', 'node', 'bash', 'sh', 'ruby', 'perl'):
|
|
502
|
+
for pat in inline_patterns:
|
|
503
|
+
if pat in cmd_str:
|
|
504
|
+
return [cmd_str.strip()]
|
|
497
505
|
|
|
506
|
+
# Quote-aware splitting: track quote depth to avoid splitting inside quotes
|
|
498
507
|
sub_commands = []
|
|
508
|
+
current = []
|
|
509
|
+
in_single = False
|
|
510
|
+
in_double = False
|
|
511
|
+
i = 0
|
|
512
|
+
chars = cmd_str
|
|
513
|
+
|
|
514
|
+
while i < len(chars):
|
|
515
|
+
c = chars[i]
|
|
516
|
+
|
|
517
|
+
# Track quoting state
|
|
518
|
+
if c == "'" and not in_double:
|
|
519
|
+
in_single = not in_single
|
|
520
|
+
current.append(c)
|
|
521
|
+
i += 1
|
|
522
|
+
elif c == '"' and not in_single:
|
|
523
|
+
in_double = not in_double
|
|
524
|
+
current.append(c)
|
|
525
|
+
i += 1
|
|
526
|
+
elif not in_single and not in_double:
|
|
527
|
+
# Check for compound operators outside quotes
|
|
528
|
+
remaining = chars[i:]
|
|
529
|
+
if remaining.startswith('&&'):
|
|
530
|
+
cmd = ''.join(current).strip()
|
|
531
|
+
if cmd:
|
|
532
|
+
sub_commands.append(cmd)
|
|
533
|
+
current = []
|
|
534
|
+
i += 2
|
|
535
|
+
elif remaining.startswith('||'):
|
|
536
|
+
cmd = ''.join(current).strip()
|
|
537
|
+
if cmd:
|
|
538
|
+
sub_commands.append(cmd)
|
|
539
|
+
current = []
|
|
540
|
+
i += 2
|
|
541
|
+
elif c == ';':
|
|
542
|
+
cmd = ''.join(current).strip()
|
|
543
|
+
if cmd:
|
|
544
|
+
sub_commands.append(cmd)
|
|
545
|
+
current = []
|
|
546
|
+
i += 1
|
|
547
|
+
elif c == '|' and not remaining.startswith('||'):
|
|
548
|
+
cmd = ''.join(current).strip()
|
|
549
|
+
if cmd:
|
|
550
|
+
sub_commands.append(cmd)
|
|
551
|
+
current = []
|
|
552
|
+
i += 1
|
|
553
|
+
else:
|
|
554
|
+
current.append(c)
|
|
555
|
+
i += 1
|
|
556
|
+
else:
|
|
557
|
+
current.append(c)
|
|
558
|
+
i += 1
|
|
499
559
|
|
|
500
|
-
#
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
# Split each part by &&
|
|
505
|
-
and_parts = re.split(r'\s*&&\s*', or_part)
|
|
506
|
-
|
|
507
|
-
for and_part in and_parts:
|
|
508
|
-
# Split each part by ; (sequential)
|
|
509
|
-
seq_parts = re.split(r'\s*;\s*', and_part)
|
|
510
|
-
|
|
511
|
-
for seq_part in seq_parts:
|
|
512
|
-
# Split by single pipe |
|
|
513
|
-
# Use negative lookbehind/lookahead to avoid ||
|
|
514
|
-
pipe_parts = re.split(r'(?<!\|)\|(?!\|)', seq_part)
|
|
515
|
-
|
|
516
|
-
for pipe_part in pipe_parts:
|
|
517
|
-
cleaned = pipe_part.strip()
|
|
518
|
-
if cleaned:
|
|
519
|
-
sub_commands.append(cleaned)
|
|
560
|
+
# Add final segment
|
|
561
|
+
cmd = ''.join(current).strip()
|
|
562
|
+
if cmd:
|
|
563
|
+
sub_commands.append(cmd)
|
|
520
564
|
|
|
521
565
|
return sub_commands
|
|
522
566
|
|
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Merge enrichment data into knowledge_base.py COMMAND_DB.
|
|
4
|
+
|
|
5
|
+
Reads enrichment data from enrichment_*.py files and merges them into
|
|
6
|
+
the existing COMMAND_DB entries in knowledge_base.py. Adds missing fields
|
|
7
|
+
(use_cases, gotchas, man_url, related, difficulty) and supplements
|
|
8
|
+
existing flag definitions with extra_flags.
|
|
9
|
+
|
|
10
|
+
Usage:
|
|
11
|
+
python scripts/merge_enrichment.py [--dry-run]
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import sys
|
|
15
|
+
import re
|
|
16
|
+
import importlib
|
|
17
|
+
import importlib.util
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Dict, Any
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def load_enrichment_module(filepath: Path) -> Dict[str, Any]:
|
|
23
|
+
"""Load ENRICHMENT_DATA from a Python file."""
|
|
24
|
+
spec = importlib.util.spec_from_file_location("enrichment", filepath)
|
|
25
|
+
module = importlib.util.module_from_spec(spec)
|
|
26
|
+
spec.loader.exec_module(module)
|
|
27
|
+
return getattr(module, 'ENRICHMENT_DATA', {})
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def collect_all_enrichments(scripts_dir: Path) -> Dict[str, Any]:
|
|
31
|
+
"""Collect enrichment data from all enrichment_*.py files."""
|
|
32
|
+
merged = {}
|
|
33
|
+
for enrichment_file in sorted(scripts_dir.glob("enrichment_*.py")):
|
|
34
|
+
print(f" Loading: {enrichment_file.name}")
|
|
35
|
+
data = load_enrichment_module(enrichment_file)
|
|
36
|
+
print(f" -> {len(data)} commands")
|
|
37
|
+
for cmd_name, cmd_data in data.items():
|
|
38
|
+
if cmd_name in merged:
|
|
39
|
+
# Merge: later files can supplement but not overwrite
|
|
40
|
+
for key, value in cmd_data.items():
|
|
41
|
+
if key not in merged[cmd_name] or not merged[cmd_name][key]:
|
|
42
|
+
merged[cmd_name][key] = value
|
|
43
|
+
else:
|
|
44
|
+
merged[cmd_name] = cmd_data
|
|
45
|
+
return merged
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def merge_into_knowledge_base(kb_path: Path, enrichments: Dict[str, Any], dry_run: bool = False) -> int:
|
|
49
|
+
"""
|
|
50
|
+
Merge enrichment data into knowledge_base.py by modifying COMMAND_DB entries.
|
|
51
|
+
|
|
52
|
+
Strategy: For each command in enrichments, find its entry in COMMAND_DB and
|
|
53
|
+
insert the enrichment fields before the closing brace of that entry.
|
|
54
|
+
|
|
55
|
+
Returns number of commands enriched.
|
|
56
|
+
"""
|
|
57
|
+
content = kb_path.read_text(encoding='utf-8')
|
|
58
|
+
original_content = content
|
|
59
|
+
enriched_count = 0
|
|
60
|
+
fields_to_add = ['man_url', 'use_cases', 'gotchas', 'related', 'difficulty']
|
|
61
|
+
|
|
62
|
+
for cmd_name, enrichment in enrichments.items():
|
|
63
|
+
# Find this command's entry in COMMAND_DB
|
|
64
|
+
# Pattern: "cmd_name": { ... },
|
|
65
|
+
# We look for the closing "}, " or "},\n" of this entry
|
|
66
|
+
|
|
67
|
+
# Find the start of this command's dict entry
|
|
68
|
+
# Handle both regular command names and special ones like "."
|
|
69
|
+
escaped_name = re.escape(cmd_name)
|
|
70
|
+
entry_pattern = rf' "{escaped_name}": \{{'
|
|
71
|
+
match = re.search(entry_pattern, content)
|
|
72
|
+
if not match:
|
|
73
|
+
print(f" WARNING: Command '{cmd_name}' not found in COMMAND_DB, skipping")
|
|
74
|
+
continue
|
|
75
|
+
|
|
76
|
+
entry_start = match.start()
|
|
77
|
+
|
|
78
|
+
# Find the closing of this entry by counting braces
|
|
79
|
+
brace_depth = 0
|
|
80
|
+
entry_end = -1
|
|
81
|
+
i = match.end() - 1 # Start at the opening brace
|
|
82
|
+
while i < len(content):
|
|
83
|
+
char = content[i]
|
|
84
|
+
if char == '{':
|
|
85
|
+
brace_depth += 1
|
|
86
|
+
elif char == '}':
|
|
87
|
+
brace_depth -= 1
|
|
88
|
+
if brace_depth == 0:
|
|
89
|
+
entry_end = i
|
|
90
|
+
break
|
|
91
|
+
# Skip string contents to avoid counting braces in strings
|
|
92
|
+
elif char == '"':
|
|
93
|
+
i += 1
|
|
94
|
+
while i < len(content) and content[i] != '"':
|
|
95
|
+
if content[i] == '\\':
|
|
96
|
+
i += 1 # Skip escaped char
|
|
97
|
+
i += 1
|
|
98
|
+
elif char == "'":
|
|
99
|
+
i += 1
|
|
100
|
+
while i < len(content) and content[i] != "'":
|
|
101
|
+
if content[i] == '\\':
|
|
102
|
+
i += 1
|
|
103
|
+
i += 1
|
|
104
|
+
i += 1
|
|
105
|
+
|
|
106
|
+
if entry_end == -1:
|
|
107
|
+
print(f" WARNING: Could not find end of entry for '{cmd_name}', skipping")
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
# Extract the entry content
|
|
111
|
+
entry_content = content[entry_start:entry_end + 1]
|
|
112
|
+
|
|
113
|
+
# Check which fields are missing
|
|
114
|
+
additions = []
|
|
115
|
+
for field in fields_to_add:
|
|
116
|
+
if f'"{field}"' not in entry_content:
|
|
117
|
+
value = enrichment.get(field)
|
|
118
|
+
if value:
|
|
119
|
+
additions.append((field, value))
|
|
120
|
+
|
|
121
|
+
# Handle extra_flags: merge into existing flags dict
|
|
122
|
+
extra_flags = enrichment.get('extra_flags', {})
|
|
123
|
+
if extra_flags and '"flags"' in entry_content:
|
|
124
|
+
# Find the flags dict closing brace and add new flags before it
|
|
125
|
+
flags_additions = []
|
|
126
|
+
for flag, desc in extra_flags.items():
|
|
127
|
+
escaped_flag = flag.replace('"', '\\"')
|
|
128
|
+
if f'"{escaped_flag}"' not in entry_content:
|
|
129
|
+
flags_additions.append(f' "{escaped_flag}": "{desc}",')
|
|
130
|
+
if flags_additions:
|
|
131
|
+
# Find the closing of the flags dict within this entry
|
|
132
|
+
flags_match = re.search(r'"flags":\s*\{', entry_content)
|
|
133
|
+
if flags_match:
|
|
134
|
+
flags_start = flags_match.end()
|
|
135
|
+
# Find closing brace of flags
|
|
136
|
+
fb_depth = 1
|
|
137
|
+
fi = flags_start
|
|
138
|
+
while fi < len(entry_content) and fb_depth > 0:
|
|
139
|
+
if entry_content[fi] == '{':
|
|
140
|
+
fb_depth += 1
|
|
141
|
+
elif entry_content[fi] == '}':
|
|
142
|
+
fb_depth -= 1
|
|
143
|
+
elif entry_content[fi] == '"':
|
|
144
|
+
fi += 1
|
|
145
|
+
while fi < len(entry_content) and entry_content[fi] != '"':
|
|
146
|
+
if entry_content[fi] == '\\':
|
|
147
|
+
fi += 1
|
|
148
|
+
fi += 1
|
|
149
|
+
fi += 1
|
|
150
|
+
flags_end_pos = entry_start + fi - 1
|
|
151
|
+
# Insert new flags before the closing brace
|
|
152
|
+
flags_insert = '\n' + '\n'.join(flags_additions) + '\n '
|
|
153
|
+
content = content[:flags_end_pos] + flags_insert + content[flags_end_pos:]
|
|
154
|
+
# Recalculate entry_end since we modified content
|
|
155
|
+
entry_end += len(flags_insert)
|
|
156
|
+
|
|
157
|
+
# Handle improved_description: replace existing description
|
|
158
|
+
improved_desc = enrichment.get('improved_description')
|
|
159
|
+
if improved_desc and '"description"' in entry_content:
|
|
160
|
+
# Replace the existing description string
|
|
161
|
+
desc_pattern = rf'( "{escaped_name}": \{{[^}}]*?"description":\s*)"([^"]*(?:\\.[^"]*)*)"'
|
|
162
|
+
new_desc = improved_desc.replace('"', '\\"')
|
|
163
|
+
content = re.sub(desc_pattern, rf'\1"{new_desc}"', content, count=1)
|
|
164
|
+
|
|
165
|
+
if not additions:
|
|
166
|
+
continue
|
|
167
|
+
|
|
168
|
+
# Build the insertion text
|
|
169
|
+
insertion_lines = []
|
|
170
|
+
for field, value in additions:
|
|
171
|
+
if isinstance(value, str):
|
|
172
|
+
escaped_val = value.replace('"', '\\"')
|
|
173
|
+
insertion_lines.append(f' "{field}": "{escaped_val}",')
|
|
174
|
+
elif isinstance(value, list):
|
|
175
|
+
if all(isinstance(v, str) for v in value):
|
|
176
|
+
items = ', '.join(f'"{v}"' for v in value)
|
|
177
|
+
if len(items) < 80:
|
|
178
|
+
insertion_lines.append(f' "{field}": [{items}],')
|
|
179
|
+
else:
|
|
180
|
+
insertion_lines.append(f' "{field}": [')
|
|
181
|
+
for v in value:
|
|
182
|
+
escaped_v = v.replace('"', '\\"')
|
|
183
|
+
insertion_lines.append(f' "{escaped_v}",')
|
|
184
|
+
insertion_lines.append(f' ],')
|
|
185
|
+
|
|
186
|
+
if insertion_lines:
|
|
187
|
+
insertion = '\n' + '\n'.join(insertion_lines)
|
|
188
|
+
# Recalculate entry_end in current content
|
|
189
|
+
match2 = re.search(entry_pattern, content)
|
|
190
|
+
if match2:
|
|
191
|
+
brace_depth = 0
|
|
192
|
+
i2 = match2.end() - 1
|
|
193
|
+
while i2 < len(content):
|
|
194
|
+
char = content[i2]
|
|
195
|
+
if char == '{':
|
|
196
|
+
brace_depth += 1
|
|
197
|
+
elif char == '}':
|
|
198
|
+
brace_depth -= 1
|
|
199
|
+
if brace_depth == 0:
|
|
200
|
+
entry_end = i2
|
|
201
|
+
break
|
|
202
|
+
elif char == '"':
|
|
203
|
+
i2 += 1
|
|
204
|
+
while i2 < len(content) and content[i2] != '"':
|
|
205
|
+
if content[i2] == '\\':
|
|
206
|
+
i2 += 1
|
|
207
|
+
i2 += 1
|
|
208
|
+
elif char == "'":
|
|
209
|
+
i2 += 1
|
|
210
|
+
while i2 < len(content) and content[i2] != "'":
|
|
211
|
+
if content[i2] == '\\':
|
|
212
|
+
i2 += 1
|
|
213
|
+
i2 += 1
|
|
214
|
+
i2 += 1
|
|
215
|
+
|
|
216
|
+
# Insert before the closing brace
|
|
217
|
+
content = content[:entry_end] + insertion + '\n ' + content[entry_end:]
|
|
218
|
+
enriched_count += 1
|
|
219
|
+
|
|
220
|
+
if content != original_content:
|
|
221
|
+
if dry_run:
|
|
222
|
+
print(f"\n DRY RUN: Would enrich {enriched_count} commands")
|
|
223
|
+
# Show a diff summary
|
|
224
|
+
added_lines = len(content.splitlines()) - len(original_content.splitlines())
|
|
225
|
+
print(f" Would add ~{added_lines} lines")
|
|
226
|
+
else:
|
|
227
|
+
kb_path.write_text(content, encoding='utf-8')
|
|
228
|
+
print(f"\n Enriched {enriched_count} commands in {kb_path.name}")
|
|
229
|
+
|
|
230
|
+
return enriched_count
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def main():
|
|
234
|
+
dry_run = '--dry-run' in sys.argv
|
|
235
|
+
|
|
236
|
+
scripts_dir = Path(__file__).parent
|
|
237
|
+
kb_path = scripts_dir / 'knowledge_base.py'
|
|
238
|
+
|
|
239
|
+
if not kb_path.exists():
|
|
240
|
+
print(f"Error: {kb_path} not found")
|
|
241
|
+
return 1
|
|
242
|
+
|
|
243
|
+
print("Collecting enrichment data...")
|
|
244
|
+
enrichments = collect_all_enrichments(scripts_dir)
|
|
245
|
+
|
|
246
|
+
if not enrichments:
|
|
247
|
+
print("No enrichment data found. Run the research agents first.")
|
|
248
|
+
return 1
|
|
249
|
+
|
|
250
|
+
print(f"\nTotal enrichments: {len(enrichments)} commands")
|
|
251
|
+
print(f"\nMerging into {kb_path.name}{' (DRY RUN)' if dry_run else ''}...")
|
|
252
|
+
count = merge_into_knowledge_base(kb_path, enrichments, dry_run=dry_run)
|
|
253
|
+
|
|
254
|
+
if count > 0:
|
|
255
|
+
# Verify the file is still valid Python
|
|
256
|
+
if not dry_run:
|
|
257
|
+
print("\nVerifying syntax...")
|
|
258
|
+
try:
|
|
259
|
+
compile(kb_path.read_text(encoding='utf-8'), kb_path, 'exec')
|
|
260
|
+
print(" Syntax OK")
|
|
261
|
+
except SyntaxError as e:
|
|
262
|
+
print(f" SYNTAX ERROR: {e}")
|
|
263
|
+
print(" Reverting changes...")
|
|
264
|
+
# We'd need to keep a backup for this - for now just warn
|
|
265
|
+
return 1
|
|
266
|
+
|
|
267
|
+
print("\nDone.")
|
|
268
|
+
return 0
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
if __name__ == '__main__':
|
|
272
|
+
sys.exit(main())
|
|
@@ -784,29 +784,25 @@ def generate_what_does_quiz(
|
|
|
784
784
|
QuizQuestion instance
|
|
785
785
|
"""
|
|
786
786
|
cmd_string = command.get("command", "")
|
|
787
|
-
description = command.get("description", "")
|
|
788
787
|
complexity = command.get("complexity", 2)
|
|
789
788
|
|
|
790
789
|
parsed = _parse_command(cmd_string)
|
|
791
790
|
base_cmd = parsed["base"]
|
|
792
791
|
|
|
793
|
-
#
|
|
794
|
-
correct_desc =
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
correct_desc =
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
flag_descs.append(f"{flag} ({fd.lower()})")
|
|
808
|
-
if flag_descs:
|
|
809
|
-
correct_desc += " using " + ", ".join(flag_descs)
|
|
792
|
+
# Always use the educational bash description generator (not session descriptions)
|
|
793
|
+
correct_desc = _generate_bash_description(cmd_string)
|
|
794
|
+
# Capitalize first letter for consistent formatting
|
|
795
|
+
if correct_desc:
|
|
796
|
+
correct_desc = correct_desc[0].upper() + correct_desc[1:]
|
|
797
|
+
|
|
798
|
+
# Add flag details if available
|
|
799
|
+
flag_descs = []
|
|
800
|
+
for flag in parsed["flags"]:
|
|
801
|
+
fd = _get_flag_description(base_cmd, flag)
|
|
802
|
+
if fd:
|
|
803
|
+
flag_descs.append(f"{flag} ({fd.lower()})")
|
|
804
|
+
if flag_descs:
|
|
805
|
+
correct_desc += " using " + ", ".join(flag_descs)
|
|
810
806
|
|
|
811
807
|
# Generate distractors
|
|
812
808
|
distractor_descriptions = _generate_distractor_descriptions(correct_desc, 3)
|
|
@@ -965,8 +961,6 @@ def generate_build_command_quiz(
|
|
|
965
961
|
QuizQuestion instance
|
|
966
962
|
"""
|
|
967
963
|
cmd_string = command.get("command", "")
|
|
968
|
-
description = command.get("description", "")
|
|
969
|
-
intent = command.get("intent", description)
|
|
970
964
|
|
|
971
965
|
parsed = _parse_command(cmd_string)
|
|
972
966
|
base_cmd = parsed["base"]
|
|
@@ -1051,14 +1045,8 @@ def generate_build_command_quiz(
|
|
|
1051
1045
|
|
|
1052
1046
|
question_id = _generate_id(f"build_{cmd_string}")
|
|
1053
1047
|
|
|
1054
|
-
#
|
|
1055
|
-
|
|
1056
|
-
task_description = intent
|
|
1057
|
-
elif description:
|
|
1058
|
-
task_description = description
|
|
1059
|
-
else:
|
|
1060
|
-
# Generate educational description from the command
|
|
1061
|
-
task_description = _generate_bash_description(cmd_string)
|
|
1048
|
+
# Always generate description from the command itself (not session descriptions)
|
|
1049
|
+
task_description = _generate_bash_description(cmd_string)
|
|
1062
1050
|
|
|
1063
1051
|
return QuizQuestion(
|
|
1064
1052
|
id=question_id,
|
|
@@ -1113,14 +1101,19 @@ def generate_spot_difference_quiz(
|
|
|
1113
1101
|
|
|
1114
1102
|
# Build the correct explanation of difference
|
|
1115
1103
|
differences = []
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
differences.append(f"Command 1 has `{flag}` ({desc or 'unknown'})")
|
|
1120
|
-
if only_in_2:
|
|
1121
|
-
for flag in only_in_2:
|
|
1104
|
+
has_unknown = False
|
|
1105
|
+
for flag_set, label in [(only_in_1, "Command 1"), (only_in_2, "Command 2")]:
|
|
1106
|
+
for flag in flag_set:
|
|
1122
1107
|
desc = _get_flag_description(base_cmd, flag)
|
|
1123
|
-
|
|
1108
|
+
# Handle numeric flags like -3 (shorthand for -n 3)
|
|
1109
|
+
if not desc and re.match(r'^-\d+$', flag):
|
|
1110
|
+
desc = f"Specify count ({flag[1:]})"
|
|
1111
|
+
if not desc:
|
|
1112
|
+
has_unknown = True
|
|
1113
|
+
differences.append(f"{label} has `{flag}` ({desc or 'specifies an option'})")
|
|
1114
|
+
# Skip questions where we can't explain the flags well
|
|
1115
|
+
if has_unknown:
|
|
1116
|
+
return None
|
|
1124
1117
|
if parsed1["args"] != parsed2["args"]:
|
|
1125
1118
|
differences.append(f"Different arguments: '{' '.join(parsed1['args'])}' vs '{' '.join(parsed2['args'])}'")
|
|
1126
1119
|
|
|
@@ -1236,14 +1229,33 @@ def generate_quiz_set(
|
|
|
1236
1229
|
"""
|
|
1237
1230
|
questions: list[QuizQuestion] = []
|
|
1238
1231
|
|
|
1232
|
+
# Filter out non-bash entries (Python code fragments, junk tokens, single chars)
|
|
1233
|
+
junk_tokens = {'version', 'total', 'package', 'success', 'error', 'reading',
|
|
1234
|
+
'editing', 'done', 'warning', 'info', 'note', 'output',
|
|
1235
|
+
'task', 'goal', 'purpose', 'what', 'description'}
|
|
1236
|
+
clean_commands = []
|
|
1237
|
+
for cmd in analyzed_commands:
|
|
1238
|
+
base = cmd.get("base_command", "")
|
|
1239
|
+
if not base or len(base) < 2:
|
|
1240
|
+
continue
|
|
1241
|
+
if any(c in base for c in ('(', ')', '=', '{', '}')):
|
|
1242
|
+
continue
|
|
1243
|
+
if any(c in base for c in ('\\', '"', "'")) or '&' in base:
|
|
1244
|
+
continue
|
|
1245
|
+
if base[0].isupper() and base.isalpha() and base not in ('PATH', 'HOME'):
|
|
1246
|
+
continue
|
|
1247
|
+
if base.lower() in junk_tokens:
|
|
1248
|
+
continue
|
|
1249
|
+
clean_commands.append(cmd)
|
|
1250
|
+
|
|
1239
1251
|
# Filter commands by complexity >= 2
|
|
1240
1252
|
eligible_commands = [
|
|
1241
|
-
cmd for cmd in
|
|
1253
|
+
cmd for cmd in clean_commands
|
|
1242
1254
|
if cmd.get("complexity", 0) >= 2
|
|
1243
1255
|
]
|
|
1244
1256
|
|
|
1245
1257
|
if not eligible_commands:
|
|
1246
|
-
eligible_commands = analyzed_commands
|
|
1258
|
+
eligible_commands = clean_commands if clean_commands else analyzed_commands
|
|
1247
1259
|
|
|
1248
1260
|
# Weight toward high-frequency commands
|
|
1249
1261
|
weighted_commands = []
|
|
@@ -1270,12 +1282,17 @@ def generate_quiz_set(
|
|
|
1270
1282
|
QuizType.SPOT_DIFFERENCE: set(),
|
|
1271
1283
|
}
|
|
1272
1284
|
|
|
1285
|
+
# Max command length for readable quiz questions
|
|
1286
|
+
MAX_QUIZ_CMD_LEN = 200
|
|
1287
|
+
|
|
1273
1288
|
# Generate "What does this do?" questions
|
|
1274
1289
|
random.shuffle(weighted_commands)
|
|
1275
1290
|
for cmd in weighted_commands:
|
|
1276
1291
|
if len([q for q in questions if q.quiz_type == QuizType.WHAT_DOES]) >= target_what_does:
|
|
1277
1292
|
break
|
|
1278
1293
|
cmd_id = cmd.get("command", "")
|
|
1294
|
+
if len(cmd_id) > MAX_QUIZ_CMD_LEN:
|
|
1295
|
+
continue
|
|
1279
1296
|
if cmd_id not in used_per_type[QuizType.WHAT_DOES]:
|
|
1280
1297
|
q = generate_what_does_quiz(cmd)
|
|
1281
1298
|
questions.append(q)
|
|
@@ -1299,6 +1316,8 @@ def generate_quiz_set(
|
|
|
1299
1316
|
if len([q for q in questions if q.quiz_type == QuizType.BUILD_COMMAND]) >= target_build:
|
|
1300
1317
|
break
|
|
1301
1318
|
cmd_id = cmd.get("command", "")
|
|
1319
|
+
if len(cmd_id) > MAX_QUIZ_CMD_LEN:
|
|
1320
|
+
continue
|
|
1302
1321
|
if cmd_id not in used_per_type[QuizType.BUILD_COMMAND]:
|
|
1303
1322
|
q = generate_build_command_quiz(cmd)
|
|
1304
1323
|
questions.append(q)
|
|
@@ -1310,6 +1329,8 @@ def generate_quiz_set(
|
|
|
1310
1329
|
if len([q for q in questions if q.quiz_type == QuizType.SPOT_DIFFERENCE]) >= target_spot_diff:
|
|
1311
1330
|
break
|
|
1312
1331
|
cmd_id = cmd.get("command", "")
|
|
1332
|
+
if len(cmd_id) > MAX_QUIZ_CMD_LEN:
|
|
1333
|
+
continue
|
|
1313
1334
|
if cmd_id not in used_per_type[QuizType.SPOT_DIFFERENCE]:
|
|
1314
1335
|
variant = _create_similar_command_variant(cmd)
|
|
1315
1336
|
if variant:
|
|
@@ -1318,6 +1339,17 @@ def generate_quiz_set(
|
|
|
1318
1339
|
questions.append(q)
|
|
1319
1340
|
used_per_type[QuizType.SPOT_DIFFERENCE].add(cmd_id)
|
|
1320
1341
|
|
|
1342
|
+
# Deduplicate by question text (same question can come from different commands)
|
|
1343
|
+
seen_texts = set()
|
|
1344
|
+
deduped = []
|
|
1345
|
+
for q in questions:
|
|
1346
|
+
# Normalize: take first 80 chars of question text
|
|
1347
|
+
q_key = q.question_text[:80]
|
|
1348
|
+
if q_key not in seen_texts:
|
|
1349
|
+
deduped.append(q)
|
|
1350
|
+
seen_texts.add(q_key)
|
|
1351
|
+
questions = deduped
|
|
1352
|
+
|
|
1321
1353
|
# Shuffle final questions
|
|
1322
1354
|
random.shuffle(questions)
|
|
1323
1355
|
|