npm - learn_bash_from_session_data - Versions diffs - 1.0.4 → 1.0.6 - Mend

learn_bash_from_session_data 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/bash-learner-output/run-2026-02-05-154214/index.html +3848 -0
package/bash-learner-output/run-2026-02-05-154214/summary.json +148 -0
package/bash-learner-output/run-2026-02-05-155427/index.html +3900 -0
package/bash-learner-output/run-2026-02-05-155427/summary.json +157 -0
package/bash-learner-output/run-2026-02-05-155949/index.html +4514 -0
package/bash-learner-output/run-2026-02-05-155949/summary.json +163 -0
package/package.json +7 -3
package/scripts/html_generator.py +135 -58
package/scripts/knowledge_base.py +5624 -1593
package/scripts/main.py +110 -5
package/scripts/quiz_generator.py +163 -48
package/vectors.db +0 -0

package/scripts/main.py CHANGED Viewed

@@ -319,11 +319,66 @@ def run_extraction_pipeline(
     parsed_commands = parse_commands(raw_commands)
     print(f"  -> Parsed {len(parsed_commands)} commands")
-    # Step 4: Count frequencies BEFORE deduplication
+    # Step 4: Expand compound commands into individual sub-commands
+    # Also count operators for tracking
     from collections import Counter
+    import re
+    operator_frequency = Counter()
+    expanded_commands = []
+    # Operator patterns to detect
+    operator_patterns = {
+        '||': r'\|\|',
+        '&&': r'&&',
+        '|': r'(?<!\|)\|(?!\|)',  # Single pipe, not ||
+        '2>&1': r'2>&1',
+        '2>/dev/null': r'2>/dev/null',
+        '>': r'(?<![2&])>(?!>|&)',  # Single >, not >> or 2> or >&
+        '>>': r'>>',
+        '<': r'<(?!<)',
+    }
+    for cmd in parsed_commands:
+        cmd_str = cmd.get('command', '') or cmd.get('raw', '')
+        if not cmd_str:
+            continue
+        # Count operators in this command
+        for op_name, op_pattern in operator_patterns.items():
+            matches = re.findall(op_pattern, cmd_str)
+            if matches:
+                operator_frequency[op_name] += len(matches)
+        # Check if this is a compound command
+        is_compound = any(op in cmd_str for op in ['||', '&&', ' | ', ';'])
+        if is_compound:
+            # Extract individual sub-commands from compound statement
+            sub_commands = extract_sub_commands(cmd_str)
+            for sub_cmd in sub_commands:
+                if sub_cmd.strip():
+                    expanded_commands.append({
+                        'command': sub_cmd.strip(),
+                        'raw': sub_cmd.strip(),
+                        'original_compound': cmd_str,
+                        'description': cmd.get('description', ''),
+                        'output': cmd.get('output', ''),
+                    })
+        else:
+            # Simple command - add as-is
+            expanded_commands.append(cmd)
+    print(f"  -> Expanded to {len(expanded_commands)} individual commands")
+    # Step 5: Re-parse expanded commands to get proper base_command for each
+    parsed_expanded = parse_commands(expanded_commands)
+    # Step 6: Count frequencies BEFORE deduplication
     cmd_frequency = Counter()
     base_cmd_frequency = Counter()
-    for cmd in parsed_commands:
+    for cmd in parsed_expanded:
         cmd_str = cmd.get('command', '') or cmd.get('raw', '')
         base_cmd = cmd.get('base_command', '')
         if cmd_str:
@@ -331,8 +386,8 @@ def run_extraction_pipeline(
         if base_cmd:
             base_cmd_frequency[base_cmd] += 1
-    # Step 5: Deduplicate and add frequency data
-    unique_commands = deduplicate_commands(parsed_commands)
+    # Step 7: Deduplicate and add frequency data
+    unique_commands = deduplicate_commands(parsed_expanded)
     # Add frequency to each unique command
     for cmd in unique_commands:
@@ -357,6 +412,7 @@ def run_extraction_pipeline(
     analysis['base_command_frequency'] = dict(base_cmd_frequency)
     analysis['top_commands'] = cmd_frequency.most_common(20)
     analysis['top_base_commands'] = base_cmd_frequency.most_common(20)
+    analysis['operators_used'] = dict(operator_frequency)
     print(f"  -> Generated analysis with {len(analysis.get('categories', {}))} categories")
     # Step 6: Generate quizzes
@@ -375,7 +431,7 @@ def run_extraction_pipeline(
         "metadata": {
             "generated_at": datetime.now().isoformat(),
             "run_id": output_dir.name,
-            "version": "1.0.4",
+            "version": "1.0.5",
         },
         "input": {
             "sessions_processed": len(sessions),
@@ -399,6 +455,7 @@ def run_extraction_pipeline(
                 {"command": cmd, "count": count}
                 for cmd, count in list(base_cmd_frequency.most_common(10))
             ],
+            "operators_used": dict(operator_frequency),
             "complexity_distribution": dict(analysis.get('complexity_distribution', {})),
         },
         "output": {
@@ -416,6 +473,54 @@ def run_extraction_pipeline(
     return True, f"Successfully generated learning materials in {output_dir}"
+def extract_sub_commands(cmd_str: str) -> List[str]:
+    """
+    Extract individual sub-commands from a compound command.
+    Splits commands by ||, &&, |, and ; while preserving each sub-command
+    as a learnable unit.
+    Args:
+        cmd_str: The compound command string
+    Returns:
+        List of individual sub-command strings
+    """
+    import re
+    # First, clean up redirections but keep them with their command
+    # We want "pip show pkg 2>/dev/null" to stay together
+    # Split by compound operators: ||, &&, |, ;
+    # Use regex to split while handling edge cases
+    # Note: | needs special handling to not match ||
+    sub_commands = []
+    # Split by || first (highest precedence for our purposes)
+    or_parts = re.split(r'\s*\|\|\s*', cmd_str)
+    for or_part in or_parts:
+        # Split each part by &&
+        and_parts = re.split(r'\s*&&\s*', or_part)
+        for and_part in and_parts:
+            # Split each part by ; (sequential)
+            seq_parts = re.split(r'\s*;\s*', and_part)
+            for seq_part in seq_parts:
+                # Split by single pipe |
+                # Use negative lookbehind/lookahead to avoid ||
+                pipe_parts = re.split(r'(?<!\|)\|(?!\|)', seq_part)
+                for pipe_part in pipe_parts:
+                    cleaned = pipe_part.strip()
+                    if cleaned:
+                        sub_commands.append(cleaned)
+    return sub_commands
 def deduplicate_commands(commands: List[Dict]) -> List[Dict]:
     """
     Remove duplicate commands while preserving order.

package/scripts/quiz_generator.py CHANGED Viewed

@@ -18,6 +18,45 @@ import random
 import re
 import hashlib
+try:
+    from scripts.knowledge_base import COMMAND_DB, get_command_info, get_flags_for_command
+except ImportError:
+    try:
+        from knowledge_base import COMMAND_DB, get_command_info, get_flags_for_command
+    except ImportError:
+        COMMAND_DB = {}
+        def get_command_info(name): return None
+        def get_flags_for_command(command): return {}
+def _get_flags_for_cmd(cmd: str) -> dict[str, str]:
+    """Get merged flags for a command from knowledge_base (primary) and local FLAG_DATABASE (fallback).
+    Knowledge_base.py COMMAND_DB is the authoritative source. FLAG_DATABASE provides
+    additional coverage for commands not yet in knowledge_base.
+    """
+    flags = {}
+    # Primary source: knowledge_base COMMAND_DB
+    kb_flags = get_flags_for_command(cmd)
+    if kb_flags:
+        flags.update(kb_flags)
+    # Fallback/supplement: local FLAG_DATABASE
+    if cmd in FLAG_DATABASE:
+        for flag, desc in FLAG_DATABASE[cmd].items():
+            if flag not in flags:
+                flags[flag] = desc
+    return flags
+def _get_all_flagged_commands() -> set[str]:
+    """Get the set of all commands that have flag data from any source."""
+    cmds = set()
+    for cmd, info in COMMAND_DB.items():
+        if info.get("flags"):
+            cmds.add(cmd)
+    cmds.update(FLAG_DATABASE.keys())
+    return cmds
 class QuizType(Enum):
     """Types of quiz questions."""
@@ -397,37 +436,98 @@ def _generate_bash_description(cmd_string: str) -> str:
     Generate an educational description focusing on bash concepts.
     Explains what each part of the command does from a bash perspective.
+    Handles: &&, ||, |, 2>&1, 2>/dev/null, and combinations.
     """
     if not cmd_string:
         return "Runs a command"
+    # Clean up redirections for description (note them but don't clutter)
+    has_stderr_to_stdout = '2>&1' in cmd_string
+    has_stderr_to_null = '2>/dev/null' in cmd_string
+    has_stdout_redirect = re.search(r'>\s*\S+', cmd_string) and '2>' not in cmd_string
+    # Remove redirections for parsing (we'll note them separately)
+    clean_cmd = re.sub(r'\s*2>&1\s*', ' ', cmd_string)
+    clean_cmd = re.sub(r'\s*2>/dev/null\s*', ' ', clean_cmd)
+    clean_cmd = re.sub(r'\s*>\s*\S+\s*', ' ', clean_cmd)
+    clean_cmd = ' '.join(clean_cmd.split())  # normalize whitespace
     parts = []
-    # Check for command chaining
-    if ' && ' in cmd_string:
-        commands = cmd_string.split(' && ')
+    # Handle && (run if previous succeeds)
+    if ' && ' in clean_cmd:
+        commands = clean_cmd.split(' && ')
         for i, cmd in enumerate(commands):
-            base = cmd.strip().split()[0] if cmd.strip() else ''
-            if i == 0:
-                parts.append(_describe_single_command(cmd.strip()))
+            cmd = cmd.strip()
+            if not cmd:
+                continue
+            # Handle nested || or | within && segments
+            if ' || ' in cmd:
+                parts.append(_describe_or_chain(cmd))
+            elif ' | ' in cmd:
+                parts.append(_describe_pipe_chain(cmd))
+            elif i == 0:
+                parts.append(_describe_single_command(cmd))
             else:
-                parts.append(f"then {_describe_single_command(cmd.strip())}")
-        return ', '.join(parts)
+                parts.append(f"then {_describe_single_command(cmd)}")
+    # Handle || (run if previous fails)
+    elif ' || ' in clean_cmd:
+        parts.append(_describe_or_chain(clean_cmd))
+    # Handle | (pipe)
+    elif ' | ' in clean_cmd:
+        parts.append(_describe_pipe_chain(clean_cmd))
+    else:
+        parts.append(_describe_single_command(clean_cmd))
+    result = ', '.join(parts)
+    # Add redirection notes
+    if has_stderr_to_null:
+        result += " (suppressing errors)"
+    elif has_stderr_to_stdout:
+        result += " (capturing all output)"
-    if ' || ' in cmd_string:
-        commands = cmd_string.split(' || ')
-        parts.append(_describe_single_command(commands[0].strip()))
-        parts.append(f"or if that fails, {_describe_single_command(commands[1].strip())}")
-        return ', '.join(parts)
+    return result
-    if ' | ' in cmd_string:
-        commands = cmd_string.split(' | ')
-        parts.append(_describe_single_command(commands[0].strip()))
-        for cmd in commands[1:]:
-            parts.append(f"pipes output to {_describe_single_command(cmd.strip())}")
-        return ', '.join(parts)
-    return _describe_single_command(cmd_string)
+def _describe_or_chain(cmd_string: str) -> str:
+    """Describe an || chain (fallback pattern)."""
+    commands = cmd_string.split(' || ')
+    parts = []
+    for i, cmd in enumerate(commands):
+        cmd = cmd.strip()
+        if not cmd:
+            continue
+        # Handle pipes within || segments
+        if ' | ' in cmd:
+            desc = _describe_pipe_chain(cmd)
+        else:
+            desc = _describe_single_command(cmd)
+        if i == 0:
+            parts.append(desc)
+        else:
+            parts.append(f"or if that fails, {desc}")
+    return ', '.join(parts)
+def _describe_pipe_chain(cmd_string: str) -> str:
+    """Describe a pipe chain."""
+    commands = cmd_string.split(' | ')
+    parts = []
+    for i, cmd in enumerate(commands):
+        cmd = cmd.strip()
+        if not cmd:
+            continue
+        desc = _describe_single_command(cmd)
+        if i == 0:
+            parts.append(desc)
+        else:
+            parts.append(f"pipes to {desc}")
+    return ', '.join(parts)
 def _describe_single_command(cmd: str) -> str:
@@ -438,6 +538,19 @@ def _describe_single_command(cmd: str) -> str:
     tokens = cmd.split()
     base_cmd = tokens[0] if tokens else ''
+    # Get args (skip flags) for knowledge_base fallback
+    args = [t for t in tokens[1:] if not t.startswith('-')]
+    # Check knowledge_base COMMAND_DB for rich description
+    if base_cmd and base_cmd in COMMAND_DB:
+        cmd_info = COMMAND_DB[base_cmd]
+        kb_desc = cmd_info.get('description', '')
+        if kb_desc:
+            # Use knowledge base description but make it contextual with args
+            if args:
+                return f"{kb_desc.lower()} ({' '.join(args[:2])})"
+            return kb_desc.lower()
     # Common command descriptions with bash focus
     descriptions = {
         'cd': lambda args: f"changes directory to {args[0] if args else 'specified path'}",
@@ -576,35 +689,35 @@ def _parse_command(cmd_string: str) -> dict:
 def _get_flag_description(cmd: str, flag: str) -> Optional[str]:
-    """Get description for a flag of a command."""
-    if cmd in FLAG_DATABASE:
-        # Handle flags like -la (combined short flags)
-        if flag in FLAG_DATABASE[cmd]:
-            return FLAG_DATABASE[cmd][flag]
-        # Try individual characters for combined flags
-        if len(flag) > 2 and flag.startswith("-") and not flag.startswith("--"):
-            for char in flag[1:]:
-                single_flag = f"-{char}"
-                if single_flag in FLAG_DATABASE[cmd]:
-                    return FLAG_DATABASE[cmd][single_flag]
+    """Get description for a flag of a command from merged sources."""
+    merged = _get_flags_for_cmd(cmd)
+    if flag in merged:
+        return merged[flag]
+    # Try individual characters for combined flags (e.g., -la -> -l, -a)
+    if len(flag) > 2 and flag.startswith("-") and not flag.startswith("--"):
+        for char in flag[1:]:
+            single_flag = f"-{char}"
+            if single_flag in merged:
+                return merged[single_flag]
     return None
 def _generate_distractor_flags(cmd: str, correct_flag: str, count: int = 3) -> list[str]:
-    """Generate plausible distractor flags."""
+    """Generate plausible distractor flags from merged knowledge sources."""
     distractors = []
-    # Get other flags from the same command
-    if cmd in FLAG_DATABASE:
-        other_flags = [f for f in FLAG_DATABASE[cmd].keys() if f != correct_flag]
+    # Get other flags from the same command (merged sources)
+    cmd_flags = _get_flags_for_cmd(cmd)
+    if cmd_flags:
+        other_flags = [f for f in cmd_flags.keys() if f != correct_flag]
         random.shuffle(other_flags)
         distractors.extend(other_flags[:count])
     # If we need more, get common flags from other commands
     if len(distractors) < count:
-        for other_cmd, flags in FLAG_DATABASE.items():
+        for other_cmd in _get_all_flagged_commands():
             if other_cmd != cmd:
-                for flag in flags:
+                for flag in _get_flags_for_cmd(other_cmd):
                     if flag not in distractors and flag != correct_flag:
                         distractors.append(flag)
                         if len(distractors) >= count:
@@ -619,10 +732,10 @@ def _generate_distractor_descriptions(correct_desc: str, count: int = 3) -> list
     """Generate plausible wrong descriptions."""
     distractors = []
-    # Collect all descriptions from FLAG_DATABASE
+    # Collect all descriptions from merged sources
     all_descriptions = []
-    for cmd_flags in FLAG_DATABASE.values():
-        all_descriptions.extend(cmd_flags.values())
+    for cmd in _get_all_flagged_commands():
+        all_descriptions.extend(_get_flags_for_cmd(cmd).values())
     # Remove duplicates and the correct answer
     all_descriptions = list(set(all_descriptions))
@@ -736,16 +849,17 @@ def generate_which_flag_quiz(
     parsed = _parse_command(cmd_string)
     base_cmd = parsed["base"]
-    if base_cmd not in FLAG_DATABASE or not parsed["flags"]:
+    cmd_flags = _get_flags_for_cmd(base_cmd)
+    if not cmd_flags or not parsed["flags"]:
         return None
     # Pick a flag to quiz on
-    available_flags = [f for f in parsed["flags"] if f in FLAG_DATABASE.get(base_cmd, {})]
+    available_flags = [f for f in parsed["flags"] if f in cmd_flags]
     if not available_flags:
         return None
     target_flag = random.choice(available_flags)
-    flag_desc = FLAG_DATABASE[base_cmd][target_flag]
+    flag_desc = cmd_flags[target_flag]
     # Generate distractor flags
     distractor_flags = _generate_distractor_flags(base_cmd, target_flag, 3)
@@ -770,13 +884,13 @@ def generate_which_flag_quiz(
             correct_id = opt_id
         # Get description for option explanation
-        flag_explanation = FLAG_DATABASE.get(base_cmd, {}).get(flag, "Unknown flag")
+        flag_explanation = cmd_flags.get(flag, "Unknown flag")
         options.append(QuizOption(
             id=opt_id,
             text=flag,
             is_correct=is_correct,
-            explanation=f"{flag}: {flag_explanation}" if flag in FLAG_DATABASE.get(base_cmd, {}) else f"{flag}: Not a standard flag for {base_cmd}"
+            explanation=f"{flag}: {flag_explanation}" if flag in cmd_flags else f"{flag}: Not a standard flag for {base_cmd}"
         ))
     question_id = _generate_id(f"which_flag_{base_cmd}_{target_flag}")
@@ -835,7 +949,7 @@ def generate_build_command_quiz(
         distractors.append(" ".join(missing_flag))
     # Distractor 3: Wrong flag
-    if parsed["flags"] and base_cmd in FLAG_DATABASE:
+    if parsed["flags"] and _get_flags_for_cmd(base_cmd):
         wrong_flags = _generate_distractor_flags(base_cmd, parsed["flags"][0], 1)
         if wrong_flags:
             wrong_flag_cmd = [base_cmd] + [wrong_flags[0]] + parsed["flags"][1:] + parsed["args"]
@@ -1004,14 +1118,15 @@ def _create_similar_command_variant(command: dict) -> Optional[dict]:
     parsed = _parse_command(cmd_string)
     base_cmd = parsed["base"]
-    if base_cmd not in FLAG_DATABASE:
+    variant_flags = _get_flags_for_cmd(base_cmd)
+    if not variant_flags:
         return None
     # Strategy: add, remove, or change a flag
     strategies = []
     # Can add a flag
-    available_flags = [f for f in FLAG_DATABASE[base_cmd].keys() if f not in parsed["flags"]]
+    available_flags = [f for f in variant_flags.keys() if f not in parsed["flags"]]
     if available_flags:
         strategies.append("add")

package/vectors.db ADDED Viewed

Binary file