learn_bash_from_session_data 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/scripts/analyzer.py +64 -0
- package/scripts/extractor.py +37 -0
- package/scripts/html_generator.py +123 -2
- package/scripts/parser.py +42 -5
- package/scripts/quiz_generator.py +31 -0
- package/test-output/index.html +3954 -0
- package/test-output/summary.json +19 -0
package/package.json
CHANGED
package/scripts/analyzer.py
CHANGED
|
@@ -565,6 +565,70 @@ def quick_analyze(commands: List[str], verbose: bool = False) -> Dict[str, Any]:
|
|
|
565
565
|
return summary
|
|
566
566
|
|
|
567
567
|
|
|
568
|
+
def analyze_commands(commands: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
569
|
+
"""
|
|
570
|
+
Analyze a list of command dictionaries for the pipeline.
|
|
571
|
+
|
|
572
|
+
This is the interface expected by main.py.
|
|
573
|
+
|
|
574
|
+
Args:
|
|
575
|
+
commands: List of command dictionaries with 'command' key
|
|
576
|
+
|
|
577
|
+
Returns:
|
|
578
|
+
Dictionary with 'categories', 'commands', 'statistics' keys
|
|
579
|
+
"""
|
|
580
|
+
# Extract command strings from dictionaries
|
|
581
|
+
cmd_strings = [
|
|
582
|
+
cmd.get('command', '') or cmd.get('raw', '')
|
|
583
|
+
for cmd in commands
|
|
584
|
+
if cmd.get('command') or cmd.get('raw')
|
|
585
|
+
]
|
|
586
|
+
|
|
587
|
+
if not cmd_strings:
|
|
588
|
+
return {
|
|
589
|
+
'categories': {},
|
|
590
|
+
'commands': [],
|
|
591
|
+
'statistics': {},
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
result = analyze_session(cmd_strings)
|
|
595
|
+
|
|
596
|
+
# Build analyzed command list with parsed info
|
|
597
|
+
analyzed_commands = []
|
|
598
|
+
for cmd_dict in commands:
|
|
599
|
+
cmd_str = cmd_dict.get('command', '') or cmd_dict.get('raw', '')
|
|
600
|
+
if cmd_str:
|
|
601
|
+
parsed = parse_command(cmd_str)
|
|
602
|
+
analyzed_commands.append({
|
|
603
|
+
'command': cmd_str,
|
|
604
|
+
'description': cmd_dict.get('description', ''),
|
|
605
|
+
'output': cmd_dict.get('output', ''),
|
|
606
|
+
'base_command': parsed.base_command,
|
|
607
|
+
'flags': parsed.flags,
|
|
608
|
+
'args': parsed.args,
|
|
609
|
+
'complexity': score_complexity(parsed),
|
|
610
|
+
'category': assign_category(parsed),
|
|
611
|
+
'success': cmd_dict.get('success', True),
|
|
612
|
+
})
|
|
613
|
+
|
|
614
|
+
# Group by category
|
|
615
|
+
categories = {}
|
|
616
|
+
for cmd in analyzed_commands:
|
|
617
|
+
cat = cmd['category']
|
|
618
|
+
if cat not in categories:
|
|
619
|
+
categories[cat] = []
|
|
620
|
+
categories[cat].append(cmd)
|
|
621
|
+
|
|
622
|
+
return {
|
|
623
|
+
'categories': categories,
|
|
624
|
+
'commands': analyzed_commands,
|
|
625
|
+
'statistics': result.statistics,
|
|
626
|
+
'category_breakdown': result.category_breakdown,
|
|
627
|
+
'complexity_distribution': result.complexity_distribution,
|
|
628
|
+
'top_commands': result.top_commands,
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
|
|
568
632
|
if __name__ == "__main__":
|
|
569
633
|
# Example usage and testing
|
|
570
634
|
test_commands = [
|
package/scripts/extractor.py
CHANGED
|
@@ -348,6 +348,43 @@ class JSONLExtractor:
|
|
|
348
348
|
return all_commands
|
|
349
349
|
|
|
350
350
|
|
|
351
|
+
def extract_commands(entries: list[dict]) -> list[dict]:
|
|
352
|
+
"""
|
|
353
|
+
Extract bash commands from a list of session entries.
|
|
354
|
+
|
|
355
|
+
This is the interface expected by main.py for pipeline processing.
|
|
356
|
+
|
|
357
|
+
Args:
|
|
358
|
+
entries: List of parsed JSON entries from session files
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
List of command dictionaries with 'command', 'description', 'output' keys
|
|
362
|
+
"""
|
|
363
|
+
extractor = JSONLExtractor()
|
|
364
|
+
tool_uses: dict[str, dict] = {}
|
|
365
|
+
tool_results: dict[str, dict] = {}
|
|
366
|
+
sequence_counter = 0
|
|
367
|
+
|
|
368
|
+
for entry in entries:
|
|
369
|
+
extractor._process_entry(entry, tool_uses, tool_results, sequence_counter)
|
|
370
|
+
sequence_counter += 1
|
|
371
|
+
|
|
372
|
+
extracted = extractor._correlate_commands(tool_uses, tool_results)
|
|
373
|
+
|
|
374
|
+
# Convert ExtractedCommand objects to dicts for pipeline compatibility
|
|
375
|
+
return [
|
|
376
|
+
{
|
|
377
|
+
'command': cmd.command,
|
|
378
|
+
'description': cmd.description,
|
|
379
|
+
'output': cmd.output,
|
|
380
|
+
'timestamp': cmd.timestamp,
|
|
381
|
+
'success': cmd.success,
|
|
382
|
+
'exit_code': cmd.exit_code,
|
|
383
|
+
}
|
|
384
|
+
for cmd in extracted
|
|
385
|
+
]
|
|
386
|
+
|
|
387
|
+
|
|
351
388
|
def extract_commands_from_jsonl(file_path: str | Path) -> list[ExtractedCommand]:
|
|
352
389
|
"""
|
|
353
390
|
Convenience function to extract commands from a single JSONL file.
|
|
@@ -6,13 +6,14 @@ Generates a single self-contained HTML file with all CSS and JS inline.
|
|
|
6
6
|
No external dependencies - pure Python standard library.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
from typing import Any
|
|
9
|
+
from typing import Any, List
|
|
10
10
|
from datetime import datetime
|
|
11
|
+
from pathlib import Path
|
|
11
12
|
import html
|
|
12
13
|
import json
|
|
13
14
|
|
|
14
15
|
|
|
15
|
-
def
|
|
16
|
+
def _generate_html_impl(analysis_result: dict[str, Any], quizzes: list[dict[str, Any]]) -> str:
|
|
16
17
|
"""
|
|
17
18
|
Generate complete HTML report from analysis results and quizzes.
|
|
18
19
|
|
|
@@ -1957,6 +1958,126 @@ def get_inline_js(quizzes: list[dict]) -> str:
|
|
|
1957
1958
|
'''
|
|
1958
1959
|
|
|
1959
1960
|
|
|
1961
|
+
def generate_html_files(
|
|
1962
|
+
commands: List[dict],
|
|
1963
|
+
analysis: dict,
|
|
1964
|
+
quizzes: list,
|
|
1965
|
+
output_dir: Path
|
|
1966
|
+
) -> List[Path]:
|
|
1967
|
+
"""
|
|
1968
|
+
Generate HTML files from commands, analysis and quizzes.
|
|
1969
|
+
|
|
1970
|
+
This is the interface expected by main.py for the pipeline.
|
|
1971
|
+
|
|
1972
|
+
Args:
|
|
1973
|
+
commands: List of command dictionaries
|
|
1974
|
+
analysis: Analysis dictionary from analyze_commands
|
|
1975
|
+
quizzes: List of quiz dictionaries
|
|
1976
|
+
output_dir: Output directory path
|
|
1977
|
+
|
|
1978
|
+
Returns:
|
|
1979
|
+
List of generated file paths
|
|
1980
|
+
"""
|
|
1981
|
+
output_dir = Path(output_dir)
|
|
1982
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
1983
|
+
|
|
1984
|
+
# Build analysis_result in expected format for generate_html
|
|
1985
|
+
stats = analysis.get('statistics', {})
|
|
1986
|
+
categories = analysis.get('categories', {})
|
|
1987
|
+
analyzed_commands = analysis.get('commands', commands)
|
|
1988
|
+
|
|
1989
|
+
# Transform commands to expected format
|
|
1990
|
+
formatted_commands = []
|
|
1991
|
+
for cmd in analyzed_commands:
|
|
1992
|
+
# Convert flags to expected format (list of dicts with 'flag' and 'description')
|
|
1993
|
+
raw_flags = cmd.get('flags', [])
|
|
1994
|
+
formatted_flags = []
|
|
1995
|
+
for f in raw_flags:
|
|
1996
|
+
if isinstance(f, dict):
|
|
1997
|
+
formatted_flags.append(f)
|
|
1998
|
+
elif isinstance(f, str):
|
|
1999
|
+
formatted_flags.append({'flag': f, 'description': ''})
|
|
2000
|
+
|
|
2001
|
+
formatted_commands.append({
|
|
2002
|
+
'base_command': cmd.get('base_command', cmd.get('command', '').split()[0] if cmd.get('command') else ''),
|
|
2003
|
+
'full_command': cmd.get('command', ''),
|
|
2004
|
+
'category': cmd.get('category', 'Other'),
|
|
2005
|
+
'complexity': cmd.get('complexity', 1),
|
|
2006
|
+
'frequency': cmd.get('frequency', 1),
|
|
2007
|
+
'description': cmd.get('description', ''),
|
|
2008
|
+
'flags': formatted_flags,
|
|
2009
|
+
'is_new': False,
|
|
2010
|
+
})
|
|
2011
|
+
|
|
2012
|
+
analysis_result = {
|
|
2013
|
+
'stats': {
|
|
2014
|
+
'total_commands': stats.get('total_commands', len(commands)),
|
|
2015
|
+
'unique_commands': stats.get('unique_commands', len(commands)),
|
|
2016
|
+
'total_categories': len(categories),
|
|
2017
|
+
'complexity_avg': stats.get('average_complexity', 2),
|
|
2018
|
+
},
|
|
2019
|
+
'commands': formatted_commands,
|
|
2020
|
+
'categories': {cat: [c.get('command', '') for c in cmds] for cat, cmds in categories.items()},
|
|
2021
|
+
}
|
|
2022
|
+
|
|
2023
|
+
# Transform quizzes to expected format for HTML generator
|
|
2024
|
+
# HTML generator expects: options as list of strings, correct_answer as int index
|
|
2025
|
+
formatted_quizzes = []
|
|
2026
|
+
for quiz in quizzes:
|
|
2027
|
+
options = quiz.get('options', [])
|
|
2028
|
+
|
|
2029
|
+
# Convert options from dicts to strings and find correct index
|
|
2030
|
+
option_texts = []
|
|
2031
|
+
correct_idx = 0
|
|
2032
|
+
for idx, opt in enumerate(options):
|
|
2033
|
+
if isinstance(opt, dict):
|
|
2034
|
+
option_texts.append(opt.get('text', ''))
|
|
2035
|
+
if opt.get('is_correct', False):
|
|
2036
|
+
correct_idx = idx
|
|
2037
|
+
else:
|
|
2038
|
+
option_texts.append(str(opt))
|
|
2039
|
+
|
|
2040
|
+
formatted_quizzes.append({
|
|
2041
|
+
'question': quiz.get('question', ''),
|
|
2042
|
+
'options': option_texts,
|
|
2043
|
+
'correct_answer': correct_idx,
|
|
2044
|
+
'explanation': quiz.get('explanation', ''),
|
|
2045
|
+
})
|
|
2046
|
+
|
|
2047
|
+
# Generate HTML
|
|
2048
|
+
html_content = _generate_html_impl(analysis_result, formatted_quizzes)
|
|
2049
|
+
|
|
2050
|
+
# Write to file
|
|
2051
|
+
index_file = output_dir / "index.html"
|
|
2052
|
+
with open(index_file, 'w', encoding='utf-8') as f:
|
|
2053
|
+
f.write(html_content)
|
|
2054
|
+
|
|
2055
|
+
return [index_file]
|
|
2056
|
+
|
|
2057
|
+
|
|
2058
|
+
def generate_html(
|
|
2059
|
+
commands_or_analysis: Any,
|
|
2060
|
+
analysis_or_quizzes: Any = None,
|
|
2061
|
+
quizzes: Any = None,
|
|
2062
|
+
output_dir: Any = None
|
|
2063
|
+
) -> Any:
|
|
2064
|
+
"""
|
|
2065
|
+
Wrapper that handles both original 2-param and main.py 4-param signatures.
|
|
2066
|
+
|
|
2067
|
+
Original: generate_html(analysis_result, quizzes) -> str
|
|
2068
|
+
Pipeline: generate_html(commands, analysis, quizzes, output_dir) -> List[Path]
|
|
2069
|
+
"""
|
|
2070
|
+
if output_dir is not None:
|
|
2071
|
+
# Called with 4 params from main.py pipeline
|
|
2072
|
+
return generate_html_files(commands_or_analysis, analysis_or_quizzes, quizzes, output_dir)
|
|
2073
|
+
elif quizzes is not None:
|
|
2074
|
+
# Called with 3 params (shouldn't happen but handle it)
|
|
2075
|
+
return generate_html_files(commands_or_analysis, analysis_or_quizzes, quizzes, Path('./output'))
|
|
2076
|
+
else:
|
|
2077
|
+
# Original 2-param call: generate_html(analysis_result, quizzes)
|
|
2078
|
+
return _generate_html_impl(commands_or_analysis, analysis_or_quizzes)
|
|
2079
|
+
|
|
2080
|
+
|
|
1960
2081
|
if __name__ == "__main__":
|
|
1961
2082
|
# Test with sample data
|
|
1962
2083
|
sample_analysis = {
|
package/scripts/parser.py
CHANGED
|
@@ -574,19 +574,56 @@ def parse_command(
|
|
|
574
574
|
|
|
575
575
|
|
|
576
576
|
def parse_commands(
|
|
577
|
-
commands: list
|
|
578
|
-
) -> list[
|
|
577
|
+
commands: list
|
|
578
|
+
) -> list[dict]:
|
|
579
579
|
"""
|
|
580
580
|
Convenience function to parse multiple bash commands.
|
|
581
581
|
|
|
582
582
|
Args:
|
|
583
|
-
commands: List of (command, description, output) tuples
|
|
583
|
+
commands: List of (command, description, output) tuples OR
|
|
584
|
+
List of dicts with 'command', 'description', 'output' keys
|
|
584
585
|
|
|
585
586
|
Returns:
|
|
586
|
-
List of
|
|
587
|
+
List of parsed command dictionaries
|
|
587
588
|
"""
|
|
588
589
|
parser = BashParser()
|
|
589
|
-
|
|
590
|
+
|
|
591
|
+
# Normalize input to tuples
|
|
592
|
+
normalized = []
|
|
593
|
+
for item in commands:
|
|
594
|
+
if isinstance(item, dict):
|
|
595
|
+
cmd = item.get('command', '')
|
|
596
|
+
desc = item.get('description', '')
|
|
597
|
+
output = item.get('output', '')
|
|
598
|
+
normalized.append((cmd, desc, output))
|
|
599
|
+
elif isinstance(item, (tuple, list)) and len(item) >= 3:
|
|
600
|
+
normalized.append((item[0], item[1], item[2]))
|
|
601
|
+
elif isinstance(item, str):
|
|
602
|
+
normalized.append((item, '', ''))
|
|
603
|
+
else:
|
|
604
|
+
continue
|
|
605
|
+
|
|
606
|
+
parsed_objs = parser.parse_batch(normalized)
|
|
607
|
+
|
|
608
|
+
# Convert ParsedCommand objects to dicts for pipeline compatibility
|
|
609
|
+
return [
|
|
610
|
+
{
|
|
611
|
+
'command': p.raw,
|
|
612
|
+
'raw': p.raw,
|
|
613
|
+
'base_command': p.base_commands[0] if p.base_commands else '',
|
|
614
|
+
'base_commands': p.base_commands,
|
|
615
|
+
'flags': p.flags,
|
|
616
|
+
'args': p.arguments,
|
|
617
|
+
'pipes': p.pipes,
|
|
618
|
+
'redirects': p.redirects,
|
|
619
|
+
'category': p.category.value if p.category else 'unknown',
|
|
620
|
+
'complexity': p.complexity_score,
|
|
621
|
+
'description': p.description,
|
|
622
|
+
'output': p.output,
|
|
623
|
+
'is_compound': len(p.base_commands) > 1 or len(p.pipes) > 0,
|
|
624
|
+
}
|
|
625
|
+
for p in parsed_objs
|
|
626
|
+
]
|
|
590
627
|
|
|
591
628
|
|
|
592
629
|
if __name__ == "__main__":
|
|
@@ -993,6 +993,37 @@ def create_quiz(
|
|
|
993
993
|
)
|
|
994
994
|
|
|
995
995
|
|
|
996
|
+
def generate_quizzes(
|
|
997
|
+
commands: list[dict],
|
|
998
|
+
analysis: dict,
|
|
999
|
+
question_count: int = 20
|
|
1000
|
+
) -> list[dict]:
|
|
1001
|
+
"""
|
|
1002
|
+
Generate quizzes from commands and analysis for the pipeline.
|
|
1003
|
+
|
|
1004
|
+
This is the interface expected by main.py.
|
|
1005
|
+
|
|
1006
|
+
Args:
|
|
1007
|
+
commands: List of command dictionaries
|
|
1008
|
+
analysis: Analysis dictionary from analyze_commands
|
|
1009
|
+
question_count: Target number of questions
|
|
1010
|
+
|
|
1011
|
+
Returns:
|
|
1012
|
+
List of quiz question dictionaries
|
|
1013
|
+
"""
|
|
1014
|
+
# Get analyzed commands from analysis if available, otherwise use raw commands
|
|
1015
|
+
analyzed_commands = analysis.get('commands', commands)
|
|
1016
|
+
|
|
1017
|
+
if not analyzed_commands:
|
|
1018
|
+
return []
|
|
1019
|
+
|
|
1020
|
+
# Generate quiz questions
|
|
1021
|
+
questions = generate_quiz_set(analyzed_commands, question_count)
|
|
1022
|
+
|
|
1023
|
+
# Convert QuizQuestion objects to dictionaries using the built-in method
|
|
1024
|
+
return [q.to_dict() for q in questions]
|
|
1025
|
+
|
|
1026
|
+
|
|
996
1027
|
# =============================================================================
|
|
997
1028
|
# Main entry point for testing
|
|
998
1029
|
# =============================================================================
|