learn_bash_from_session_data 1.0.4 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bash-learner-output/run-2026-02-05-154214/index.html +3848 -0
- package/bash-learner-output/run-2026-02-05-154214/summary.json +148 -0
- package/bash-learner-output/run-2026-02-05-155427/index.html +3900 -0
- package/bash-learner-output/run-2026-02-05-155427/summary.json +157 -0
- package/bash-learner-output/run-2026-02-05-155949/index.html +4514 -0
- package/bash-learner-output/run-2026-02-05-155949/summary.json +163 -0
- package/package.json +1 -1
- package/scripts/html_generator.py +116 -33
- package/scripts/main.py +110 -5
- package/scripts/quiz_generator.py +84 -23
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
{
|
|
2
|
+
"metadata": {
|
|
3
|
+
"generated_at": "2026-02-05T15:59:49.814630",
|
|
4
|
+
"run_id": "run-2026-02-05-155949",
|
|
5
|
+
"version": "1.0.5"
|
|
6
|
+
},
|
|
7
|
+
"input": {
|
|
8
|
+
"sessions_processed": 10,
|
|
9
|
+
"session_files": [
|
|
10
|
+
{
|
|
11
|
+
"filename": "081cfcfd-cde6-4304-89ca-6ac61faf8d85.jsonl",
|
|
12
|
+
"path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand-Development-Project-Workspace-active-development-session-with-chris/081cfcfd-cde6-4304-89ca-6ac61faf8d85.jsonl",
|
|
13
|
+
"size": "759.1 KB",
|
|
14
|
+
"modified": "2026-02-05 11:01:08"
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"filename": "2ee58010-e794-48d8-a1b7-0ba14e06e7b7.jsonl",
|
|
18
|
+
"path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand/2ee58010-e794-48d8-a1b7-0ba14e06e7b7.jsonl",
|
|
19
|
+
"size": "1.8 KB",
|
|
20
|
+
"modified": "2026-02-05 10:22:23"
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
"filename": "ef6494ae-aca1-43b1-800d-d5586069d42c.jsonl",
|
|
24
|
+
"path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand/ef6494ae-aca1-43b1-800d-d5586069d42c.jsonl",
|
|
25
|
+
"size": "1.8 KB",
|
|
26
|
+
"modified": "2026-01-23 14:32:05"
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
"filename": "6b1fb6cd-5865-4a5f-97f9-5e1e46d79f81.jsonl",
|
|
30
|
+
"path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand/6b1fb6cd-5865-4a5f-97f9-5e1e46d79f81.jsonl",
|
|
31
|
+
"size": "3.7 KB",
|
|
32
|
+
"modified": "2026-01-19 22:44:52"
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
"filename": "9db5f466-5bc4-4778-94ba-b10d72e8c464.jsonl",
|
|
36
|
+
"path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand/9db5f466-5bc4-4778-94ba-b10d72e8c464.jsonl",
|
|
37
|
+
"size": "916.0 B",
|
|
38
|
+
"modified": "2026-01-19 09:25:47"
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"filename": "dfc82e92-9786-4baf-a5df-44046f21f90a.jsonl",
|
|
42
|
+
"path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand-Development-Project-Workspace-active-development-en-consulta/dfc82e92-9786-4baf-a5df-44046f21f90a.jsonl",
|
|
43
|
+
"size": "1.0 KB",
|
|
44
|
+
"modified": "2026-01-16 06:42:34"
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
"filename": "b3c0facc-2728-48a5-a1d0-0fed6b049cf2.jsonl",
|
|
48
|
+
"path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand/b3c0facc-2728-48a5-a1d0-0fed6b049cf2.jsonl",
|
|
49
|
+
"size": "116.0 B",
|
|
50
|
+
"modified": "2026-01-08 21:01:17"
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
"filename": "846d1c3c-05e3-4d86-9aae-c8bd266962d4.jsonl",
|
|
54
|
+
"path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand/846d1c3c-05e3-4d86-9aae-c8bd266962d4.jsonl",
|
|
55
|
+
"size": "1.8 KB",
|
|
56
|
+
"modified": "2026-01-08 21:00:57"
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"filename": "agent-a979d7c.jsonl",
|
|
60
|
+
"path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand/agent-a979d7c.jsonl",
|
|
61
|
+
"size": "2.0 KB",
|
|
62
|
+
"modified": "2026-01-06 20:47:47"
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
"filename": "agent-a1fb460.jsonl",
|
|
66
|
+
"path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand/agent-a1fb460.jsonl",
|
|
67
|
+
"size": "1.9 KB",
|
|
68
|
+
"modified": "2026-01-06 20:47:44"
|
|
69
|
+
}
|
|
70
|
+
],
|
|
71
|
+
"total_entries": 438
|
|
72
|
+
},
|
|
73
|
+
"analysis": {
|
|
74
|
+
"raw_commands_found": 37,
|
|
75
|
+
"unique_commands": 53,
|
|
76
|
+
"categories": [
|
|
77
|
+
"Unknown",
|
|
78
|
+
"Package Management",
|
|
79
|
+
"Text Processing",
|
|
80
|
+
"File System",
|
|
81
|
+
"Shell Builtins",
|
|
82
|
+
"Development",
|
|
83
|
+
"Git",
|
|
84
|
+
"Search & Navigation",
|
|
85
|
+
"Process & System"
|
|
86
|
+
],
|
|
87
|
+
"category_counts": {
|
|
88
|
+
"Unknown": 5,
|
|
89
|
+
"Package Management": 4,
|
|
90
|
+
"Text Processing": 3,
|
|
91
|
+
"File System": 7,
|
|
92
|
+
"Shell Builtins": 3,
|
|
93
|
+
"Development": 5,
|
|
94
|
+
"Git": 24,
|
|
95
|
+
"Search & Navigation": 1,
|
|
96
|
+
"Process & System": 1
|
|
97
|
+
},
|
|
98
|
+
"top_base_commands": [
|
|
99
|
+
{
|
|
100
|
+
"command": "cd",
|
|
101
|
+
"count": 18
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
"command": "git",
|
|
105
|
+
"count": 17
|
|
106
|
+
},
|
|
107
|
+
{
|
|
108
|
+
"command": "gh",
|
|
109
|
+
"count": 9
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
"command": "python",
|
|
113
|
+
"count": 5
|
|
114
|
+
},
|
|
115
|
+
{
|
|
116
|
+
"command": "mkdir",
|
|
117
|
+
"count": 3
|
|
118
|
+
},
|
|
119
|
+
{
|
|
120
|
+
"command": "ls",
|
|
121
|
+
"count": 3
|
|
122
|
+
},
|
|
123
|
+
{
|
|
124
|
+
"command": "echo",
|
|
125
|
+
"count": 3
|
|
126
|
+
},
|
|
127
|
+
{
|
|
128
|
+
"command": "session-slides",
|
|
129
|
+
"count": 3
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
"command": "pip",
|
|
133
|
+
"count": 2
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
"command": "head",
|
|
137
|
+
"count": 2
|
|
138
|
+
}
|
|
139
|
+
],
|
|
140
|
+
"operators_used": {
|
|
141
|
+
"||": 4,
|
|
142
|
+
"|": 3,
|
|
143
|
+
"2>&1": 8,
|
|
144
|
+
"2>/dev/null": 5,
|
|
145
|
+
"&&": 29,
|
|
146
|
+
">": 5,
|
|
147
|
+
"<": 13
|
|
148
|
+
},
|
|
149
|
+
"complexity_distribution": {
|
|
150
|
+
"1": 10,
|
|
151
|
+
"2": 23,
|
|
152
|
+
"3": 15,
|
|
153
|
+
"4": 3,
|
|
154
|
+
"5": 2
|
|
155
|
+
}
|
|
156
|
+
},
|
|
157
|
+
"output": {
|
|
158
|
+
"quiz_questions": 20,
|
|
159
|
+
"html_files": [
|
|
160
|
+
"bash-learner-output/run-2026-02-05-155949/index.html"
|
|
161
|
+
]
|
|
162
|
+
}
|
|
163
|
+
}
|
package/package.json
CHANGED
|
@@ -114,19 +114,52 @@ def _generate_html_impl(analysis_result: dict[str, Any], quizzes: list[dict[str,
|
|
|
114
114
|
</html>'''
|
|
115
115
|
|
|
116
116
|
|
|
117
|
+
def _generate_operators_html(operators_used: dict, operator_descriptions: dict) -> str:
|
|
118
|
+
"""Generate HTML for the operators used section."""
|
|
119
|
+
if not operators_used:
|
|
120
|
+
return '<p class="empty-state">No bash operators detected in these commands</p>'
|
|
121
|
+
|
|
122
|
+
operators_html = ""
|
|
123
|
+
# Sort by count descending
|
|
124
|
+
sorted_ops = sorted(operators_used.items(), key=lambda x: -x[1])
|
|
125
|
+
max_count = sorted_ops[0][1] if sorted_ops else 1
|
|
126
|
+
|
|
127
|
+
for op, count in sorted_ops:
|
|
128
|
+
name, desc = operator_descriptions.get(op, (op, 'Bash operator'))
|
|
129
|
+
bar_width = (count / max_count) * 100
|
|
130
|
+
operators_html += f'''
|
|
131
|
+
<div class="operator-item">
|
|
132
|
+
<div class="operator-symbol"><code>{html.escape(op)}</code></div>
|
|
133
|
+
<div class="operator-info">
|
|
134
|
+
<div class="operator-name">{html.escape(name)}</div>
|
|
135
|
+
<div class="operator-desc">{html.escape(desc)}</div>
|
|
136
|
+
</div>
|
|
137
|
+
<div class="operator-bar-container">
|
|
138
|
+
<div class="operator-bar" style="width: {bar_width}%"></div>
|
|
139
|
+
</div>
|
|
140
|
+
<div class="operator-count">{count}</div>
|
|
141
|
+
</div>'''
|
|
142
|
+
return operators_html
|
|
143
|
+
|
|
144
|
+
|
|
117
145
|
def render_overview_tab(stats: dict[str, Any], commands: list[dict], categories: dict) -> str:
|
|
118
146
|
"""Render the overview/dashboard tab content."""
|
|
119
147
|
total_commands = stats.get("total_commands", 0)
|
|
120
148
|
unique_commands = stats.get("unique_commands", 0)
|
|
121
149
|
unique_utilities = stats.get("unique_utilities", 0)
|
|
122
150
|
date_range = stats.get("date_range", {"start": "N/A", "end": "N/A"})
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
151
|
+
# Get operators data for the "Bash Operators Used" section
|
|
152
|
+
operators_used = stats.get("operators_used", {})
|
|
153
|
+
operator_descriptions = {
|
|
154
|
+
'|': ('Pipe', 'Sends output of one command to input of another'),
|
|
155
|
+
'||': ('OR operator', 'Run next command if previous failed'),
|
|
156
|
+
'&&': ('AND operator', 'Run next command if previous succeeded'),
|
|
157
|
+
'2>&1': ('Redirect stderr', 'Combines error output with standard output'),
|
|
158
|
+
'2>/dev/null': ('Suppress errors', 'Discards error messages'),
|
|
159
|
+
'>': ('Redirect output', 'Writes output to a file (overwrites)'),
|
|
160
|
+
'>>': ('Append output', 'Appends output to a file'),
|
|
161
|
+
'<': ('Redirect input', 'Reads input from a file'),
|
|
162
|
+
}
|
|
130
163
|
|
|
131
164
|
# Top 10 commands by frequency - use pre-computed data if available
|
|
132
165
|
top_commands_data = stats.get("top_commands", [])
|
|
@@ -237,29 +270,9 @@ def render_overview_tab(stats: dict[str, Any], commands: list[dict], categories:
|
|
|
237
270
|
|
|
238
271
|
<div class="charts-row">
|
|
239
272
|
<div class="chart-card">
|
|
240
|
-
<h3>
|
|
241
|
-
<div class="
|
|
242
|
-
|
|
243
|
-
<span class="complexity-label simple">Simple</span>
|
|
244
|
-
<div class="complexity-bar-bg">
|
|
245
|
-
<div class="complexity-bar simple" style="width: {simple_pct}%"></div>
|
|
246
|
-
</div>
|
|
247
|
-
<span class="complexity-count">{complexity_dist.get("simple", 0)}</span>
|
|
248
|
-
</div>
|
|
249
|
-
<div class="complexity-row">
|
|
250
|
-
<span class="complexity-label intermediate">Intermediate</span>
|
|
251
|
-
<div class="complexity-bar-bg">
|
|
252
|
-
<div class="complexity-bar intermediate" style="width: {intermediate_pct}%"></div>
|
|
253
|
-
</div>
|
|
254
|
-
<span class="complexity-count">{complexity_dist.get("intermediate", 0)}</span>
|
|
255
|
-
</div>
|
|
256
|
-
<div class="complexity-row">
|
|
257
|
-
<span class="complexity-label advanced">Advanced</span>
|
|
258
|
-
<div class="complexity-bar-bg">
|
|
259
|
-
<div class="complexity-bar advanced" style="width: {advanced_pct}%"></div>
|
|
260
|
-
</div>
|
|
261
|
-
<span class="complexity-count">{complexity_dist.get("advanced", 0)}</span>
|
|
262
|
-
</div>
|
|
273
|
+
<h3>Bash Operators Used</h3>
|
|
274
|
+
<div class="operators-list">
|
|
275
|
+
{_generate_operators_html(operators_used, operator_descriptions)}
|
|
263
276
|
</div>
|
|
264
277
|
</div>
|
|
265
278
|
|
|
@@ -385,11 +398,10 @@ def render_commands_tab(commands: list[dict]) -> str:
|
|
|
385
398
|
</div>'''
|
|
386
399
|
|
|
387
400
|
commands_html += f'''
|
|
388
|
-
<div class="command-card" data-category="{category}" data-
|
|
401
|
+
<div class="command-card" data-category="{category}" data-frequency="{frequency}" data-name="{base_cmd}">
|
|
389
402
|
<div class="command-header" onclick="toggleCommand('{cmd_id}')">
|
|
390
403
|
<div class="command-main">
|
|
391
404
|
<code class="cmd">{base_cmd}</code>
|
|
392
|
-
<span class="complexity-badge {complexity}">{complexity}</span>
|
|
393
405
|
<span class="category-badge">{category}</span>
|
|
394
406
|
</div>
|
|
395
407
|
<div class="command-meta">
|
|
@@ -504,7 +516,6 @@ def render_lessons_tab(categories: dict, commands: list[dict]) -> str:
|
|
|
504
516
|
<div class="lesson-command">
|
|
505
517
|
<div class="lesson-command-header">
|
|
506
518
|
<code class="cmd">{base_cmd}</code>
|
|
507
|
-
<span class="complexity-badge {complexity}">{complexity}</span>
|
|
508
519
|
</div>
|
|
509
520
|
<pre class="syntax-highlighted">{highlighted}</pre>
|
|
510
521
|
<p class="lesson-description">{description}</p>
|
|
@@ -990,6 +1001,77 @@ def get_inline_css() -> str:
|
|
|
990
1001
|
color: var(--text-secondary);
|
|
991
1002
|
}
|
|
992
1003
|
|
|
1004
|
+
/* Operators List */
|
|
1005
|
+
.operators-list {
|
|
1006
|
+
display: flex;
|
|
1007
|
+
flex-direction: column;
|
|
1008
|
+
gap: 12px;
|
|
1009
|
+
}
|
|
1010
|
+
|
|
1011
|
+
.operator-item {
|
|
1012
|
+
display: grid;
|
|
1013
|
+
grid-template-columns: 80px 1fr 120px 50px;
|
|
1014
|
+
align-items: center;
|
|
1015
|
+
gap: 12px;
|
|
1016
|
+
padding: 8px 0;
|
|
1017
|
+
border-bottom: 1px solid var(--border-color);
|
|
1018
|
+
}
|
|
1019
|
+
|
|
1020
|
+
.operator-item:last-child {
|
|
1021
|
+
border-bottom: none;
|
|
1022
|
+
}
|
|
1023
|
+
|
|
1024
|
+
.operator-symbol {
|
|
1025
|
+
font-family: var(--font-mono);
|
|
1026
|
+
font-size: 1rem;
|
|
1027
|
+
font-weight: 600;
|
|
1028
|
+
color: var(--accent-primary);
|
|
1029
|
+
}
|
|
1030
|
+
|
|
1031
|
+
.operator-symbol code {
|
|
1032
|
+
background: var(--bg-tertiary);
|
|
1033
|
+
padding: 4px 8px;
|
|
1034
|
+
border-radius: var(--radius-sm);
|
|
1035
|
+
}
|
|
1036
|
+
|
|
1037
|
+
.operator-info {
|
|
1038
|
+
display: flex;
|
|
1039
|
+
flex-direction: column;
|
|
1040
|
+
gap: 2px;
|
|
1041
|
+
}
|
|
1042
|
+
|
|
1043
|
+
.operator-name {
|
|
1044
|
+
font-size: 0.9rem;
|
|
1045
|
+
font-weight: 600;
|
|
1046
|
+
color: var(--text-primary);
|
|
1047
|
+
}
|
|
1048
|
+
|
|
1049
|
+
.operator-desc {
|
|
1050
|
+
font-size: 0.8rem;
|
|
1051
|
+
color: var(--text-secondary);
|
|
1052
|
+
}
|
|
1053
|
+
|
|
1054
|
+
.operator-bar-container {
|
|
1055
|
+
height: 20px;
|
|
1056
|
+
background: var(--bg-tertiary);
|
|
1057
|
+
border-radius: var(--radius-sm);
|
|
1058
|
+
overflow: hidden;
|
|
1059
|
+
}
|
|
1060
|
+
|
|
1061
|
+
.operator-bar {
|
|
1062
|
+
height: 100%;
|
|
1063
|
+
background: var(--accent-primary);
|
|
1064
|
+
border-radius: var(--radius-sm);
|
|
1065
|
+
transition: width 0.5s ease;
|
|
1066
|
+
}
|
|
1067
|
+
|
|
1068
|
+
.operator-count {
|
|
1069
|
+
font-size: 0.9rem;
|
|
1070
|
+
font-weight: 600;
|
|
1071
|
+
text-align: right;
|
|
1072
|
+
color: var(--text-secondary);
|
|
1073
|
+
}
|
|
1074
|
+
|
|
993
1075
|
/* Pie Chart */
|
|
994
1076
|
.pie-container {
|
|
995
1077
|
display: flex;
|
|
@@ -2081,6 +2163,7 @@ def generate_html_files(
|
|
|
2081
2163
|
'complexity_avg': stats.get('average_complexity', 2),
|
|
2082
2164
|
'complexity_distribution': complexity_distribution,
|
|
2083
2165
|
'top_commands': top_10_commands, # Pre-computed top commands with frequencies
|
|
2166
|
+
'operators_used': analysis.get('operators_used', {}), # Bash operators like ||, &&, |, 2>&1
|
|
2084
2167
|
},
|
|
2085
2168
|
'commands': formatted_commands,
|
|
2086
2169
|
'categories': {cat: [c.get('command', '') for c in cmds] for cat, cmds in categories.items()},
|
package/scripts/main.py
CHANGED
|
@@ -319,11 +319,66 @@ def run_extraction_pipeline(
|
|
|
319
319
|
parsed_commands = parse_commands(raw_commands)
|
|
320
320
|
print(f" -> Parsed {len(parsed_commands)} commands")
|
|
321
321
|
|
|
322
|
-
# Step 4:
|
|
322
|
+
# Step 4: Expand compound commands into individual sub-commands
|
|
323
|
+
# Also count operators for tracking
|
|
323
324
|
from collections import Counter
|
|
325
|
+
import re
|
|
326
|
+
|
|
327
|
+
operator_frequency = Counter()
|
|
328
|
+
expanded_commands = []
|
|
329
|
+
|
|
330
|
+
# Operator patterns to detect
|
|
331
|
+
operator_patterns = {
|
|
332
|
+
'||': r'\|\|',
|
|
333
|
+
'&&': r'&&',
|
|
334
|
+
'|': r'(?<!\|)\|(?!\|)', # Single pipe, not ||
|
|
335
|
+
'2>&1': r'2>&1',
|
|
336
|
+
'2>/dev/null': r'2>/dev/null',
|
|
337
|
+
'>': r'(?<![2&])>(?!>|&)', # Single >, not >> or 2> or >&
|
|
338
|
+
'>>': r'>>',
|
|
339
|
+
'<': r'<(?!<)',
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
for cmd in parsed_commands:
|
|
343
|
+
cmd_str = cmd.get('command', '') or cmd.get('raw', '')
|
|
344
|
+
if not cmd_str:
|
|
345
|
+
continue
|
|
346
|
+
|
|
347
|
+
# Count operators in this command
|
|
348
|
+
for op_name, op_pattern in operator_patterns.items():
|
|
349
|
+
matches = re.findall(op_pattern, cmd_str)
|
|
350
|
+
if matches:
|
|
351
|
+
operator_frequency[op_name] += len(matches)
|
|
352
|
+
|
|
353
|
+
# Check if this is a compound command
|
|
354
|
+
is_compound = any(op in cmd_str for op in ['||', '&&', ' | ', ';'])
|
|
355
|
+
|
|
356
|
+
if is_compound:
|
|
357
|
+
# Extract individual sub-commands from compound statement
|
|
358
|
+
sub_commands = extract_sub_commands(cmd_str)
|
|
359
|
+
for sub_cmd in sub_commands:
|
|
360
|
+
if sub_cmd.strip():
|
|
361
|
+
expanded_commands.append({
|
|
362
|
+
'command': sub_cmd.strip(),
|
|
363
|
+
'raw': sub_cmd.strip(),
|
|
364
|
+
'original_compound': cmd_str,
|
|
365
|
+
'description': cmd.get('description', ''),
|
|
366
|
+
'output': cmd.get('output', ''),
|
|
367
|
+
})
|
|
368
|
+
else:
|
|
369
|
+
# Simple command - add as-is
|
|
370
|
+
expanded_commands.append(cmd)
|
|
371
|
+
|
|
372
|
+
print(f" -> Expanded to {len(expanded_commands)} individual commands")
|
|
373
|
+
|
|
374
|
+
# Step 5: Re-parse expanded commands to get proper base_command for each
|
|
375
|
+
parsed_expanded = parse_commands(expanded_commands)
|
|
376
|
+
|
|
377
|
+
# Step 6: Count frequencies BEFORE deduplication
|
|
324
378
|
cmd_frequency = Counter()
|
|
325
379
|
base_cmd_frequency = Counter()
|
|
326
|
-
|
|
380
|
+
|
|
381
|
+
for cmd in parsed_expanded:
|
|
327
382
|
cmd_str = cmd.get('command', '') or cmd.get('raw', '')
|
|
328
383
|
base_cmd = cmd.get('base_command', '')
|
|
329
384
|
if cmd_str:
|
|
@@ -331,8 +386,8 @@ def run_extraction_pipeline(
|
|
|
331
386
|
if base_cmd:
|
|
332
387
|
base_cmd_frequency[base_cmd] += 1
|
|
333
388
|
|
|
334
|
-
# Step
|
|
335
|
-
unique_commands = deduplicate_commands(
|
|
389
|
+
# Step 7: Deduplicate and add frequency data
|
|
390
|
+
unique_commands = deduplicate_commands(parsed_expanded)
|
|
336
391
|
|
|
337
392
|
# Add frequency to each unique command
|
|
338
393
|
for cmd in unique_commands:
|
|
@@ -357,6 +412,7 @@ def run_extraction_pipeline(
|
|
|
357
412
|
analysis['base_command_frequency'] = dict(base_cmd_frequency)
|
|
358
413
|
analysis['top_commands'] = cmd_frequency.most_common(20)
|
|
359
414
|
analysis['top_base_commands'] = base_cmd_frequency.most_common(20)
|
|
415
|
+
analysis['operators_used'] = dict(operator_frequency)
|
|
360
416
|
print(f" -> Generated analysis with {len(analysis.get('categories', {}))} categories")
|
|
361
417
|
|
|
362
418
|
# Step 6: Generate quizzes
|
|
@@ -375,7 +431,7 @@ def run_extraction_pipeline(
|
|
|
375
431
|
"metadata": {
|
|
376
432
|
"generated_at": datetime.now().isoformat(),
|
|
377
433
|
"run_id": output_dir.name,
|
|
378
|
-
"version": "1.0.
|
|
434
|
+
"version": "1.0.5",
|
|
379
435
|
},
|
|
380
436
|
"input": {
|
|
381
437
|
"sessions_processed": len(sessions),
|
|
@@ -399,6 +455,7 @@ def run_extraction_pipeline(
|
|
|
399
455
|
{"command": cmd, "count": count}
|
|
400
456
|
for cmd, count in list(base_cmd_frequency.most_common(10))
|
|
401
457
|
],
|
|
458
|
+
"operators_used": dict(operator_frequency),
|
|
402
459
|
"complexity_distribution": dict(analysis.get('complexity_distribution', {})),
|
|
403
460
|
},
|
|
404
461
|
"output": {
|
|
@@ -416,6 +473,54 @@ def run_extraction_pipeline(
|
|
|
416
473
|
return True, f"Successfully generated learning materials in {output_dir}"
|
|
417
474
|
|
|
418
475
|
|
|
476
|
+
def extract_sub_commands(cmd_str: str) -> List[str]:
|
|
477
|
+
"""
|
|
478
|
+
Extract individual sub-commands from a compound command.
|
|
479
|
+
|
|
480
|
+
Splits commands by ||, &&, |, and ; while preserving each sub-command
|
|
481
|
+
as a learnable unit.
|
|
482
|
+
|
|
483
|
+
Args:
|
|
484
|
+
cmd_str: The compound command string
|
|
485
|
+
|
|
486
|
+
Returns:
|
|
487
|
+
List of individual sub-command strings
|
|
488
|
+
"""
|
|
489
|
+
import re
|
|
490
|
+
|
|
491
|
+
# First, clean up redirections but keep them with their command
|
|
492
|
+
# We want "pip show pkg 2>/dev/null" to stay together
|
|
493
|
+
|
|
494
|
+
# Split by compound operators: ||, &&, |, ;
|
|
495
|
+
# Use regex to split while handling edge cases
|
|
496
|
+
# Note: | needs special handling to not match ||
|
|
497
|
+
|
|
498
|
+
sub_commands = []
|
|
499
|
+
|
|
500
|
+
# Split by || first (highest precedence for our purposes)
|
|
501
|
+
or_parts = re.split(r'\s*\|\|\s*', cmd_str)
|
|
502
|
+
|
|
503
|
+
for or_part in or_parts:
|
|
504
|
+
# Split each part by &&
|
|
505
|
+
and_parts = re.split(r'\s*&&\s*', or_part)
|
|
506
|
+
|
|
507
|
+
for and_part in and_parts:
|
|
508
|
+
# Split each part by ; (sequential)
|
|
509
|
+
seq_parts = re.split(r'\s*;\s*', and_part)
|
|
510
|
+
|
|
511
|
+
for seq_part in seq_parts:
|
|
512
|
+
# Split by single pipe |
|
|
513
|
+
# Use negative lookbehind/lookahead to avoid ||
|
|
514
|
+
pipe_parts = re.split(r'(?<!\|)\|(?!\|)', seq_part)
|
|
515
|
+
|
|
516
|
+
for pipe_part in pipe_parts:
|
|
517
|
+
cleaned = pipe_part.strip()
|
|
518
|
+
if cleaned:
|
|
519
|
+
sub_commands.append(cleaned)
|
|
520
|
+
|
|
521
|
+
return sub_commands
|
|
522
|
+
|
|
523
|
+
|
|
419
524
|
def deduplicate_commands(commands: List[Dict]) -> List[Dict]:
|
|
420
525
|
"""
|
|
421
526
|
Remove duplicate commands while preserving order.
|
|
@@ -397,37 +397,98 @@ def _generate_bash_description(cmd_string: str) -> str:
|
|
|
397
397
|
Generate an educational description focusing on bash concepts.
|
|
398
398
|
|
|
399
399
|
Explains what each part of the command does from a bash perspective.
|
|
400
|
+
Handles: &&, ||, |, 2>&1, 2>/dev/null, and combinations.
|
|
400
401
|
"""
|
|
401
402
|
if not cmd_string:
|
|
402
403
|
return "Runs a command"
|
|
403
404
|
|
|
405
|
+
# Clean up redirections for description (note them but don't clutter)
|
|
406
|
+
has_stderr_to_stdout = '2>&1' in cmd_string
|
|
407
|
+
has_stderr_to_null = '2>/dev/null' in cmd_string
|
|
408
|
+
has_stdout_redirect = re.search(r'>\s*\S+', cmd_string) and '2>' not in cmd_string
|
|
409
|
+
|
|
410
|
+
# Remove redirections for parsing (we'll note them separately)
|
|
411
|
+
clean_cmd = re.sub(r'\s*2>&1\s*', ' ', cmd_string)
|
|
412
|
+
clean_cmd = re.sub(r'\s*2>/dev/null\s*', ' ', clean_cmd)
|
|
413
|
+
clean_cmd = re.sub(r'\s*>\s*\S+\s*', ' ', clean_cmd)
|
|
414
|
+
clean_cmd = ' '.join(clean_cmd.split()) # normalize whitespace
|
|
415
|
+
|
|
404
416
|
parts = []
|
|
405
417
|
|
|
406
|
-
#
|
|
407
|
-
if ' && ' in
|
|
408
|
-
commands =
|
|
418
|
+
# Handle && (run if previous succeeds)
|
|
419
|
+
if ' && ' in clean_cmd:
|
|
420
|
+
commands = clean_cmd.split(' && ')
|
|
409
421
|
for i, cmd in enumerate(commands):
|
|
410
|
-
|
|
411
|
-
if
|
|
412
|
-
|
|
422
|
+
cmd = cmd.strip()
|
|
423
|
+
if not cmd:
|
|
424
|
+
continue
|
|
425
|
+
# Handle nested || or | within && segments
|
|
426
|
+
if ' || ' in cmd:
|
|
427
|
+
parts.append(_describe_or_chain(cmd))
|
|
428
|
+
elif ' | ' in cmd:
|
|
429
|
+
parts.append(_describe_pipe_chain(cmd))
|
|
430
|
+
elif i == 0:
|
|
431
|
+
parts.append(_describe_single_command(cmd))
|
|
413
432
|
else:
|
|
414
|
-
parts.append(f"then {_describe_single_command(cmd
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
parts.append(_describe_single_command(
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
433
|
+
parts.append(f"then {_describe_single_command(cmd)}")
|
|
434
|
+
|
|
435
|
+
# Handle || (run if previous fails)
|
|
436
|
+
elif ' || ' in clean_cmd:
|
|
437
|
+
parts.append(_describe_or_chain(clean_cmd))
|
|
438
|
+
|
|
439
|
+
# Handle | (pipe)
|
|
440
|
+
elif ' | ' in clean_cmd:
|
|
441
|
+
parts.append(_describe_pipe_chain(clean_cmd))
|
|
442
|
+
|
|
443
|
+
else:
|
|
444
|
+
parts.append(_describe_single_command(clean_cmd))
|
|
445
|
+
|
|
446
|
+
result = ', '.join(parts)
|
|
447
|
+
|
|
448
|
+
# Add redirection notes
|
|
449
|
+
if has_stderr_to_null:
|
|
450
|
+
result += " (suppressing errors)"
|
|
451
|
+
elif has_stderr_to_stdout:
|
|
452
|
+
result += " (capturing all output)"
|
|
453
|
+
|
|
454
|
+
return result
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
def _describe_or_chain(cmd_string: str) -> str:
|
|
458
|
+
"""Describe an || chain (fallback pattern)."""
|
|
459
|
+
commands = cmd_string.split(' || ')
|
|
460
|
+
parts = []
|
|
461
|
+
for i, cmd in enumerate(commands):
|
|
462
|
+
cmd = cmd.strip()
|
|
463
|
+
if not cmd:
|
|
464
|
+
continue
|
|
465
|
+
# Handle pipes within || segments
|
|
466
|
+
if ' | ' in cmd:
|
|
467
|
+
desc = _describe_pipe_chain(cmd)
|
|
468
|
+
else:
|
|
469
|
+
desc = _describe_single_command(cmd)
|
|
470
|
+
|
|
471
|
+
if i == 0:
|
|
472
|
+
parts.append(desc)
|
|
473
|
+
else:
|
|
474
|
+
parts.append(f"or if that fails, {desc}")
|
|
475
|
+
return ', '.join(parts)
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
def _describe_pipe_chain(cmd_string: str) -> str:
|
|
479
|
+
"""Describe a pipe chain."""
|
|
480
|
+
commands = cmd_string.split(' | ')
|
|
481
|
+
parts = []
|
|
482
|
+
for i, cmd in enumerate(commands):
|
|
483
|
+
cmd = cmd.strip()
|
|
484
|
+
if not cmd:
|
|
485
|
+
continue
|
|
486
|
+
desc = _describe_single_command(cmd)
|
|
487
|
+
if i == 0:
|
|
488
|
+
parts.append(desc)
|
|
489
|
+
else:
|
|
490
|
+
parts.append(f"pipes to {desc}")
|
|
491
|
+
return ', '.join(parts)
|
|
431
492
|
|
|
432
493
|
|
|
433
494
|
def _describe_single_command(cmd: str) -> str:
|