learn_bash_from_session_data 1.0.3 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bash-learner-output/run-2026-02-05-154214/index.html +3848 -0
- package/bash-learner-output/run-2026-02-05-154214/summary.json +148 -0
- package/bash-learner-output/run-2026-02-05-155427/index.html +3900 -0
- package/bash-learner-output/run-2026-02-05-155427/summary.json +157 -0
- package/bash-learner-output/run-2026-02-05-155949/index.html +4514 -0
- package/bash-learner-output/run-2026-02-05-155949/summary.json +163 -0
- package/package.json +1 -1
- package/scripts/html_generator.py +124 -37
- package/scripts/main.py +190 -17
- package/scripts/quiz_generator.py +84 -23
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
{
|
|
2
|
+
"metadata": {
|
|
3
|
+
"generated_at": "2026-02-05T15:59:49.814630",
|
|
4
|
+
"run_id": "run-2026-02-05-155949",
|
|
5
|
+
"version": "1.0.5"
|
|
6
|
+
},
|
|
7
|
+
"input": {
|
|
8
|
+
"sessions_processed": 10,
|
|
9
|
+
"session_files": [
|
|
10
|
+
{
|
|
11
|
+
"filename": "081cfcfd-cde6-4304-89ca-6ac61faf8d85.jsonl",
|
|
12
|
+
"path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand-Development-Project-Workspace-active-development-session-with-chris/081cfcfd-cde6-4304-89ca-6ac61faf8d85.jsonl",
|
|
13
|
+
"size": "759.1 KB",
|
|
14
|
+
"modified": "2026-02-05 11:01:08"
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"filename": "2ee58010-e794-48d8-a1b7-0ba14e06e7b7.jsonl",
|
|
18
|
+
"path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand/2ee58010-e794-48d8-a1b7-0ba14e06e7b7.jsonl",
|
|
19
|
+
"size": "1.8 KB",
|
|
20
|
+
"modified": "2026-02-05 10:22:23"
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
"filename": "ef6494ae-aca1-43b1-800d-d5586069d42c.jsonl",
|
|
24
|
+
"path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand/ef6494ae-aca1-43b1-800d-d5586069d42c.jsonl",
|
|
25
|
+
"size": "1.8 KB",
|
|
26
|
+
"modified": "2026-01-23 14:32:05"
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
"filename": "6b1fb6cd-5865-4a5f-97f9-5e1e46d79f81.jsonl",
|
|
30
|
+
"path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand/6b1fb6cd-5865-4a5f-97f9-5e1e46d79f81.jsonl",
|
|
31
|
+
"size": "3.7 KB",
|
|
32
|
+
"modified": "2026-01-19 22:44:52"
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
"filename": "9db5f466-5bc4-4778-94ba-b10d72e8c464.jsonl",
|
|
36
|
+
"path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand/9db5f466-5bc4-4778-94ba-b10d72e8c464.jsonl",
|
|
37
|
+
"size": "916.0 B",
|
|
38
|
+
"modified": "2026-01-19 09:25:47"
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"filename": "dfc82e92-9786-4baf-a5df-44046f21f90a.jsonl",
|
|
42
|
+
"path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand-Development-Project-Workspace-active-development-en-consulta/dfc82e92-9786-4baf-a5df-44046f21f90a.jsonl",
|
|
43
|
+
"size": "1.0 KB",
|
|
44
|
+
"modified": "2026-01-16 06:42:34"
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
"filename": "b3c0facc-2728-48a5-a1d0-0fed6b049cf2.jsonl",
|
|
48
|
+
"path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand/b3c0facc-2728-48a5-a1d0-0fed6b049cf2.jsonl",
|
|
49
|
+
"size": "116.0 B",
|
|
50
|
+
"modified": "2026-01-08 21:01:17"
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
"filename": "846d1c3c-05e3-4d86-9aae-c8bd266962d4.jsonl",
|
|
54
|
+
"path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand/846d1c3c-05e3-4d86-9aae-c8bd266962d4.jsonl",
|
|
55
|
+
"size": "1.8 KB",
|
|
56
|
+
"modified": "2026-01-08 21:00:57"
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"filename": "agent-a979d7c.jsonl",
|
|
60
|
+
"path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand/agent-a979d7c.jsonl",
|
|
61
|
+
"size": "2.0 KB",
|
|
62
|
+
"modified": "2026-01-06 20:47:47"
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
"filename": "agent-a1fb460.jsonl",
|
|
66
|
+
"path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand/agent-a1fb460.jsonl",
|
|
67
|
+
"size": "1.9 KB",
|
|
68
|
+
"modified": "2026-01-06 20:47:44"
|
|
69
|
+
}
|
|
70
|
+
],
|
|
71
|
+
"total_entries": 438
|
|
72
|
+
},
|
|
73
|
+
"analysis": {
|
|
74
|
+
"raw_commands_found": 37,
|
|
75
|
+
"unique_commands": 53,
|
|
76
|
+
"categories": [
|
|
77
|
+
"Unknown",
|
|
78
|
+
"Package Management",
|
|
79
|
+
"Text Processing",
|
|
80
|
+
"File System",
|
|
81
|
+
"Shell Builtins",
|
|
82
|
+
"Development",
|
|
83
|
+
"Git",
|
|
84
|
+
"Search & Navigation",
|
|
85
|
+
"Process & System"
|
|
86
|
+
],
|
|
87
|
+
"category_counts": {
|
|
88
|
+
"Unknown": 5,
|
|
89
|
+
"Package Management": 4,
|
|
90
|
+
"Text Processing": 3,
|
|
91
|
+
"File System": 7,
|
|
92
|
+
"Shell Builtins": 3,
|
|
93
|
+
"Development": 5,
|
|
94
|
+
"Git": 24,
|
|
95
|
+
"Search & Navigation": 1,
|
|
96
|
+
"Process & System": 1
|
|
97
|
+
},
|
|
98
|
+
"top_base_commands": [
|
|
99
|
+
{
|
|
100
|
+
"command": "cd",
|
|
101
|
+
"count": 18
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
"command": "git",
|
|
105
|
+
"count": 17
|
|
106
|
+
},
|
|
107
|
+
{
|
|
108
|
+
"command": "gh",
|
|
109
|
+
"count": 9
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
"command": "python",
|
|
113
|
+
"count": 5
|
|
114
|
+
},
|
|
115
|
+
{
|
|
116
|
+
"command": "mkdir",
|
|
117
|
+
"count": 3
|
|
118
|
+
},
|
|
119
|
+
{
|
|
120
|
+
"command": "ls",
|
|
121
|
+
"count": 3
|
|
122
|
+
},
|
|
123
|
+
{
|
|
124
|
+
"command": "echo",
|
|
125
|
+
"count": 3
|
|
126
|
+
},
|
|
127
|
+
{
|
|
128
|
+
"command": "session-slides",
|
|
129
|
+
"count": 3
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
"command": "pip",
|
|
133
|
+
"count": 2
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
"command": "head",
|
|
137
|
+
"count": 2
|
|
138
|
+
}
|
|
139
|
+
],
|
|
140
|
+
"operators_used": {
|
|
141
|
+
"||": 4,
|
|
142
|
+
"|": 3,
|
|
143
|
+
"2>&1": 8,
|
|
144
|
+
"2>/dev/null": 5,
|
|
145
|
+
"&&": 29,
|
|
146
|
+
">": 5,
|
|
147
|
+
"<": 13
|
|
148
|
+
},
|
|
149
|
+
"complexity_distribution": {
|
|
150
|
+
"1": 10,
|
|
151
|
+
"2": 23,
|
|
152
|
+
"3": 15,
|
|
153
|
+
"4": 3,
|
|
154
|
+
"5": 2
|
|
155
|
+
}
|
|
156
|
+
},
|
|
157
|
+
"output": {
|
|
158
|
+
"quiz_questions": 20,
|
|
159
|
+
"html_files": [
|
|
160
|
+
"bash-learner-output/run-2026-02-05-155949/index.html"
|
|
161
|
+
]
|
|
162
|
+
}
|
|
163
|
+
}
|
package/package.json
CHANGED
|
@@ -114,19 +114,52 @@ def _generate_html_impl(analysis_result: dict[str, Any], quizzes: list[dict[str,
|
|
|
114
114
|
</html>'''
|
|
115
115
|
|
|
116
116
|
|
|
117
|
+
def _generate_operators_html(operators_used: dict, operator_descriptions: dict) -> str:
|
|
118
|
+
"""Generate HTML for the operators used section."""
|
|
119
|
+
if not operators_used:
|
|
120
|
+
return '<p class="empty-state">No bash operators detected in these commands</p>'
|
|
121
|
+
|
|
122
|
+
operators_html = ""
|
|
123
|
+
# Sort by count descending
|
|
124
|
+
sorted_ops = sorted(operators_used.items(), key=lambda x: -x[1])
|
|
125
|
+
max_count = sorted_ops[0][1] if sorted_ops else 1
|
|
126
|
+
|
|
127
|
+
for op, count in sorted_ops:
|
|
128
|
+
name, desc = operator_descriptions.get(op, (op, 'Bash operator'))
|
|
129
|
+
bar_width = (count / max_count) * 100
|
|
130
|
+
operators_html += f'''
|
|
131
|
+
<div class="operator-item">
|
|
132
|
+
<div class="operator-symbol"><code>{html.escape(op)}</code></div>
|
|
133
|
+
<div class="operator-info">
|
|
134
|
+
<div class="operator-name">{html.escape(name)}</div>
|
|
135
|
+
<div class="operator-desc">{html.escape(desc)}</div>
|
|
136
|
+
</div>
|
|
137
|
+
<div class="operator-bar-container">
|
|
138
|
+
<div class="operator-bar" style="width: {bar_width}%"></div>
|
|
139
|
+
</div>
|
|
140
|
+
<div class="operator-count">{count}</div>
|
|
141
|
+
</div>'''
|
|
142
|
+
return operators_html
|
|
143
|
+
|
|
144
|
+
|
|
117
145
|
def render_overview_tab(stats: dict[str, Any], commands: list[dict], categories: dict) -> str:
|
|
118
146
|
"""Render the overview/dashboard tab content."""
|
|
119
147
|
total_commands = stats.get("total_commands", 0)
|
|
120
148
|
unique_commands = stats.get("unique_commands", 0)
|
|
121
149
|
unique_utilities = stats.get("unique_utilities", 0)
|
|
122
150
|
date_range = stats.get("date_range", {"start": "N/A", "end": "N/A"})
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
151
|
+
# Get operators data for the "Bash Operators Used" section
|
|
152
|
+
operators_used = stats.get("operators_used", {})
|
|
153
|
+
operator_descriptions = {
|
|
154
|
+
'|': ('Pipe', 'Sends output of one command to input of another'),
|
|
155
|
+
'||': ('OR operator', 'Run next command if previous failed'),
|
|
156
|
+
'&&': ('AND operator', 'Run next command if previous succeeded'),
|
|
157
|
+
'2>&1': ('Redirect stderr', 'Combines error output with standard output'),
|
|
158
|
+
'2>/dev/null': ('Suppress errors', 'Discards error messages'),
|
|
159
|
+
'>': ('Redirect output', 'Writes output to a file (overwrites)'),
|
|
160
|
+
'>>': ('Append output', 'Appends output to a file'),
|
|
161
|
+
'<': ('Redirect input', 'Reads input from a file'),
|
|
162
|
+
}
|
|
130
163
|
|
|
131
164
|
# Top 10 commands by frequency - use pre-computed data if available
|
|
132
165
|
top_commands_data = stats.get("top_commands", [])
|
|
@@ -237,29 +270,9 @@ def render_overview_tab(stats: dict[str, Any], commands: list[dict], categories:
|
|
|
237
270
|
|
|
238
271
|
<div class="charts-row">
|
|
239
272
|
<div class="chart-card">
|
|
240
|
-
<h3>
|
|
241
|
-
<div class="
|
|
242
|
-
|
|
243
|
-
<span class="complexity-label simple">Simple</span>
|
|
244
|
-
<div class="complexity-bar-bg">
|
|
245
|
-
<div class="complexity-bar simple" style="width: {simple_pct}%"></div>
|
|
246
|
-
</div>
|
|
247
|
-
<span class="complexity-count">{complexity_dist.get("simple", 0)}</span>
|
|
248
|
-
</div>
|
|
249
|
-
<div class="complexity-row">
|
|
250
|
-
<span class="complexity-label intermediate">Intermediate</span>
|
|
251
|
-
<div class="complexity-bar-bg">
|
|
252
|
-
<div class="complexity-bar intermediate" style="width: {intermediate_pct}%"></div>
|
|
253
|
-
</div>
|
|
254
|
-
<span class="complexity-count">{complexity_dist.get("intermediate", 0)}</span>
|
|
255
|
-
</div>
|
|
256
|
-
<div class="complexity-row">
|
|
257
|
-
<span class="complexity-label advanced">Advanced</span>
|
|
258
|
-
<div class="complexity-bar-bg">
|
|
259
|
-
<div class="complexity-bar advanced" style="width: {advanced_pct}%"></div>
|
|
260
|
-
</div>
|
|
261
|
-
<span class="complexity-count">{complexity_dist.get("advanced", 0)}</span>
|
|
262
|
-
</div>
|
|
273
|
+
<h3>Bash Operators Used</h3>
|
|
274
|
+
<div class="operators-list">
|
|
275
|
+
{_generate_operators_html(operators_used, operator_descriptions)}
|
|
263
276
|
</div>
|
|
264
277
|
</div>
|
|
265
278
|
|
|
@@ -385,11 +398,10 @@ def render_commands_tab(commands: list[dict]) -> str:
|
|
|
385
398
|
</div>'''
|
|
386
399
|
|
|
387
400
|
commands_html += f'''
|
|
388
|
-
<div class="command-card" data-category="{category}" data-
|
|
401
|
+
<div class="command-card" data-category="{category}" data-frequency="{frequency}" data-name="{base_cmd}">
|
|
389
402
|
<div class="command-header" onclick="toggleCommand('{cmd_id}')">
|
|
390
403
|
<div class="command-main">
|
|
391
404
|
<code class="cmd">{base_cmd}</code>
|
|
392
|
-
<span class="complexity-badge {complexity}">{complexity}</span>
|
|
393
405
|
<span class="category-badge">{category}</span>
|
|
394
406
|
</div>
|
|
395
407
|
<div class="command-meta">
|
|
@@ -504,7 +516,6 @@ def render_lessons_tab(categories: dict, commands: list[dict]) -> str:
|
|
|
504
516
|
<div class="lesson-command">
|
|
505
517
|
<div class="lesson-command-header">
|
|
506
518
|
<code class="cmd">{base_cmd}</code>
|
|
507
|
-
<span class="complexity-badge {complexity}">{complexity}</span>
|
|
508
519
|
</div>
|
|
509
520
|
<pre class="syntax-highlighted">{highlighted}</pre>
|
|
510
521
|
<p class="lesson-description">{description}</p>
|
|
@@ -990,6 +1001,77 @@ def get_inline_css() -> str:
|
|
|
990
1001
|
color: var(--text-secondary);
|
|
991
1002
|
}
|
|
992
1003
|
|
|
1004
|
+
/* Operators List */
|
|
1005
|
+
.operators-list {
|
|
1006
|
+
display: flex;
|
|
1007
|
+
flex-direction: column;
|
|
1008
|
+
gap: 12px;
|
|
1009
|
+
}
|
|
1010
|
+
|
|
1011
|
+
.operator-item {
|
|
1012
|
+
display: grid;
|
|
1013
|
+
grid-template-columns: 80px 1fr 120px 50px;
|
|
1014
|
+
align-items: center;
|
|
1015
|
+
gap: 12px;
|
|
1016
|
+
padding: 8px 0;
|
|
1017
|
+
border-bottom: 1px solid var(--border-color);
|
|
1018
|
+
}
|
|
1019
|
+
|
|
1020
|
+
.operator-item:last-child {
|
|
1021
|
+
border-bottom: none;
|
|
1022
|
+
}
|
|
1023
|
+
|
|
1024
|
+
.operator-symbol {
|
|
1025
|
+
font-family: var(--font-mono);
|
|
1026
|
+
font-size: 1rem;
|
|
1027
|
+
font-weight: 600;
|
|
1028
|
+
color: var(--accent-primary);
|
|
1029
|
+
}
|
|
1030
|
+
|
|
1031
|
+
.operator-symbol code {
|
|
1032
|
+
background: var(--bg-tertiary);
|
|
1033
|
+
padding: 4px 8px;
|
|
1034
|
+
border-radius: var(--radius-sm);
|
|
1035
|
+
}
|
|
1036
|
+
|
|
1037
|
+
.operator-info {
|
|
1038
|
+
display: flex;
|
|
1039
|
+
flex-direction: column;
|
|
1040
|
+
gap: 2px;
|
|
1041
|
+
}
|
|
1042
|
+
|
|
1043
|
+
.operator-name {
|
|
1044
|
+
font-size: 0.9rem;
|
|
1045
|
+
font-weight: 600;
|
|
1046
|
+
color: var(--text-primary);
|
|
1047
|
+
}
|
|
1048
|
+
|
|
1049
|
+
.operator-desc {
|
|
1050
|
+
font-size: 0.8rem;
|
|
1051
|
+
color: var(--text-secondary);
|
|
1052
|
+
}
|
|
1053
|
+
|
|
1054
|
+
.operator-bar-container {
|
|
1055
|
+
height: 20px;
|
|
1056
|
+
background: var(--bg-tertiary);
|
|
1057
|
+
border-radius: var(--radius-sm);
|
|
1058
|
+
overflow: hidden;
|
|
1059
|
+
}
|
|
1060
|
+
|
|
1061
|
+
.operator-bar {
|
|
1062
|
+
height: 100%;
|
|
1063
|
+
background: var(--accent-primary);
|
|
1064
|
+
border-radius: var(--radius-sm);
|
|
1065
|
+
transition: width 0.5s ease;
|
|
1066
|
+
}
|
|
1067
|
+
|
|
1068
|
+
.operator-count {
|
|
1069
|
+
font-size: 0.9rem;
|
|
1070
|
+
font-weight: 600;
|
|
1071
|
+
text-align: right;
|
|
1072
|
+
color: var(--text-secondary);
|
|
1073
|
+
}
|
|
1074
|
+
|
|
993
1075
|
/* Pie Chart */
|
|
994
1076
|
.pie-container {
|
|
995
1077
|
display: flex;
|
|
@@ -2007,7 +2089,7 @@ def generate_html_files(
|
|
|
2007
2089
|
categories = analysis.get('categories', {})
|
|
2008
2090
|
analyzed_commands = analysis.get('commands', commands)
|
|
2009
2091
|
|
|
2010
|
-
# Build frequency map from top_commands
|
|
2092
|
+
# Build frequency map from top_commands (full command strings)
|
|
2011
2093
|
top_commands_data = analysis.get('top_commands', [])
|
|
2012
2094
|
frequency_map = {}
|
|
2013
2095
|
for item in top_commands_data:
|
|
@@ -2015,6 +2097,10 @@ def generate_html_files(
|
|
|
2015
2097
|
cmd_str, count = item[0], item[1]
|
|
2016
2098
|
frequency_map[cmd_str] = count
|
|
2017
2099
|
|
|
2100
|
+
# Get base command frequency for the "Top 10 Most-Used Commands" chart
|
|
2101
|
+
# This aggregates by base command (cd, git, mkdir) not full command strings
|
|
2102
|
+
top_base_commands_data = analysis.get('top_base_commands', [])
|
|
2103
|
+
|
|
2018
2104
|
# Map complexity scores (1-5) to string labels for CSS
|
|
2019
2105
|
def complexity_to_label(score):
|
|
2020
2106
|
if score <= 2:
|
|
@@ -2059,12 +2145,12 @@ def generate_html_files(
|
|
|
2059
2145
|
'advanced': raw_complexity.get(4, 0) + raw_complexity.get(5, 0),
|
|
2060
2146
|
}
|
|
2061
2147
|
|
|
2062
|
-
# Build top commands list with proper frequencies
|
|
2148
|
+
# Build top commands list with proper frequencies (by base command)
|
|
2063
2149
|
top_10_commands = []
|
|
2064
|
-
for item in
|
|
2150
|
+
for item in top_base_commands_data[:10]:
|
|
2065
2151
|
if isinstance(item, (list, tuple)) and len(item) >= 2:
|
|
2066
2152
|
top_10_commands.append({
|
|
2067
|
-
'command': item[0],
|
|
2153
|
+
'command': item[0], # base command like "cd", "git"
|
|
2068
2154
|
'count': item[1]
|
|
2069
2155
|
})
|
|
2070
2156
|
|
|
@@ -2077,6 +2163,7 @@ def generate_html_files(
|
|
|
2077
2163
|
'complexity_avg': stats.get('average_complexity', 2),
|
|
2078
2164
|
'complexity_distribution': complexity_distribution,
|
|
2079
2165
|
'top_commands': top_10_commands, # Pre-computed top commands with frequencies
|
|
2166
|
+
'operators_used': analysis.get('operators_used', {}), # Bash operators like ||, &&, |, 2>&1
|
|
2080
2167
|
},
|
|
2081
2168
|
'commands': formatted_commands,
|
|
2082
2169
|
'categories': {cat: [c.get('command', '') for c in cmds] for cat, cmds in categories.items()},
|
package/scripts/main.py
CHANGED
|
@@ -20,10 +20,24 @@ if sys.version_info < (3, 8):
|
|
|
20
20
|
f"{sys.version_info.major}.{sys.version_info.minor}")
|
|
21
21
|
|
|
22
22
|
# Constants
|
|
23
|
-
|
|
23
|
+
DEFAULT_OUTPUT_BASE = "./bash-learner-output"
|
|
24
24
|
MAX_UNIQUE_COMMANDS = 500
|
|
25
25
|
|
|
26
26
|
|
|
27
|
+
def generate_timestamped_output_dir(base_dir: str = DEFAULT_OUTPUT_BASE) -> Path:
|
|
28
|
+
"""
|
|
29
|
+
Generate a timestamped output directory.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
base_dir: Base directory for outputs
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
Path to timestamped output directory (e.g., ./bash-learner-output/run-2026-02-05-143052/)
|
|
36
|
+
"""
|
|
37
|
+
timestamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
|
|
38
|
+
return Path(base_dir) / f"run-{timestamp}"
|
|
39
|
+
|
|
40
|
+
|
|
27
41
|
def get_sessions_base_path() -> Path:
|
|
28
42
|
"""
|
|
29
43
|
Get the base path for Claude session files.
|
|
@@ -305,8 +319,83 @@ def run_extraction_pipeline(
|
|
|
305
319
|
parsed_commands = parse_commands(raw_commands)
|
|
306
320
|
print(f" -> Parsed {len(parsed_commands)} commands")
|
|
307
321
|
|
|
308
|
-
# Step 4:
|
|
309
|
-
|
|
322
|
+
# Step 4: Expand compound commands into individual sub-commands
|
|
323
|
+
# Also count operators for tracking
|
|
324
|
+
from collections import Counter
|
|
325
|
+
import re
|
|
326
|
+
|
|
327
|
+
operator_frequency = Counter()
|
|
328
|
+
expanded_commands = []
|
|
329
|
+
|
|
330
|
+
# Operator patterns to detect
|
|
331
|
+
operator_patterns = {
|
|
332
|
+
'||': r'\|\|',
|
|
333
|
+
'&&': r'&&',
|
|
334
|
+
'|': r'(?<!\|)\|(?!\|)', # Single pipe, not ||
|
|
335
|
+
'2>&1': r'2>&1',
|
|
336
|
+
'2>/dev/null': r'2>/dev/null',
|
|
337
|
+
'>': r'(?<![2&])>(?!>|&)', # Single >, not >> or 2> or >&
|
|
338
|
+
'>>': r'>>',
|
|
339
|
+
'<': r'<(?!<)',
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
for cmd in parsed_commands:
|
|
343
|
+
cmd_str = cmd.get('command', '') or cmd.get('raw', '')
|
|
344
|
+
if not cmd_str:
|
|
345
|
+
continue
|
|
346
|
+
|
|
347
|
+
# Count operators in this command
|
|
348
|
+
for op_name, op_pattern in operator_patterns.items():
|
|
349
|
+
matches = re.findall(op_pattern, cmd_str)
|
|
350
|
+
if matches:
|
|
351
|
+
operator_frequency[op_name] += len(matches)
|
|
352
|
+
|
|
353
|
+
# Check if this is a compound command
|
|
354
|
+
is_compound = any(op in cmd_str for op in ['||', '&&', ' | ', ';'])
|
|
355
|
+
|
|
356
|
+
if is_compound:
|
|
357
|
+
# Extract individual sub-commands from compound statement
|
|
358
|
+
sub_commands = extract_sub_commands(cmd_str)
|
|
359
|
+
for sub_cmd in sub_commands:
|
|
360
|
+
if sub_cmd.strip():
|
|
361
|
+
expanded_commands.append({
|
|
362
|
+
'command': sub_cmd.strip(),
|
|
363
|
+
'raw': sub_cmd.strip(),
|
|
364
|
+
'original_compound': cmd_str,
|
|
365
|
+
'description': cmd.get('description', ''),
|
|
366
|
+
'output': cmd.get('output', ''),
|
|
367
|
+
})
|
|
368
|
+
else:
|
|
369
|
+
# Simple command - add as-is
|
|
370
|
+
expanded_commands.append(cmd)
|
|
371
|
+
|
|
372
|
+
print(f" -> Expanded to {len(expanded_commands)} individual commands")
|
|
373
|
+
|
|
374
|
+
# Step 5: Re-parse expanded commands to get proper base_command for each
|
|
375
|
+
parsed_expanded = parse_commands(expanded_commands)
|
|
376
|
+
|
|
377
|
+
# Step 6: Count frequencies BEFORE deduplication
|
|
378
|
+
cmd_frequency = Counter()
|
|
379
|
+
base_cmd_frequency = Counter()
|
|
380
|
+
|
|
381
|
+
for cmd in parsed_expanded:
|
|
382
|
+
cmd_str = cmd.get('command', '') or cmd.get('raw', '')
|
|
383
|
+
base_cmd = cmd.get('base_command', '')
|
|
384
|
+
if cmd_str:
|
|
385
|
+
cmd_frequency[cmd_str] += 1
|
|
386
|
+
if base_cmd:
|
|
387
|
+
base_cmd_frequency[base_cmd] += 1
|
|
388
|
+
|
|
389
|
+
# Step 7: Deduplicate and add frequency data
|
|
390
|
+
unique_commands = deduplicate_commands(parsed_expanded)
|
|
391
|
+
|
|
392
|
+
# Add frequency to each unique command
|
|
393
|
+
for cmd in unique_commands:
|
|
394
|
+
cmd_str = cmd.get('command', '') or cmd.get('raw', '')
|
|
395
|
+
base_cmd = cmd.get('base_command', '')
|
|
396
|
+
cmd['frequency'] = cmd_frequency.get(cmd_str, 1)
|
|
397
|
+
cmd['base_frequency'] = base_cmd_frequency.get(base_cmd, 1)
|
|
398
|
+
|
|
310
399
|
if len(unique_commands) > MAX_UNIQUE_COMMANDS:
|
|
311
400
|
print(f"\nCapping at {MAX_UNIQUE_COMMANDS} unique commands "
|
|
312
401
|
f"(found {len(unique_commands)})")
|
|
@@ -314,9 +403,16 @@ def run_extraction_pipeline(
|
|
|
314
403
|
else:
|
|
315
404
|
print(f"\n{len(unique_commands)} unique commands")
|
|
316
405
|
|
|
317
|
-
# Step
|
|
406
|
+
# Step 6: Analyze commands
|
|
318
407
|
print("\nAnalyzing commands...")
|
|
319
408
|
analysis = analyze_commands(unique_commands)
|
|
409
|
+
|
|
410
|
+
# Inject pre-computed frequency data into analysis
|
|
411
|
+
analysis['command_frequency'] = dict(cmd_frequency)
|
|
412
|
+
analysis['base_command_frequency'] = dict(base_cmd_frequency)
|
|
413
|
+
analysis['top_commands'] = cmd_frequency.most_common(20)
|
|
414
|
+
analysis['top_base_commands'] = base_cmd_frequency.most_common(20)
|
|
415
|
+
analysis['operators_used'] = dict(operator_frequency)
|
|
320
416
|
print(f" -> Generated analysis with {len(analysis.get('categories', {}))} categories")
|
|
321
417
|
|
|
322
418
|
# Step 6: Generate quizzes
|
|
@@ -330,16 +426,42 @@ def run_extraction_pipeline(
|
|
|
330
426
|
html_files = generate_html(unique_commands, analysis, quizzes, output_dir)
|
|
331
427
|
print(f" -> Created {len(html_files)} HTML files")
|
|
332
428
|
|
|
333
|
-
# Write summary JSON
|
|
429
|
+
# Write summary JSON with comprehensive metadata
|
|
334
430
|
summary = {
|
|
335
|
-
"
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
"
|
|
341
|
-
|
|
342
|
-
|
|
431
|
+
"metadata": {
|
|
432
|
+
"generated_at": datetime.now().isoformat(),
|
|
433
|
+
"run_id": output_dir.name,
|
|
434
|
+
"version": "1.0.5",
|
|
435
|
+
},
|
|
436
|
+
"input": {
|
|
437
|
+
"sessions_processed": len(sessions),
|
|
438
|
+
"session_files": [
|
|
439
|
+
{
|
|
440
|
+
"filename": s['filename'],
|
|
441
|
+
"path": str(s['path']),
|
|
442
|
+
"size": s['size_human'],
|
|
443
|
+
"modified": s['modified_str']
|
|
444
|
+
}
|
|
445
|
+
for s in sessions
|
|
446
|
+
],
|
|
447
|
+
"total_entries": len(all_entries),
|
|
448
|
+
},
|
|
449
|
+
"analysis": {
|
|
450
|
+
"raw_commands_found": len(raw_commands),
|
|
451
|
+
"unique_commands": len(unique_commands),
|
|
452
|
+
"categories": list(analysis.get('categories', {}).keys()),
|
|
453
|
+
"category_counts": {cat: len(cmds) for cat, cmds in analysis.get('categories', {}).items()},
|
|
454
|
+
"top_base_commands": [
|
|
455
|
+
{"command": cmd, "count": count}
|
|
456
|
+
for cmd, count in list(base_cmd_frequency.most_common(10))
|
|
457
|
+
],
|
|
458
|
+
"operators_used": dict(operator_frequency),
|
|
459
|
+
"complexity_distribution": dict(analysis.get('complexity_distribution', {})),
|
|
460
|
+
},
|
|
461
|
+
"output": {
|
|
462
|
+
"quiz_questions": quiz_count,
|
|
463
|
+
"html_files": [str(f) for f in html_files],
|
|
464
|
+
},
|
|
343
465
|
}
|
|
344
466
|
|
|
345
467
|
summary_path = output_dir / "summary.json"
|
|
@@ -351,6 +473,54 @@ def run_extraction_pipeline(
|
|
|
351
473
|
return True, f"Successfully generated learning materials in {output_dir}"
|
|
352
474
|
|
|
353
475
|
|
|
476
|
+
def extract_sub_commands(cmd_str: str) -> List[str]:
|
|
477
|
+
"""
|
|
478
|
+
Extract individual sub-commands from a compound command.
|
|
479
|
+
|
|
480
|
+
Splits commands by ||, &&, |, and ; while preserving each sub-command
|
|
481
|
+
as a learnable unit.
|
|
482
|
+
|
|
483
|
+
Args:
|
|
484
|
+
cmd_str: The compound command string
|
|
485
|
+
|
|
486
|
+
Returns:
|
|
487
|
+
List of individual sub-command strings
|
|
488
|
+
"""
|
|
489
|
+
import re
|
|
490
|
+
|
|
491
|
+
# First, clean up redirections but keep them with their command
|
|
492
|
+
# We want "pip show pkg 2>/dev/null" to stay together
|
|
493
|
+
|
|
494
|
+
# Split by compound operators: ||, &&, |, ;
|
|
495
|
+
# Use regex to split while handling edge cases
|
|
496
|
+
# Note: | needs special handling to not match ||
|
|
497
|
+
|
|
498
|
+
sub_commands = []
|
|
499
|
+
|
|
500
|
+
# Split by || first (highest precedence for our purposes)
|
|
501
|
+
or_parts = re.split(r'\s*\|\|\s*', cmd_str)
|
|
502
|
+
|
|
503
|
+
for or_part in or_parts:
|
|
504
|
+
# Split each part by &&
|
|
505
|
+
and_parts = re.split(r'\s*&&\s*', or_part)
|
|
506
|
+
|
|
507
|
+
for and_part in and_parts:
|
|
508
|
+
# Split each part by ; (sequential)
|
|
509
|
+
seq_parts = re.split(r'\s*;\s*', and_part)
|
|
510
|
+
|
|
511
|
+
for seq_part in seq_parts:
|
|
512
|
+
# Split by single pipe |
|
|
513
|
+
# Use negative lookbehind/lookahead to avoid ||
|
|
514
|
+
pipe_parts = re.split(r'(?<!\|)\|(?!\|)', seq_part)
|
|
515
|
+
|
|
516
|
+
for pipe_part in pipe_parts:
|
|
517
|
+
cleaned = pipe_part.strip()
|
|
518
|
+
if cleaned:
|
|
519
|
+
sub_commands.append(cleaned)
|
|
520
|
+
|
|
521
|
+
return sub_commands
|
|
522
|
+
|
|
523
|
+
|
|
354
524
|
def deduplicate_commands(commands: List[Dict]) -> List[Dict]:
|
|
355
525
|
"""
|
|
356
526
|
Remove duplicate commands while preserving order.
|
|
@@ -405,8 +575,8 @@ Examples:
|
|
|
405
575
|
parser.add_argument(
|
|
406
576
|
'-o', '--output',
|
|
407
577
|
type=str,
|
|
408
|
-
default=
|
|
409
|
-
help=f'Output directory (default: {
|
|
578
|
+
default=None,
|
|
579
|
+
help=f'Output directory (default: timestamped folder in {DEFAULT_OUTPUT_BASE}/)'
|
|
410
580
|
)
|
|
411
581
|
|
|
412
582
|
parser.add_argument(
|
|
@@ -487,8 +657,11 @@ def main() -> int:
|
|
|
487
657
|
|
|
488
658
|
sessions_to_process = sessions
|
|
489
659
|
|
|
490
|
-
# Run the pipeline
|
|
491
|
-
|
|
660
|
+
# Run the pipeline with timestamped output directory
|
|
661
|
+
if args.output:
|
|
662
|
+
output_dir = Path(args.output)
|
|
663
|
+
else:
|
|
664
|
+
output_dir = generate_timestamped_output_dir()
|
|
492
665
|
success, message = run_extraction_pipeline(sessions_to_process, output_dir)
|
|
493
666
|
|
|
494
667
|
if success:
|