learn_bash_from_session_data 1.0.3 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,163 @@
1
+ {
2
+ "metadata": {
3
+ "generated_at": "2026-02-05T15:59:49.814630",
4
+ "run_id": "run-2026-02-05-155949",
5
+ "version": "1.0.5"
6
+ },
7
+ "input": {
8
+ "sessions_processed": 10,
9
+ "session_files": [
10
+ {
11
+ "filename": "081cfcfd-cde6-4304-89ca-6ac61faf8d85.jsonl",
12
+ "path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand-Development-Project-Workspace-active-development-session-with-chris/081cfcfd-cde6-4304-89ca-6ac61faf8d85.jsonl",
13
+ "size": "759.1 KB",
14
+ "modified": "2026-02-05 11:01:08"
15
+ },
16
+ {
17
+ "filename": "2ee58010-e794-48d8-a1b7-0ba14e06e7b7.jsonl",
18
+ "path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand/2ee58010-e794-48d8-a1b7-0ba14e06e7b7.jsonl",
19
+ "size": "1.8 KB",
20
+ "modified": "2026-02-05 10:22:23"
21
+ },
22
+ {
23
+ "filename": "ef6494ae-aca1-43b1-800d-d5586069d42c.jsonl",
24
+ "path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand/ef6494ae-aca1-43b1-800d-d5586069d42c.jsonl",
25
+ "size": "1.8 KB",
26
+ "modified": "2026-01-23 14:32:05"
27
+ },
28
+ {
29
+ "filename": "6b1fb6cd-5865-4a5f-97f9-5e1e46d79f81.jsonl",
30
+ "path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand/6b1fb6cd-5865-4a5f-97f9-5e1e46d79f81.jsonl",
31
+ "size": "3.7 KB",
32
+ "modified": "2026-01-19 22:44:52"
33
+ },
34
+ {
35
+ "filename": "9db5f466-5bc4-4778-94ba-b10d72e8c464.jsonl",
36
+ "path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand/9db5f466-5bc4-4778-94ba-b10d72e8c464.jsonl",
37
+ "size": "916.0 B",
38
+ "modified": "2026-01-19 09:25:47"
39
+ },
40
+ {
41
+ "filename": "dfc82e92-9786-4baf-a5df-44046f21f90a.jsonl",
42
+ "path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand-Development-Project-Workspace-active-development-en-consulta/dfc82e92-9786-4baf-a5df-44046f21f90a.jsonl",
43
+ "size": "1.0 KB",
44
+ "modified": "2026-01-16 06:42:34"
45
+ },
46
+ {
47
+ "filename": "b3c0facc-2728-48a5-a1d0-0fed6b049cf2.jsonl",
48
+ "path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand/b3c0facc-2728-48a5-a1d0-0fed6b049cf2.jsonl",
49
+ "size": "116.0 B",
50
+ "modified": "2026-01-08 21:01:17"
51
+ },
52
+ {
53
+ "filename": "846d1c3c-05e3-4d86-9aae-c8bd266962d4.jsonl",
54
+ "path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand/846d1c3c-05e3-4d86-9aae-c8bd266962d4.jsonl",
55
+ "size": "1.8 KB",
56
+ "modified": "2026-01-08 21:00:57"
57
+ },
58
+ {
59
+ "filename": "agent-a979d7c.jsonl",
60
+ "path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand/agent-a979d7c.jsonl",
61
+ "size": "2.0 KB",
62
+ "modified": "2026-01-06 20:47:47"
63
+ },
64
+ {
65
+ "filename": "agent-a1fb460.jsonl",
66
+ "path": "/mnt/c/Users/brand/.claude/projects/C--Users-brand/agent-a1fb460.jsonl",
67
+ "size": "1.9 KB",
68
+ "modified": "2026-01-06 20:47:44"
69
+ }
70
+ ],
71
+ "total_entries": 438
72
+ },
73
+ "analysis": {
74
+ "raw_commands_found": 37,
75
+ "unique_commands": 53,
76
+ "categories": [
77
+ "Unknown",
78
+ "Package Management",
79
+ "Text Processing",
80
+ "File System",
81
+ "Shell Builtins",
82
+ "Development",
83
+ "Git",
84
+ "Search & Navigation",
85
+ "Process & System"
86
+ ],
87
+ "category_counts": {
88
+ "Unknown": 5,
89
+ "Package Management": 4,
90
+ "Text Processing": 3,
91
+ "File System": 7,
92
+ "Shell Builtins": 3,
93
+ "Development": 5,
94
+ "Git": 24,
95
+ "Search & Navigation": 1,
96
+ "Process & System": 1
97
+ },
98
+ "top_base_commands": [
99
+ {
100
+ "command": "cd",
101
+ "count": 18
102
+ },
103
+ {
104
+ "command": "git",
105
+ "count": 17
106
+ },
107
+ {
108
+ "command": "gh",
109
+ "count": 9
110
+ },
111
+ {
112
+ "command": "python",
113
+ "count": 5
114
+ },
115
+ {
116
+ "command": "mkdir",
117
+ "count": 3
118
+ },
119
+ {
120
+ "command": "ls",
121
+ "count": 3
122
+ },
123
+ {
124
+ "command": "echo",
125
+ "count": 3
126
+ },
127
+ {
128
+ "command": "session-slides",
129
+ "count": 3
130
+ },
131
+ {
132
+ "command": "pip",
133
+ "count": 2
134
+ },
135
+ {
136
+ "command": "head",
137
+ "count": 2
138
+ }
139
+ ],
140
+ "operators_used": {
141
+ "||": 4,
142
+ "|": 3,
143
+ "2>&1": 8,
144
+ "2>/dev/null": 5,
145
+ "&&": 29,
146
+ ">": 5,
147
+ "<": 13
148
+ },
149
+ "complexity_distribution": {
150
+ "1": 10,
151
+ "2": 23,
152
+ "3": 15,
153
+ "4": 3,
154
+ "5": 2
155
+ }
156
+ },
157
+ "output": {
158
+ "quiz_questions": 20,
159
+ "html_files": [
160
+ "bash-learner-output/run-2026-02-05-155949/index.html"
161
+ ]
162
+ }
163
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "learn_bash_from_session_data",
3
- "version": "1.0.3",
3
+ "version": "1.0.5",
4
4
  "description": "Learn bash from your Claude Code sessions - extracts commands and generates interactive HTML lessons",
5
5
  "main": "bin/learn-bash.js",
6
6
  "bin": {
@@ -114,19 +114,52 @@ def _generate_html_impl(analysis_result: dict[str, Any], quizzes: list[dict[str,
114
114
  </html>'''
115
115
 
116
116
 
117
+ def _generate_operators_html(operators_used: dict, operator_descriptions: dict) -> str:
118
+ """Generate HTML for the operators used section."""
119
+ if not operators_used:
120
+ return '<p class="empty-state">No bash operators detected in these commands</p>'
121
+
122
+ operators_html = ""
123
+ # Sort by count descending
124
+ sorted_ops = sorted(operators_used.items(), key=lambda x: -x[1])
125
+ max_count = sorted_ops[0][1] if sorted_ops else 1
126
+
127
+ for op, count in sorted_ops:
128
+ name, desc = operator_descriptions.get(op, (op, 'Bash operator'))
129
+ bar_width = (count / max_count) * 100
130
+ operators_html += f'''
131
+ <div class="operator-item">
132
+ <div class="operator-symbol"><code>{html.escape(op)}</code></div>
133
+ <div class="operator-info">
134
+ <div class="operator-name">{html.escape(name)}</div>
135
+ <div class="operator-desc">{html.escape(desc)}</div>
136
+ </div>
137
+ <div class="operator-bar-container">
138
+ <div class="operator-bar" style="width: {bar_width}%"></div>
139
+ </div>
140
+ <div class="operator-count">{count}</div>
141
+ </div>'''
142
+ return operators_html
143
+
144
+
117
145
  def render_overview_tab(stats: dict[str, Any], commands: list[dict], categories: dict) -> str:
118
146
  """Render the overview/dashboard tab content."""
119
147
  total_commands = stats.get("total_commands", 0)
120
148
  unique_commands = stats.get("unique_commands", 0)
121
149
  unique_utilities = stats.get("unique_utilities", 0)
122
150
  date_range = stats.get("date_range", {"start": "N/A", "end": "N/A"})
123
- complexity_dist = stats.get("complexity_distribution", {"simple": 0, "intermediate": 0, "advanced": 0})
124
-
125
- # Calculate percentages for complexity bars
126
- total_for_pct = sum(complexity_dist.values()) or 1
127
- simple_pct = (complexity_dist.get("simple", 0) / total_for_pct) * 100
128
- intermediate_pct = (complexity_dist.get("intermediate", 0) / total_for_pct) * 100
129
- advanced_pct = (complexity_dist.get("advanced", 0) / total_for_pct) * 100
151
+ # Get operators data for the "Bash Operators Used" section
152
+ operators_used = stats.get("operators_used", {})
153
+ operator_descriptions = {
154
+ '|': ('Pipe', 'Sends output of one command to input of another'),
155
+ '||': ('OR operator', 'Run next command if previous failed'),
156
+ '&&': ('AND operator', 'Run next command if previous succeeded'),
157
+ '2>&1': ('Redirect stderr', 'Combines error output with standard output'),
158
+ '2>/dev/null': ('Suppress errors', 'Discards error messages'),
159
+ '>': ('Redirect output', 'Writes output to a file (overwrites)'),
160
+ '>>': ('Append output', 'Appends output to a file'),
161
+ '<': ('Redirect input', 'Reads input from a file'),
162
+ }
130
163
 
131
164
  # Top 10 commands by frequency - use pre-computed data if available
132
165
  top_commands_data = stats.get("top_commands", [])
@@ -237,29 +270,9 @@ def render_overview_tab(stats: dict[str, Any], commands: list[dict], categories:
237
270
 
238
271
  <div class="charts-row">
239
272
  <div class="chart-card">
240
- <h3>Complexity Distribution</h3>
241
- <div class="complexity-bars">
242
- <div class="complexity-row">
243
- <span class="complexity-label simple">Simple</span>
244
- <div class="complexity-bar-bg">
245
- <div class="complexity-bar simple" style="width: {simple_pct}%"></div>
246
- </div>
247
- <span class="complexity-count">{complexity_dist.get("simple", 0)}</span>
248
- </div>
249
- <div class="complexity-row">
250
- <span class="complexity-label intermediate">Intermediate</span>
251
- <div class="complexity-bar-bg">
252
- <div class="complexity-bar intermediate" style="width: {intermediate_pct}%"></div>
253
- </div>
254
- <span class="complexity-count">{complexity_dist.get("intermediate", 0)}</span>
255
- </div>
256
- <div class="complexity-row">
257
- <span class="complexity-label advanced">Advanced</span>
258
- <div class="complexity-bar-bg">
259
- <div class="complexity-bar advanced" style="width: {advanced_pct}%"></div>
260
- </div>
261
- <span class="complexity-count">{complexity_dist.get("advanced", 0)}</span>
262
- </div>
273
+ <h3>Bash Operators Used</h3>
274
+ <div class="operators-list">
275
+ {_generate_operators_html(operators_used, operator_descriptions)}
263
276
  </div>
264
277
  </div>
265
278
 
@@ -385,11 +398,10 @@ def render_commands_tab(commands: list[dict]) -> str:
385
398
  </div>'''
386
399
 
387
400
  commands_html += f'''
388
- <div class="command-card" data-category="{category}" data-complexity="{complexity}" data-frequency="{frequency}" data-name="{base_cmd}">
401
+ <div class="command-card" data-category="{category}" data-frequency="{frequency}" data-name="{base_cmd}">
389
402
  <div class="command-header" onclick="toggleCommand('{cmd_id}')">
390
403
  <div class="command-main">
391
404
  <code class="cmd">{base_cmd}</code>
392
- <span class="complexity-badge {complexity}">{complexity}</span>
393
405
  <span class="category-badge">{category}</span>
394
406
  </div>
395
407
  <div class="command-meta">
@@ -504,7 +516,6 @@ def render_lessons_tab(categories: dict, commands: list[dict]) -> str:
504
516
  <div class="lesson-command">
505
517
  <div class="lesson-command-header">
506
518
  <code class="cmd">{base_cmd}</code>
507
- <span class="complexity-badge {complexity}">{complexity}</span>
508
519
  </div>
509
520
  <pre class="syntax-highlighted">{highlighted}</pre>
510
521
  <p class="lesson-description">{description}</p>
@@ -990,6 +1001,77 @@ def get_inline_css() -> str:
990
1001
  color: var(--text-secondary);
991
1002
  }
992
1003
 
1004
+ /* Operators List */
1005
+ .operators-list {
1006
+ display: flex;
1007
+ flex-direction: column;
1008
+ gap: 12px;
1009
+ }
1010
+
1011
+ .operator-item {
1012
+ display: grid;
1013
+ grid-template-columns: 80px 1fr 120px 50px;
1014
+ align-items: center;
1015
+ gap: 12px;
1016
+ padding: 8px 0;
1017
+ border-bottom: 1px solid var(--border-color);
1018
+ }
1019
+
1020
+ .operator-item:last-child {
1021
+ border-bottom: none;
1022
+ }
1023
+
1024
+ .operator-symbol {
1025
+ font-family: var(--font-mono);
1026
+ font-size: 1rem;
1027
+ font-weight: 600;
1028
+ color: var(--accent-primary);
1029
+ }
1030
+
1031
+ .operator-symbol code {
1032
+ background: var(--bg-tertiary);
1033
+ padding: 4px 8px;
1034
+ border-radius: var(--radius-sm);
1035
+ }
1036
+
1037
+ .operator-info {
1038
+ display: flex;
1039
+ flex-direction: column;
1040
+ gap: 2px;
1041
+ }
1042
+
1043
+ .operator-name {
1044
+ font-size: 0.9rem;
1045
+ font-weight: 600;
1046
+ color: var(--text-primary);
1047
+ }
1048
+
1049
+ .operator-desc {
1050
+ font-size: 0.8rem;
1051
+ color: var(--text-secondary);
1052
+ }
1053
+
1054
+ .operator-bar-container {
1055
+ height: 20px;
1056
+ background: var(--bg-tertiary);
1057
+ border-radius: var(--radius-sm);
1058
+ overflow: hidden;
1059
+ }
1060
+
1061
+ .operator-bar {
1062
+ height: 100%;
1063
+ background: var(--accent-primary);
1064
+ border-radius: var(--radius-sm);
1065
+ transition: width 0.5s ease;
1066
+ }
1067
+
1068
+ .operator-count {
1069
+ font-size: 0.9rem;
1070
+ font-weight: 600;
1071
+ text-align: right;
1072
+ color: var(--text-secondary);
1073
+ }
1074
+
993
1075
  /* Pie Chart */
994
1076
  .pie-container {
995
1077
  display: flex;
@@ -2007,7 +2089,7 @@ def generate_html_files(
2007
2089
  categories = analysis.get('categories', {})
2008
2090
  analyzed_commands = analysis.get('commands', commands)
2009
2091
 
2010
- # Build frequency map from top_commands
2092
+ # Build frequency map from top_commands (full command strings)
2011
2093
  top_commands_data = analysis.get('top_commands', [])
2012
2094
  frequency_map = {}
2013
2095
  for item in top_commands_data:
@@ -2015,6 +2097,10 @@ def generate_html_files(
2015
2097
  cmd_str, count = item[0], item[1]
2016
2098
  frequency_map[cmd_str] = count
2017
2099
 
2100
+ # Get base command frequency for the "Top 10 Most-Used Commands" chart
2101
+ # This aggregates by base command (cd, git, mkdir) not full command strings
2102
+ top_base_commands_data = analysis.get('top_base_commands', [])
2103
+
2018
2104
  # Map complexity scores (1-5) to string labels for CSS
2019
2105
  def complexity_to_label(score):
2020
2106
  if score <= 2:
@@ -2059,12 +2145,12 @@ def generate_html_files(
2059
2145
  'advanced': raw_complexity.get(4, 0) + raw_complexity.get(5, 0),
2060
2146
  }
2061
2147
 
2062
- # Build top commands list with proper frequencies
2148
+ # Build top commands list with proper frequencies (by base command)
2063
2149
  top_10_commands = []
2064
- for item in top_commands_data[:10]:
2150
+ for item in top_base_commands_data[:10]:
2065
2151
  if isinstance(item, (list, tuple)) and len(item) >= 2:
2066
2152
  top_10_commands.append({
2067
- 'command': item[0],
2153
+ 'command': item[0], # base command like "cd", "git"
2068
2154
  'count': item[1]
2069
2155
  })
2070
2156
 
@@ -2077,6 +2163,7 @@ def generate_html_files(
2077
2163
  'complexity_avg': stats.get('average_complexity', 2),
2078
2164
  'complexity_distribution': complexity_distribution,
2079
2165
  'top_commands': top_10_commands, # Pre-computed top commands with frequencies
2166
+ 'operators_used': analysis.get('operators_used', {}), # Bash operators like ||, &&, |, 2>&1
2080
2167
  },
2081
2168
  'commands': formatted_commands,
2082
2169
  'categories': {cat: [c.get('command', '') for c in cmds] for cat, cmds in categories.items()},
package/scripts/main.py CHANGED
@@ -20,10 +20,24 @@ if sys.version_info < (3, 8):
20
20
  f"{sys.version_info.major}.{sys.version_info.minor}")
21
21
 
22
22
  # Constants
23
- DEFAULT_OUTPUT_DIR = "./bash-learner-output/"
23
+ DEFAULT_OUTPUT_BASE = "./bash-learner-output"
24
24
  MAX_UNIQUE_COMMANDS = 500
25
25
 
26
26
 
27
+ def generate_timestamped_output_dir(base_dir: str = DEFAULT_OUTPUT_BASE) -> Path:
28
+ """
29
+ Generate a timestamped output directory.
30
+
31
+ Args:
32
+ base_dir: Base directory for outputs
33
+
34
+ Returns:
35
+ Path to timestamped output directory (e.g., ./bash-learner-output/run-2026-02-05-143052/)
36
+ """
37
+ timestamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
38
+ return Path(base_dir) / f"run-{timestamp}"
39
+
40
+
27
41
  def get_sessions_base_path() -> Path:
28
42
  """
29
43
  Get the base path for Claude session files.
@@ -305,8 +319,83 @@ def run_extraction_pipeline(
305
319
  parsed_commands = parse_commands(raw_commands)
306
320
  print(f" -> Parsed {len(parsed_commands)} commands")
307
321
 
308
- # Step 4: Deduplicate and cap
309
- unique_commands = deduplicate_commands(parsed_commands)
322
+ # Step 4: Expand compound commands into individual sub-commands
323
+ # Also count operators for tracking
324
+ from collections import Counter
325
+ import re
326
+
327
+ operator_frequency = Counter()
328
+ expanded_commands = []
329
+
330
+ # Operator patterns to detect
331
+ operator_patterns = {
332
+ '||': r'\|\|',
333
+ '&&': r'&&',
334
+ '|': r'(?<!\|)\|(?!\|)', # Single pipe, not ||
335
+ '2>&1': r'2>&1',
336
+ '2>/dev/null': r'2>/dev/null',
337
+ '>': r'(?<![2&])>(?!>|&)', # Single >, not >> or 2> or >&
338
+ '>>': r'>>',
339
+ '<': r'<(?!<)',
340
+ }
341
+
342
+ for cmd in parsed_commands:
343
+ cmd_str = cmd.get('command', '') or cmd.get('raw', '')
344
+ if not cmd_str:
345
+ continue
346
+
347
+ # Count operators in this command
348
+ for op_name, op_pattern in operator_patterns.items():
349
+ matches = re.findall(op_pattern, cmd_str)
350
+ if matches:
351
+ operator_frequency[op_name] += len(matches)
352
+
353
+ # Check if this is a compound command
354
+ is_compound = any(op in cmd_str for op in ['||', '&&', ' | ', ';'])
355
+
356
+ if is_compound:
357
+ # Extract individual sub-commands from compound statement
358
+ sub_commands = extract_sub_commands(cmd_str)
359
+ for sub_cmd in sub_commands:
360
+ if sub_cmd.strip():
361
+ expanded_commands.append({
362
+ 'command': sub_cmd.strip(),
363
+ 'raw': sub_cmd.strip(),
364
+ 'original_compound': cmd_str,
365
+ 'description': cmd.get('description', ''),
366
+ 'output': cmd.get('output', ''),
367
+ })
368
+ else:
369
+ # Simple command - add as-is
370
+ expanded_commands.append(cmd)
371
+
372
+ print(f" -> Expanded to {len(expanded_commands)} individual commands")
373
+
374
+ # Step 5: Re-parse expanded commands to get proper base_command for each
375
+ parsed_expanded = parse_commands(expanded_commands)
376
+
377
+ # Step 6: Count frequencies BEFORE deduplication
378
+ cmd_frequency = Counter()
379
+ base_cmd_frequency = Counter()
380
+
381
+ for cmd in parsed_expanded:
382
+ cmd_str = cmd.get('command', '') or cmd.get('raw', '')
383
+ base_cmd = cmd.get('base_command', '')
384
+ if cmd_str:
385
+ cmd_frequency[cmd_str] += 1
386
+ if base_cmd:
387
+ base_cmd_frequency[base_cmd] += 1
388
+
389
+ # Step 7: Deduplicate and add frequency data
390
+ unique_commands = deduplicate_commands(parsed_expanded)
391
+
392
+ # Add frequency to each unique command
393
+ for cmd in unique_commands:
394
+ cmd_str = cmd.get('command', '') or cmd.get('raw', '')
395
+ base_cmd = cmd.get('base_command', '')
396
+ cmd['frequency'] = cmd_frequency.get(cmd_str, 1)
397
+ cmd['base_frequency'] = base_cmd_frequency.get(base_cmd, 1)
398
+
310
399
  if len(unique_commands) > MAX_UNIQUE_COMMANDS:
311
400
  print(f"\nCapping at {MAX_UNIQUE_COMMANDS} unique commands "
312
401
  f"(found {len(unique_commands)})")
@@ -314,9 +403,16 @@ def run_extraction_pipeline(
314
403
  else:
315
404
  print(f"\n{len(unique_commands)} unique commands")
316
405
 
317
- # Step 5: Analyze commands
406
+ # Step 6: Analyze commands
318
407
  print("\nAnalyzing commands...")
319
408
  analysis = analyze_commands(unique_commands)
409
+
410
+ # Inject pre-computed frequency data into analysis
411
+ analysis['command_frequency'] = dict(cmd_frequency)
412
+ analysis['base_command_frequency'] = dict(base_cmd_frequency)
413
+ analysis['top_commands'] = cmd_frequency.most_common(20)
414
+ analysis['top_base_commands'] = base_cmd_frequency.most_common(20)
415
+ analysis['operators_used'] = dict(operator_frequency)
320
416
  print(f" -> Generated analysis with {len(analysis.get('categories', {}))} categories")
321
417
 
322
418
  # Step 6: Generate quizzes
@@ -330,16 +426,42 @@ def run_extraction_pipeline(
330
426
  html_files = generate_html(unique_commands, analysis, quizzes, output_dir)
331
427
  print(f" -> Created {len(html_files)} HTML files")
332
428
 
333
- # Write summary JSON
429
+ # Write summary JSON with comprehensive metadata
334
430
  summary = {
335
- "generated_at": datetime.now().isoformat(),
336
- "sessions_processed": len(sessions),
337
- "total_entries": len(all_entries),
338
- "raw_commands": len(raw_commands),
339
- "unique_commands": len(unique_commands),
340
- "categories": list(analysis.get('categories', {}).keys()),
341
- "quiz_count": quiz_count,
342
- "html_files": [str(f) for f in html_files],
431
+ "metadata": {
432
+ "generated_at": datetime.now().isoformat(),
433
+ "run_id": output_dir.name,
434
+ "version": "1.0.5",
435
+ },
436
+ "input": {
437
+ "sessions_processed": len(sessions),
438
+ "session_files": [
439
+ {
440
+ "filename": s['filename'],
441
+ "path": str(s['path']),
442
+ "size": s['size_human'],
443
+ "modified": s['modified_str']
444
+ }
445
+ for s in sessions
446
+ ],
447
+ "total_entries": len(all_entries),
448
+ },
449
+ "analysis": {
450
+ "raw_commands_found": len(raw_commands),
451
+ "unique_commands": len(unique_commands),
452
+ "categories": list(analysis.get('categories', {}).keys()),
453
+ "category_counts": {cat: len(cmds) for cat, cmds in analysis.get('categories', {}).items()},
454
+ "top_base_commands": [
455
+ {"command": cmd, "count": count}
456
+ for cmd, count in list(base_cmd_frequency.most_common(10))
457
+ ],
458
+ "operators_used": dict(operator_frequency),
459
+ "complexity_distribution": dict(analysis.get('complexity_distribution', {})),
460
+ },
461
+ "output": {
462
+ "quiz_questions": quiz_count,
463
+ "html_files": [str(f) for f in html_files],
464
+ },
343
465
  }
344
466
 
345
467
  summary_path = output_dir / "summary.json"
@@ -351,6 +473,54 @@ def run_extraction_pipeline(
351
473
  return True, f"Successfully generated learning materials in {output_dir}"
352
474
 
353
475
 
476
+ def extract_sub_commands(cmd_str: str) -> List[str]:
477
+ """
478
+ Extract individual sub-commands from a compound command.
479
+
480
+ Splits commands by ||, &&, |, and ; while preserving each sub-command
481
+ as a learnable unit.
482
+
483
+ Args:
484
+ cmd_str: The compound command string
485
+
486
+ Returns:
487
+ List of individual sub-command strings
488
+ """
489
+ import re
490
+
491
+ # First, clean up redirections but keep them with their command
492
+ # We want "pip show pkg 2>/dev/null" to stay together
493
+
494
+ # Split by compound operators: ||, &&, |, ;
495
+ # Use regex to split while handling edge cases
496
+ # Note: | needs special handling to not match ||
497
+
498
+ sub_commands = []
499
+
500
+ # Split by || first (highest precedence for our purposes)
501
+ or_parts = re.split(r'\s*\|\|\s*', cmd_str)
502
+
503
+ for or_part in or_parts:
504
+ # Split each part by &&
505
+ and_parts = re.split(r'\s*&&\s*', or_part)
506
+
507
+ for and_part in and_parts:
508
+ # Split each part by ; (sequential)
509
+ seq_parts = re.split(r'\s*;\s*', and_part)
510
+
511
+ for seq_part in seq_parts:
512
+ # Split by single pipe |
513
+ # Use negative lookbehind/lookahead to avoid ||
514
+ pipe_parts = re.split(r'(?<!\|)\|(?!\|)', seq_part)
515
+
516
+ for pipe_part in pipe_parts:
517
+ cleaned = pipe_part.strip()
518
+ if cleaned:
519
+ sub_commands.append(cleaned)
520
+
521
+ return sub_commands
522
+
523
+
354
524
  def deduplicate_commands(commands: List[Dict]) -> List[Dict]:
355
525
  """
356
526
  Remove duplicate commands while preserving order.
@@ -405,8 +575,8 @@ Examples:
405
575
  parser.add_argument(
406
576
  '-o', '--output',
407
577
  type=str,
408
- default=DEFAULT_OUTPUT_DIR,
409
- help=f'Output directory (default: {DEFAULT_OUTPUT_DIR})'
578
+ default=None,
579
+ help=f'Output directory (default: timestamped folder in {DEFAULT_OUTPUT_BASE}/)'
410
580
  )
411
581
 
412
582
  parser.add_argument(
@@ -487,8 +657,11 @@ def main() -> int:
487
657
 
488
658
  sessions_to_process = sessions
489
659
 
490
- # Run the pipeline
491
- output_dir = Path(args.output)
660
+ # Run the pipeline with timestamped output directory
661
+ if args.output:
662
+ output_dir = Path(args.output)
663
+ else:
664
+ output_dir = generate_timestamped_output_dir()
492
665
  success, message = run_extraction_pipeline(sessions_to_process, output_dir)
493
666
 
494
667
  if success: