learn_bash_from_session_data 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "learn_bash_from_session_data",
3
- "version": "1.0.3",
3
+ "version": "1.0.4",
4
4
  "description": "Learn bash from your Claude Code sessions - extracts commands and generates interactive HTML lessons",
5
5
  "main": "bin/learn-bash.js",
6
6
  "bin": {
@@ -2007,7 +2007,7 @@ def generate_html_files(
2007
2007
  categories = analysis.get('categories', {})
2008
2008
  analyzed_commands = analysis.get('commands', commands)
2009
2009
 
2010
- # Build frequency map from top_commands
2010
+ # Build frequency map from top_commands (full command strings)
2011
2011
  top_commands_data = analysis.get('top_commands', [])
2012
2012
  frequency_map = {}
2013
2013
  for item in top_commands_data:
@@ -2015,6 +2015,10 @@ def generate_html_files(
2015
2015
  cmd_str, count = item[0], item[1]
2016
2016
  frequency_map[cmd_str] = count
2017
2017
 
2018
+ # Get base command frequency for the "Top 10 Most-Used Commands" chart
2019
+ # This aggregates by base command (cd, git, mkdir) not full command strings
2020
+ top_base_commands_data = analysis.get('top_base_commands', [])
2021
+
2018
2022
  # Map complexity scores (1-5) to string labels for CSS
2019
2023
  def complexity_to_label(score):
2020
2024
  if score <= 2:
@@ -2059,12 +2063,12 @@ def generate_html_files(
2059
2063
  'advanced': raw_complexity.get(4, 0) + raw_complexity.get(5, 0),
2060
2064
  }
2061
2065
 
2062
- # Build top commands list with proper frequencies
2066
+ # Build top commands list with proper frequencies (by base command)
2063
2067
  top_10_commands = []
2064
- for item in top_commands_data[:10]:
2068
+ for item in top_base_commands_data[:10]:
2065
2069
  if isinstance(item, (list, tuple)) and len(item) >= 2:
2066
2070
  top_10_commands.append({
2067
- 'command': item[0],
2071
+ 'command': item[0], # base command like "cd", "git"
2068
2072
  'count': item[1]
2069
2073
  })
2070
2074
 
package/scripts/main.py CHANGED
@@ -20,10 +20,24 @@ if sys.version_info < (3, 8):
20
20
  f"{sys.version_info.major}.{sys.version_info.minor}")
21
21
 
22
22
  # Constants
23
- DEFAULT_OUTPUT_DIR = "./bash-learner-output/"
23
+ DEFAULT_OUTPUT_BASE = "./bash-learner-output"
24
24
  MAX_UNIQUE_COMMANDS = 500
25
25
 
26
26
 
27
+ def generate_timestamped_output_dir(base_dir: str = DEFAULT_OUTPUT_BASE) -> Path:
28
+ """
29
+ Generate a timestamped output directory.
30
+
31
+ Args:
32
+ base_dir: Base directory for outputs
33
+
34
+ Returns:
35
+ Path to timestamped output directory (e.g., ./bash-learner-output/run-2026-02-05-143052/)
36
+ """
37
+ timestamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
38
+ return Path(base_dir) / f"run-{timestamp}"
39
+
40
+
27
41
  def get_sessions_base_path() -> Path:
28
42
  """
29
43
  Get the base path for Claude session files.
@@ -305,8 +319,28 @@ def run_extraction_pipeline(
305
319
  parsed_commands = parse_commands(raw_commands)
306
320
  print(f" -> Parsed {len(parsed_commands)} commands")
307
321
 
308
- # Step 4: Deduplicate and cap
322
+ # Step 4: Count frequencies BEFORE deduplication
323
+ from collections import Counter
324
+ cmd_frequency = Counter()
325
+ base_cmd_frequency = Counter()
326
+ for cmd in parsed_commands:
327
+ cmd_str = cmd.get('command', '') or cmd.get('raw', '')
328
+ base_cmd = cmd.get('base_command', '')
329
+ if cmd_str:
330
+ cmd_frequency[cmd_str] += 1
331
+ if base_cmd:
332
+ base_cmd_frequency[base_cmd] += 1
333
+
334
+ # Step 5: Deduplicate and add frequency data
309
335
  unique_commands = deduplicate_commands(parsed_commands)
336
+
337
+ # Add frequency to each unique command
338
+ for cmd in unique_commands:
339
+ cmd_str = cmd.get('command', '') or cmd.get('raw', '')
340
+ base_cmd = cmd.get('base_command', '')
341
+ cmd['frequency'] = cmd_frequency.get(cmd_str, 1)
342
+ cmd['base_frequency'] = base_cmd_frequency.get(base_cmd, 1)
343
+
310
344
  if len(unique_commands) > MAX_UNIQUE_COMMANDS:
311
345
  print(f"\nCapping at {MAX_UNIQUE_COMMANDS} unique commands "
312
346
  f"(found {len(unique_commands)})")
@@ -314,9 +348,15 @@ def run_extraction_pipeline(
314
348
  else:
315
349
  print(f"\n{len(unique_commands)} unique commands")
316
350
 
317
- # Step 5: Analyze commands
351
+ # Step 6: Analyze commands
318
352
  print("\nAnalyzing commands...")
319
353
  analysis = analyze_commands(unique_commands)
354
+
355
+ # Inject pre-computed frequency data into analysis
356
+ analysis['command_frequency'] = dict(cmd_frequency)
357
+ analysis['base_command_frequency'] = dict(base_cmd_frequency)
358
+ analysis['top_commands'] = cmd_frequency.most_common(20)
359
+ analysis['top_base_commands'] = base_cmd_frequency.most_common(20)
320
360
  print(f" -> Generated analysis with {len(analysis.get('categories', {}))} categories")
321
361
 
322
362
  # Step 6: Generate quizzes
@@ -330,16 +370,41 @@ def run_extraction_pipeline(
330
370
  html_files = generate_html(unique_commands, analysis, quizzes, output_dir)
331
371
  print(f" -> Created {len(html_files)} HTML files")
332
372
 
333
- # Write summary JSON
373
+ # Write summary JSON with comprehensive metadata
334
374
  summary = {
335
- "generated_at": datetime.now().isoformat(),
336
- "sessions_processed": len(sessions),
337
- "total_entries": len(all_entries),
338
- "raw_commands": len(raw_commands),
339
- "unique_commands": len(unique_commands),
340
- "categories": list(analysis.get('categories', {}).keys()),
341
- "quiz_count": quiz_count,
342
- "html_files": [str(f) for f in html_files],
375
+ "metadata": {
376
+ "generated_at": datetime.now().isoformat(),
377
+ "run_id": output_dir.name,
378
+ "version": "1.0.4",
379
+ },
380
+ "input": {
381
+ "sessions_processed": len(sessions),
382
+ "session_files": [
383
+ {
384
+ "filename": s['filename'],
385
+ "path": str(s['path']),
386
+ "size": s['size_human'],
387
+ "modified": s['modified_str']
388
+ }
389
+ for s in sessions
390
+ ],
391
+ "total_entries": len(all_entries),
392
+ },
393
+ "analysis": {
394
+ "raw_commands_found": len(raw_commands),
395
+ "unique_commands": len(unique_commands),
396
+ "categories": list(analysis.get('categories', {}).keys()),
397
+ "category_counts": {cat: len(cmds) for cat, cmds in analysis.get('categories', {}).items()},
398
+ "top_base_commands": [
399
+ {"command": cmd, "count": count}
400
+ for cmd, count in list(base_cmd_frequency.most_common(10))
401
+ ],
402
+ "complexity_distribution": dict(analysis.get('complexity_distribution', {})),
403
+ },
404
+ "output": {
405
+ "quiz_questions": quiz_count,
406
+ "html_files": [str(f) for f in html_files],
407
+ },
343
408
  }
344
409
 
345
410
  summary_path = output_dir / "summary.json"
@@ -405,8 +470,8 @@ Examples:
405
470
  parser.add_argument(
406
471
  '-o', '--output',
407
472
  type=str,
408
- default=DEFAULT_OUTPUT_DIR,
409
- help=f'Output directory (default: {DEFAULT_OUTPUT_DIR})'
473
+ default=None,
474
+ help=f'Output directory (default: timestamped folder in {DEFAULT_OUTPUT_BASE}/)'
410
475
  )
411
476
 
412
477
  parser.add_argument(
@@ -487,8 +552,11 @@ def main() -> int:
487
552
 
488
553
  sessions_to_process = sessions
489
554
 
490
- # Run the pipeline
491
- output_dir = Path(args.output)
555
+ # Run the pipeline with timestamped output directory
556
+ if args.output:
557
+ output_dir = Path(args.output)
558
+ else:
559
+ output_dir = generate_timestamped_output_dir()
492
560
  success, message = run_extraction_pipeline(sessions_to_process, output_dir)
493
561
 
494
562
  if success: