learn_bash_from_session_data 1.0.3 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/scripts/html_generator.py +8 -4
- package/scripts/main.py +84 -16
package/package.json
CHANGED
|
@@ -2007,7 +2007,7 @@ def generate_html_files(
|
|
|
2007
2007
|
categories = analysis.get('categories', {})
|
|
2008
2008
|
analyzed_commands = analysis.get('commands', commands)
|
|
2009
2009
|
|
|
2010
|
-
# Build frequency map from top_commands
|
|
2010
|
+
# Build frequency map from top_commands (full command strings)
|
|
2011
2011
|
top_commands_data = analysis.get('top_commands', [])
|
|
2012
2012
|
frequency_map = {}
|
|
2013
2013
|
for item in top_commands_data:
|
|
@@ -2015,6 +2015,10 @@ def generate_html_files(
|
|
|
2015
2015
|
cmd_str, count = item[0], item[1]
|
|
2016
2016
|
frequency_map[cmd_str] = count
|
|
2017
2017
|
|
|
2018
|
+
# Get base command frequency for the "Top 10 Most-Used Commands" chart
|
|
2019
|
+
# This aggregates by base command (cd, git, mkdir) not full command strings
|
|
2020
|
+
top_base_commands_data = analysis.get('top_base_commands', [])
|
|
2021
|
+
|
|
2018
2022
|
# Map complexity scores (1-5) to string labels for CSS
|
|
2019
2023
|
def complexity_to_label(score):
|
|
2020
2024
|
if score <= 2:
|
|
@@ -2059,12 +2063,12 @@ def generate_html_files(
|
|
|
2059
2063
|
'advanced': raw_complexity.get(4, 0) + raw_complexity.get(5, 0),
|
|
2060
2064
|
}
|
|
2061
2065
|
|
|
2062
|
-
# Build top commands list with proper frequencies
|
|
2066
|
+
# Build top commands list with proper frequencies (by base command)
|
|
2063
2067
|
top_10_commands = []
|
|
2064
|
-
for item in
|
|
2068
|
+
for item in top_base_commands_data[:10]:
|
|
2065
2069
|
if isinstance(item, (list, tuple)) and len(item) >= 2:
|
|
2066
2070
|
top_10_commands.append({
|
|
2067
|
-
'command': item[0],
|
|
2071
|
+
'command': item[0], # base command like "cd", "git"
|
|
2068
2072
|
'count': item[1]
|
|
2069
2073
|
})
|
|
2070
2074
|
|
package/scripts/main.py
CHANGED
|
@@ -20,10 +20,24 @@ if sys.version_info < (3, 8):
|
|
|
20
20
|
f"{sys.version_info.major}.{sys.version_info.minor}")
|
|
21
21
|
|
|
22
22
|
# Constants
|
|
23
|
-
|
|
23
|
+
DEFAULT_OUTPUT_BASE = "./bash-learner-output"
|
|
24
24
|
MAX_UNIQUE_COMMANDS = 500
|
|
25
25
|
|
|
26
26
|
|
|
27
|
+
def generate_timestamped_output_dir(base_dir: str = DEFAULT_OUTPUT_BASE) -> Path:
|
|
28
|
+
"""
|
|
29
|
+
Generate a timestamped output directory.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
base_dir: Base directory for outputs
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
Path to timestamped output directory (e.g., ./bash-learner-output/run-2026-02-05-143052/)
|
|
36
|
+
"""
|
|
37
|
+
timestamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
|
|
38
|
+
return Path(base_dir) / f"run-{timestamp}"
|
|
39
|
+
|
|
40
|
+
|
|
27
41
|
def get_sessions_base_path() -> Path:
|
|
28
42
|
"""
|
|
29
43
|
Get the base path for Claude session files.
|
|
@@ -305,8 +319,28 @@ def run_extraction_pipeline(
|
|
|
305
319
|
parsed_commands = parse_commands(raw_commands)
|
|
306
320
|
print(f" -> Parsed {len(parsed_commands)} commands")
|
|
307
321
|
|
|
308
|
-
# Step 4:
|
|
322
|
+
# Step 4: Count frequencies BEFORE deduplication
|
|
323
|
+
from collections import Counter
|
|
324
|
+
cmd_frequency = Counter()
|
|
325
|
+
base_cmd_frequency = Counter()
|
|
326
|
+
for cmd in parsed_commands:
|
|
327
|
+
cmd_str = cmd.get('command', '') or cmd.get('raw', '')
|
|
328
|
+
base_cmd = cmd.get('base_command', '')
|
|
329
|
+
if cmd_str:
|
|
330
|
+
cmd_frequency[cmd_str] += 1
|
|
331
|
+
if base_cmd:
|
|
332
|
+
base_cmd_frequency[base_cmd] += 1
|
|
333
|
+
|
|
334
|
+
# Step 5: Deduplicate and add frequency data
|
|
309
335
|
unique_commands = deduplicate_commands(parsed_commands)
|
|
336
|
+
|
|
337
|
+
# Add frequency to each unique command
|
|
338
|
+
for cmd in unique_commands:
|
|
339
|
+
cmd_str = cmd.get('command', '') or cmd.get('raw', '')
|
|
340
|
+
base_cmd = cmd.get('base_command', '')
|
|
341
|
+
cmd['frequency'] = cmd_frequency.get(cmd_str, 1)
|
|
342
|
+
cmd['base_frequency'] = base_cmd_frequency.get(base_cmd, 1)
|
|
343
|
+
|
|
310
344
|
if len(unique_commands) > MAX_UNIQUE_COMMANDS:
|
|
311
345
|
print(f"\nCapping at {MAX_UNIQUE_COMMANDS} unique commands "
|
|
312
346
|
f"(found {len(unique_commands)})")
|
|
@@ -314,9 +348,15 @@ def run_extraction_pipeline(
|
|
|
314
348
|
else:
|
|
315
349
|
print(f"\n{len(unique_commands)} unique commands")
|
|
316
350
|
|
|
317
|
-
# Step
|
|
351
|
+
# Step 6: Analyze commands
|
|
318
352
|
print("\nAnalyzing commands...")
|
|
319
353
|
analysis = analyze_commands(unique_commands)
|
|
354
|
+
|
|
355
|
+
# Inject pre-computed frequency data into analysis
|
|
356
|
+
analysis['command_frequency'] = dict(cmd_frequency)
|
|
357
|
+
analysis['base_command_frequency'] = dict(base_cmd_frequency)
|
|
358
|
+
analysis['top_commands'] = cmd_frequency.most_common(20)
|
|
359
|
+
analysis['top_base_commands'] = base_cmd_frequency.most_common(20)
|
|
320
360
|
print(f" -> Generated analysis with {len(analysis.get('categories', {}))} categories")
|
|
321
361
|
|
|
322
362
|
# Step 6: Generate quizzes
|
|
@@ -330,16 +370,41 @@ def run_extraction_pipeline(
|
|
|
330
370
|
html_files = generate_html(unique_commands, analysis, quizzes, output_dir)
|
|
331
371
|
print(f" -> Created {len(html_files)} HTML files")
|
|
332
372
|
|
|
333
|
-
# Write summary JSON
|
|
373
|
+
# Write summary JSON with comprehensive metadata
|
|
334
374
|
summary = {
|
|
335
|
-
"
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
"
|
|
341
|
-
|
|
342
|
-
|
|
375
|
+
"metadata": {
|
|
376
|
+
"generated_at": datetime.now().isoformat(),
|
|
377
|
+
"run_id": output_dir.name,
|
|
378
|
+
"version": "1.0.4",
|
|
379
|
+
},
|
|
380
|
+
"input": {
|
|
381
|
+
"sessions_processed": len(sessions),
|
|
382
|
+
"session_files": [
|
|
383
|
+
{
|
|
384
|
+
"filename": s['filename'],
|
|
385
|
+
"path": str(s['path']),
|
|
386
|
+
"size": s['size_human'],
|
|
387
|
+
"modified": s['modified_str']
|
|
388
|
+
}
|
|
389
|
+
for s in sessions
|
|
390
|
+
],
|
|
391
|
+
"total_entries": len(all_entries),
|
|
392
|
+
},
|
|
393
|
+
"analysis": {
|
|
394
|
+
"raw_commands_found": len(raw_commands),
|
|
395
|
+
"unique_commands": len(unique_commands),
|
|
396
|
+
"categories": list(analysis.get('categories', {}).keys()),
|
|
397
|
+
"category_counts": {cat: len(cmds) for cat, cmds in analysis.get('categories', {}).items()},
|
|
398
|
+
"top_base_commands": [
|
|
399
|
+
{"command": cmd, "count": count}
|
|
400
|
+
for cmd, count in list(base_cmd_frequency.most_common(10))
|
|
401
|
+
],
|
|
402
|
+
"complexity_distribution": dict(analysis.get('complexity_distribution', {})),
|
|
403
|
+
},
|
|
404
|
+
"output": {
|
|
405
|
+
"quiz_questions": quiz_count,
|
|
406
|
+
"html_files": [str(f) for f in html_files],
|
|
407
|
+
},
|
|
343
408
|
}
|
|
344
409
|
|
|
345
410
|
summary_path = output_dir / "summary.json"
|
|
@@ -405,8 +470,8 @@ Examples:
|
|
|
405
470
|
parser.add_argument(
|
|
406
471
|
'-o', '--output',
|
|
407
472
|
type=str,
|
|
408
|
-
default=
|
|
409
|
-
help=f'Output directory (default: {
|
|
473
|
+
default=None,
|
|
474
|
+
help=f'Output directory (default: timestamped folder in {DEFAULT_OUTPUT_BASE}/)'
|
|
410
475
|
)
|
|
411
476
|
|
|
412
477
|
parser.add_argument(
|
|
@@ -487,8 +552,11 @@ def main() -> int:
|
|
|
487
552
|
|
|
488
553
|
sessions_to_process = sessions
|
|
489
554
|
|
|
490
|
-
# Run the pipeline
|
|
491
|
-
|
|
555
|
+
# Run the pipeline with timestamped output directory
|
|
556
|
+
if args.output:
|
|
557
|
+
output_dir = Path(args.output)
|
|
558
|
+
else:
|
|
559
|
+
output_dir = generate_timestamped_output_dir()
|
|
492
560
|
success, message = run_extraction_pipeline(sessions_to_process, output_dir)
|
|
493
561
|
|
|
494
562
|
if success:
|