@rookiestar/eng-lang-tutor 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +22 -0
- package/.gitignore +32 -0
- package/CHANGELOG.md +37 -0
- package/CLAUDE.md +275 -0
- package/README.md +369 -0
- package/SKILL.md +613 -0
- package/bin/eng-lang-tutor.js +177 -0
- package/docs/OPENCLAW_DEPLOYMENT.md +241 -0
- package/examples/sample_keypoint_a1.json +112 -0
- package/examples/sample_keypoint_a2.json +124 -0
- package/examples/sample_keypoint_b1.json +135 -0
- package/examples/sample_keypoint_b2.json +137 -0
- package/examples/sample_keypoint_c1.json +134 -0
- package/examples/sample_keypoint_c2.json +141 -0
- package/examples/sample_quiz_a1.json +94 -0
- package/examples/sample_quiz_a2.json +94 -0
- package/examples/sample_quiz_b1.json +92 -0
- package/examples/sample_quiz_b2.json +94 -0
- package/examples/sample_quiz_c1.json +94 -0
- package/examples/sample_quiz_c2.json +104 -0
- package/package.json +41 -0
- package/references/resources.md +292 -0
- package/requirements.txt +16 -0
- package/scripts/__init__.py +28 -0
- package/scripts/audio/__init__.py +23 -0
- package/scripts/audio/composer.py +367 -0
- package/scripts/audio/converter.py +331 -0
- package/scripts/audio/feishu_voice.py +404 -0
- package/scripts/audio/tts/__init__.py +30 -0
- package/scripts/audio/tts/base.py +166 -0
- package/scripts/audio/tts/manager.py +306 -0
- package/scripts/audio/tts/providers/__init__.py +12 -0
- package/scripts/audio/tts/providers/edge.py +111 -0
- package/scripts/audio/tts/providers/xunfei.py +205 -0
- package/scripts/audio/utils.py +63 -0
- package/scripts/cli/__init__.py +7 -0
- package/scripts/cli/cli.py +229 -0
- package/scripts/cli/command_parser.py +336 -0
- package/scripts/core/__init__.py +30 -0
- package/scripts/core/constants.py +125 -0
- package/scripts/core/error_notebook.py +308 -0
- package/scripts/core/gamification.py +405 -0
- package/scripts/core/scorer.py +295 -0
- package/scripts/core/state_manager.py +814 -0
- package/scripts/eng-lang-tutor +16 -0
- package/scripts/scheduling/__init__.py +6 -0
- package/scripts/scheduling/cron_push.py +229 -0
- package/scripts/utils/__init__.py +12 -0
- package/scripts/utils/dedup.py +331 -0
- package/scripts/utils/helpers.py +82 -0
- package/templates/keypoint_schema.json +420 -0
- package/templates/prompt_templates.md +73 -0
- package/templates/prompts/display_guide.md +106 -0
- package/templates/prompts/initialization.md +350 -0
- package/templates/prompts/keypoint_generation.md +272 -0
- package/templates/prompts/output_rules.md +106 -0
- package/templates/prompts/quiz_generation.md +190 -0
- package/templates/prompts/responses.md +339 -0
- package/templates/prompts/shared_enums.md +252 -0
- package/templates/quiz_schema.json +214 -0
- package/templates/state_schema.json +277 -0
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# eng-lang-tutor wrapper script
|
|
3
|
+
# Automatically uses venv Python if available, falls back to system Python
|
|
4
|
+
|
|
5
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
6
|
+
VENV_PYTHON="$HOME/.venvs/eng-lang-tutor/bin/python"
|
|
7
|
+
|
|
8
|
+
# Prefer venv Python if it exists
|
|
9
|
+
if [ -x "$VENV_PYTHON" ]; then
|
|
10
|
+
PYTHON="$VENV_PYTHON"
|
|
11
|
+
else
|
|
12
|
+
PYTHON="python3"
|
|
13
|
+
fi
|
|
14
|
+
|
|
15
|
+
# Run the CLI with all arguments
|
|
16
|
+
exec "$PYTHON" "$SCRIPT_DIR/cli.py" "$@"
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Cron Push - Handles scheduled content generation and notifications.
|
|
4
|
+
|
|
5
|
+
This script is called by cron to:
|
|
6
|
+
1. Generate and push daily keypoints
|
|
7
|
+
2. Generate and push daily quizzes
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
python3 cron_push.py --task keypoint
|
|
11
|
+
python3 cron_push.py --task quiz
|
|
12
|
+
python3 cron_push.py --task status
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import argparse
|
|
16
|
+
import json
|
|
17
|
+
from datetime import date, datetime, timedelta
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Dict, Any, Optional
|
|
20
|
+
|
|
21
|
+
# Import local modules
|
|
22
|
+
import sys
|
|
23
|
+
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
from scripts.core.state_manager import StateManager
|
|
27
|
+
except ImportError:
|
|
28
|
+
from core.state_manager import StateManager
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class CronPusher:
|
|
32
|
+
"""Handles scheduled content push operations."""
|
|
33
|
+
|
|
34
|
+
def __init__(self, data_dir: str = "data"):
|
|
35
|
+
"""
|
|
36
|
+
Initialize the cron pusher.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
data_dir: Path to the data directory
|
|
40
|
+
"""
|
|
41
|
+
self.state_manager = StateManager(data_dir)
|
|
42
|
+
self.today = date.today()
|
|
43
|
+
|
|
44
|
+
def push_keypoint(self) -> Dict[str, Any]:
|
|
45
|
+
"""
|
|
46
|
+
Generate and save today's knowledge point.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
Result dictionary with status and content
|
|
50
|
+
"""
|
|
51
|
+
# Check if already exists
|
|
52
|
+
existing = self.state_manager.load_daily_content('keypoint', self.today)
|
|
53
|
+
if existing:
|
|
54
|
+
return {
|
|
55
|
+
"status": "exists",
|
|
56
|
+
"message": f"Keypoint for {self.today} already exists",
|
|
57
|
+
"keypoint": existing
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
# Load state
|
|
61
|
+
state = self.state_manager.load_state()
|
|
62
|
+
self.state_manager.save_state(state)
|
|
63
|
+
|
|
64
|
+
# Generate keypoint (placeholder - actual LLM generation happens in Agent)
|
|
65
|
+
keypoint = {
|
|
66
|
+
"date": self.today.isoformat(),
|
|
67
|
+
"topic_fingerprint": f"auto_{self.today.isoformat()}",
|
|
68
|
+
"category": "oral",
|
|
69
|
+
"scene": {
|
|
70
|
+
"context": "Auto-generated placeholder",
|
|
71
|
+
"formality": "casual"
|
|
72
|
+
},
|
|
73
|
+
"expressions": [],
|
|
74
|
+
"alternatives": [],
|
|
75
|
+
"chinglish_trap": {},
|
|
76
|
+
"examples": [],
|
|
77
|
+
"generated": False,
|
|
78
|
+
"needs_generation": True
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
# Save and log
|
|
82
|
+
self.state_manager.save_daily_content('keypoint', keypoint, self.today)
|
|
83
|
+
self.state_manager.append_event('keypoint_pushed', {
|
|
84
|
+
"date": self.today.isoformat(),
|
|
85
|
+
"fingerprint": keypoint.get("topic_fingerprint")
|
|
86
|
+
})
|
|
87
|
+
|
|
88
|
+
# Update state
|
|
89
|
+
state = self.state_manager.load_state()
|
|
90
|
+
state['recent_topics'].append(keypoint.get("topic_fingerprint"))
|
|
91
|
+
# Keep only last 50 topics
|
|
92
|
+
state['recent_topics'] = state['recent_topics'][-50:]
|
|
93
|
+
|
|
94
|
+
# Record view for this keypoint
|
|
95
|
+
state = self.state_manager.record_keypoint_view(state, self.today)
|
|
96
|
+
|
|
97
|
+
self.state_manager.save_state(state)
|
|
98
|
+
|
|
99
|
+
return {
|
|
100
|
+
"status": "created",
|
|
101
|
+
"message": f"Keypoint for {self.today} created (needs generation)",
|
|
102
|
+
"keypoint": keypoint
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
def push_quiz(self) -> Dict[str, Any]:
|
|
106
|
+
"""
|
|
107
|
+
Generate and save today's quiz.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
Result dictionary with status and content
|
|
111
|
+
"""
|
|
112
|
+
# Check if already exists
|
|
113
|
+
existing = self.state_manager.load_daily_content('quiz', self.today)
|
|
114
|
+
if existing:
|
|
115
|
+
return {
|
|
116
|
+
"status": "exists",
|
|
117
|
+
"message": f"Quiz for {self.today} already exists",
|
|
118
|
+
"quiz": existing
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
# Load state
|
|
122
|
+
state = self.state_manager.load_state()
|
|
123
|
+
self.state_manager.save_state(state)
|
|
124
|
+
|
|
125
|
+
# Load today's keypoint
|
|
126
|
+
keypoint = self.state_manager.load_daily_content('keypoint', self.today)
|
|
127
|
+
if not keypoint:
|
|
128
|
+
# Need to create keypoint first
|
|
129
|
+
keypoint_result = self.push_keypoint()
|
|
130
|
+
keypoint = keypoint_result.get("keypoint")
|
|
131
|
+
|
|
132
|
+
# Generate quiz (placeholder - actual LLM generation happens in Agent)
|
|
133
|
+
quiz = {
|
|
134
|
+
"quiz_date": self.today.isoformat(),
|
|
135
|
+
"keypoint_fingerprint": keypoint.get("topic_fingerprint") if keypoint else None,
|
|
136
|
+
"questions": [],
|
|
137
|
+
"total_xp": 0,
|
|
138
|
+
"passing_score": 70,
|
|
139
|
+
"generated": False,
|
|
140
|
+
"needs_generation": True
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
# Save and log
|
|
144
|
+
self.state_manager.save_daily_content('quiz', quiz, self.today)
|
|
145
|
+
self.state_manager.append_event('quiz_pushed', {
|
|
146
|
+
"date": self.today.isoformat()
|
|
147
|
+
})
|
|
148
|
+
|
|
149
|
+
return {
|
|
150
|
+
"status": "created",
|
|
151
|
+
"message": f"Quiz for {self.today} created (needs generation)",
|
|
152
|
+
"quiz": quiz
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
def reset_daily_flags(self) -> Dict[str, Any]:
|
|
156
|
+
"""
|
|
157
|
+
Reset daily completion flags (called at midnight).
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
Result dictionary with status
|
|
161
|
+
"""
|
|
162
|
+
state = self.state_manager.load_state()
|
|
163
|
+
|
|
164
|
+
# Reset quiz completion flag for new day
|
|
165
|
+
if "completion_status" in state:
|
|
166
|
+
# Keep the date, just let the new day's check handle it
|
|
167
|
+
pass
|
|
168
|
+
|
|
169
|
+
self.state_manager.save_state(state)
|
|
170
|
+
|
|
171
|
+
return {
|
|
172
|
+
"status": "success",
|
|
173
|
+
"message": f"Daily flags reset for {self.today}"
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
def get_status(self) -> Dict[str, Any]:
|
|
177
|
+
"""
|
|
178
|
+
Get current status of today's content.
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
Status dictionary
|
|
182
|
+
"""
|
|
183
|
+
state = self.state_manager.load_state()
|
|
184
|
+
|
|
185
|
+
keypoint = self.state_manager.load_daily_content('keypoint', self.today)
|
|
186
|
+
quiz = self.state_manager.load_daily_content('quiz', self.today)
|
|
187
|
+
|
|
188
|
+
return {
|
|
189
|
+
"date": self.today.isoformat(),
|
|
190
|
+
"initialized": state.get("initialized", False),
|
|
191
|
+
"keypoint_exists": keypoint is not None,
|
|
192
|
+
"quiz_exists": quiz is not None,
|
|
193
|
+
"can_take_quiz": self.state_manager.can_take_quiz(state),
|
|
194
|
+
"user_xp": state.get("user", {}).get("xp", 0),
|
|
195
|
+
"user_streak": state.get("user", {}).get("streak", 0)
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def main():
|
|
200
|
+
parser = argparse.ArgumentParser(description="Cron Push for eng-lang-tutor")
|
|
201
|
+
parser.add_argument('--task', required=True,
|
|
202
|
+
choices=['keypoint', 'quiz', 'reset_daily', 'status'],
|
|
203
|
+
help='Task to execute')
|
|
204
|
+
parser.add_argument('--data-dir', default='data', help='Data directory path')
|
|
205
|
+
parser.add_argument('--json', action='store_true', help='Output as JSON only')
|
|
206
|
+
|
|
207
|
+
args = parser.parse_args()
|
|
208
|
+
|
|
209
|
+
pusher = CronPusher(args.data_dir)
|
|
210
|
+
|
|
211
|
+
if args.task == 'keypoint':
|
|
212
|
+
result = pusher.push_keypoint()
|
|
213
|
+
elif args.task == 'quiz':
|
|
214
|
+
result = pusher.push_quiz()
|
|
215
|
+
elif args.task == 'reset_daily':
|
|
216
|
+
result = pusher.reset_daily_flags()
|
|
217
|
+
elif args.task == 'status':
|
|
218
|
+
result = pusher.get_status()
|
|
219
|
+
|
|
220
|
+
if args.json:
|
|
221
|
+
print(json.dumps(result, indent=2, ensure_ascii=False))
|
|
222
|
+
else:
|
|
223
|
+
print(f"[{datetime.now().isoformat()}] Task: {args.task}")
|
|
224
|
+
print(f"Status: {result.get('status')}")
|
|
225
|
+
print(f"Message: {result.get('message')}")
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
if __name__ == "__main__":
|
|
229
|
+
main()
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Utilities: helper functions and deduplication."""
|
|
3
|
+
|
|
4
|
+
from .helpers import safe_divide, deep_merge, clamp
|
|
5
|
+
from .dedup import DeduplicationManager
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
'safe_divide',
|
|
9
|
+
'deep_merge',
|
|
10
|
+
'clamp',
|
|
11
|
+
'DeduplicationManager',
|
|
12
|
+
]
|
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Deduplication - Prevents repeated content within 14-day window for eng-lang-tutor.
|
|
4
|
+
|
|
5
|
+
Methods:
|
|
6
|
+
1. Topic fingerprint matching
|
|
7
|
+
2. Expression similarity check
|
|
8
|
+
3. Phrase root matching
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from typing import Dict, Any, List, Set, Optional, Tuple
|
|
12
|
+
from datetime import datetime, timedelta
|
|
13
|
+
import re
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class DeduplicationManager:
|
|
17
|
+
"""Manages content deduplication to avoid repetitive learning."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, lookback_days: int = 14):
|
|
20
|
+
"""
|
|
21
|
+
Initialize the deduplication manager.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
lookback_days: Number of days to look back for duplicates
|
|
25
|
+
"""
|
|
26
|
+
self.lookback_days = lookback_days
|
|
27
|
+
|
|
28
|
+
def get_excluded_topics(self, state: Dict[str, Any]) -> List[str]:
|
|
29
|
+
"""
|
|
30
|
+
Get topic fingerprints from recent days.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
state: Current state with recent_topics
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
List of topic fingerprints to exclude
|
|
37
|
+
"""
|
|
38
|
+
recent = state.get('recent_topics', [])
|
|
39
|
+
# Return last 50 topics (or all if fewer)
|
|
40
|
+
return recent[-50:] if len(recent) > 50 else recent
|
|
41
|
+
|
|
42
|
+
def check_duplicate(
|
|
43
|
+
self,
|
|
44
|
+
new_content: Dict[str, Any],
|
|
45
|
+
recent_content: List[Dict[str, Any]]
|
|
46
|
+
) -> Tuple[bool, str]:
|
|
47
|
+
"""
|
|
48
|
+
Check if new content duplicates recent content.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
new_content: New knowledge point to check
|
|
52
|
+
recent_content: List of recent knowledge points
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Tuple of (is_duplicate, reason)
|
|
56
|
+
"""
|
|
57
|
+
new_fingerprint = new_content.get('topic_fingerprint', '')
|
|
58
|
+
|
|
59
|
+
# Check fingerprint match
|
|
60
|
+
for old in recent_content:
|
|
61
|
+
if old.get('topic_fingerprint') == new_fingerprint:
|
|
62
|
+
return (True, f"Topic fingerprint matches: {new_fingerprint}")
|
|
63
|
+
|
|
64
|
+
# Check expression overlap
|
|
65
|
+
new_expressions = self._extract_expressions(new_content)
|
|
66
|
+
for old in recent_content:
|
|
67
|
+
old_expressions = self._extract_expressions(old)
|
|
68
|
+
overlap = new_expressions & old_expressions
|
|
69
|
+
|
|
70
|
+
# If more than 50% overlap, consider it duplicate
|
|
71
|
+
if len(overlap) > 0:
|
|
72
|
+
overlap_ratio = len(overlap) / max(len(new_expressions), 1)
|
|
73
|
+
if overlap_ratio > 0.5:
|
|
74
|
+
return (True, f"Expression overlap: {overlap}")
|
|
75
|
+
|
|
76
|
+
# Check phrase root similarity
|
|
77
|
+
new_roots = self._extract_phrase_roots(new_content)
|
|
78
|
+
for old in recent_content:
|
|
79
|
+
old_roots = self._extract_phrase_roots(old)
|
|
80
|
+
root_overlap = new_roots & old_roots
|
|
81
|
+
|
|
82
|
+
if len(root_overlap) >= 2:
|
|
83
|
+
return (True, f"Phrase root overlap: {root_overlap}")
|
|
84
|
+
|
|
85
|
+
return (False, "")
|
|
86
|
+
|
|
87
|
+
def _extract_expressions(self, content: Dict[str, Any]) -> Set[str]:
|
|
88
|
+
"""
|
|
89
|
+
Extract normalized expressions from content.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
content: Knowledge point content
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
Set of normalized expression strings
|
|
96
|
+
"""
|
|
97
|
+
expressions = set()
|
|
98
|
+
|
|
99
|
+
# Main expressions
|
|
100
|
+
for expr in content.get('expressions', []):
|
|
101
|
+
phrase = expr.get('phrase', '').lower().strip()
|
|
102
|
+
# Normalize: remove punctuation, collapse whitespace
|
|
103
|
+
normalized = re.sub(r'[^\w\s]', '', phrase)
|
|
104
|
+
normalized = ' '.join(normalized.split())
|
|
105
|
+
if normalized:
|
|
106
|
+
expressions.add(normalized)
|
|
107
|
+
|
|
108
|
+
# Alternatives
|
|
109
|
+
for alt in content.get('alternatives', []):
|
|
110
|
+
normalized = re.sub(r'[^\w\s]', '', alt.lower())
|
|
111
|
+
normalized = ' '.join(normalized.split())
|
|
112
|
+
if normalized:
|
|
113
|
+
expressions.add(normalized)
|
|
114
|
+
|
|
115
|
+
return expressions
|
|
116
|
+
|
|
117
|
+
def _extract_phrase_roots(self, content: Dict[str, Any]) -> Set[str]:
|
|
118
|
+
"""
|
|
119
|
+
Extract phrase roots/concepts from content.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
content: Knowledge point content
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
Set of phrase root strings
|
|
126
|
+
"""
|
|
127
|
+
roots = set()
|
|
128
|
+
|
|
129
|
+
# From fingerprint
|
|
130
|
+
fingerprint = content.get('topic_fingerprint', '')
|
|
131
|
+
if fingerprint:
|
|
132
|
+
# Split by underscore and get key concepts
|
|
133
|
+
parts = fingerprint.split('_')
|
|
134
|
+
for part in parts:
|
|
135
|
+
if len(part) > 3: # Skip short words
|
|
136
|
+
roots.add(part)
|
|
137
|
+
|
|
138
|
+
# From expressions (first significant word)
|
|
139
|
+
for expr in content.get('expressions', []):
|
|
140
|
+
phrase = expr.get('phrase', '').lower()
|
|
141
|
+
words = phrase.split()
|
|
142
|
+
for word in words:
|
|
143
|
+
# Skip common words
|
|
144
|
+
if word not in ['the', 'a', 'an', 'to', 'for', 'and', 'or', 'is', 'are']:
|
|
145
|
+
if len(word) > 3:
|
|
146
|
+
roots.add(word)
|
|
147
|
+
break
|
|
148
|
+
|
|
149
|
+
return roots
|
|
150
|
+
|
|
151
|
+
def add_to_recent_topics(self, state: Dict[str, Any],
|
|
152
|
+
fingerprint: str) -> Dict[str, Any]:
|
|
153
|
+
"""
|
|
154
|
+
Add a topic fingerprint to recent topics list.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
state: Current state
|
|
158
|
+
fingerprint: Topic fingerprint to add
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
Updated state
|
|
162
|
+
"""
|
|
163
|
+
recent = state.get('recent_topics', [])
|
|
164
|
+
recent.append(fingerprint)
|
|
165
|
+
|
|
166
|
+
# Keep only last 50
|
|
167
|
+
state['recent_topics'] = recent[-50:]
|
|
168
|
+
return state
|
|
169
|
+
|
|
170
|
+
def generate_excluded_list_prompt(self, excluded_topics: List[str]) -> str:
|
|
171
|
+
"""
|
|
172
|
+
Generate a prompt-friendly list of excluded topics.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
excluded_topics: List of topic fingerprints
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
Formatted string for LLM prompt
|
|
179
|
+
"""
|
|
180
|
+
if not excluded_topics:
|
|
181
|
+
return "None (first content or no recent topics)"
|
|
182
|
+
|
|
183
|
+
# Group by prefix (e.g., "asking_", "workplace_")
|
|
184
|
+
grouped = {}
|
|
185
|
+
for topic in excluded_topics:
|
|
186
|
+
parts = topic.split('_')
|
|
187
|
+
if len(parts) > 1:
|
|
188
|
+
prefix = parts[0]
|
|
189
|
+
else:
|
|
190
|
+
prefix = 'other'
|
|
191
|
+
|
|
192
|
+
if prefix not in grouped:
|
|
193
|
+
grouped[prefix] = []
|
|
194
|
+
grouped[prefix].append(topic)
|
|
195
|
+
|
|
196
|
+
lines = []
|
|
197
|
+
for prefix, topics in sorted(grouped.items()):
|
|
198
|
+
lines.append(f"- {prefix}: {', '.join(topics[:3])}{'...' if len(topics) > 3 else ''}")
|
|
199
|
+
|
|
200
|
+
return '\n'.join(lines)
|
|
201
|
+
|
|
202
|
+
def suggest_alternative_topic(
|
|
203
|
+
self,
|
|
204
|
+
excluded_topics: List[str],
|
|
205
|
+
available_topics: List[str]
|
|
206
|
+
) -> Optional[str]:
|
|
207
|
+
"""
|
|
208
|
+
Suggest an alternative topic that hasn't been used recently.
|
|
209
|
+
|
|
210
|
+
TODO: This function is defined but not currently used in production code.
|
|
211
|
+
Could be integrated with LLM generation to suggest topics when duplicates detected.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
excluded_topics: Topics to avoid
|
|
215
|
+
available_topics: All available topics
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
A topic not in excluded list, or None if all exhausted
|
|
219
|
+
"""
|
|
220
|
+
excluded_set = set(excluded_topics)
|
|
221
|
+
|
|
222
|
+
for topic in available_topics:
|
|
223
|
+
if topic not in excluded_set:
|
|
224
|
+
return topic
|
|
225
|
+
|
|
226
|
+
return None
|
|
227
|
+
|
|
228
|
+
def get_content_diversity_score(
|
|
229
|
+
self,
|
|
230
|
+
recent_content: List[Dict[str, Any]]
|
|
231
|
+
) -> Dict[str, Any]:
|
|
232
|
+
"""
|
|
233
|
+
Calculate content diversity metrics.
|
|
234
|
+
|
|
235
|
+
TODO: This function is defined but not currently used in production code.
|
|
236
|
+
Could be used for analytics dashboard or adaptive content generation.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
recent_content: List of recent content
|
|
240
|
+
|
|
241
|
+
Returns:
|
|
242
|
+
Dictionary with diversity metrics
|
|
243
|
+
"""
|
|
244
|
+
if not recent_content:
|
|
245
|
+
return {
|
|
246
|
+
'total_content': 0,
|
|
247
|
+
'unique_topics': 0,
|
|
248
|
+
'topic_diversity': 1.0,
|
|
249
|
+
'category_distribution': {}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
topics = set()
|
|
253
|
+
categories = {'oral': 0, 'written': 0}
|
|
254
|
+
topic_themes = {}
|
|
255
|
+
|
|
256
|
+
for content in recent_content:
|
|
257
|
+
fingerprint = content.get('topic_fingerprint', '')
|
|
258
|
+
if fingerprint:
|
|
259
|
+
topics.add(fingerprint)
|
|
260
|
+
# Extract theme
|
|
261
|
+
theme = fingerprint.split('_')[0] if '_' in fingerprint else fingerprint
|
|
262
|
+
topic_themes[theme] = topic_themes.get(theme, 0) + 1
|
|
263
|
+
|
|
264
|
+
category = content.get('category', 'oral')
|
|
265
|
+
categories[category] = categories.get(category, 0) + 1
|
|
266
|
+
|
|
267
|
+
total = len(recent_content)
|
|
268
|
+
unique = len(topics)
|
|
269
|
+
|
|
270
|
+
return {
|
|
271
|
+
'total_content': total,
|
|
272
|
+
'unique_topics': unique,
|
|
273
|
+
'topic_diversity': unique / total if total > 0 else 1.0,
|
|
274
|
+
'category_distribution': categories,
|
|
275
|
+
'theme_distribution': topic_themes
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
# CLI interface for testing
|
|
280
|
+
if __name__ == "__main__":
|
|
281
|
+
import argparse
|
|
282
|
+
import json
|
|
283
|
+
|
|
284
|
+
parser = argparse.ArgumentParser(description="Deduplication for eng-lang-tutor")
|
|
285
|
+
parser.add_argument('--demo', action='store_true', help='Run demo')
|
|
286
|
+
|
|
287
|
+
args = parser.parse_args()
|
|
288
|
+
|
|
289
|
+
dm = DeduplicationManager()
|
|
290
|
+
|
|
291
|
+
if args.demo:
|
|
292
|
+
# Simulate recent content
|
|
293
|
+
recent = [
|
|
294
|
+
{
|
|
295
|
+
"topic_fingerprint": "asking_favor_casual",
|
|
296
|
+
"category": "oral",
|
|
297
|
+
"expressions": [{"phrase": "Can you do me a favor?"}]
|
|
298
|
+
},
|
|
299
|
+
{
|
|
300
|
+
"topic_fingerprint": "workplace_meeting",
|
|
301
|
+
"category": "oral",
|
|
302
|
+
"expressions": [{"phrase": "Let's circle back"}]
|
|
303
|
+
},
|
|
304
|
+
{
|
|
305
|
+
"topic_fingerprint": "gaming_slang",
|
|
306
|
+
"category": "oral",
|
|
307
|
+
"expressions": [{"phrase": "GG, that was clutch"}]
|
|
308
|
+
}
|
|
309
|
+
]
|
|
310
|
+
|
|
311
|
+
# Test duplicate detection
|
|
312
|
+
new_content = {
|
|
313
|
+
"topic_fingerprint": "asking_favor_formal",
|
|
314
|
+
"category": "oral",
|
|
315
|
+
"expressions": [{"phrase": "Could you help me out?"}]
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
is_dup, reason = dm.check_duplicate(new_content, recent)
|
|
319
|
+
print(f"Is duplicate: {is_dup}")
|
|
320
|
+
print(f"Reason: {reason}")
|
|
321
|
+
|
|
322
|
+
# Test diversity score
|
|
323
|
+
print("\n=== Diversity Score ===")
|
|
324
|
+
score = dm.get_content_diversity_score(recent)
|
|
325
|
+
print(json.dumps(score, indent=2))
|
|
326
|
+
|
|
327
|
+
# Test excluded list prompt
|
|
328
|
+
print("\n=== Excluded Topics (for LLM prompt) ===")
|
|
329
|
+
excluded = ['asking_favor_casual', 'workplace_meeting', 'gaming_slang',
|
|
330
|
+
'social_greeting', 'news_vocabulary']
|
|
331
|
+
print(dm.generate_excluded_list_prompt(excluded))
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Utility functions for eng-lang-tutor.
|
|
4
|
+
|
|
5
|
+
Common utilities used across multiple modules.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Dict, Any
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def safe_divide(numerator: float, denominator: float, default: float = 0.0) -> float:
|
|
12
|
+
"""
|
|
13
|
+
Safe division that returns default if denominator is zero.
|
|
14
|
+
|
|
15
|
+
Reserved for future use in XP/ratio calculations.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
numerator: The number to divide
|
|
19
|
+
denominator: The number to divide by
|
|
20
|
+
default: Value to return if denominator is zero
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
Result of division, or default if denominator is zero
|
|
24
|
+
|
|
25
|
+
Examples:
|
|
26
|
+
>>> safe_divide(10, 2)
|
|
27
|
+
5.0
|
|
28
|
+
>>> safe_divide(10, 0)
|
|
29
|
+
0.0
|
|
30
|
+
>>> safe_divide(10, 0, default=100)
|
|
31
|
+
100.0
|
|
32
|
+
""" # noqa: DOC501 - Reserved for future use
|
|
33
|
+
return numerator / denominator if denominator != 0 else default
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def deep_merge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
|
|
37
|
+
"""
|
|
38
|
+
Recursively merge override dictionary into base dictionary.
|
|
39
|
+
|
|
40
|
+
Creates a new dictionary with values from base, updated with values from override.
|
|
41
|
+
Nested dictionaries are merged recursively; other values are overwritten.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
base: Base dictionary (not modified)
|
|
45
|
+
override: Dictionary with values to override/add
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
New merged dictionary
|
|
49
|
+
|
|
50
|
+
Examples:
|
|
51
|
+
>>> base = {'a': 1, 'b': {'c': 2, 'd': 3}}
|
|
52
|
+
>>> override = {'b': {'c': 10}}
|
|
53
|
+
>>> deep_merge(base, override)
|
|
54
|
+
{'a': 1, 'b': {'c': 10, 'd': 3}}
|
|
55
|
+
"""
|
|
56
|
+
import copy
|
|
57
|
+
result = copy.deepcopy(base)
|
|
58
|
+
|
|
59
|
+
for key, value in override.items():
|
|
60
|
+
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
|
|
61
|
+
result[key] = deep_merge(result[key], value)
|
|
62
|
+
else:
|
|
63
|
+
result[key] = copy.deepcopy(value)
|
|
64
|
+
|
|
65
|
+
return result
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def clamp(value: float, min_val: float, max_val: float) -> float:
|
|
69
|
+
"""
|
|
70
|
+
Clamp a value to a range.
|
|
71
|
+
|
|
72
|
+
Reserved for future use in streak multiplier capping or level calculations.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
value: Value to clamp
|
|
76
|
+
min_val: Minimum allowed value
|
|
77
|
+
max_val: Maximum allowed value
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Value clamped to [min_val, max_val]
|
|
81
|
+
""" # noqa: DOC501 - Reserved for future use
|
|
82
|
+
return max(min_val, min(max_val, value))
|