langgraph-tavily 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langgraph_tavily-0.1.0/LICENSE +21 -0
- langgraph_tavily-0.1.0/PKG-INFO +68 -0
- langgraph_tavily-0.1.0/README.md +51 -0
- langgraph_tavily-0.1.0/pyproject.toml +28 -0
- langgraph_tavily-0.1.0/src/langgraph_tavily/__init__.py +16 -0
- langgraph_tavily-0.1.0/src/langgraph_tavily/analyser.py +238 -0
- langgraph_tavily-0.1.0/src/langgraph_tavily/cli.py +85 -0
- langgraph_tavily-0.1.0/src/langgraph_tavily/generator.py +89 -0
- langgraph_tavily-0.1.0/src/langgraph_tavily/models.py +100 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Artem
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: langgraph-tavily
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Language analyser for trivia questions — parse, classify, and score question difficulty
|
|
5
|
+
Project-URL: Homepage, https://github.com/brnv/langgraph-tavily
|
|
6
|
+
Author-email: Artem <brnv@canva.com>
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Keywords: language,nlp,question-analysis,quiz,trivia
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
|
15
|
+
Requires-Python: >=3.10
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
|
|
18
|
+
# langgraph-tavily
|
|
19
|
+
|
|
20
|
+
Language analyser for trivia questions. Parse, classify, and score question difficulty using linguistic features.
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pip install langgraph-tavily
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Usage
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
from langgraph_tavily import TriviaAnalyser, TriviaGenerator
|
|
32
|
+
|
|
33
|
+
# Analyse a question
|
|
34
|
+
analyser = TriviaAnalyser()
|
|
35
|
+
result = analyser.analyse("What ancient wonder was located in Alexandria?")
|
|
36
|
+
print(result.difficulty) # hard
|
|
37
|
+
print(result.question_type) # what
|
|
38
|
+
print(result.category) # history
|
|
39
|
+
print(result.difficulty_score) # 0.52
|
|
40
|
+
print(result.keywords) # ['ancient', 'wonder', 'located', 'alexandria']
|
|
41
|
+
|
|
42
|
+
# Compare two questions
|
|
43
|
+
cmp = analyser.compare(
|
|
44
|
+
"What color is the sky?",
|
|
45
|
+
"In what year was the Treaty of Westphalia signed?"
|
|
46
|
+
)
|
|
47
|
+
print(cmp["harder"]) # q2
|
|
48
|
+
|
|
49
|
+
# Generate a quiz
|
|
50
|
+
gen = TriviaGenerator()
|
|
51
|
+
quiz = gen.quiz(count=10, mix=True)
|
|
52
|
+
for q in quiz:
|
|
53
|
+
print(q.text)
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## CLI
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
langgraph-tavily analyse "Who composed The Rite of Spring?"
|
|
60
|
+
langgraph-tavily analyse "What is 2+2?" --json
|
|
61
|
+
langgraph-tavily compare "What is the capital of France?" "Which element has atomic number 79?"
|
|
62
|
+
langgraph-tavily quiz --count 5 --difficulty medium --show-answers
|
|
63
|
+
langgraph-tavily batch questions.txt --json
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## License
|
|
67
|
+
|
|
68
|
+
MIT
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# langgraph-tavily
|
|
2
|
+
|
|
3
|
+
Language analyser for trivia questions. Parse, classify, and score question difficulty using linguistic features.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install langgraph-tavily
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from langgraph_tavily import TriviaAnalyser, TriviaGenerator
|
|
15
|
+
|
|
16
|
+
# Analyse a question
|
|
17
|
+
analyser = TriviaAnalyser()
|
|
18
|
+
result = analyser.analyse("What ancient wonder was located in Alexandria?")
|
|
19
|
+
print(result.difficulty) # hard
|
|
20
|
+
print(result.question_type) # what
|
|
21
|
+
print(result.category) # history
|
|
22
|
+
print(result.difficulty_score) # 0.52
|
|
23
|
+
print(result.keywords) # ['ancient', 'wonder', 'located', 'alexandria']
|
|
24
|
+
|
|
25
|
+
# Compare two questions
|
|
26
|
+
cmp = analyser.compare(
|
|
27
|
+
"What color is the sky?",
|
|
28
|
+
"In what year was the Treaty of Westphalia signed?"
|
|
29
|
+
)
|
|
30
|
+
print(cmp["harder"]) # q2
|
|
31
|
+
|
|
32
|
+
# Generate a quiz
|
|
33
|
+
gen = TriviaGenerator()
|
|
34
|
+
quiz = gen.quiz(count=10, mix=True)
|
|
35
|
+
for q in quiz:
|
|
36
|
+
print(q.text)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## CLI
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
langgraph-tavily analyse "Who composed The Rite of Spring?"
|
|
43
|
+
langgraph-tavily analyse "What is 2+2?" --json
|
|
44
|
+
langgraph-tavily compare "What is the capital of France?" "Which element has atomic number 79?"
|
|
45
|
+
langgraph-tavily quiz --count 5 --difficulty medium --show-answers
|
|
46
|
+
langgraph-tavily batch questions.txt --json
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## License
|
|
50
|
+
|
|
51
|
+
MIT
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "langgraph-tavily"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Language analyser for trivia questions — parse, classify, and score question difficulty"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Artem", email = "brnv@canva.com" },
|
|
14
|
+
]
|
|
15
|
+
keywords = ["trivia", "language", "nlp", "quiz", "question-analysis"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Topic :: Text Processing :: Linguistic",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
[project.urls]
|
|
25
|
+
Homepage = "https://github.com/brnv/langgraph-tavily"
|
|
26
|
+
|
|
27
|
+
[project.scripts]
|
|
28
|
+
langgraph-tavily = "langgraph_tavily.cli:main"
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""langgraph-tavily: Language analyser for trivia questions."""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.1.0"
|
|
4
|
+
|
|
5
|
+
from .analyser import TriviaAnalyser
|
|
6
|
+
from .models import TriviaQuestion, QuestionType, Difficulty, AnalysisResult
|
|
7
|
+
from .generator import TriviaGenerator
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"TriviaAnalyser",
|
|
11
|
+
"TriviaQuestion",
|
|
12
|
+
"QuestionType",
|
|
13
|
+
"Difficulty",
|
|
14
|
+
"AnalysisResult",
|
|
15
|
+
"TriviaGenerator",
|
|
16
|
+
]
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
"""Trivia question analyser — classifies, scores, and extracts features."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import math
|
|
6
|
+
import re
|
|
7
|
+
import string
|
|
8
|
+
from collections import Counter
|
|
9
|
+
|
|
10
|
+
from .models import (
|
|
11
|
+
TriviaQuestion, QuestionType, Difficulty, Category,
|
|
12
|
+
AnalysisResult,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
# Common/easy words that reduce difficulty
|
|
16
|
+
_COMMON_WORDS = frozenset(
|
|
17
|
+
"the a an is are was were be been being have has had do does did will would "
|
|
18
|
+
"shall should may might can could of in to for on with at by from as into "
|
|
19
|
+
"about between through during before after above below up down out off over "
|
|
20
|
+
"under again further then once and but or nor not so yet both either neither "
|
|
21
|
+
"each every all any few more most other some such no only own same than too "
|
|
22
|
+
"very just don also back even still already almost always never often "
|
|
23
|
+
"sometimes usually what which who whom whose where when why how".split()
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
# Category keyword mapping
|
|
27
|
+
_CATEGORY_KEYWORDS: dict[Category, set[str]] = {
|
|
28
|
+
Category.SCIENCE: {"atom", "molecule", "element", "chemical", "physics", "biology",
|
|
29
|
+
"cell", "dna", "planet", "gravity", "electron", "nucleus", "species",
|
|
30
|
+
"evolution", "quantum", "telescope", "laboratory", "experiment"},
|
|
31
|
+
Category.HISTORY: {"war", "king", "queen", "empire", "dynasty", "century", "ancient",
|
|
32
|
+
"medieval", "revolution", "president", "battle", "treaty", "colonial",
|
|
33
|
+
"civilization", "pharaoh", "emperor"},
|
|
34
|
+
Category.GEOGRAPHY: {"country", "capital", "continent", "ocean", "river", "mountain",
|
|
35
|
+
"island", "desert", "lake", "border", "population", "latitude",
|
|
36
|
+
"longitude", "hemisphere"},
|
|
37
|
+
Category.ENTERTAINMENT: {"movie", "film", "actor", "actress", "director", "oscar",
|
|
38
|
+
"show", "series", "episode", "character", "hollywood", "disney"},
|
|
39
|
+
Category.SPORTS: {"goal", "team", "player", "championship", "olympic", "medal",
|
|
40
|
+
"score", "league", "cup", "match", "tournament", "coach", "athlete"},
|
|
41
|
+
Category.ART: {"painting", "sculpture", "artist", "museum", "gallery", "canvas",
|
|
42
|
+
"portrait", "renaissance", "impressionism"},
|
|
43
|
+
Category.LITERATURE: {"novel", "author", "book", "poem", "poet", "chapter",
|
|
44
|
+
"fiction", "publisher", "literary", "shakespeare"},
|
|
45
|
+
Category.MUSIC: {"song", "band", "album", "singer", "composer", "symphony",
|
|
46
|
+
"guitar", "piano", "concert", "grammy", "genre", "melody"},
|
|
47
|
+
Category.FOOD: {"recipe", "cuisine", "dish", "ingredient", "chef", "restaurant",
|
|
48
|
+
"flavor", "spice", "cook", "bake", "wine", "beer"},
|
|
49
|
+
Category.TECHNOLOGY: {"computer", "software", "programming", "internet", "algorithm",
|
|
50
|
+
"digital", "robot", "ai", "silicon", "processor", "server"},
|
|
51
|
+
Category.NATURE: {"animal", "plant", "tree", "flower", "forest", "habitat",
|
|
52
|
+
"ecosystem", "endangered", "predator", "prey", "migration"},
|
|
53
|
+
Category.MATH: {"equation", "theorem", "prime", "geometry", "algebra", "calculus",
|
|
54
|
+
"probability", "fraction", "pi", "infinity", "fibonacci"},
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class TriviaAnalyser:
|
|
59
|
+
"""Analyse trivia questions for type, difficulty, category, and language features."""
|
|
60
|
+
|
|
61
|
+
def analyse(self, question: TriviaQuestion | str) -> AnalysisResult:
|
|
62
|
+
"""Fully analyse a trivia question."""
|
|
63
|
+
if isinstance(question, str):
|
|
64
|
+
question = TriviaQuestion(text=question)
|
|
65
|
+
|
|
66
|
+
text = question.text.strip()
|
|
67
|
+
words = self._tokenize(text)
|
|
68
|
+
q_type = self._classify_type(text)
|
|
69
|
+
category = self._classify_category(words)
|
|
70
|
+
keywords = self._extract_keywords(words)
|
|
71
|
+
readability = self._readability_grade(text, words)
|
|
72
|
+
ambiguity = self._ambiguity_score(text, words)
|
|
73
|
+
specificity = self._specificity_score(text, words, keywords)
|
|
74
|
+
difficulty_score = self._difficulty_score(
|
|
75
|
+
text, words, keywords, readability, ambiguity, specificity, question)
|
|
76
|
+
difficulty = self._score_to_difficulty(difficulty_score)
|
|
77
|
+
|
|
78
|
+
features = {
|
|
79
|
+
"word_count": len(words),
|
|
80
|
+
"avg_word_length": sum(len(w) for w in words) / max(len(words), 1),
|
|
81
|
+
"sentence_count": max(1, text.count(".") + text.count("?") + text.count("!")),
|
|
82
|
+
"has_negation": any(w in {"not", "never", "no", "none", "neither", "nor", "isn't",
|
|
83
|
+
"wasn't", "doesn't", "don't", "won't", "can't"}
|
|
84
|
+
for w in words),
|
|
85
|
+
"has_superlative": any(w.endswith("est") or w in {"most", "least", "best", "worst",
|
|
86
|
+
"first", "last", "largest", "smallest"}
|
|
87
|
+
for w in words),
|
|
88
|
+
"has_numeric": bool(re.search(r"\d", text)),
|
|
89
|
+
"has_proper_noun": bool(re.search(r"\b[A-Z][a-z]+\b", text[1:])), # skip first word
|
|
90
|
+
"unique_word_ratio": len(set(words)) / max(len(words), 1),
|
|
91
|
+
"rare_word_count": sum(1 for w in words if w not in _COMMON_WORDS and len(w) > 3),
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
return AnalysisResult(
|
|
95
|
+
question=question,
|
|
96
|
+
question_type=q_type,
|
|
97
|
+
difficulty=difficulty,
|
|
98
|
+
category=category,
|
|
99
|
+
difficulty_score=difficulty_score,
|
|
100
|
+
language_features=features,
|
|
101
|
+
keywords=keywords,
|
|
102
|
+
ambiguity_score=ambiguity,
|
|
103
|
+
specificity_score=specificity,
|
|
104
|
+
readability_grade=readability,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
def batch_analyse(self, questions: list[TriviaQuestion | str]) -> list[AnalysisResult]:
|
|
108
|
+
return [self.analyse(q) for q in questions]
|
|
109
|
+
|
|
110
|
+
def compare(self, q1: TriviaQuestion | str, q2: TriviaQuestion | str) -> dict:
|
|
111
|
+
"""Compare two questions."""
|
|
112
|
+
a1 = self.analyse(q1)
|
|
113
|
+
a2 = self.analyse(q2)
|
|
114
|
+
return {
|
|
115
|
+
"q1": a1.summary,
|
|
116
|
+
"q2": a2.summary,
|
|
117
|
+
"harder": "q1" if a1.difficulty_score > a2.difficulty_score else "q2",
|
|
118
|
+
"difficulty_delta": abs(a1.difficulty_score - a2.difficulty_score),
|
|
119
|
+
"same_category": a1.category == a2.category,
|
|
120
|
+
"same_type": a1.question_type == a2.question_type,
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
# ── Private helpers ──
|
|
124
|
+
|
|
125
|
+
def _tokenize(self, text: str) -> list[str]:
|
|
126
|
+
text = text.lower().translate(str.maketrans("", "", string.punctuation))
|
|
127
|
+
return text.split()
|
|
128
|
+
|
|
129
|
+
def _classify_type(self, text: str) -> QuestionType:
|
|
130
|
+
t = text.lower().strip()
|
|
131
|
+
if t.startswith("true or false") or t.startswith("t/f"):
|
|
132
|
+
return QuestionType.TRUE_FALSE
|
|
133
|
+
if "___" in t or "____" in t or "fill in" in t:
|
|
134
|
+
return QuestionType.FILL_BLANK
|
|
135
|
+
|
|
136
|
+
for qt in [QuestionType.WHO, QuestionType.WHAT, QuestionType.WHEN,
|
|
137
|
+
QuestionType.WHERE, QuestionType.WHY, QuestionType.HOW,
|
|
138
|
+
QuestionType.WHICH]:
|
|
139
|
+
if t.startswith(qt.value):
|
|
140
|
+
return qt
|
|
141
|
+
|
|
142
|
+
# Check for embedded question words
|
|
143
|
+
for qt in [QuestionType.WHO, QuestionType.WHAT, QuestionType.WHEN,
|
|
144
|
+
QuestionType.WHERE, QuestionType.WHY, QuestionType.HOW]:
|
|
145
|
+
if f" {qt.value} " in f" {t} ":
|
|
146
|
+
return qt
|
|
147
|
+
|
|
148
|
+
return QuestionType.UNKNOWN
|
|
149
|
+
|
|
150
|
+
def _classify_category(self, words: list[str]) -> Category:
|
|
151
|
+
word_set = set(words)
|
|
152
|
+
scores: dict[Category, int] = {}
|
|
153
|
+
for cat, keywords in _CATEGORY_KEYWORDS.items():
|
|
154
|
+
overlap = len(word_set & keywords)
|
|
155
|
+
if overlap > 0:
|
|
156
|
+
scores[cat] = overlap
|
|
157
|
+
if scores:
|
|
158
|
+
return max(scores, key=scores.get)
|
|
159
|
+
return Category.GENERAL
|
|
160
|
+
|
|
161
|
+
def _extract_keywords(self, words: list[str]) -> list[str]:
|
|
162
|
+
return [w for w in words if w not in _COMMON_WORDS and len(w) > 2]
|
|
163
|
+
|
|
164
|
+
def _readability_grade(self, text: str, words: list[str]) -> float:
|
|
165
|
+
"""Approximate Flesch-Kincaid grade level."""
|
|
166
|
+
sentences = max(1, text.count(".") + text.count("?") + text.count("!"))
|
|
167
|
+
syllables = sum(self._count_syllables(w) for w in words)
|
|
168
|
+
if not words:
|
|
169
|
+
return 0
|
|
170
|
+
return 0.39 * (len(words) / sentences) + 11.8 * (syllables / len(words)) - 15.59
|
|
171
|
+
|
|
172
|
+
def _count_syllables(self, word: str) -> int:
|
|
173
|
+
word = word.lower().rstrip("e")
|
|
174
|
+
count = len(re.findall(r"[aeiouy]+", word))
|
|
175
|
+
return max(1, count)
|
|
176
|
+
|
|
177
|
+
def _ambiguity_score(self, text: str, words: list[str]) -> float:
|
|
178
|
+
score = 0.0
|
|
179
|
+
ambiguous_markers = {"could", "might", "possibly", "perhaps", "sometimes",
|
|
180
|
+
"often", "usually", "generally", "approximately", "roughly"}
|
|
181
|
+
score += 0.15 * len(set(words) & ambiguous_markers)
|
|
182
|
+
if "or" in words:
|
|
183
|
+
score += 0.1
|
|
184
|
+
if text.count("?") > 1:
|
|
185
|
+
score += 0.2
|
|
186
|
+
return min(1.0, score)
|
|
187
|
+
|
|
188
|
+
def _specificity_score(self, text: str, words: list[str], keywords: list[str]) -> float:
|
|
189
|
+
score = 0.0
|
|
190
|
+
if re.search(r"\d{4}", text): # years
|
|
191
|
+
score += 0.2
|
|
192
|
+
if re.search(r"\b[A-Z][a-z]+\b", text[1:]): # proper nouns
|
|
193
|
+
score += 0.2
|
|
194
|
+
score += min(0.4, len(keywords) * 0.05)
|
|
195
|
+
if any(w in words for w in ["exactly", "specifically", "precisely"]):
|
|
196
|
+
score += 0.2
|
|
197
|
+
return min(1.0, score)
|
|
198
|
+
|
|
199
|
+
def _difficulty_score(self, text: str, words: list[str], keywords: list[str],
|
|
200
|
+
readability: float, ambiguity: float, specificity: float,
|
|
201
|
+
question: TriviaQuestion) -> float:
|
|
202
|
+
score = 0.0
|
|
203
|
+
|
|
204
|
+
# Length factor
|
|
205
|
+
score += min(0.2, len(words) * 0.008)
|
|
206
|
+
|
|
207
|
+
# Rare words
|
|
208
|
+
rare = sum(1 for w in words if w not in _COMMON_WORDS and len(w) > 5)
|
|
209
|
+
score += min(0.2, rare * 0.04)
|
|
210
|
+
|
|
211
|
+
# Readability
|
|
212
|
+
score += min(0.15, max(0, readability - 5) * 0.015)
|
|
213
|
+
|
|
214
|
+
# Specificity increases difficulty
|
|
215
|
+
score += specificity * 0.2
|
|
216
|
+
|
|
217
|
+
# Negation makes questions trickier
|
|
218
|
+
if any(w in {"not", "never", "except", "least", "false"} for w in words):
|
|
219
|
+
score += 0.1
|
|
220
|
+
|
|
221
|
+
# Multiple choice reduces difficulty
|
|
222
|
+
if question.has_choices:
|
|
223
|
+
score -= 0.15
|
|
224
|
+
|
|
225
|
+
# True/false is generally easier
|
|
226
|
+
if text.lower().startswith("true or false"):
|
|
227
|
+
score -= 0.1
|
|
228
|
+
|
|
229
|
+
return max(0.0, min(1.0, score))
|
|
230
|
+
|
|
231
|
+
def _score_to_difficulty(self, score: float) -> Difficulty:
|
|
232
|
+
if score < 0.25:
|
|
233
|
+
return Difficulty.EASY
|
|
234
|
+
elif score < 0.5:
|
|
235
|
+
return Difficulty.MEDIUM
|
|
236
|
+
elif score < 0.75:
|
|
237
|
+
return Difficulty.HARD
|
|
238
|
+
return Difficulty.EXPERT
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""CLI for langgraph-tavily."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import json
|
|
7
|
+
import sys
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def main(argv: list[str] | None = None) -> None:
|
|
11
|
+
parser = argparse.ArgumentParser(
|
|
12
|
+
prog="langgraph-tavily",
|
|
13
|
+
description="Analyse trivia questions for difficulty, type, and language features",
|
|
14
|
+
)
|
|
15
|
+
sub = parser.add_subparsers(dest="command")
|
|
16
|
+
|
|
17
|
+
# Analyse
|
|
18
|
+
a_p = sub.add_parser("analyse", help="Analyse a trivia question")
|
|
19
|
+
a_p.add_argument("question", help="The trivia question text")
|
|
20
|
+
a_p.add_argument("--json", action="store_true")
|
|
21
|
+
|
|
22
|
+
# Batch
|
|
23
|
+
b_p = sub.add_parser("batch", help="Analyse questions from a file (one per line)")
|
|
24
|
+
b_p.add_argument("file", help="Text file with one question per line")
|
|
25
|
+
b_p.add_argument("--json", action="store_true")
|
|
26
|
+
|
|
27
|
+
# Compare
|
|
28
|
+
c_p = sub.add_parser("compare", help="Compare two questions")
|
|
29
|
+
c_p.add_argument("q1", help="First question")
|
|
30
|
+
c_p.add_argument("q2", help="Second question")
|
|
31
|
+
|
|
32
|
+
# Quiz
|
|
33
|
+
q_p = sub.add_parser("quiz", help="Generate a trivia quiz")
|
|
34
|
+
q_p.add_argument("--count", type=int, default=5)
|
|
35
|
+
q_p.add_argument("--difficulty", choices=["easy", "medium", "hard", "expert"])
|
|
36
|
+
q_p.add_argument("--category", default="")
|
|
37
|
+
q_p.add_argument("--show-answers", action="store_true")
|
|
38
|
+
|
|
39
|
+
args = parser.parse_args(argv)
|
|
40
|
+
|
|
41
|
+
if args.command == "analyse":
|
|
42
|
+
from .analyser import TriviaAnalyser
|
|
43
|
+
result = TriviaAnalyser().analyse(args.question)
|
|
44
|
+
if args.json:
|
|
45
|
+
print(json.dumps(result.to_dict(), indent=2))
|
|
46
|
+
else:
|
|
47
|
+
print(result.summary)
|
|
48
|
+
print(f" Keywords: {', '.join(result.keywords)}")
|
|
49
|
+
print(f" Ambiguity: {result.ambiguity_score:.2f} Specificity: {result.specificity_score:.2f}")
|
|
50
|
+
|
|
51
|
+
elif args.command == "batch":
|
|
52
|
+
from .analyser import TriviaAnalyser
|
|
53
|
+
from pathlib import Path
|
|
54
|
+
questions = Path(args.file).read_text().strip().splitlines()
|
|
55
|
+
analyser = TriviaAnalyser()
|
|
56
|
+
results = analyser.batch_analyse(questions)
|
|
57
|
+
if args.json:
|
|
58
|
+
print(json.dumps([r.to_dict() for r in results], indent=2))
|
|
59
|
+
else:
|
|
60
|
+
for r in results:
|
|
61
|
+
print(r.summary)
|
|
62
|
+
|
|
63
|
+
elif args.command == "compare":
|
|
64
|
+
from .analyser import TriviaAnalyser
|
|
65
|
+
result = TriviaAnalyser().compare(args.q1, args.q2)
|
|
66
|
+
for k, v in result.items():
|
|
67
|
+
print(f" {k}: {v}")
|
|
68
|
+
|
|
69
|
+
elif args.command == "quiz":
|
|
70
|
+
from .generator import TriviaGenerator
|
|
71
|
+
from .models import Difficulty
|
|
72
|
+
gen = TriviaGenerator()
|
|
73
|
+
diff = Difficulty(args.difficulty) if args.difficulty else None
|
|
74
|
+
questions = gen.random(args.count, difficulty=diff, category=args.category)
|
|
75
|
+
for i, q in enumerate(questions, 1):
|
|
76
|
+
print(f" {i}. {q.text}")
|
|
77
|
+
if args.show_answers:
|
|
78
|
+
print(f" Answer: {q.answer}")
|
|
79
|
+
|
|
80
|
+
else:
|
|
81
|
+
parser.print_help()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
if __name__ == "__main__":
|
|
85
|
+
main()
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""Trivia question generator with difficulty control."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import random
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
from .models import TriviaQuestion, Category, Difficulty
|
|
9
|
+
|
|
10
|
+
# Built-in trivia bank
|
|
11
|
+
_TRIVIA_BANK: list[dict] = [
|
|
12
|
+
{"q": "What planet is known as the Red Planet?", "a": "Mars", "cat": "science", "diff": "easy"},
|
|
13
|
+
{"q": "What is the largest ocean on Earth?", "a": "Pacific Ocean", "cat": "geography", "diff": "easy"},
|
|
14
|
+
{"q": "Who painted the Mona Lisa?", "a": "Leonardo da Vinci", "cat": "art", "diff": "easy"},
|
|
15
|
+
{"q": "In what year did World War II end?", "a": "1945", "cat": "history", "diff": "easy"},
|
|
16
|
+
{"q": "What is the chemical symbol for gold?", "a": "Au", "cat": "science", "diff": "medium"},
|
|
17
|
+
{"q": "Which country has the most natural lakes?", "a": "Canada", "cat": "geography", "diff": "medium"},
|
|
18
|
+
{"q": "Who wrote 'One Hundred Years of Solitude'?", "a": "Gabriel Garcia Marquez", "cat": "literature", "diff": "medium"},
|
|
19
|
+
{"q": "What instrument has 88 keys?", "a": "Piano", "cat": "music", "diff": "easy"},
|
|
20
|
+
{"q": "In which city would you find the Sagrada Familia?", "a": "Barcelona", "cat": "geography", "diff": "medium"},
|
|
21
|
+
{"q": "What is the smallest bone in the human body?", "a": "Stapes (stirrup bone)", "cat": "science", "diff": "medium"},
|
|
22
|
+
{"q": "Which element has the atomic number 79?", "a": "Gold", "cat": "science", "diff": "hard"},
|
|
23
|
+
{"q": "What ancient wonder was located in Alexandria?", "a": "The Lighthouse (Pharos)", "cat": "history", "diff": "hard"},
|
|
24
|
+
{"q": "Who composed 'The Rite of Spring'?", "a": "Igor Stravinsky", "cat": "music", "diff": "hard"},
|
|
25
|
+
{"q": "What is the only country to span four hemispheres?", "a": "Kiribati", "cat": "geography", "diff": "expert"},
|
|
26
|
+
{"q": "In what year was the Treaty of Westphalia signed?", "a": "1648", "cat": "history", "diff": "expert"},
|
|
27
|
+
{"q": "What is the Chandrasekhar limit measured in solar masses?", "a": "1.4 solar masses", "cat": "science", "diff": "expert"},
|
|
28
|
+
{"q": "Which programming language was created by Bjarne Stroustrup?", "a": "C++", "cat": "technology", "diff": "medium"},
|
|
29
|
+
{"q": "What sport uses a shuttlecock?", "a": "Badminton", "cat": "sports", "diff": "easy"},
|
|
30
|
+
{"q": "Which vitamin is produced when skin is exposed to sunlight?", "a": "Vitamin D", "cat": "science", "diff": "medium"},
|
|
31
|
+
{"q": "What is the world's largest desert by area?", "a": "Antarctic Desert", "cat": "geography", "diff": "hard"},
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class TriviaGenerator:
|
|
37
|
+
"""Generate trivia questions from the built-in bank with filters."""
|
|
38
|
+
|
|
39
|
+
seed: int | None = None
|
|
40
|
+
|
|
41
|
+
def _rng(self) -> random.Random:
|
|
42
|
+
return random.Random(self.seed)
|
|
43
|
+
|
|
44
|
+
def random(self, count: int = 1, *, difficulty: Difficulty | None = None,
|
|
45
|
+
category: str = "") -> list[TriviaQuestion]:
|
|
46
|
+
"""Get random trivia questions."""
|
|
47
|
+
rng = self._rng()
|
|
48
|
+
pool = list(_TRIVIA_BANK)
|
|
49
|
+
|
|
50
|
+
if difficulty:
|
|
51
|
+
pool = [q for q in pool if q["diff"] == difficulty.value]
|
|
52
|
+
if category:
|
|
53
|
+
pool = [q for q in pool if q["cat"] == category.lower()]
|
|
54
|
+
|
|
55
|
+
if not pool:
|
|
56
|
+
return []
|
|
57
|
+
|
|
58
|
+
picked = rng.sample(pool, min(count, len(pool)))
|
|
59
|
+
return [
|
|
60
|
+
TriviaQuestion(text=q["q"], answer=q["a"], category=q["cat"])
|
|
61
|
+
for q in picked
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
def quiz(self, count: int = 10, mix: bool = True) -> list[TriviaQuestion]:
|
|
65
|
+
"""Generate a balanced quiz with mixed difficulties."""
|
|
66
|
+
rng = self._rng()
|
|
67
|
+
if mix:
|
|
68
|
+
questions = []
|
|
69
|
+
for diff in [Difficulty.EASY, Difficulty.MEDIUM, Difficulty.HARD]:
|
|
70
|
+
pool = [q for q in _TRIVIA_BANK if q["diff"] == diff.value]
|
|
71
|
+
n = max(1, count // 3)
|
|
72
|
+
questions.extend(rng.sample(pool, min(n, len(pool))))
|
|
73
|
+
rng.shuffle(questions)
|
|
74
|
+
questions = questions[:count]
|
|
75
|
+
else:
|
|
76
|
+
questions = rng.sample(_TRIVIA_BANK, min(count, len(_TRIVIA_BANK)))
|
|
77
|
+
|
|
78
|
+
return [
|
|
79
|
+
TriviaQuestion(text=q["q"], answer=q["a"], category=q["cat"])
|
|
80
|
+
for q in questions
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def categories(self) -> list[str]:
|
|
85
|
+
return sorted(set(q["cat"] for q in _TRIVIA_BANK))
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def total_questions(self) -> int:
|
|
89
|
+
return len(_TRIVIA_BANK)
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""Core models for trivia question analysis."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class QuestionType(str, Enum):
|
|
11
|
+
WHO = "who"
|
|
12
|
+
WHAT = "what"
|
|
13
|
+
WHEN = "when"
|
|
14
|
+
WHERE = "where"
|
|
15
|
+
WHY = "why"
|
|
16
|
+
HOW = "how"
|
|
17
|
+
WHICH = "which"
|
|
18
|
+
TRUE_FALSE = "true_false"
|
|
19
|
+
MULTIPLE_CHOICE = "multiple_choice"
|
|
20
|
+
FILL_BLANK = "fill_blank"
|
|
21
|
+
UNKNOWN = "unknown"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Difficulty(str, Enum):
|
|
25
|
+
EASY = "easy"
|
|
26
|
+
MEDIUM = "medium"
|
|
27
|
+
HARD = "hard"
|
|
28
|
+
EXPERT = "expert"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class Category(str, Enum):
|
|
32
|
+
SCIENCE = "science"
|
|
33
|
+
HISTORY = "history"
|
|
34
|
+
GEOGRAPHY = "geography"
|
|
35
|
+
ENTERTAINMENT = "entertainment"
|
|
36
|
+
SPORTS = "sports"
|
|
37
|
+
ART = "art"
|
|
38
|
+
LITERATURE = "literature"
|
|
39
|
+
MUSIC = "music"
|
|
40
|
+
FOOD = "food"
|
|
41
|
+
TECHNOLOGY = "technology"
|
|
42
|
+
NATURE = "nature"
|
|
43
|
+
POLITICS = "politics"
|
|
44
|
+
LANGUAGE = "language"
|
|
45
|
+
MATH = "math"
|
|
46
|
+
GENERAL = "general"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass
|
|
50
|
+
class TriviaQuestion:
|
|
51
|
+
"""A trivia question with optional answer and choices."""
|
|
52
|
+
text: str
|
|
53
|
+
answer: str = ""
|
|
54
|
+
choices: list[str] = field(default_factory=list)
|
|
55
|
+
category: str = ""
|
|
56
|
+
source: str = ""
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def word_count(self) -> int:
|
|
60
|
+
return len(self.text.split())
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def has_choices(self) -> bool:
|
|
64
|
+
return len(self.choices) > 0
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dataclass
|
|
68
|
+
class AnalysisResult:
|
|
69
|
+
"""Full analysis of a trivia question."""
|
|
70
|
+
question: TriviaQuestion
|
|
71
|
+
question_type: QuestionType
|
|
72
|
+
difficulty: Difficulty
|
|
73
|
+
category: Category
|
|
74
|
+
difficulty_score: float # 0.0 (trivial) to 1.0 (impossible)
|
|
75
|
+
language_features: dict[str, Any] = field(default_factory=dict)
|
|
76
|
+
keywords: list[str] = field(default_factory=list)
|
|
77
|
+
ambiguity_score: float = 0.0 # 0 = clear, 1 = very ambiguous
|
|
78
|
+
specificity_score: float = 0.0 # 0 = vague, 1 = very specific
|
|
79
|
+
readability_grade: float = 0.0 # approximate grade level
|
|
80
|
+
|
|
81
|
+
def to_dict(self) -> dict[str, Any]:
|
|
82
|
+
return {
|
|
83
|
+
"question": self.question.text,
|
|
84
|
+
"answer": self.question.answer,
|
|
85
|
+
"question_type": self.question_type.value,
|
|
86
|
+
"difficulty": self.difficulty.value,
|
|
87
|
+
"difficulty_score": round(self.difficulty_score, 2),
|
|
88
|
+
"category": self.category.value,
|
|
89
|
+
"keywords": self.keywords,
|
|
90
|
+
"ambiguity_score": round(self.ambiguity_score, 2),
|
|
91
|
+
"specificity_score": round(self.specificity_score, 2),
|
|
92
|
+
"readability_grade": round(self.readability_grade, 1),
|
|
93
|
+
"language_features": self.language_features,
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def summary(self) -> str:
|
|
98
|
+
return (f"[{self.difficulty.value.upper()}] {self.question_type.value} "
|
|
99
|
+
f"({self.category.value}) — score: {self.difficulty_score:.2f}, "
|
|
100
|
+
f"readability: grade {self.readability_grade:.0f}")
|