mathipy 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mathipy-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Mikyung Shin
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
mathipy-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,85 @@
1
+ Metadata-Version: 2.4
2
+ Name: mathipy
3
+ Version: 0.1.0
4
+ Summary: Multimodal item feature extraction for K-12 math assessment
5
+ Author-email: Mikyung Shin <shin.mikyung@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/mshin77/mathipy
8
+ Project-URL: Repository, https://github.com/mshin77/mathipy
9
+ Project-URL: Documentation, https://mshin77.github.io/mathipy
10
+ Project-URL: Issues, https://github.com/mshin77/mathipy/issues
11
+ Keywords: math,assessment,education,readability,k-12
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: Intended Audience :: Education
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Education
21
+ Classifier: Topic :: Scientific/Engineering
22
+ Requires-Python: >=3.9
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: numpy>=1.20.0
26
+ Provides-Extra: nlp
27
+ Requires-Dist: textstat>=0.7.0; extra == "nlp"
28
+ Requires-Dist: nltk>=3.8.0; extra == "nlp"
29
+ Provides-Extra: vision
30
+ Requires-Dist: pillow>=9.0.0; extra == "vision"
31
+ Requires-Dist: opencv-python-headless>=4.5.0; extra == "vision"
32
+ Provides-Extra: ocr
33
+ Requires-Dist: httpx>=0.24.0; extra == "ocr"
34
+ Provides-Extra: documents
35
+ Requires-Dist: python-docx>=0.8.0; extra == "documents"
36
+ Requires-Dist: pdfplumber>=0.7.0; extra == "documents"
37
+ Provides-Extra: all
38
+ Requires-Dist: mathipy[documents,nlp,ocr,vision]; extra == "all"
39
+ Provides-Extra: dev
40
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
41
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
42
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
43
+ Provides-Extra: docs
44
+ Requires-Dist: sphinx>=8.2; extra == "docs"
45
+ Requires-Dist: pydata-sphinx-theme>=0.16; extra == "docs"
46
+ Requires-Dist: myst-parser>=3.0; extra == "docs"
47
+ Dynamic: license-file
48
+
49
+ <img src="docs_src/_static/logo.svg" alt="MathiPy Logo" align="right" width="220px"/>
50
+
51
+ [![PyPI version](https://img.shields.io/pypi/v/mathipy)](https://pypi.org/project/mathipy/)
52
+ [![Python versions](https://img.shields.io/pypi/pyversions/mathipy)](https://pypi.org/project/mathipy/)
53
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
54
+
55
+ Multimodal item feature extraction for K-12 math assessment. Analyze readability with math-aware normalization via [textstat](https://github.com/textstat/textstat) and [NLTK](https://www.nltk.org/), classify math content by [Common Core State Standards for Mathematics](https://www.thecorestandards.org/Math/) domain, estimate cognitive load components, extract visual complexity features from images using [OpenCV](https://opencv.org/) and [Pillow](https://pillow.readthedocs.io/), and perform multimodal optical character recognition (OCR) through [Gemini](https://ai.google.dev/) and [OpenAI](https://platform.openai.com/) vision APIs.
56
+
57
+ ## Installation
58
+
59
+ ```bash
60
+ pip install mathipy
61
+ ```
62
+
63
+ With optional dependencies:
64
+
65
+ ```bash
66
+ pip install mathipy[nlp] # readability (textstat, nltk)
67
+ pip install mathipy[vision] # visual analysis (opencv, pillow)
68
+ pip install mathipy[ocr] # OCR via vision LLMs (httpx)
69
+ pip install mathipy[documents] # document parsing (python-docx, pdfplumber)
70
+ pip install mathipy[all] # all features
71
+ ```
72
+
73
+ From GitHub:
74
+
75
+ ```bash
76
+ pip install git+https://github.com/mshin77/mathipy.git[all]
77
+ ```
78
+
79
+ ## Getting Started
80
+
81
+ See [Quick Start](https://mshin77.github.io/mathipy/getting-started.html) and [Analyzing Math Items](https://mshin77.github.io/mathipy/vignettes/naep-demo.html) for tutorials.
82
+
83
+ ## Citation
84
+
85
+ - Shin, M. (2026). *MathiPy: Multimodal item feature extraction for K-12 math assessment* (Python package version 0.1.0) [Computer software]. <https://github.com/mshin77/mathipy>
@@ -0,0 +1,37 @@
1
+ <img src="docs_src/_static/logo.svg" alt="MathiPy Logo" align="right" width="220px"/>
2
+
3
+ [![PyPI version](https://img.shields.io/pypi/v/mathipy)](https://pypi.org/project/mathipy/)
4
+ [![Python versions](https://img.shields.io/pypi/pyversions/mathipy)](https://pypi.org/project/mathipy/)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
6
+
7
+ Multimodal item feature extraction for K-12 math assessment. Analyze readability with math-aware normalization via [textstat](https://github.com/textstat/textstat) and [NLTK](https://www.nltk.org/), classify math content by [Common Core State Standards for Mathematics](https://www.thecorestandards.org/Math/) domain, estimate cognitive load components, extract visual complexity features from images using [OpenCV](https://opencv.org/) and [Pillow](https://pillow.readthedocs.io/), and perform multimodal optical character recognition (OCR) through [Gemini](https://ai.google.dev/) and [OpenAI](https://platform.openai.com/) vision APIs.
8
+
9
+ ## Installation
10
+
11
+ ```bash
12
+ pip install mathipy
13
+ ```
14
+
15
+ With optional dependencies:
16
+
17
+ ```bash
18
+ pip install mathipy[nlp] # readability (textstat, nltk)
19
+ pip install mathipy[vision] # visual analysis (opencv, pillow)
20
+ pip install mathipy[ocr] # OCR via vision LLMs (httpx)
21
+ pip install mathipy[documents] # document parsing (python-docx, pdfplumber)
22
+ pip install mathipy[all] # all features
23
+ ```
24
+
25
+ From GitHub:
26
+
27
+ ```bash
28
+ pip install git+https://github.com/mshin77/mathipy.git[all]
29
+ ```
30
+
31
+ ## Getting Started
32
+
33
+ See [Quick Start](https://mshin77.github.io/mathipy/getting-started.html) and [Analyzing Math Items](https://mshin77.github.io/mathipy/vignettes/naep-demo.html) for tutorials.
34
+
35
+ ## Citation
36
+
37
+ - Shin, M. (2026). *MathiPy: Multimodal item feature extraction for K-12 math assessment* (Python package version 0.1.0) [Computer software]. <https://github.com/mshin77/mathipy>
@@ -0,0 +1,19 @@
1
+ """MathiPy - Multimodal item feature extraction for K-12 math assessment."""
2
+
3
+ __version__ = "0.1.0"
4
+ __author__ = "Mikyung Shin"
5
+ __email__ = "shin.mikyung@gmail.com"
6
+
7
+ from mathipy.readability import ReadabilityAnalyzer
8
+ from mathipy.math_content import MathContentAnalyzer
9
+ from mathipy.cognitive_load import CognitiveLoadEstimator
10
+ from mathipy.visual import VisualFeatureExtractor
11
+ from mathipy.ocr import MultimodalOCR
12
+
13
+ __all__ = [
14
+ "ReadabilityAnalyzer",
15
+ "MathContentAnalyzer",
16
+ "CognitiveLoadEstimator",
17
+ "VisualFeatureExtractor",
18
+ "MultimodalOCR",
19
+ ]
@@ -0,0 +1,106 @@
1
+ """Cognitive load estimation for mathematical assessment items."""
2
+
3
+ import re
4
+ import logging
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ from mathipy.utils import extract_numbers, extract_variables
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class CognitiveLoadEstimator:
13
+ """Estimate cognitive load components for math assessment items.
14
+
15
+ Computes intrinsic (item complexity), extraneous (linguistic demand),
16
+ and germane (schema-building) load from text features.
17
+ """
18
+
19
+ def estimate(
20
+ self,
21
+ text: str,
22
+ readability_grade: Optional[float] = None,
23
+ math_terms: Optional[List[str]] = None,
24
+ ) -> Dict[str, Any]:
25
+ """Estimate cognitive load for the given text.
26
+
27
+ Args:
28
+ text: Input text to analyze.
29
+ readability_grade: Optional Flesch-Kincaid grade level. Estimated from text if not provided.
30
+ math_terms: Optional list of math terms found in the text. Estimated from keywords if not provided.
31
+
32
+ Returns:
33
+ Dictionary with ``intrinsic_cognitive_load``, ``extraneous_cognitive_load``,
34
+ ``germane_cognitive_load``, ``total_cognitive_load``, and element counts.
35
+ """
36
+ if not text or not text.strip():
37
+ return self._empty_estimate()
38
+
39
+ numbers = extract_numbers(text)
40
+ variables = extract_variables(text)
41
+ operations = sum(1 for c in text if c in "+-*/^=<>")
42
+ word_count = len(text.split())
43
+
44
+ intrinsic = (len(numbers) + len(variables)) / word_count if word_count else 0
45
+ intrinsic = min(1.0, intrinsic * 2)
46
+
47
+ if readability_grade is not None:
48
+ extraneous = min(1.0, readability_grade / 12)
49
+ else:
50
+ extraneous = self._estimate_extraneous(text)
51
+
52
+ math_term_count = len(math_terms) if math_terms else 0
53
+ if math_term_count:
54
+ germane = min(1.0, math_term_count / 10)
55
+ else:
56
+ germane = self._estimate_germane(text)
57
+
58
+ total = intrinsic * 0.4 + extraneous * 0.3 + germane * 0.3
59
+
60
+ return {
61
+ "intrinsic_cognitive_load": round(intrinsic, 3),
62
+ "extraneous_cognitive_load": round(extraneous, 3),
63
+ "germane_cognitive_load": round(germane, 3),
64
+ "total_cognitive_load": round(total, 3),
65
+ "numeric_elements": len(numbers),
66
+ "variable_count": len(variables),
67
+ "operation_count": operations,
68
+ }
69
+
70
+ def _estimate_extraneous(self, text: str) -> float:
71
+ words = text.split()
72
+ word_count = len(words)
73
+ if not word_count:
74
+ return 0.0
75
+
76
+ avg_word_length = sum(len(w) for w in words) / word_count
77
+ sentences = re.split(r"[.!?]+", text)
78
+ sentences = [s for s in sentences if s.strip()]
79
+ avg_sentence_length = word_count / max(len(sentences), 1)
80
+
81
+ estimated_grade = (avg_word_length * 1.5) + (avg_sentence_length * 0.3) - 3
82
+ estimated_grade = max(1, min(16, estimated_grade))
83
+ return min(1.0, estimated_grade / 12)
84
+
85
+ def _estimate_germane(self, text: str) -> float:
86
+ math_keywords = {
87
+ "add", "subtract", "multiply", "divide", "sum", "difference",
88
+ "product", "quotient", "fraction", "decimal", "percent",
89
+ "equation", "variable", "solve", "function", "graph",
90
+ "area", "perimeter", "volume", "angle", "triangle", "circle",
91
+ "mean", "median", "mode", "probability", "ratio", "proportion",
92
+ }
93
+ text_lower = text.lower()
94
+ found = sum(1 for term in math_keywords if term in text_lower)
95
+ return min(1.0, found / 10) if found else 0.3
96
+
97
+ def _empty_estimate(self) -> Dict[str, Any]:
98
+ return {
99
+ "intrinsic_cognitive_load": 0.0,
100
+ "extraneous_cognitive_load": 0.0,
101
+ "germane_cognitive_load": 0.0,
102
+ "total_cognitive_load": 0.0,
103
+ "numeric_elements": 0,
104
+ "variable_count": 0,
105
+ "operation_count": 0,
106
+ }
@@ -0,0 +1,9 @@
1
+ SOURCE: U.S. Department of Education, Institute of Education Sciences,
2
+ National Center for Education Statistics, National Assessment of Educational
3
+ Progress (NAEP), 2017, 2022, and 2024 Mathematics Assessments.
4
+
5
+ Items obtained from the NAEP Questions Tool (https://www.nationsreportcard.gov/nqt/).
6
+
7
+ NAEP released items are in the public domain per the NAEP Questions Tool
8
+ Copyright Policy. This sample dataset contains 5 items selected to
9
+ demonstrate MathiPy's feature extraction capabilities.
@@ -0,0 +1,26 @@
1
+ """Sample NAEP assessment items for demonstrating MathiPy features."""
2
+
3
+ import re
4
+ from pathlib import Path
5
+
6
+ DATA_DIR = Path(__file__).parent
7
+
8
+
9
+ def get_sample_csv() -> Path:
10
+ """Return the path to the sample NAEP CSV file."""
11
+ return DATA_DIR / "naep_sample.csv"
12
+
13
+
14
+ def get_sample_image(item_id: str) -> Path:
15
+ """Return the path to a sample NAEP item image by item ID (e.g., ``"2024-4M10 #2"``)."""
16
+ if not re.match(r'^[a-zA-Z0-9_\-\s#]+$', item_id):
17
+ raise ValueError(f"Invalid item_id: {item_id}")
18
+ path = DATA_DIR / f"{item_id}.png"
19
+ if not path.resolve().is_relative_to(DATA_DIR.resolve()):
20
+ raise ValueError(f"Invalid item_id: {item_id}")
21
+ return path
22
+
23
+
24
+ def list_sample_images() -> list:
25
+ """Return a sorted list of available sample image filenames."""
26
+ return sorted(p.name for p in DATA_DIR.glob("*.png"))
@@ -0,0 +1,6 @@
1
+ item_id,grade,year,difficulty,content,image_file
2
+ 2024-4M10 #2,4,2024,Easy,Algebra,2024-4M10 #2.png
3
+ 2017-4M1 #4,4,2017,Medium,Number Properties and Operations,2017-4M1 #4.png
4
+ 2024-4M13 #2,4,2024,Hard,Measurement,2024-4M13 #2.png
5
+ 2022-8M1 #2,8,2022,Easy,Geometry,2022-8M1 #2.png
6
+ 2017-8M3 #2,8,2017,Easy,"Data Analysis, Statistics, and Probability",2017-8M3 #2.png
@@ -0,0 +1,213 @@
1
+ """Mathematical content analysis and domain classification."""
2
+
3
+ import re
4
+ import logging
5
+ from collections import Counter
6
+ from typing import Any, Dict, List, Set, Union
7
+
8
+ from mathipy.utils import extract_numbers
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class MathContentAnalyzer:
14
+ """Analyze math content and classify by Common Core State Standards domain.
15
+
16
+ Detects math patterns (equations, fractions, operations), counts symbols,
17
+ extracts numbers and vocabulary, and classifies the primary math domain
18
+ (arithmetic, algebra, geometry, statistics, calculus, fractions).
19
+ """
20
+
21
+ def __init__(self):
22
+ self._init_patterns()
23
+ self._init_vocabulary()
24
+
25
+ def _init_patterns(self):
26
+ self.patterns = {
27
+ "addition": re.compile(r"\d+\s*\+\s*\d+"),
28
+ "subtraction": re.compile(r"\d+\s*[-−]\s*\d+"),
29
+ "multiplication": re.compile(r"\d+\s*[×*·]\s*\d+"),
30
+ "division": re.compile(r"\d+\s*[÷/]\s*\d+"),
31
+ "variable": re.compile(r"\b[a-zA-Z]\b(?!\w)"),
32
+ "equation": re.compile(r"[^=]+=\s*[^=]+"),
33
+ "inequality": re.compile(r"[^<>=]+\s*[<>≤≥]\s*[^<>=]+"),
34
+ "exponent": re.compile(r"\w+\^[\w\d{}]+|\w+\*\*[\w\d{}]+"),
35
+ "function": re.compile(r"\b[a-zA-Z]+\([^)]+\)"),
36
+ "polynomial": re.compile(r"[a-z]\^?\d*\s*[+\-]\s*[a-z]\^?\d*"),
37
+ "fraction": re.compile(r"\d+/\d+|\\frac\{\d+\}\{\d+\}"),
38
+ "decimal": re.compile(r"\d+\.\d+"),
39
+ "percentage": re.compile(r"\d+\.?\d*%"),
40
+ "ratio": re.compile(r"\d+:\d+"),
41
+ "scientific_notation": re.compile(r"\d+\.?\d*\s*[×x]\s*10\^[-]?\d+"),
42
+ "derivative": re.compile(r"d/dx|f'|\\frac\{d\}\{dx\}"),
43
+ "integral": re.compile(r"∫|\\int"),
44
+ "limit": re.compile(r"\\lim|lim_"),
45
+ "summation": re.compile(r"∑|\\sum"),
46
+ }
47
+
48
+ self.symbols = {
49
+ "+": "addition", "-": "subtraction", "×": "multiplication",
50
+ "*": "multiplication", "·": "multiplication", "÷": "division",
51
+ "/": "division", "=": "equals", "<": "less_than",
52
+ ">": "greater_than", "≤": "less_equal", "≥": "greater_equal",
53
+ "≠": "not_equal", "≈": "approximately", "√": "square_root",
54
+ "∑": "summation", "∫": "integral", "π": "pi", "∞": "infinity",
55
+ }
56
+
57
+ def _init_vocabulary(self):
58
+ self.domains = {
59
+ "arithmetic": {
60
+ "add", "subtract", "multiply", "divide", "sum", "difference",
61
+ "product", "quotient", "remainder", "factor", "multiple",
62
+ "even", "odd", "prime", "composite", "digit", "place value",
63
+ },
64
+ "algebra": {
65
+ "variable", "coefficient", "term", "expression", "equation",
66
+ "inequality", "solve", "simplify", "factor", "polynomial",
67
+ "linear", "quadratic", "function", "slope", "intercept",
68
+ },
69
+ "geometry": {
70
+ "point", "line", "ray", "segment", "angle", "triangle",
71
+ "rectangle", "square", "circle", "polygon", "area",
72
+ "perimeter", "volume", "parallel", "perpendicular", "congruent",
73
+ },
74
+ "statistics": {
75
+ "mean", "median", "mode", "range", "data", "graph", "chart",
76
+ "probability", "outcome", "sample", "population", "distribution",
77
+ "standard deviation", "variance", "correlation",
78
+ },
79
+ "calculus": {
80
+ "limit", "derivative", "integral", "differentiate", "integrate",
81
+ "continuous", "rate of change", "maximum", "minimum",
82
+ "optimization", "series", "convergence",
83
+ },
84
+ "fractions": {
85
+ "fraction", "numerator", "denominator", "mixed number",
86
+ "improper", "equivalent", "simplify", "common denominator",
87
+ "decimal", "percent", "ratio", "proportion",
88
+ },
89
+ }
90
+
91
+ self.all_terms: Set[str] = set()
92
+ for terms in self.domains.values():
93
+ self.all_terms.update(terms)
94
+
95
+ def analyze(self, text: str) -> Dict[str, Any]:
96
+ """Analyze math content in the given text.
97
+
98
+ Args:
99
+ text: Input text to analyze.
100
+
101
+ Returns:
102
+ Dictionary with ``pattern_matches``, ``symbol_counts``, ``numbers``,
103
+ ``vocabulary``, ``domain_classification``, and ``math_density``.
104
+ """
105
+ if not text or not text.strip():
106
+ return self._empty_analysis()
107
+
108
+ text_lower = text.lower()
109
+ pattern_matches = self._match_patterns(text)
110
+ symbol_counts = self._count_symbols(text)
111
+ numbers = extract_numbers(text)
112
+ term_matches = self._match_vocabulary(text_lower)
113
+ domain = self._classify_domain(text_lower, pattern_matches, term_matches)
114
+
115
+ word_count = len(text.split())
116
+ return {
117
+ "pattern_matches": pattern_matches,
118
+ "symbol_counts": symbol_counts,
119
+ "total_math_symbols": sum(symbol_counts.values()),
120
+ "unique_symbol_types": len(symbol_counts),
121
+ "numbers": {
122
+ "count": len(numbers),
123
+ "values": numbers[:20],
124
+ "range": max(numbers) - min(numbers) if numbers else 0,
125
+ "has_negative": any(n < 0 for n in numbers),
126
+ "has_decimal": any(isinstance(n, float) and n != int(n) for n in numbers),
127
+ },
128
+ "vocabulary": {
129
+ "math_terms": list(term_matches.keys()),
130
+ "term_count": sum(term_matches.values()),
131
+ "unique_terms": len(term_matches),
132
+ },
133
+ "domain_classification": domain,
134
+ "math_density": sum(pattern_matches.values()) / word_count if word_count else 0,
135
+ }
136
+
137
+ def _match_patterns(self, text: str) -> Dict[str, int]:
138
+ matches = {}
139
+ for name, pattern in self.patterns.items():
140
+ found = pattern.findall(text)
141
+ if found:
142
+ matches[name] = len(found)
143
+ return matches
144
+
145
+ def _count_symbols(self, text: str) -> Dict[str, int]:
146
+ counts = Counter()
147
+ for char in text:
148
+ if char in self.symbols:
149
+ counts[self.symbols[char]] += 1
150
+ return dict(counts)
151
+
152
+ def _match_vocabulary(self, text: str) -> Dict[str, int]:
153
+ matches = {}
154
+ for term in self.all_terms:
155
+ count = text.count(term)
156
+ if count > 0:
157
+ matches[term] = count
158
+ return matches
159
+
160
+ def _classify_domain(
161
+ self,
162
+ text: str,
163
+ patterns: Dict[str, int],
164
+ terms: Dict[str, int],
165
+ ) -> Dict[str, Any]:
166
+ domain_scores: Dict[str, float] = {}
167
+
168
+ for domain, vocab in self.domains.items():
169
+ score = 0
170
+ for term in vocab:
171
+ if term in terms:
172
+ score += terms[term]
173
+ domain_scores[domain] = score
174
+
175
+ if patterns.get("derivative") or patterns.get("integral"):
176
+ domain_scores["calculus"] = domain_scores.get("calculus", 0) + 5
177
+
178
+ if patterns.get("fraction"):
179
+ domain_scores["fractions"] = domain_scores.get("fractions", 0) + 3
180
+
181
+ if patterns.get("equation") or patterns.get("variable"):
182
+ domain_scores["algebra"] = domain_scores.get("algebra", 0) + 2
183
+
184
+ primary = max(domain_scores, key=domain_scores.get) if domain_scores else "unknown"
185
+ total = sum(domain_scores.values()) or 1
186
+
187
+ return {
188
+ "primary": primary,
189
+ "confidence": domain_scores.get(primary, 0) / total,
190
+ "scores": domain_scores,
191
+ "secondary": sorted(
192
+ domain_scores.keys(),
193
+ key=lambda k: domain_scores[k],
194
+ reverse=True,
195
+ )[1:3] if len(domain_scores) > 1 else [],
196
+ }
197
+
198
+ def _empty_analysis(self) -> Dict[str, Any]:
199
+ return {
200
+ "pattern_matches": {},
201
+ "symbol_counts": {},
202
+ "total_math_symbols": 0,
203
+ "unique_symbol_types": 0,
204
+ "numbers": {
205
+ "count": 0, "values": [], "range": 0,
206
+ "has_negative": False, "has_decimal": False,
207
+ },
208
+ "vocabulary": {"math_terms": [], "term_count": 0, "unique_terms": 0},
209
+ "domain_classification": {
210
+ "primary": "unknown", "confidence": 0, "scores": {}, "secondary": [],
211
+ },
212
+ "math_density": 0,
213
+ }