PyPI - mathformer - Versions diffs - 1.0.0__tar.gz - Mend

mathformer 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

mathformer-1.0.0/LICENSE +21 -0
mathformer-1.0.0/PKG-INFO +40 -0
mathformer-1.0.0/README.md +18 -0
mathformer-1.0.0/pyproject.toml +47 -0
mathformer-1.0.0/setup.cfg +4 -0
mathformer-1.0.0/src/mathformer/__init__.py +46 -0
mathformer-1.0.0/src/mathformer/api.py +580 -0
mathformer-1.0.0/src/mathformer/tokenizer.py +101 -0
mathformer-1.0.0/src/mathformer.egg-info/PKG-INFO +40 -0
mathformer-1.0.0/src/mathformer.egg-info/SOURCES.txt +12 -0
mathformer-1.0.0/src/mathformer.egg-info/dependency_links.txt +1 -0
mathformer-1.0.0/src/mathformer.egg-info/requires.txt +7 -0
mathformer-1.0.0/src/mathformer.egg-info/top_level.txt +1 -0
mathformer-1.0.0/tests/test_api.py +107 -0

mathformer-1.0.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2024 MathFormer Authors
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

mathformer-1.0.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,40 @@
+Metadata-Version: 2.4
+Name: mathformer
+Version: 1.0.0
+Summary: A transformer-based math library
+Author-email: JeremySu0818 <xinghong.su0818@gmail.com>
+Project-URL: Homepage, https://github.com/JeremySu0818/MathFormer-API
+Project-URL: Bug Tracker, https://github.com/JeremySu0818/MathFormer-API/issues
+Project-URL: Repository, https://github.com/JeremySu0818/MathFormer-API
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: torch>=2.0.0
+Requires-Dist: transformers>=4.30.0
+Requires-Dist: safetensors>=0.3.0
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0.0; extra == "dev"
+Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
+Dynamic: license-file
+# MathFormer
+MathFormer is a Python library for mathematical operations using transformer architectures.
+## Installation
+```bash
+pip install mathformer
+```
+## Usage
+```python
+import mathformer
+# Example usage
+# mathformer.do_something()
+```

mathformer-1.0.0/README.md ADDED Viewed

@@ -0,0 +1,18 @@
+# MathFormer
+MathFormer is a Python library for mathematical operations using transformer architectures.
+## Installation
+```bash
+pip install mathformer
+```
+## Usage
+```python
+import mathformer
+# Example usage
+# mathformer.do_something()
+```

mathformer-1.0.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,47 @@
+[tool.setuptools.package-dir]
+"" = "src"
+[tool.setuptools.packages.find]
+where = ["src"]
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "mathformer"
+version = "1.0.0"
+description = "A transformer-based math library"
+readme = "README.md"
+authors = [
+  { name = "JeremySu0818", email = "xinghong.su0818@gmail.com" },
+]
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+]
+requires-python = ">=3.8"
+dependencies = [
+    "torch>=2.0.0",
+    "transformers>=4.30.0",
+    "safetensors>=0.3.0",
+]
+[project.optional-dependencies]
+dev = [
+    "pytest>=7.0.0",
+    "pytest-cov>=4.0.0",
+]
+[project.urls]
+"Homepage" = "https://github.com/JeremySu0818/MathFormer-API"
+"Bug Tracker" = "https://github.com/JeremySu0818/MathFormer-API/issues"
+"Repository" = "https://github.com/JeremySu0818/MathFormer-API"
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+python_files = ["test_*.py"]
+python_classes = ["Test*"]
+python_functions = ["test_*"]

mathformer-1.0.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

mathformer-1.0.0/src/mathformer/__init__.py ADDED Viewed

@@ -0,0 +1,46 @@
+from typing import Union, Optional
+from .api import MathFormerAPI, MathFormer
+from .tokenizer import MathTokenizer
+__version__ = "1.0.0"
+_default_api = MathFormerAPI()
+def add(*args: Union[str, int]) -> str:
+    return _default_api.add(*args)
+def sub(*args: Union[str, int]) -> str:
+    return _default_api.sub(*args)
+def mul(*args: Union[str, int]) -> str:
+    return _default_api.mul(*args)
+def div(*args: Union[str, int]) -> str:
+    return _default_api.div(*args)
+def calculate(operation: str, a, b) -> str:
+    return _default_api.calculate(operation, a, b)
+def unload_models():
+    _default_api.unload_all()
+__all__ = [
+    "MathFormerAPI",
+    "MathFormer",
+    "MathTokenizer",
+    "add",
+    "sub",
+    "mul",
+    "div",
+    "calculate",
+    "unload_models",
+]

mathformer-1.0.0/src/mathformer/api.py ADDED Viewed

@@ -0,0 +1,580 @@
+import re
+from typing import Optional, Dict, Any, List, Union, Tuple
+from pathlib import Path
+import torch
+from transformers import LlamaForCausalLM, logging
+import os
+import warnings
+os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
+warnings.filterwarnings("ignore")
+logging.set_verbosity_error()
+logging.disable_progress_bar()
+from .tokenizer import MathTokenizer
+_BASE_DIR = Path(__file__).parent
+_DEFAULT_MODEL_PATHS = {
+    "add": _BASE_DIR / "addformer",
+    "sub": _BASE_DIR / "subformer",
+    "mul": _BASE_DIR / "mulformer",
+    "div": _BASE_DIR / "divformer",
+}
+_OPERATION_SYMBOLS = {
+    "add": "+",
+    "sub": "-",
+    "mul": "*",
+    "div": "/",
+}
+class MathFormer:
+    def __init__(
+        self,
+        model_path: str,
+        device: Optional[str] = None,
+        max_new_tokens: int = 32,
+    ):
+        self.model_path = Path(model_path)
+        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+        self.max_new_tokens = max_new_tokens
+        self._model: Optional[LlamaForCausalLM] = None
+        self._tokenizer: Optional[MathTokenizer] = None
+        self._loaded = False
+    def load(self) -> "MathFormer":
+        if self._loaded:
+            return self
+        self._tokenizer = MathTokenizer.from_pretrained(str(self.model_path))
+        self._model = LlamaForCausalLM.from_pretrained(str(self.model_path))
+        self._model.to(self.device)
+        self._model.eval()
+        self._loaded = True
+        return self
+    def unload(self) -> None:
+        if self._model is not None:
+            del self._model
+            self._model = None
+        if self._tokenizer is not None:
+            del self._tokenizer
+            self._tokenizer = None
+        self._loaded = False
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+    @property
+    def is_loaded(self) -> bool:
+        return self._loaded
+    def predict(self, expression: str) -> str:
+        if not self._loaded:
+            self.load()
+        if "=" not in expression:
+            expression += "="
+        inputs = self._tokenizer(expression, return_tensors="pt")
+        input_ids = inputs["input_ids"].to(self.device)
+        attention_mask = inputs["attention_mask"].to(self.device)
+        with torch.no_grad():
+            outputs = self._model.generate(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                max_new_tokens=self.max_new_tokens,
+                pad_token_id=self._tokenizer.pad_token_id,
+                eos_token_id=self._tokenizer.eos_token_id,
+                do_sample=False,
+                repetition_penalty=1.1,
+            )
+        generated_text = self._tokenizer.decode(outputs[0], skip_special_tokens=True)
+        if "=" in generated_text:
+            answer = generated_text.split("=", 1)[1].strip()
+        else:
+            answer = generated_text.strip()
+        return answer
+    def __call__(self, expression: str) -> str:
+        return self.predict(expression)
+    def __enter__(self) -> "MathFormer":
+        self.load()
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
+        self.unload()
+class MathFormerAPI:
+    def __init__(
+        self,
+        model_paths: Optional[Dict[str, str]] = None,
+        device: Optional[str] = None,
+        max_new_tokens: int = 32,
+        lazy_load: bool = True,
+    ):
+        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+        self.max_new_tokens = max_new_tokens
+        paths = model_paths or {}
+        self._model_paths = {
+            op: Path(paths.get(op, _DEFAULT_MODEL_PATHS[op]))
+            for op in ["add", "sub", "mul", "div"]
+        }
+        self.models: Dict[str, MathFormer] = {
+            op: MathFormer(
+                model_path=str(path),
+                device=self.device,
+                max_new_tokens=self.max_new_tokens,
+            )
+            for op, path in self._model_paths.items()
+        }
+        if not lazy_load:
+            self.load_all()
+    def load_all(self) -> "MathFormerAPI":
+        for model in self.models.values():
+            model.load()
+        return self
+    def unload_all(self) -> None:
+        for model in self.models.values():
+            model.unload()
+    def load(self, operation: str) -> "MathFormerAPI":
+        if operation in self.models:
+            self.models[operation].load()
+        return self
+    def unload(self, operation: str) -> None:
+        if operation in self.models:
+            self.models[operation].unload()
+    def _raw_predict(self, operation: str, expression: str) -> str:
+        if operation not in self.models:
+            raise ValueError(
+                f"Unknown operation type: {operation}. Available: {list(self.models.keys())}"
+            )
+        return self.models[operation].predict(expression)
+    def _single_add(self, a: int, b: int) -> Tuple[int, int]:
+        result_str = self._raw_predict("add", f"{a}+{b}")
+        result = int(result_str)
+        return result % 10, result // 10
+    def _single_sub(self, a: int, b: int, borrow: int = 0) -> Tuple[int, int]:
+        a_actual = a - borrow
+        if a_actual >= b:
+            result_str = self._raw_predict("sub", f"{a_actual}-{b}")
+            return int(result_str), 0
+        else:
+            a_with_borrow = a_actual + 10
+            result_str = self._raw_predict("sub", f"{a_with_borrow}-{b}")
+            return int(result_str), 1
+    def _single_mul(self, a: int, b: int) -> int:
+        result_str = self._raw_predict("mul", f"{a}*{b}")
+        return int(result_str)
+    def _single_div(self, a: int, b: int) -> Tuple[int, int]:
+        result_str = self._raw_predict("div", f"{a}/{b}")
+        match = re.match(r"Q(\d+)R(\d+)", result_str)
+        if match:
+            return int(match.group(1)), int(match.group(2))
+        return int(result_str), 0
+    def _multi_add(self, a: int, b: int) -> int:
+        if a < 0 or b < 0:
+            if a < 0 and b < 0:
+                return -self._multi_add(-a, -b)
+            elif a < 0:
+                return self._multi_sub(b, -a)
+            else:
+                return self._multi_sub(a, -b)
+        digits_a = [int(d) for d in str(a)[::-1]]
+        digits_b = [int(d) for d in str(b)[::-1]]
+        max_len = max(len(digits_a), len(digits_b))
+        digits_a.extend([0] * (max_len - len(digits_a)))
+        digits_b.extend([0] * (max_len - len(digits_b)))
+        result = []
+        carry = 0
+        for i in range(max_len):
+            sum_with_carry = digits_a[i] + carry
+            carry_from_first = 0
+            if sum_with_carry > 9:
+                sum_with_carry = sum_with_carry - 10
+                carry_from_first = 1
+            digit_result, new_carry = self._single_add(sum_with_carry, digits_b[i])
+            result.append(digit_result)
+            carry = new_carry + carry_from_first
+        if carry > 0:
+            result.append(carry)
+        return int("".join(str(d) for d in result[::-1]))
+    def _multi_sub(self, a: int, b: int) -> int:
+        if a < 0 and b < 0:
+            return self._multi_sub(-b, -a)
+        elif a < 0:
+            return -self._multi_add(-a, b)
+        elif b < 0:
+            return self._multi_add(a, -b)
+        if a < b:
+            return -self._multi_sub(b, a)
+        digits_a = [int(d) for d in str(a)[::-1]]
+        digits_b = [int(d) for d in str(b)[::-1]]
+        digits_b.extend([0] * (len(digits_a) - len(digits_b)))
+        result = []
+        borrow = 0
+        for i in range(len(digits_a)):
+            digit_a = digits_a[i]
+            digit_b = digits_b[i]
+            digit_result, new_borrow = self._single_sub(digit_a, digit_b, borrow)
+            result.append(digit_result)
+            borrow = new_borrow
+        while len(result) > 1 and result[-1] == 0:
+            result.pop()
+        return int("".join(str(d) for d in result[::-1]))
+    def _multi_mul(self, a: int, b: int) -> int:
+        negative = (a < 0) ^ (b < 0)
+        a, b = abs(a), abs(b)
+        if a == 0 or b == 0:
+            return 0
+        digits_a = [int(d) for d in str(a)[::-1]]
+        digits_b = [int(d) for d in str(b)[::-1]]
+        result = [0] * (len(digits_a) + len(digits_b))
+        for i, digit_b in enumerate(digits_b):
+            carry = 0
+            for j, digit_a in enumerate(digits_a):
+                product = self._single_mul(digit_a, digit_b)
+                total = product + carry + result[i + j]
+                result[i + j] = total % 10
+                carry = total // 10
+            k = i + len(digits_a)
+            while carry > 0:
+                total = carry + result[k]
+                result[k] = total % 10
+                carry = total // 10
+                k += 1
+        while len(result) > 1 and result[-1] == 0:
+            result.pop()
+        final_result = int("".join(str(d) for d in result[::-1]))
+        return -final_result if negative else final_result
+    def _trial_division(self, dividend: int, divisor: int) -> Tuple[int, int]:
+        quotient = 0
+        for q in range(9, -1, -1):
+            product = self._multi_mul(divisor, q)
+            if product <= dividend:
+                quotient = q
+                break
+        product = self._multi_mul(divisor, quotient)
+        remainder = self._multi_sub(dividend, product)
+        return quotient, remainder
+    def _multi_div(self, a: int, b: int) -> Tuple[int, int]:
+        if b == 0:
+            raise ZeroDivisionError("Divisor cannot be zero")
+        negative = (a < 0) ^ (b < 0)
+        a, b = abs(a), abs(b)
+        if a < b:
+            return 0, a
+        if a == 0:
+            return 0, 0
+        digits_a = [int(d) for d in str(a)]
+        quotient_digits = []
+        remainder = 0
+        for digit in digits_a:
+            current = remainder * 10 + digit
+            if current < b:
+                quotient_digits.append(0)
+                remainder = current
+            else:
+                if b <= 9 and current <= 89:
+                    q, r = self._single_div(current, b)
+                else:
+                    q, r = self._trial_division(current, b)
+                quotient_digits.append(q)
+                remainder = r
+        while len(quotient_digits) > 1 and quotient_digits[0] == 0:
+            quotient_digits.pop(0)
+        quotient = int("".join(str(d) for d in quotient_digits))
+        if negative:
+            quotient = -quotient
+        return quotient, remainder
+    def _parse_expression(self, expression: str, operation: str) -> Tuple[int, int]:
+        expression = expression.replace(" ", "").replace("=", "")
+        if operation == "add":
+            parts = expression.split("+")
+        elif operation == "sub":
+            if expression.startswith("-"):
+                rest = expression[1:]
+                if "-" in rest:
+                    idx = rest.index("-")
+                    parts = ["-" + rest[:idx], rest[idx + 1 :]]
+                else:
+                    raise ValueError(f"Cannot parse expression: {expression}")
+            else:
+                parts = expression.split("-")
+        elif operation == "mul":
+            expression = expression.replace("×", "*")
+            parts = expression.split("*")
+        elif operation == "div":
+            expression = expression.replace("÷", "/")
+            parts = expression.split("/")
+        else:
+            raise ValueError(f"Unknown operation type: {operation}")
+        if len(parts) != 2:
+            raise ValueError(f"Cannot parse expression: {expression}")
+        return int(parts[0]), int(parts[1])
+    def add(self, *args: Union[str, int]) -> str:
+        values = []
+        if len(args) == 0:
+            raise ValueError("At least one argument is required")
+        if len(args) == 1 and isinstance(args[0], str) and "+" in args[0]:
+            expression = args[0].replace(" ", "").replace("=", "")
+            parts = expression.split("+")
+            try:
+                values = [int(p) for p in parts]
+            except ValueError:
+                raise ValueError(f"Cannot parse expression: {expression}")
+        else:
+            try:
+                values = [int(a) for a in args]
+            except ValueError:
+                raise ValueError(
+                    f"Arguments contain values that cannot be converted to integers: {args}"
+                )
+        if not values:
+            return "0"
+        result = values[0]
+        for val in values[1:]:
+            result = self._multi_add(result, val)
+        return str(result)
+    def sub(self, *args: Union[str, int]) -> str:
+        values = []
+        if len(args) == 0:
+            raise ValueError("At least one argument is required")
+        if len(args) == 1 and isinstance(args[0], str) and "-" in args[0].lstrip("-"):
+            expression = args[0].replace(" ", "").replace("=", "")
+            if expression.startswith("-"):
+                temp_expr = expression[1:]
+                parts = temp_expr.split("-")
+                values = [-int(parts[0])] + [int(p) for p in parts[1:]]
+            else:
+                parts = expression.split("-")
+                values = [int(p) for p in parts]
+        else:
+            try:
+                values = [int(a) for a in args]
+            except ValueError:
+                raise ValueError(
+                    f"Arguments contain values that cannot be converted to integers: {args}"
+                )
+        if not values:
+            return "0"
+        result = values[0]
+        for val in values[1:]:
+            result = self._multi_sub(result, val)
+        return str(result)
+    def mul(self, *args: Union[str, int]) -> str:
+        values = []
+        if len(args) == 0:
+            raise ValueError("At least one argument is required")
+        if (
+            len(args) == 1
+            and isinstance(args[0], str)
+            and any(op in args[0] for op in ["*", "×"])
+        ):
+            expression = args[0].replace(" ", "").replace("=", "").replace("×", "*")
+            parts = expression.split("*")
+            try:
+                values = [int(p) for p in parts]
+            except ValueError:
+                raise ValueError(f"Cannot parse expression: {expression}")
+        else:
+            try:
+                values = [int(a) for a in args]
+            except ValueError:
+                raise ValueError(
+                    f"Arguments contain values that cannot be converted to integers: {args}"
+                )
+        if not values:
+            return "0"
+        result = values[0]
+        for val in values[1:]:
+            result = self._multi_mul(result, val)
+        return str(result)
+    def div(self, *args: Union[str, int]) -> str:
+        values = []
+        if len(args) == 0:
+            raise ValueError("At least one argument is required")
+        if (
+            len(args) == 1
+            and isinstance(args[0], str)
+            and any(op in args[0] for op in ["/", "÷"])
+        ):
+            expression = args[0].replace(" ", "").replace("=", "").replace("÷", "/")
+            parts = expression.split("/")
+            try:
+                values = [int(p) for p in parts]
+            except ValueError:
+                raise ValueError(f"Cannot parse expression: {expression}")
+        else:
+            try:
+                values = [int(a) for a in args]
+            except ValueError:
+                raise ValueError(
+                    f"Arguments contain values that cannot be converted to integers: {args}"
+                )
+        if not values:
+            return "0"
+        result_q = values[0]
+        result_r = 0
+        for val in values[1:]:
+            result_q, result_r = self._multi_div(result_q, val)
+        if result_r == 0:
+            return str(result_q)
+        else:
+            return f"Q{result_q}R{result_r}"
+    def calculate(
+        self, operation: str, a: Union[int, float, str], b: Union[int, float, str]
+    ) -> str:
+        a_int = int(a)
+        b_int = int(b)
+        if operation == "add":
+            result = self._multi_add(a_int, b_int)
+            return str(result)
+        elif operation == "sub":
+            result = self._multi_sub(a_int, b_int)
+            return str(result)
+        elif operation == "mul":
+            result = self._multi_mul(a_int, b_int)
+            return str(result)
+        elif operation == "div":
+            quotient, remainder = self._multi_div(a_int, b_int)
+            if remainder == 0:
+                return str(quotient)
+            else:
+                return f"Q{quotient}R{remainder}"
+        else:
+            raise ValueError(f"Unknown operation type: {operation}")
+    def batch_predict(
+        self,
+        operation: str,
+        expressions: List[str],
+    ) -> List[str]:
+        results = []
+        for expr in expressions:
+            if operation == "add":
+                results.append(self.add(expr))
+            elif operation == "sub":
+                results.append(self.sub(expr))
+            elif operation == "mul":
+                results.append(self.mul(expr))
+            elif operation == "div":
+                results.append(self.div(expr))
+            else:
+                raise ValueError(f"Unknown operation type: {operation}")
+        return results
+    def get_model_info(self) -> Dict[str, Any]:
+        return {
+            op: {
+                "path": str(model.model_path),
+                "loaded": model.is_loaded,
+                "device": model.device,
+            }
+            for op, model in self.models.items()
+        }
+    def __enter__(self) -> "MathFormerAPI":
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
+        self.unload_all()

mathformer-1.0.0/src/mathformer/tokenizer.py ADDED Viewed

@@ -0,0 +1,101 @@
+import os
+import json
+from typing import List, Dict, Union, Optional
+import torch
+class MathTokenizer:
+    def __init__(self, model_max_length: int = 64):
+        self.chars = [
+            "<pad>", "<s>", "</s>", "<unk>",
+            "0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
+            "+", "-", "*", "/", "=", ".", "(", ")", "^", "%", " ",
+            "Q", "R",
+        ]
+        self.token_to_id = {c: i for i, c in enumerate(self.chars)}
+        self.id_to_token = {i: c for i, c in enumerate(self.chars)}
+        self.pad_token_id = self.token_to_id["<pad>"]
+        self.eos_token_id = self.token_to_id["</s>"]
+        self.bos_token_id = self.token_to_id["<s>"]
+        self.unk_token_id = self.token_to_id["<unk>"]
+        self.padding_side = "left"
+        self.model_max_length = model_max_length
+    @classmethod
+    def from_pretrained(cls, path: str) -> "MathTokenizer":
+        config_path = os.path.join(path, "tokenizer_config.json")
+        vocab_path = os.path.join(path, "vocab.json")
+        tokenizer = cls()
+        if os.path.exists(config_path):
+            with open(config_path, "r", encoding="utf-8") as f:
+                config = json.load(f)
+                tokenizer.model_max_length = config.get("model_max_length", 64)
+                tokenizer.padding_side = config.get("padding_side", "left")
+        if os.path.exists(vocab_path):
+            with open(vocab_path, "r", encoding="utf-8") as f:
+                vocab = json.load(f)
+                tokenizer.token_to_id = vocab
+                tokenizer.id_to_token = {int(v): k for k, v in vocab.items()}
+        return tokenizer
+    def __call__(
+        self,
+        texts: Union[str, List[str]],
+        return_tensors: Optional[str] = None,
+        padding: bool = True,
+    ) -> Dict:
+        if isinstance(texts, str):
+            texts = [texts]
+        input_ids_list = []
+        attention_mask_list = []
+        for text in texts:
+            ids = [self.token_to_id.get(c, self.unk_token_id) for c in text]
+            input_ids_list.append(ids)
+            attention_mask_list.append([1] * len(ids))
+        if return_tensors == "pt":
+            max_len = max(len(x) for x in input_ids_list)
+            padded_ids = []
+            padded_mask = []
+            for ids, mask in zip(input_ids_list, attention_mask_list):
+                pad_len = max_len - len(ids)
+                if self.padding_side == "left":
+                    ids = [self.pad_token_id] * pad_len + ids
+                    mask = [0] * pad_len + mask
+                else:
+                    ids = ids + [self.pad_token_id] * pad_len
+                    mask = mask + [0] * pad_len
+                padded_ids.append(ids)
+                padded_mask.append(mask)
+            return {
+                "input_ids": torch.tensor(padded_ids, dtype=torch.long),
+                "attention_mask": torch.tensor(padded_mask, dtype=torch.long),
+            }
+        return {"input_ids": input_ids_list, "attention_mask": attention_mask_list}
+    def decode(self, token_ids: Union[List[int], torch.Tensor], skip_special_tokens: bool = False) -> str:
+        result = ""
+        if isinstance(token_ids, torch.Tensor):
+            token_ids = token_ids.tolist()
+        for idx in token_ids:
+            char = self.id_to_token.get(idx, "<unk>")
+            if skip_special_tokens and char in ["<pad>", "<s>", "</s>"]:
+                continue
+            result += char
+        return result
+    def batch_decode(self, sequences: List, skip_special_tokens: bool = False) -> List[str]:
+        return [self.decode(seq, skip_special_tokens=skip_special_tokens) for seq in sequences]
+    def __len__(self) -> int:
+        return len(self.chars)

mathformer-1.0.0/src/mathformer.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,40 @@
+Metadata-Version: 2.4
+Name: mathformer
+Version: 1.0.0
+Summary: A transformer-based math library
+Author-email: JeremySu0818 <xinghong.su0818@gmail.com>
+Project-URL: Homepage, https://github.com/JeremySu0818/MathFormer-API
+Project-URL: Bug Tracker, https://github.com/JeremySu0818/MathFormer-API/issues
+Project-URL: Repository, https://github.com/JeremySu0818/MathFormer-API
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: torch>=2.0.0
+Requires-Dist: transformers>=4.30.0
+Requires-Dist: safetensors>=0.3.0
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0.0; extra == "dev"
+Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
+Dynamic: license-file
+# MathFormer
+MathFormer is a Python library for mathematical operations using transformer architectures.
+## Installation
+```bash
+pip install mathformer
+```
+## Usage
+```python
+import mathformer
+# Example usage
+# mathformer.do_something()
+```

mathformer-1.0.0/src/mathformer.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,12 @@
+LICENSE
+README.md
+pyproject.toml
+src/mathformer/__init__.py
+src/mathformer/api.py
+src/mathformer/tokenizer.py
+src/mathformer.egg-info/PKG-INFO
+src/mathformer.egg-info/SOURCES.txt
+src/mathformer.egg-info/dependency_links.txt
+src/mathformer.egg-info/requires.txt
+src/mathformer.egg-info/top_level.txt
+tests/test_api.py

mathformer-1.0.0/src/mathformer.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

mathformer-1.0.0/src/mathformer.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,7 @@
+torch>=2.0.0
+transformers>=4.30.0
+safetensors>=0.3.0
+[dev]
+pytest>=7.0.0
+pytest-cov>=4.0.0

mathformer-1.0.0/src/mathformer.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ mathformer

mathformer-1.0.0/tests/test_api.py ADDED Viewed

@@ -0,0 +1,107 @@
+"""
+Unit tests for MathFormer API
+"""
+import pytest
+import mathformer
+class TestAddition:
+    """Test cases for addition operations"""
+    def test_add_two_integers(self):
+        """Test adding two integers"""
+        result = mathformer.add(1, 2)
+        assert result == "3"
+    def test_add_multiple_integers(self):
+        """Test adding multiple integers"""
+        result = mathformer.add(1, 2, 3)
+        assert result == "6"
+    def test_add_with_strings(self):
+        """Test adding numbers passed as strings"""
+        result = mathformer.add("10", "20")
+        assert result == "30"
+class TestSubtraction:
+    """Test cases for subtraction operations"""
+    def test_sub_two_integers(self):
+        """Test subtracting two integers"""
+        result = mathformer.sub(5, 3)
+        assert result == "2"
+    def test_sub_multiple_integers(self):
+        """Test subtracting multiple integers"""
+        result = mathformer.sub(10, 3, 2)
+        assert result == "5"
+class TestMultiplication:
+    """Test cases for multiplication operations"""
+    def test_mul_two_integers(self):
+        """Test multiplying two integers"""
+        result = mathformer.mul(3, 4)
+        assert result == "12"
+    def test_mul_multiple_integers(self):
+        """Test multiplying multiple integers"""
+        result = mathformer.mul(2, 3, 4)
+        assert result == "24"
+class TestDivision:
+    """Test cases for division operations"""
+    def test_div_two_integers(self):
+        """Test dividing two integers"""
+        result = mathformer.div(10, 2)
+        assert result == "5"
+class TestCalculate:
+    """Test cases for the calculate function"""
+    def test_calculate_add(self):
+        """Test calculate with add operation"""
+        result = mathformer.calculate("add", 5, 3)
+        assert result == "8"
+    def test_calculate_sub(self):
+        """Test calculate with sub operation"""
+        result = mathformer.calculate("sub", 10, 4)
+        assert result == "6"
+    def test_calculate_mul(self):
+        """Test calculate with mul operation"""
+        result = mathformer.calculate("mul", 6, 7)
+        assert result == "42"
+    def test_calculate_div(self):
+        """Test calculate with div operation"""
+        result = mathformer.calculate("div", 20, 5)
+        assert result == "4"
+class TestModuleExports:
+    """Test that all expected exports are available"""
+    def test_mathformerapi_exists(self):
+        """Test MathFormerAPI class is exported"""
+        assert hasattr(mathformer, "MathFormerAPI")
+    def test_mathformer_exists(self):
+        """Test MathFormer class is exported"""
+        assert hasattr(mathformer, "MathFormer")
+    def test_tokenizer_exists(self):
+        """Test MathTokenizer class is exported"""
+        assert hasattr(mathformer, "MathTokenizer")
+    def test_version_exists(self):
+        """Test version is defined"""
+        assert hasattr(mathformer, "__version__")
+        assert mathformer.__version__ == "1.0.0"