PyPI - smart-translate - Versions diffs - 0.1.0__tar.gz - Mend

smart-translate 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

smart_translate-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,10 @@
+Metadata-Version: 2.1
+Name: smart-translate
+Version: 0.1.0
+Summary: Rule-based and selective smart translation library
+Author-email: Onkar Nanavare <onkarnanavare007@gmail.com>
+License: MIT
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+Requires-Dist: transformers
+Requires-Dist: torch

smart_translate-0.1.0/README.md ADDED Viewed

File without changes

smart_translate-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,25 @@
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "smart-translate"
+version = "0.1.0"
+description = "Rule-based and selective smart translation library"
+readme = "README.md"
+requires-python = ">=3.8"
+license = { text = "MIT" }
+authors = [
+    { name = "Onkar Nanavare", email = "onkarnanavare007@gmail.com" }
+]
+dependencies = [
+    "transformers",
+    "torch"
+]
+[tool.setuptools]
+packages = ["smart_translate", "smart_translate.rules"]
+[tool.setuptools.package-data]
+"smart_translate.rules" = ["*.json"]

smart_translate-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

smart_translate-0.1.0/smart_translate/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .translator import SmartTranslator
+__all__ = ["SmartTranslator"]

smart_translate-0.1.0/smart_translate/config.py ADDED Viewed

@@ -0,0 +1,14 @@
+# # smart_translate/config.py
+# # Default model used for translation
+# MODEL_NAME = "facebook/nllb-200-distilled-600M"
+# # Optional: default languages (used only if API doesn't send them)
+# DEFAULT_SRC_LANG = "tha_Thai"
+# DEFAULT_TGT_LANG = "eng_Latn"
+# # Max token length for generation
+# MAX_LENGTH = 256
+MODEL_NAME = "facebook/nllb-200-distilled-600M"

smart_translate-0.1.0/smart_translate/protector.py ADDED Viewed

@@ -0,0 +1,14 @@
+import re
+def protect_words(text, keep_words):
+    mapping = {}
+    for i, word in enumerate(keep_words):
+        placeholder = f"__KEEP_{i}__"
+        mapping[placeholder] = word
+        text = re.sub(rf"\b{re.escape(word)}\b", placeholder, text)
+    return text, mapping
+def restore_words(text, mapping):
+    for placeholder, word in mapping.items():
+        text = text.replace(placeholder, word)
+    return text

smart_translate-0.1.0/smart_translate/rules/custom_rules.json ADDED Viewed

@@ -0,0 +1,4 @@
+{
+  "मेरा": "माझा",
+  "Anger": "onkar"
+}

smart_translate-0.1.0/smart_translate/rules.py ADDED Viewed

@@ -0,0 +1,12 @@
+from importlib import resources
+import json
+def load_custom_rules():
+    with resources.files("smart_translate.rules").joinpath("custom_rules.json").open("r") as f:
+        return json.load(f)
+def apply_custom_rules(text, rules=CUSTOM_REPLACEMENTS):
+    for src, tgt in rules.items():
+        text = text.replace(src, tgt)
+    return text

smart_translate-0.1.0/smart_translate/rules_loader.py ADDED Viewed

@@ -0,0 +1,10 @@
+import json
+from pathlib import Path
+RULES_FILE = Path(__file__).parent / "rules" / "custom.rules.json"
+def load_rules():
+    if RULES_FILE.exists():
+        with open(RULES_FILE, "r", encoding="utf-8") as f:
+            return json.load(f)
+    return {}

smart_translate-0.1.0/smart_translate/translator.py ADDED Viewed

@@ -0,0 +1,47 @@
+import torch
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+from .protector import protect_words, restore_words
+from .rules import apply_custom_rules
+from .config import MODEL_NAME
+class SmartTranslator:
+    def __init__(self, src_lang, tgt_lang):
+        self.src_lang = src_lang
+        self.tgt_lang = tgt_lang
+        # Detect device
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        print("Using device:", self.device)
+        # Load tokenizer and model
+        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
+        self.model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
+        # Move model to the device
+        self.model.to(self.device)
+        self.model.eval()
+    def translate(self, text, keep_words=None):
+        keep_words = keep_words or []
+        protected_text, mapping = protect_words(text, keep_words)
+        self.tokenizer.src_lang = self.src_lang
+        inputs = self.tokenizer(protected_text, return_tensors="pt")
+        # Move input tensors to same device as model
+        inputs = {k: v.to(self.device) for k, v in inputs.items()}
+        tgt_lang_id = self.tokenizer.convert_tokens_to_ids(self.tgt_lang)
+        outputs = self.model.generate(
+            **inputs,
+            forced_bos_token_id=tgt_lang_id,
+            max_length=256
+        )
+        translated = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        translated = restore_words(translated, mapping)
+        translated = apply_custom_rules(translated)
+        return translated

smart_translate-0.1.0/smart_translate.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,10 @@
+Metadata-Version: 2.1
+Name: smart-translate
+Version: 0.1.0
+Summary: Rule-based and selective smart translation library
+Author-email: Onkar Nanavare <onkarnanavare007@gmail.com>
+License: MIT
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+Requires-Dist: transformers
+Requires-Dist: torch

smart_translate-0.1.0/smart_translate.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,14 @@
+README.md
+pyproject.toml
+smart_translate/__init__.py
+smart_translate/config.py
+smart_translate/protector.py
+smart_translate/rules.py
+smart_translate/rules_loader.py
+smart_translate/translator.py
+smart_translate.egg-info/PKG-INFO
+smart_translate.egg-info/SOURCES.txt
+smart_translate.egg-info/dependency_links.txt
+smart_translate.egg-info/requires.txt
+smart_translate.egg-info/top_level.txt
+smart_translate/rules/custom_rules.json

smart_translate-0.1.0/smart_translate.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

smart_translate-0.1.0/smart_translate.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ transformers
2	+ torch

smart_translate-0.1.0/smart_translate.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ smart_translate