PyPI - langchain-maritaca - Versions diffs - 0.2.2__py3-none-any.whl - Mend

langchain-maritaca 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

langchain_maritaca/__init__.py +14 -0
langchain_maritaca/chat_models.py +794 -0
langchain_maritaca/embeddings.py +289 -0
langchain_maritaca/py.typed +0 -0
langchain_maritaca/version.py +7 -0
langchain_maritaca-0.2.2.dist-info/METADATA +274 -0
langchain_maritaca-0.2.2.dist-info/RECORD +9 -0
langchain_maritaca-0.2.2.dist-info/WHEEL +4 -0
langchain_maritaca-0.2.2.dist-info/licenses/LICENSE +21 -0

langchain_maritaca/embeddings.py ADDED Viewed

@@ -0,0 +1,289 @@
+"""DeepInfra Embeddings for use with Maritaca AI workflows.
+Maritaca AI recommends DeepInfra's multilingual-e5-large model for embeddings
+in RAG (Retrieval-Augmented Generation) workflows with Sabiá models.
+Author: Anderson Henrique da Silva
+Location: Minas Gerais, Brasil
+GitHub: https://github.com/anderson-ufrj
+"""
+from __future__ import annotations
+from typing import Any
+import httpx
+from langchain_core.embeddings import Embeddings
+from langchain_core.utils import from_env, secret_from_env
+from pydantic import BaseModel, ConfigDict, Field, SecretStr, model_validator
+from typing_extensions import Self
+from langchain_maritaca.version import __version__
+# Default embedding model recommended by Maritaca AI
+DEFAULT_MODEL = "intfloat/multilingual-e5-large"
+DEFAULT_API_BASE = "https://api.deepinfra.com/v1/openai"
+class DeepInfraEmbeddings(BaseModel, Embeddings):
+    """DeepInfra embeddings integration for Maritaca AI workflows.
+    DeepInfra provides the multilingual-e5-large model, which is recommended
+    by Maritaca AI for RAG workflows with their Sabiá models. This model
+    supports 100 languages including Portuguese.
+    To use, you should have the environment variable `DEEPINFRA_API_KEY`
+    set with your API key, or pass it as a named parameter to the constructor.
+    Setup:
+        Install `langchain-maritaca` and set environment variable
+        `DEEPINFRA_API_KEY`.
+        ```bash
+        pip install -U langchain-maritaca
+        export DEEPINFRA_API_KEY="your-api-key"
+        ```
+    Key init args:
+        model:
+            Name of embedding model to use. Default is
+            `intfloat/multilingual-e5-large`.
+        api_key:
+            DeepInfra API key. If not passed in will be read from
+            env var `DEEPINFRA_API_KEY`.
+    Instantiate:
+        ```python
+        from langchain_maritaca import DeepInfraEmbeddings
+        embeddings = DeepInfraEmbeddings()
+        ```
+    Embed single text:
+        ```python
+        vector = embeddings.embed_query("Olá, como vai você?")
+        print(len(vector))  # 1024
+        ```
+    Embed multiple texts:
+        ```python
+        vectors = embeddings.embed_documents([
+            "Primeiro documento",
+            "Segundo documento",
+        ])
+        print(len(vectors))  # 2
+        print(len(vectors[0]))  # 1024
+        ```
+    Use with Maritaca for RAG:
+        ```python
+        from langchain_maritaca import ChatMaritaca, DeepInfraEmbeddings
+        from langchain_community.vectorstores import FAISS
+        embeddings = DeepInfraEmbeddings()
+        vectorstore = FAISS.from_texts(documents, embeddings)
+        llm = ChatMaritaca()
+        # ... build your RAG chain
+        ```
+    """
+    client: Any = Field(default=None, exclude=True)
+    """Sync HTTP client."""
+    async_client: Any = Field(default=None, exclude=True)
+    """Async HTTP client."""
+    model: str = Field(default=DEFAULT_MODEL)
+    """Model name to use.
+    Default is `intfloat/multilingual-e5-large`, which is optimized for
+    multilingual text including Portuguese. Supports 100 languages.
+    Embedding dimension: 1024, max tokens: 512.
+    """
+    deepinfra_api_key: SecretStr | None = Field(
+        alias="api_key",
+        default_factory=secret_from_env("DEEPINFRA_API_KEY", default=None),
+    )
+    """DeepInfra API key. Automatically inferred from env var `DEEPINFRA_API_KEY`."""
+    deepinfra_api_base: str = Field(
+        alias="base_url",
+        default_factory=from_env("DEEPINFRA_API_BASE", default=DEFAULT_API_BASE),
+    )
+    """Base URL for DeepInfra API."""
+    request_timeout: float | None = Field(default=60.0, alias="timeout")
+    """Timeout for requests in seconds."""
+    max_retries: int = 2
+    """Maximum number of retries."""
+    batch_size: int = 32
+    """Maximum number of texts to embed in a single request."""
+    model_config = ConfigDict(
+        populate_by_name=True,
+    )
+    @model_validator(mode="after")
+    def validate_environment(self) -> Self:
+        """Validate that API key exists and initialize HTTP clients."""
+        api_key = (
+            self.deepinfra_api_key.get_secret_value()
+            if self.deepinfra_api_key
+            else ""
+        )
+        headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json",
+            "User-Agent": f"langchain-maritaca/{__version__}",
+        }
+        if not self.client:
+            self.client = httpx.Client(
+                base_url=self.deepinfra_api_base,
+                headers=headers,
+                timeout=httpx.Timeout(self.request_timeout),
+            )
+        if not self.async_client:
+            self.async_client = httpx.AsyncClient(
+                base_url=self.deepinfra_api_base,
+                headers=headers,
+                timeout=httpx.Timeout(self.request_timeout),
+            )
+        return self
+    def embed_documents(self, texts: list[str]) -> list[list[float]]:
+        """Embed a list of documents using DeepInfra.
+        Args:
+            texts: The list of texts to embed.
+        Returns:
+            List of embeddings, one for each text.
+        """
+        embeddings: list[list[float]] = []
+        # Process in batches
+        for i in range(0, len(texts), self.batch_size):
+            batch = texts[i : i + self.batch_size]
+            batch_embeddings = self._embed_batch(batch)
+            embeddings.extend(batch_embeddings)
+        return embeddings
+    def embed_query(self, text: str) -> list[float]:
+        """Embed a single query text.
+        Args:
+            text: The text to embed.
+        Returns:
+            Embedding for the text.
+        """
+        return self.embed_documents([text])[0]
+    async def aembed_documents(self, texts: list[str]) -> list[list[float]]:
+        """Async embed a list of documents using DeepInfra.
+        Args:
+            texts: The list of texts to embed.
+        Returns:
+            List of embeddings, one for each text.
+        """
+        embeddings: list[list[float]] = []
+        # Process in batches
+        for i in range(0, len(texts), self.batch_size):
+            batch = texts[i : i + self.batch_size]
+            batch_embeddings = await self._aembed_batch(batch)
+            embeddings.extend(batch_embeddings)
+        return embeddings
+    async def aembed_query(self, text: str) -> list[float]:
+        """Async embed a single query text.
+        Args:
+            text: The text to embed.
+        Returns:
+            Embedding for the text.
+        """
+        result = await self.aembed_documents([text])
+        return result[0]
+    def _embed_batch(self, texts: list[str]) -> list[list[float]]:
+        """Embed a batch of texts synchronously.
+        Args:
+            texts: List of texts to embed.
+        Returns:
+            List of embeddings.
+        """
+        payload = {
+            "input": texts,
+            "model": self.model,
+            "encoding_format": "float",
+        }
+        for attempt in range(self.max_retries + 1):
+            try:
+                response = self.client.post("/embeddings", json=payload)
+                response.raise_for_status()
+                data = response.json()
+                # Sort by index to ensure correct order
+                sorted_data = sorted(data["data"], key=lambda x: x["index"])
+                return [item["embedding"] for item in sorted_data]
+            except httpx.TimeoutException:
+                if attempt < self.max_retries:
+                    continue
+                raise
+            except httpx.HTTPStatusError:
+                if attempt < self.max_retries:
+                    continue
+                raise
+        msg = f"Failed after {self.max_retries + 1} attempts"
+        raise RuntimeError(msg)
+    async def _aembed_batch(self, texts: list[str]) -> list[list[float]]:
+        """Embed a batch of texts asynchronously.
+        Args:
+            texts: List of texts to embed.
+        Returns:
+            List of embeddings.
+        """
+        payload = {
+            "input": texts,
+            "model": self.model,
+            "encoding_format": "float",
+        }
+        for attempt in range(self.max_retries + 1):
+            try:
+                response = await self.async_client.post("/embeddings", json=payload)
+                response.raise_for_status()
+                data = response.json()
+                # Sort by index to ensure correct order
+                sorted_data = sorted(data["data"], key=lambda x: x["index"])
+                return [item["embedding"] for item in sorted_data]
+            except httpx.TimeoutException:
+                if attempt < self.max_retries:
+                    continue
+                raise
+            except httpx.HTTPStatusError:
+                if attempt < self.max_retries:
+                    continue
+                raise
+        msg = f"Failed after {self.max_retries + 1} attempts"
+        raise RuntimeError(msg)

langchain_maritaca/py.typed ADDED Viewed

File without changes

langchain_maritaca/version.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""Version information for langchain-maritaca.
+Author: Anderson Henrique da Silva
+Location: Minas Gerais, Brasil
+"""
+__version__ = "0.2.2"

langchain_maritaca-0.2.2.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,274 @@
+Metadata-Version: 2.4
+Name: langchain-maritaca
+Version: 0.2.2
+Summary: An integration package connecting Maritaca AI and LangChain for Brazilian Portuguese language models
+Project-URL: Homepage, https://github.com/anderson-ufrj/langchain-maritaca
+Project-URL: Documentation, https://github.com/anderson-ufrj/langchain-maritaca#readme
+Project-URL: Repository, https://github.com/anderson-ufrj/langchain-maritaca
+Project-URL: Issues, https://github.com/anderson-ufrj/langchain-maritaca/issues
+Project-URL: Changelog, https://github.com/anderson-ufrj/langchain-maritaca/blob/main/CHANGELOG.md
+Project-URL: Maritaca AI, https://www.maritaca.ai/
+Author-email: Anderson Henrique da Silva <andersonhs27@gmail.com>
+License: MIT
+License-File: LICENSE
+Keywords: ai,brazilian,chatbot,langchain,llm,maritaca,nlp,portuguese,sabia
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Requires-Python: <4.0,>=3.10
+Requires-Dist: httpx<1.0.0,>=0.25.0
+Requires-Dist: langchain-core<1.0.0,>=0.3.0
+Provides-Extra: dev
+Requires-Dist: mypy<2.0.0,>=1.10.0; extra == 'dev'
+Requires-Dist: pre-commit<5.0.0,>=3.0.0; extra == 'dev'
+Requires-Dist: pytest-asyncio<1.0.0,>=0.21.1; extra == 'dev'
+Requires-Dist: pytest-cov<6.0.0,>=4.0.0; extra == 'dev'
+Requires-Dist: pytest-mock<4.0.0,>=3.10.0; extra == 'dev'
+Requires-Dist: pytest<9.0.0,>=7.3.0; extra == 'dev'
+Requires-Dist: respx<1.0.0,>=0.20.0; extra == 'dev'
+Requires-Dist: ruff<1.0.0,>=0.8.0; extra == 'dev'
+Provides-Extra: test
+Requires-Dist: pytest-asyncio<1.0.0,>=0.21.1; extra == 'test'
+Requires-Dist: pytest-cov<6.0.0,>=4.0.0; extra == 'test'
+Requires-Dist: pytest-mock<4.0.0,>=3.10.0; extra == 'test'
+Requires-Dist: pytest<9.0.0,>=7.3.0; extra == 'test'
+Requires-Dist: respx<1.0.0,>=0.20.0; extra == 'test'
+Description-Content-Type: text/markdown
+# langchain-maritaca
+[![PyPI version](https://img.shields.io/pypi/v/langchain-maritaca.svg)](https://pypi.org/project/langchain-maritaca/)
+[![Python](https://img.shields.io/pypi/pyversions/langchain-maritaca.svg)](https://pypi.org/project/langchain-maritaca/)
+[![Downloads](https://img.shields.io/pypi/dm/langchain-maritaca.svg)](https://pypi.org/project/langchain-maritaca/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+[![CI](https://github.com/anderson-ufrj/langchain-maritaca/actions/workflows/ci.yml/badge.svg)](https://github.com/anderson-ufrj/langchain-maritaca/actions/workflows/ci.yml)
+An integration package connecting [Maritaca AI](https://www.maritaca.ai/) and [LangChain](https://langchain.com/) for Brazilian Portuguese language models.
+**Author:** Anderson Henrique da Silva
+**Location:** Minas Gerais, Brasil
+**GitHub:** [anderson-ufrj](https://github.com/anderson-ufrj)
+## Overview
+Maritaca AI provides state-of-the-art Brazilian Portuguese language models, including the Sabiá family of models. This integration allows you to use Maritaca's models seamlessly within the LangChain ecosystem.
+### Available Models
+| Model | Description | Pricing (per 1M tokens) |
+|-------|-------------|------------------------|
+| `sabia-3.1.1` | Most capable model, best for complex tasks | Check [Maritaca AI](https://www.maritaca.ai/) for pricing |
+| `sabiazinho-3.1` | Fast and economical, great for simple tasks | Check [Maritaca AI](https://www.maritaca.ai/) for pricing |
+## Installation
+```bash
+pip install langchain-maritaca
+```
+## Setup
+Set your Maritaca API key as an environment variable:
+```bash
+export MARITACA_API_KEY="your-api-key"
+```
+Or pass it directly to the model:
+```python
+from langchain_maritaca import ChatMaritaca
+model = ChatMaritaca(api_key="your-api-key")
+```
+## Usage
+### Basic Usage
+```python
+from langchain_maritaca import ChatMaritaca
+model = ChatMaritaca(
+    model="sabia-3.1",
+    temperature=0.7,
+)
+messages = [
+    ("system", "Você é um assistente prestativo especializado em cultura brasileira."),
+    ("human", "Quais são as principais festas populares do Brasil?"),
+]
+response = model.invoke(messages)
+print(response.content)
+```
+### Streaming
+```python
+from langchain_maritaca import ChatMaritaca
+model = ChatMaritaca(model="sabia-3.1", streaming=True)
+for chunk in model.stream("Conte uma história sobre o folclore brasileiro"):
+    print(chunk.content, end="", flush=True)
+```
+### Async Usage
+```python
+import asyncio
+from langchain_maritaca import ChatMaritaca
+async def main():
+    model = ChatMaritaca(model="sabia-3.1")
+    response = await model.ainvoke("Qual é a receita de pão de queijo?")
+    print(response.content)
+asyncio.run(main())
+```
+### With LangChain Expression Language (LCEL)
+```python
+from langchain_maritaca import ChatMaritaca
+from langchain_core.prompts import ChatPromptTemplate
+model = ChatMaritaca(model="sabia-3.1")
+prompt = ChatPromptTemplate.from_messages([
+    ("system", "Você é um especialista em {topic}."),
+    ("human", "{question}"),
+])
+chain = prompt | model
+response = chain.invoke({
+    "topic": "história do Brasil",
+    "question": "Quem foi Tiradentes?"
+})
+print(response.content)
+```
+### With Tool Calling (Function Calling)
+```python
+from langchain_maritaca import ChatMaritaca
+from langchain_core.tools import tool
+@tool
+def get_weather(city: str) -> str:
+    """Get the current weather for a city."""
+    return f"O clima em {city} está ensolarado, 25°C"
+model = ChatMaritaca(model="sabia-3.1")
+model_with_tools = model.bind_tools([get_weather])
+response = model_with_tools.invoke("Como está o tempo em São Paulo?")
+print(response)
+```
+## Why Maritaca AI?
+Maritaca AI models are specifically trained for Brazilian Portuguese, offering:
+- **Native Portuguese Understanding**: Better comprehension of Brazilian idioms, expressions, and cultural context
+- **Local Data Training**: Trained on diverse Brazilian Portuguese data sources
+- **Cost-Effective**: Competitive pricing for Portuguese language tasks
+- **Low Latency**: Servers located in Brazil for faster response times
+## Used in Production
+**[Cidadão.AI](https://cidadao-ai-frontend.vercel.app/pt)** - Brazilian government transparency platform powered by AI agents, handling 331K+ requests/month.
+- Frontend: [github.com/anderson-ufrj/cidadao.ai-frontend](https://github.com/anderson-ufrj/cidadao.ai-frontend)
+- Backend: [github.com/anderson-ufrj/cidadao.ai-backend](https://github.com/anderson-ufrj/cidadao.ai-backend)
+> *Using this package in production? [Open an issue](https://github.com/anderson-ufrj/langchain-maritaca/issues) to get featured!*
+## API Reference
+### ChatMaritaca
+Main class for interacting with Maritaca AI models.
+**Parameters:**
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `model` | str | `"sabia-3.1"` | Model name to use |
+| `temperature` | float | `0.7` | Sampling temperature (0.0-2.0) |
+| `max_tokens` | int | None | Maximum tokens to generate |
+| `top_p` | float | `0.9` | Top-p sampling parameter |
+| `api_key` | str | None | Maritaca API key (or use env var) |
+| `base_url` | str | `"https://chat.maritaca.ai/api"` | API base URL |
+| `timeout` | float | `60.0` | Request timeout in seconds |
+| `max_retries` | int | `2` | Maximum retry attempts |
+| `streaming` | bool | `False` | Enable streaming responses |
+## Development
+### Setup
+```bash
+# Clone the repository
+git clone https://github.com/anderson-ufrj/langchain-maritaca.git
+cd langchain-maritaca
+# Install dependencies
+pip install -e ".[dev]"
+# Run tests
+pytest
+# Run linting
+ruff check .
+ruff format .
+# Run type checking
+mypy langchain_maritaca
+```
+### Running Tests
+```bash
+# Unit tests only
+pytest tests/unit_tests/
+# Integration tests (requires MARITACA_API_KEY)
+pytest tests/integration_tests/
+# With coverage
+pytest --cov=langchain_maritaca --cov-report=html
+```
+## Contributing
+Contributions are welcome! Please feel free to submit a Pull Request.
+1. Fork the repository
+2. Create your feature branch (`git checkout -b feature/amazing-feature`)
+3. Commit your changes (`git commit -m 'feat: add amazing feature'`)
+4. Push to the branch (`git push origin feature/amazing-feature`)
+5. Open a Pull Request
+## Changelog
+See [CHANGELOG.md](CHANGELOG.md) for a list of changes.
+## License
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
+## Related Projects
+- [LangChain](https://github.com/langchain-ai/langchain) - Building applications with LLMs through composability
+- [Maritaca AI](https://www.maritaca.ai/) - Brazilian Portuguese language models

langchain_maritaca-0.2.2.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+langchain_maritaca/__init__.py,sha256=0YAmJnHRe5NI-9ryokPZESwenDawi131C-RGAs0WAv4,460
+langchain_maritaca/chat_models.py,sha256=40sIcM57rlvIS_oW_In-U31EVHGOJid9SBXT-m4fMQU,28242
+langchain_maritaca/embeddings.py,sha256=0gFcqYciTOMvgKrHEdKAcA1ikCQ_HyfikJRpesSG3_k,9003
+langchain_maritaca/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+langchain_maritaca/version.py,sha256=9FDRquReMRUwtyEEI5Of7v2AnOUhBF8AsSwmJMqQRAU,141
+langchain_maritaca-0.2.2.dist-info/METADATA,sha256=qsNgbWoUCHF3dBgvL_qdkglZl4M1PjidUb3rwmb-Hu8,9042
+langchain_maritaca-0.2.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+langchain_maritaca-0.2.2.dist-info/licenses/LICENSE,sha256=lvL7E7mqeANCoUEkZDWPeqW6YckPG0FVi920ibuZlA0,1083
+langchain_maritaca-0.2.2.dist-info/RECORD,,

langchain_maritaca-0.2.2.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.28.0
+Root-Is-Purelib: true
+Tag: py3-none-any

langchain_maritaca-0.2.2.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 Anderson Henrique da Silva
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.