PyPI - arcade-google-docs - Versions diffs - 2.0.0rc1__tar.gz - Mend

arcade-google-docs 2.0.0rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

arcade_google_docs-2.0.0rc1/.gitignore ADDED Viewed

@@ -0,0 +1,175 @@
+.DS_Store
+credentials.yaml
+docker/credentials.yaml
+*.lock
+# example data
+examples/data
+scratch
+docs/source
+# From https://raw.githubusercontent.com/github/gitignore/main/Python.gitignore
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

arcade_google_docs-2.0.0rc1/.pre-commit-config.yaml ADDED Viewed

@@ -0,0 +1,18 @@
+files: ^.*/google_docs/.*
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: "v4.4.0"
+    hooks:
+      - id: check-case-conflict
+      - id: check-merge-conflict
+      - id: check-toml
+      - id: check-yaml
+      - id: end-of-file-fixer
+      - id: trailing-whitespace
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.6.7
+    hooks:
+      - id: ruff
+        args: [--fix]
+      - id: ruff-format

arcade_google_docs-2.0.0rc1/.ruff.toml ADDED Viewed

@@ -0,0 +1,46 @@
+target-version = "py310"
+line-length = 100
+fix = true
+[lint]
+select = [
+    # flake8-2020
+    "YTT",
+    # flake8-bandit
+    "S",
+    # flake8-bugbear
+    "B",
+    # flake8-builtins
+    "A",
+    # flake8-comprehensions
+    "C4",
+    # flake8-debugger
+    "T10",
+    # flake8-simplify
+    "SIM",
+    # isort
+    "I",
+    # mccabe
+    "C90",
+    # pycodestyle
+    "E", "W",
+    # pyflakes
+    "F",
+    # pygrep-hooks
+    "PGH",
+    # pyupgrade
+    "UP",
+    # ruff
+    "RUF",
+    # tryceratops
+    "TRY",
+]
+[lint.per-file-ignores]
+"*" = ["TRY003", "B904"]
+"**/tests/*" = ["S101", "E501"]
+"**/evals/*" = ["S101", "E501"]
+[format]
+preview = true
+skip-magic-trailing-comma = false

arcade_google_docs-2.0.0rc1/Makefile ADDED Viewed

@@ -0,0 +1,55 @@
+.PHONY: help
+help:
+	@echo "🛠️ github Commands:\n"
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
+.PHONY: install
+install: ## Install the uv environment and install all packages with dependencies
+	@echo "🚀 Creating virtual environment and installing all packages using uv"
+	@uv sync --active --all-extras --no-sources
+	@if [ -f .pre-commit-config.yaml ]; then uv run --no-sources pre-commit install; fi
+	@echo "✅ All packages and dependencies installed via uv"
+.PHONY: install-local
+install-local: ## Install the uv environment and install all packages with dependencies with local Arcade sources
+	@echo "🚀 Creating virtual environment and installing all packages using uv"
+	@uv sync --active --all-extras
+	@if [ -f .pre-commit-config.yaml ]; then uv run pre-commit install; fi
+	@echo "✅ All packages and dependencies installed via uv"
+.PHONY: build
+build: clean-build ## Build wheel file using poetry
+	@echo "🚀 Creating wheel file"
+	uv build
+.PHONY: clean-build
+clean-build: ## clean build artifacts
+	@echo "🗑️ Cleaning dist directory"
+	rm -rf dist
+.PHONY: test
+test: ## Test the code with pytest
+	@echo "🚀 Testing code: Running pytest"
+	@uv run --no-sources pytest -W ignore -v --cov --cov-config=pyproject.toml --cov-report=xml
+.PHONY: coverage
+coverage: ## Generate coverage report
+	@echo "coverage report"
+	@uv run --no-sources coverage report
+	@echo "Generating coverage report"
+	@uv run --no-sources coverage html
+.PHONY: bump-version
+bump-version: ## Bump the version in the pyproject.toml file by a patch version
+	@echo "🚀 Bumping version in pyproject.toml"
+	uv version --no-sources --bump patch
+.PHONY: check
+check: ## Run code quality tools.
+	@if [ -f .pre-commit-config.yaml ]; then\
+		echo "🚀 Linting code: Running pre-commit";\
+		uv run --no-sources pre-commit run -a;\
+	fi
+	@echo "🚀 Static type checking: Running mypy"
+	@uv run --no-sources mypy --config-file=pyproject.toml

arcade_google_docs-2.0.0rc1/PKG-INFO ADDED Viewed

@@ -0,0 +1,23 @@
+Metadata-Version: 2.4
+Name: arcade_google_docs
+Version: 2.0.0rc1
+Summary: Arcade.dev LLM tools for Google Docs
+Author-email: Arcade <dev@arcade.dev>
+Requires-Python: >=3.10
+Requires-Dist: arcade-tdk<3.0.0,>=2.0.0
+Requires-Dist: google-api-core<3.0.0,>=2.19.1
+Requires-Dist: google-api-python-client<3.0.0,>=2.137.0
+Requires-Dist: google-auth-httplib2<1.0.0,>=0.2.0
+Requires-Dist: google-auth<3.0.0,>=2.32.0
+Requires-Dist: googleapis-common-protos<2.0.0,>=1.63.2
+Provides-Extra: dev
+Requires-Dist: arcade-ai[evals]<3.0.0,>=2.0.4; extra == 'dev'
+Requires-Dist: arcade-serve<3.0.0,>=2.0.0; extra == 'dev'
+Requires-Dist: mypy<1.6.0,>=1.5.1; extra == 'dev'
+Requires-Dist: pre-commit<3.5.0,>=3.4.0; extra == 'dev'
+Requires-Dist: pytest-asyncio<0.25.0,>=0.24.0; extra == 'dev'
+Requires-Dist: pytest-cov<4.1.0,>=4.0.0; extra == 'dev'
+Requires-Dist: pytest-mock<3.12.0,>=3.11.1; extra == 'dev'
+Requires-Dist: pytest<8.4.0,>=8.3.0; extra == 'dev'
+Requires-Dist: ruff<0.8.0,>=0.7.4; extra == 'dev'
+Requires-Dist: tox<4.12.0,>=4.11.1; extra == 'dev'

arcade_google_docs-2.0.0rc1/arcade_google_docs/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+from arcade_google_docs.tools import (
+    create_blank_document,
+    create_document_from_text,
+    get_document_by_id,
+    insert_text_at_end_of_document,
+    search_and_retrieve_documents,
+    search_documents,
+)
+__all__ = [
+    "create_blank_document",
+    "create_document_from_text",
+    "get_document_by_id",
+    "insert_text_at_end_of_document",
+    "search_and_retrieve_documents",
+    "search_documents",
+]

arcade_google_docs-2.0.0rc1/arcade_google_docs/decorators.py ADDED Viewed

@@ -0,0 +1,24 @@
+import functools
+from collections.abc import Callable
+from typing import Any
+from arcade_tdk import ToolContext
+from googleapiclient.errors import HttpError
+from arcade_google_docs.file_picker import generate_google_file_picker_url
+def with_filepicker_fallback(func: Callable[..., Any]) -> Callable[..., Any]:
+    """ """
+    @functools.wraps(func)
+    async def async_wrapper(context: ToolContext, *args: Any, **kwargs: Any) -> Any:
+        try:
+            return await func(context, *args, **kwargs)
+        except HttpError as e:
+            if e.status_code in [403, 404]:
+                file_picker_response = generate_google_file_picker_url(context)
+                return file_picker_response
+            raise
+    return async_wrapper

arcade_google_docs-2.0.0rc1/arcade_google_docs/doc_to_html.py ADDED Viewed

@@ -0,0 +1,99 @@
+def convert_document_to_html(document: dict) -> str:
+    html = (
+        "<html><head>"
+        f"<title>{document['title']}</title>"
+        f'<meta name="documentId" content="{document["documentId"]}">'
+        "</head><body>"
+    )
+    for element in document["body"]["content"]:
+        html += convert_structural_element(element)
+    html += "</body></html>"
+    return html
+def convert_structural_element(element: dict, wrap_paragraphs: bool = True) -> str:
+    if "sectionBreak" in element or "tableOfContents" in element:
+        return ""
+    elif "paragraph" in element:
+        paragraph_content = ""
+        prepend, append = get_paragraph_style_tags(
+            style=element["paragraph"]["paragraphStyle"],
+            wrap_paragraphs=wrap_paragraphs,
+        )
+        for item in element["paragraph"]["elements"]:
+            if "textRun" not in item:
+                continue
+            paragraph_content += extract_paragraph_content(item["textRun"])
+        if not paragraph_content:
+            return ""
+        return f"{prepend}{paragraph_content.strip()}{append}"
+    elif "table" in element:
+        table = [
+            [
+                "".join([
+                    convert_structural_element(element=cell_element, wrap_paragraphs=False)
+                    for cell_element in cell["content"]
+                ])
+                for cell in row["tableCells"]
+            ]
+            for row in element["table"]["tableRows"]
+        ]
+        return table_list_to_html(table)
+    else:
+        raise ValueError(f"Unknown document body element type: {element}")
+def extract_paragraph_content(text_run: dict) -> str:
+    content = text_run["content"]
+    style = text_run["textStyle"]
+    return apply_text_style(content, style)
+def apply_text_style(content: str, style: dict) -> str:
+    content = content.rstrip("\n")
+    content = content.replace("\n", "<br>")
+    italic = style.get("italic", False)
+    bold = style.get("bold", False)
+    if italic:
+        content = f"<i>{content}</i>"
+    if bold:
+        content = f"<b>{content}</b>"
+    return content
+def get_paragraph_style_tags(style: dict, wrap_paragraphs: bool = True) -> tuple[str, str]:
+    named_style = style["namedStyleType"]
+    if named_style == "NORMAL_TEXT":
+        return ("<p>", "</p>") if wrap_paragraphs else ("", "")
+    elif named_style == "TITLE":
+        return "<h1>", "</h1>"
+    elif named_style == "SUBTITLE":
+        return "<h2>", "</h2>"
+    elif named_style.startswith("HEADING_"):
+        try:
+            heading_level = int(named_style.split("_")[1])
+        except ValueError:
+            return ("<p>", "</p>") if wrap_paragraphs else ("", "")
+        else:
+            return f"<h{heading_level}>", f"</h{heading_level}>"
+    return ("<p>", "</p>") if wrap_paragraphs else ("", "")
+def table_list_to_html(table: list[list[str]]) -> str:
+    html = "<table>"
+    for row in table:
+        html += "<tr>"
+        for cell in row:
+            if cell.endswith("<br>"):
+                cell = cell[:-4]
+            html += f"<td>{cell}</td>"
+        html += "</tr>"
+    html += "</table>"
+    return html

arcade_google_docs-2.0.0rc1/arcade_google_docs/doc_to_markdown.py ADDED Viewed

@@ -0,0 +1,64 @@
+import arcade_google_docs.doc_to_html as doc_to_html
+def convert_document_to_markdown(document: dict) -> str:
+    md = f"---\ntitle: {document['title']}\ndocumentId: {document['documentId']}\n---\n"
+    for element in document["body"]["content"]:
+        md += convert_structural_element(element)
+    return md
+def convert_structural_element(element: dict) -> str:
+    if "sectionBreak" in element or "tableOfContents" in element:
+        return ""
+    elif "paragraph" in element:
+        md = ""
+        prepend = get_paragraph_style_prepend_str(element["paragraph"]["paragraphStyle"])
+        for item in element["paragraph"]["elements"]:
+            if "textRun" not in item:
+                continue
+            content = extract_paragraph_content(item["textRun"])
+            md += f"{prepend}{content}"
+        return md
+    elif "table" in element:
+        return doc_to_html.convert_structural_element(element)
+    else:
+        raise ValueError(f"Unknown document body element type: {element}")
+def extract_paragraph_content(text_run: dict) -> str:
+    content = text_run["content"]
+    style = text_run["textStyle"]
+    return apply_text_style(content, style)
+def apply_text_style(content: str, style: dict) -> str:
+    append = "\n" if content.endswith("\n") else ""
+    content = content.rstrip("\n")
+    italic = style.get("italic", False)
+    bold = style.get("bold", False)
+    if italic:
+        content = f"_{content}_"
+    if bold:
+        content = f"**{content}**"
+    return f"{content}{append}"
+def get_paragraph_style_prepend_str(style: dict) -> str:
+    named_style = style["namedStyleType"]
+    if named_style == "NORMAL_TEXT":
+        return ""
+    elif named_style == "TITLE":
+        return "# "
+    elif named_style == "SUBTITLE":
+        return "## "
+    elif named_style.startswith("HEADING_"):
+        try:
+            heading_level = int(named_style.split("_")[1])
+            return f"{'#' * heading_level} "
+        except ValueError:
+            return ""
+    return ""

arcade_google_docs-2.0.0rc1/arcade_google_docs/enum.py ADDED Viewed

@@ -0,0 +1,116 @@
+from enum import Enum
+class Corpora(str, Enum):
+    """
+    Bodies of items (files/documents) to which the query applies.
+    Prefer 'user' or 'drive' to 'allDrives' for efficiency.
+    By default, corpora is set to 'user'.
+    """
+    USER = "user"
+    DOMAIN = "domain"
+    DRIVE = "drive"
+    ALL_DRIVES = "allDrives"
+class DocumentFormat(str, Enum):
+    MARKDOWN = "markdown"
+    HTML = "html"
+    GOOGLE_API_JSON = "google_api_json"
+class OrderBy(str, Enum):
+    """
+    Sort keys for ordering files in Google Drive.
+    Each key has both ascending and descending options.
+    """
+    CREATED_TIME = (
+        # When the file was created (ascending)
+        "createdTime"
+    )
+    CREATED_TIME_DESC = (
+        # When the file was created (descending)
+        "createdTime desc"
+    )
+    FOLDER = (
+        # The folder ID, sorted using alphabetical ordering (ascending)
+        "folder"
+    )
+    FOLDER_DESC = (
+        # The folder ID, sorted using alphabetical ordering (descending)
+        "folder desc"
+    )
+    MODIFIED_BY_ME_TIME = (
+        # The last time the file was modified by the user (ascending)
+        "modifiedByMeTime"
+    )
+    MODIFIED_BY_ME_TIME_DESC = (
+        # The last time the file was modified by the user (descending)
+        "modifiedByMeTime desc"
+    )
+    MODIFIED_TIME = (
+        # The last time the file was modified by anyone (ascending)
+        "modifiedTime"
+    )
+    MODIFIED_TIME_DESC = (
+        # The last time the file was modified by anyone (descending)
+        "modifiedTime desc"
+    )
+    NAME = (
+        # The name of the file, sorted using alphabetical ordering (e.g., 1, 12, 2, 22) (ascending)
+        "name"
+    )
+    NAME_DESC = (
+        # The name of the file, sorted using alphabetical ordering (e.g., 1, 12, 2, 22) (descending)
+        "name desc"
+    )
+    NAME_NATURAL = (
+        # The name of the file, sorted using natural sort ordering (e.g., 1, 2, 12, 22) (ascending)
+        "name_natural"
+    )
+    NAME_NATURAL_DESC = (
+        # The name of the file, sorted using natural sort ordering (e.g., 1, 2, 12, 22) (descending)
+        "name_natural desc"
+    )
+    QUOTA_BYTES_USED = (
+        # The number of storage quota bytes used by the file (ascending)
+        "quotaBytesUsed"
+    )
+    QUOTA_BYTES_USED_DESC = (
+        # The number of storage quota bytes used by the file (descending)
+        "quotaBytesUsed desc"
+    )
+    RECENCY = (
+        # The most recent timestamp from the file's date-time fields (ascending)
+        "recency"
+    )
+    RECENCY_DESC = (
+        # The most recent timestamp from the file's date-time fields (descending)
+        "recency desc"
+    )
+    SHARED_WITH_ME_TIME = (
+        # When the file was shared with the user, if applicable (ascending)
+        "sharedWithMeTime"
+    )
+    SHARED_WITH_ME_TIME_DESC = (
+        # When the file was shared with the user, if applicable (descending)
+        "sharedWithMeTime desc"
+    )
+    STARRED = (
+        # Whether the user has starred the file (ascending)
+        "starred"
+    )
+    STARRED_DESC = (
+        # Whether the user has starred the file (descending)
+        "starred desc"
+    )
+    VIEWED_BY_ME_TIME = (
+        # The last time the file was viewed by the user (ascending)
+        "viewedByMeTime"
+    )
+    VIEWED_BY_ME_TIME_DESC = (
+        # The last time the file was viewed by the user (descending)
+        "viewedByMeTime desc"
+    )

arcade_google_docs-2.0.0rc1/arcade_google_docs/file_picker.py ADDED Viewed

@@ -0,0 +1,49 @@
+import base64
+import json
+from arcade_tdk import ToolContext, ToolMetadataKey
+from arcade_tdk.errors import ToolExecutionError
+def generate_google_file_picker_url(context: ToolContext) -> dict:
+    """Generate a Google File Picker URL for user-driven file selection and authorization.
+    Generates a URL that directs the end-user to a Google File Picker interface where
+    where they can select or upload Google Drive files. Users can grant permission to access their
+    Drive files, providing a secure and authorized way to interact with their files.
+    This is particularly useful when prior tools (e.g., those accessing or modifying
+    Google Docs, Google Sheets, etc.) encountered failures due to file non-existence
+    (Requested entity was not found) or permission errors. Once the user completes the file
+    picker flow, the prior tool can be retried.
+    Returns:
+        A dictionary containing the URL and instructions for the llm to instruct the user.
+    """
+    client_id = context.get_metadata(ToolMetadataKey.CLIENT_ID)
+    client_id_parts = client_id.split("-")
+    if not client_id_parts:
+        raise ToolExecutionError(
+            message="Invalid Google Client ID",
+            developer_message=f"Google Client ID '{client_id}' is not valid",
+        )
+    app_id = client_id_parts[0]
+    cloud_coordinator_url = context.get_metadata(ToolMetadataKey.COORDINATOR_URL).strip("/")
+    config = {
+        "auth": {
+            "client_id": client_id,
+            "app_id": app_id,
+        },
+    }
+    config_json = json.dumps(config)
+    config_base64 = base64.urlsafe_b64encode(config_json.encode("utf-8")).decode("utf-8")
+    url = f"{cloud_coordinator_url}/google/drive_picker?config={config_base64}"
+    return {
+        "url": url,
+        "llm_instructions": (
+            "Instruct the user to click the following link to open the Google Drive File Picker. "
+            f"This will allow them to select files and grant access permissions: {url}"
+        ),
+    }