PyPI - groundx - Versions diffs - 2.4.5__py3-none-any.whl → 2.4.9__py3-none-any.whl - Mend

groundx 2.4.5py3-none-any.whl → 2.4.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of groundx might be problematic. Click here for more details.

Files changed (36) hide show

groundx/core/client_wrapper.py +2 -2
groundx/extract/__init__.py +38 -0
groundx/extract/agents/__init__.py +7 -0
groundx/extract/agents/agent.py +202 -0
groundx/extract/classes/__init__.py +27 -0
groundx/extract/classes/agent.py +22 -0
groundx/extract/classes/api.py +15 -0
groundx/extract/classes/document.py +311 -0
groundx/extract/classes/field.py +88 -0
groundx/extract/classes/groundx.py +123 -0
groundx/extract/classes/post_process.py +33 -0
groundx/extract/classes/prompt.py +36 -0
groundx/extract/classes/settings.py +169 -0
groundx/extract/classes/test_document.py +126 -0
groundx/extract/classes/test_field.py +43 -0
groundx/extract/classes/test_groundx.py +188 -0
groundx/extract/classes/test_prompt.py +68 -0
groundx/extract/classes/test_settings.py +515 -0
groundx/extract/classes/test_utility.py +81 -0
groundx/extract/classes/utility.py +193 -0
groundx/extract/services/.DS_Store +0 -0
groundx/extract/services/__init__.py +14 -0
groundx/extract/services/csv.py +76 -0
groundx/extract/services/logger.py +127 -0
groundx/extract/services/logging_cfg.py +55 -0
groundx/extract/services/ratelimit.py +104 -0
groundx/extract/services/sheets_client.py +160 -0
groundx/extract/services/status.py +197 -0
groundx/extract/services/upload.py +73 -0
groundx/extract/services/upload_minio.py +122 -0
groundx/extract/services/upload_s3.py +84 -0
groundx/extract/services/utility.py +52 -0
{groundx-2.4.5.dist-info → groundx-2.4.9.dist-info}/METADATA +1 -1
{groundx-2.4.5.dist-info → groundx-2.4.9.dist-info}/RECORD +36 -5
{groundx-2.4.5.dist-info → groundx-2.4.9.dist-info}/LICENSE +0 -0
{groundx-2.4.5.dist-info → groundx-2.4.9.dist-info}/WHEEL +0 -0

groundx/extract/classes/utility.py ADDED Viewed

@@ -0,0 +1,193 @@
+import typing
+if typing.TYPE_CHECKING:
+    from .prompt import Prompt
+def class_fields(cls: typing.Any) -> typing.Set[str]:
+    fields: typing.Set[str] = set()
+    if hasattr(cls, "model_fields"):
+        fields = set(cls.model_fields.keys())
+    elif hasattr(cls, "__fields__"):
+        fields = set(cls.__fields__.keys())  # type: ignore[reportDeprecated]
+    else:
+        fields = set()
+    return fields
+def clean_json(txt: str) -> str:
+    for p in ("json```\n", "```json\n", "json\n"):
+        if txt.startswith(p):
+            txt = txt[len(p) :]
+    if txt.endswith("```"):
+        txt = txt[:-3]
+    return txt.strip()
+def coerce_numeric_string(
+    value: typing.Any,
+    et: typing.Union[str, typing.List[str]],
+) -> typing.Union[typing.Any, typing.List[typing.Any]]:
+    expected_types = str_to_type_sequence(et)
+    if any(t in (int, float) for t in expected_types):
+        if isinstance(value, str):
+            value = value.replace(",", "")
+            try:
+                value = float(value)
+            except ValueError:
+                return value
+        if float in expected_types:
+            return value
+        return int(value)
+    if str in expected_types and isinstance(value, str) and value == "0":
+        return None
+    return value
+def from_attr_name(
+    name: str, prompts: typing.Sequence[typing.Mapping[str, "Prompt"]]
+) -> typing.Tuple[typing.Optional[str], typing.Optional[typing.Any]]:
+    for pmps in prompts:
+        for key, prompt in pmps.items():
+            if getattr(prompt, "attr_name", None) == name:
+                return key, prompt
+    return None, None
+def from_key(
+    name: str,
+    prompts: typing.Sequence[typing.Mapping[str, "Prompt"]],
+) -> typing.Tuple[typing.Optional[str], typing.Optional[typing.Any]]:
+    for pmps in prompts:
+        for k, prompt in pmps.items():
+            if k == name:
+                return k, prompt
+    key, pmp = from_attr_name(name, prompts)
+    if pmp:
+        return key, pmp
+    return None, None
+def str_to_type_sequence(
+    ty: typing.Union[str, typing.List[str]],
+) -> typing.Sequence[typing.Type[typing.Any]]:
+    if isinstance(ty, list):
+        tys: typing.List[typing.Any] = []
+        for t in ty:
+            tys.append(str_to_type(t))
+        return tys
+    return [str_to_type(ty)]
+def str_to_type(
+    ty: str,
+) -> typing.Type[typing.Any]:
+    if ty == "int":
+        return int
+    elif ty == "float":
+        return float
+    elif ty == "list":
+        return list
+    elif ty == "dict":
+        return dict
+    return str
+def type_to_str(
+    ty: typing.Union[typing.Type[typing.Any], typing.Sequence[typing.Type[typing.Any]]],
+) -> typing.Union[str, typing.List[str]]:
+    if isinstance(ty, list):
+        tys: typing.List[str] = []
+        for t in ty:
+            nt = type_to_str(t)
+            if isinstance(nt, str):
+                tys.append(nt)
+            else:
+                tys.append("list")
+    if ty == int:
+        return "int"
+    if ty == float:
+        return "float"
+    if ty == list:
+        return "list"
+    if ty == dict:
+        return "dict"
+    return "str"
+def validate_confidence(
+    key: str,
+    key_data: typing.Any,
+    fields: typing.Set[str],
+    value: typing.Any,
+    errors: typing.Dict[str, str],
+) -> typing.Tuple[
+    typing.Union[typing.Any, typing.List[typing.Any]],
+    typing.Optional[str],
+    typing.Optional[str],
+]:
+    if key_data.attr_name not in fields:
+        return None, None, f"unexpected attribute [{key_data.attr_name}]"
+    if value is None:
+        return None, None, None
+    if not isinstance(value, dict):
+        return (
+            None,
+            None,
+            f"unexpected value type [{key_data.attr_name}] [{type(value)}] [{key_data.type}]\n[{value}]",
+        )
+    if "value" not in value:
+        return (
+            None,
+            None,
+            f'value is missing "value" key [{key_data.attr_name}]\n[{value}]',
+        )
+    if value["value"] is None:
+        return None, None, None
+    final_value = coerce_numeric_string(value["value"], key_data.type)
+    if not key_data.valid_value(final_value):
+        return (
+            final_value,
+            None,
+            f"unexpected type for statement [{key}] value [{type(final_value)}]\n\n{final_value}",
+        )
+    if "confidence" not in value:
+        return (
+            final_value,
+            None,
+            f'value is missing "confidence" key [{key_data.attr_name}]\n[{value}]',
+        )
+    if not isinstance(value["confidence"], str):
+        return (
+            final_value,
+            None,
+            f"confidence is not type str [{key_data.attr_name}]\n[{value}]",
+        )
+    if value["confidence"] not in ["low", "medium", "high"]:
+        return (
+            final_value,
+            None,
+            f'confidence value is unsupported value [{key_data.attr_name}]\n[{value["confidence"]}]',
+        )
+    return final_value, value["confidence"], None

groundx/extract/services/.DS_Store ADDED Viewed

Binary file

groundx/extract/services/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+from .logger import Logger
+from .sheets_client import SheetsClient
+from .ratelimit import RateLimit
+from .status import Status
+from .upload import Upload
+__all__ = [
+    "Logger",
+    "RateLimit",
+    "SheetsClient",
+    "Status",
+    "Upload",
+]

groundx/extract/services/csv.py ADDED Viewed

@@ -0,0 +1,76 @@
+import csv, typing
+from pathlib import Path
+def append_row(
+    csv_path: Path,
+    headers: typing.List[str],
+    row: typing.Dict[str, str],
+) -> None:
+    with csv_path.open("a", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=headers)
+        writer.writerow(row)
+def extraction_row(
+    record: typing.Mapping[str, typing.Any], keys_in_order: typing.Sequence[str]
+) -> typing.List[typing.Any]:
+    return [record.get(k, "") for k in keys_in_order]
+def find_rows(
+    query: typing.Dict[str, str],
+    csv_path: str,
+) -> typing.List[typing.Dict[str, str]]:
+    with open(csv_path, newline="", encoding="utf-8") as f:
+        reader = csv.DictReader(f)
+        rows: typing.List[typing.Dict[str, str]] = []
+        for row in reader:
+            matches: typing.List[str] = []
+            for k, v in query.items():
+                if str(row.get(k)) == str(v):
+                    matches.append(k)
+            if len(matches) == len(query):
+                rows.append(row)
+        return rows
+def load_row(
+    key: str,
+    match: typing.List[str],
+    csv_path: typing.Optional[Path] = None,
+    rows: typing.Optional[typing.List[typing.Dict[str, str]]] = None,
+) -> typing.Optional[typing.Dict[str, str]]:
+    if csv_path is None and rows is None:
+        raise Exception("csv_path and rows are None")
+    if rows is None and csv_path:
+        rows = load_rows(csv_path)
+    if not rows:
+        raise Exception("rows are None")
+    return next((r for r in rows if r.get(key) in match), None)
+def load_rows(csv_path: Path) -> typing.List[typing.Dict[str, str]]:
+    rows: typing.List[typing.Dict[str, str]] = []
+    with csv_path.open("r", newline="") as csvfile:
+        reader = csv.DictReader(csvfile)
+        for row in reader:
+            rows.append(row)
+    return rows
+def save_rows(
+    csv_path: Path, headers: typing.List[str], rows: typing.List[typing.Dict[str, str]]
+) -> None:
+    with csv_path.open("w", newline="") as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=headers)
+        writer.writeheader()
+        for r in rows:
+            writer.writerow(r)

groundx/extract/services/logger.py ADDED Viewed

@@ -0,0 +1,127 @@
+import logging, logging.config, typing
+from .logging_cfg import logging_config
+class Logger:
+    def __init__(
+        self,
+        name: str,
+        level: str,
+    ) -> None:
+        logging.config.dictConfig(logging_config(name, level))
+        self.logger = logging.getLogger(name)
+    def debug_msg(
+        self,
+        msg: str,
+        name: typing.Optional[str] = None,
+        document_id: typing.Optional[str] = None,
+        task_id: typing.Optional[str] = None,
+    ) -> None:
+        self.print_msg("DEBUG", msg, name, document_id, task_id)
+    def error_msg(
+        self,
+        msg: str,
+        name: typing.Optional[str] = None,
+        document_id: typing.Optional[str] = None,
+        task_id: typing.Optional[str] = None,
+    ) -> None:
+        self.print_msg("ERROR", msg, name, document_id, task_id)
+    def info_msg(
+        self,
+        msg: str,
+        name: typing.Optional[str] = None,
+        document_id: typing.Optional[str] = None,
+        task_id: typing.Optional[str] = None,
+    ) -> None:
+        self.print_msg("INFO", msg, name, document_id, task_id)
+    def report_error(
+        self,
+        api_key: str,
+        callback_url: str,
+        req: typing.Optional[typing.Dict[str, typing.Any]],
+        msg: str,
+    ) -> None:
+        import requests
+        self.error_msg(msg)
+        if req is None or callback_url == "":
+            return
+        requests.post(
+            callback_url,
+            json=req,
+            headers={"X-API-Key": api_key},
+        )
+    def report_result(
+        self,
+        api_key: str,
+        callback_url: str,
+        result_url: str,
+        req: typing.Dict[str, typing.Any],
+    ):
+        import requests
+        if callback_url == "":
+            return
+        self.info_msg("calling back to [%s]" % (callback_url))
+        requests.post(
+            callback_url,
+            json=req,
+            headers={"X-API-Key": api_key},
+        )
+    def warning_msg(
+        self,
+        msg: str,
+        name: typing.Optional[str] = None,
+        document_id: typing.Optional[str] = None,
+        task_id: typing.Optional[str] = None,
+    ) -> None:
+        self.print_msg("WARNING", msg, name, document_id, task_id)
+    def print_msg(
+        self,
+        level: str,
+        msg: str,
+        name: typing.Optional[str] = None,
+        document_id: typing.Optional[str] = None,
+        task_id: typing.Optional[str] = None,
+    ) -> None:
+        prefix = ""
+        if name:
+            if prefix != "":
+                prefix += " "
+            prefix += f"[{name}]"
+        if document_id:
+            if prefix != "":
+                prefix += " "
+            prefix += f"d [{document_id}]"
+        if task_id:
+            if prefix != "":
+                prefix += " "
+            prefix += f"t [{task_id}]"
+        text = ""
+        if prefix != "":
+            text += f"{prefix} "
+        text += f"\n\n\t>> {msg}\n"
+        lvl = level.upper()
+        if lvl == "ERROR":
+            self.logger.error(text)
+        elif lvl in ("WARN", "WARNING"):
+            self.logger.warning(text)
+        elif lvl == "INFO":
+            self.logger.info(text)
+        else:
+            self.logger.debug(text)

groundx/extract/services/logging_cfg.py ADDED Viewed

@@ -0,0 +1,55 @@
+import typing
+def logging_config(name: str, level: str) -> typing.Dict[str, typing.Any]:
+    print(level)
+    return {
+        "version": 1,
+        "disable_existing_loggers": False,
+        "formatters": {
+            "default": {
+                "format": "%(asctime)s - [%(process)d] - %(levelname)s - %(message)s",
+            },
+        },
+        "handlers": {
+            "default": {
+                "level": level,
+                "formatter": "default",
+                "class": "logging.StreamHandler",
+                "stream": "ext://sys.stdout",
+            },
+        },
+        "loggers": {
+            "": {
+                "handlers": ["default"],
+                "level": "WARNING",
+            },
+            name: {
+                "handlers": ["default"],
+                "level": level,
+                "propagate": False,
+            },
+            "gunicorn.error": {
+                "level": "INFO",
+                "handlers": ["default"],
+                "propagate": False,
+            },
+            "gunicorn.access": {
+                "level": "WARNING",
+                "handlers": ["default"],
+                "propagate": False,
+            },
+            "uvicorn": {"level": "INFO", "handlers": ["default"], "propagate": False},
+            "uvicorn.error": {
+                "level": "INFO",
+                "handlers": ["default"],
+                "propagate": False,
+            },
+            "uvicorn.access": {
+                "level": "WARNING",
+                "handlers": ["default"],
+                "propagate": False,
+            },
+        },
+    }

groundx/extract/services/ratelimit.py ADDED Viewed

@@ -0,0 +1,104 @@
+import typing
+from dataclasses import asdict
+from fastapi import Request, HTTPException
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.responses import JSONResponse, Response
+from starlette.types import ASGIApp
+from ..classes.api import ProcessResponse
+from ..classes.settings import ContainerSettings
+from .logger import Logger
+from .status import Status
+from .utility import get_gunicorn_threads, get_thread_id, get_worker_id
+class RateLimit(BaseHTTPMiddleware):
+    def __init__(
+        self,
+        app: ASGIApp,
+        settings: ContainerSettings,
+        logger: Logger,
+    ) -> None:
+        super().__init__(app)
+        self.worker_id = get_worker_id()
+        num_threads = get_gunicorn_threads()
+        if num_threads > 1:
+            num_threads = num_threads - 1
+        self.status = Status(
+            settings,
+            logger,
+        )
+        self.settings = settings
+        self.logger = logger
+        self.thread_ids: typing.Dict[str, typing.Any] = {}
+        self.status.set_worker_available(self.worker_id)
+        self.logger.info_msg(
+            f"[{self.settings.service}] ratelimit init [{num_threads}]"
+        )
+    async def dispatch(
+        self,
+        request: Request,
+        call_next: typing.Callable[[Request], typing.Awaitable[Response]],
+    ) -> Response:
+        thread_id, self.thread_ids = get_thread_id(self.thread_ids)
+        wasSet = False
+        try:
+            if request.url.path == "/health":
+                response = await call_next(request)
+                self.status.refresh_worker(self.worker_id)
+                available, total = self.status.get_worker_state(self.worker_id)
+                response = self.status.set_headers(
+                    response, self.worker_id, available, total
+                )
+                return response
+            api_key = request.headers.get("X-API-Key") or request.headers.get(
+                "Authorization"
+            )
+            if api_key and api_key.startswith("Bearer "):
+                api_key = api_key.split("Bearer ")[1]
+            if not api_key or api_key not in self.settings.get_valid_api_keys():
+                raise HTTPException(status_code=403, detail="Invalid API key")
+            request.state.api_key = api_key
+            wasSet = True
+            self.status.set_worker_unavailable(self.worker_id)
+            response = await call_next(request)
+            wasSet = False
+            self.status.set_worker_available(self.worker_id)
+            available, total = self.status.get_service_state()
+            response.headers.update(
+                {
+                    "X-RateLimit-Limit-Requests": str(total),
+                    "X-RateLimit-Remaining-Requests": str(max(0, available)),
+                    "X-Worker-ID": f"{self.worker_id}:{thread_id}",
+                }
+            )
+            return response
+        except HTTPException as exc:
+            if wasSet:
+                self.status.set_worker_available(self.worker_id)
+            return JSONResponse(
+                status_code=exc.status_code,
+                content=asdict(ProcessResponse(message=exc.detail)),
+            )

groundx 2.4.5__py3-none-any.whl → 2.4.9__py3-none-any.whl

Potentially problematic release.

groundx 2.4.5py3-none-any.whl → 2.4.9py3-none-any.whl