PyPI - GdoczAI - Versions diffs - 0.1.0__tar.gz - Mend

GdoczAI 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

gdoczai-0.1.0/GdoczAI.egg-info/PKG-INFO +52 -0
gdoczai-0.1.0/GdoczAI.egg-info/SOURCES.txt +16 -0
gdoczai-0.1.0/GdoczAI.egg-info/dependency_links.txt +1 -0
gdoczai-0.1.0/GdoczAI.egg-info/requires.txt +1 -0
gdoczai-0.1.0/GdoczAI.egg-info/top_level.txt +1 -0
gdoczai-0.1.0/PKG-INFO +52 -0
gdoczai-0.1.0/README.md +38 -0
gdoczai-0.1.0/gdocz_sdk/__init__.py +13 -0
gdoczai-0.1.0/gdocz_sdk/client.py +147 -0
gdoczai-0.1.0/gdocz_sdk/convert_options.py +5 -0
gdoczai-0.1.0/gdocz_sdk/convert_result.py +22 -0
gdoczai-0.1.0/gdocz_sdk/exceptions.py +14 -0
gdoczai-0.1.0/gdocz_sdk/extract_options.py +9 -0
gdoczai-0.1.0/gdocz_sdk/extract_result.py +13 -0
gdoczai-0.1.0/gdocz_sdk/segment_options.py +9 -0
gdoczai-0.1.0/gdocz_sdk/segment_result.py +17 -0
gdoczai-0.1.0/pyproject.toml +26 -0
gdoczai-0.1.0/setup.cfg +4 -0

gdoczai-0.1.0/GdoczAI.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,52 @@
+Metadata-Version: 2.4
+Name: GdoczAI
+Version: 0.1.0
+Summary: Python SDK for GdoczAI — OCR, extract, and segment documents
+Author-email: Ramkumar <ramkumarlpm4@gmail.com>
+Project-URL: Homepage, https://github.com/GramosoftAI/GdoczAI
+Keywords: ocr,document,pdf,extract,gdocz,ai
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+Requires-Dist: requests>=2.28.0
+# gdoczai
+Official Python SDK for [GdoczAI](https://gdocz.gramopro.ai) — OCR, extract fields,
+and segment documents with ease.
+## Installation
+```bash
+pip install gdocz_sdk
+```
+## Quick Start
+```python
+from gdocz_sdk import GdoczaiClient
+client = GdoczaiClient(api_key="your-api-key")
+# Convert PDF to Markdown
+result = client.convert("document.pdf")
+print(result.markdown)
+# Extract fields
+from gdocz_sdk import ExtractOptions
+result = client.extract("document.pdf", ExtractOptions(fields=["name", "date"]))
+print(result.extracted_data)
+# Segment document
+result = client.segment("document.pdf")
+print(result.segments)
+```
+## API Key
+Set your API key as an environment variable:
+```bash
+export GDOCZAI_API_KEY="your-api-key"
+```
+Or pass it directly to the client.

gdoczai-0.1.0/GdoczAI.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,16 @@
+README.md
+pyproject.toml
+GdoczAI.egg-info/PKG-INFO
+GdoczAI.egg-info/SOURCES.txt
+GdoczAI.egg-info/dependency_links.txt
+GdoczAI.egg-info/requires.txt
+GdoczAI.egg-info/top_level.txt
+gdocz_sdk/__init__.py
+gdocz_sdk/client.py
+gdocz_sdk/convert_options.py
+gdocz_sdk/convert_result.py
+gdocz_sdk/exceptions.py
+gdocz_sdk/extract_options.py
+gdocz_sdk/extract_result.py
+gdocz_sdk/segment_options.py
+gdocz_sdk/segment_result.py

gdoczai-0.1.0/GdoczAI.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

gdoczai-0.1.0/GdoczAI.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ requests>=2.28.0

gdoczai-0.1.0/GdoczAI.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ gdocz_sdk

gdoczai-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,52 @@
+Metadata-Version: 2.4
+Name: GdoczAI
+Version: 0.1.0
+Summary: Python SDK for GdoczAI — OCR, extract, and segment documents
+Author-email: Ramkumar <ramkumarlpm4@gmail.com>
+Project-URL: Homepage, https://github.com/GramosoftAI/GdoczAI
+Keywords: ocr,document,pdf,extract,gdocz,ai
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+Requires-Dist: requests>=2.28.0
+# gdoczai
+Official Python SDK for [GdoczAI](https://gdocz.gramopro.ai) — OCR, extract fields,
+and segment documents with ease.
+## Installation
+```bash
+pip install gdocz_sdk
+```
+## Quick Start
+```python
+from gdocz_sdk import GdoczaiClient
+client = GdoczaiClient(api_key="your-api-key")
+# Convert PDF to Markdown
+result = client.convert("document.pdf")
+print(result.markdown)
+# Extract fields
+from gdocz_sdk import ExtractOptions
+result = client.extract("document.pdf", ExtractOptions(fields=["name", "date"]))
+print(result.extracted_data)
+# Segment document
+result = client.segment("document.pdf")
+print(result.segments)
+```
+## API Key
+Set your API key as an environment variable:
+```bash
+export GDOCZAI_API_KEY="your-api-key"
+```
+Or pass it directly to the client.

gdoczai-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,38 @@
+# gdoczai
+Official Python SDK for [GdoczAI](https://gdocz.gramopro.ai) — OCR, extract fields,
+and segment documents with ease.
+## Installation
+```bash
+pip install gdocz_sdk
+```
+## Quick Start
+```python
+from gdocz_sdk import GdoczaiClient
+client = GdoczaiClient(api_key="your-api-key")
+# Convert PDF to Markdown
+result = client.convert("document.pdf")
+print(result.markdown)
+# Extract fields
+from gdocz_sdk import ExtractOptions
+result = client.extract("document.pdf", ExtractOptions(fields=["name", "date"]))
+print(result.extracted_data)
+# Segment document
+result = client.segment("document.pdf")
+print(result.segments)
+```
+## API Key
+Set your API key as an environment variable:
+```bash
+export GDOCZAI_API_KEY="your-api-key"
+```
+Or pass it directly to the client.

gdoczai-0.1.0/gdocz_sdk/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+from .client import GdoczaiClient
+from .convert_options import ConvertOptions
+from .extract_options import ExtractOptions
+from .segment_options import SegmentOptions
+from .convert_result import ConvertResult
+from .extract_result import ExtractResult
+from .segment_result import SegmentResult
+from .exceptions import (
+    GdoczAPIError,
+    GdoczTimeoutError,
+    GdoczFileError,
+    GdoczValidationError,
+)

gdoczai-0.1.0/gdocz_sdk/client.py ADDED Viewed

@@ -0,0 +1,147 @@
+import requests
+import os
+import json
+from .convert_result import ConvertResult
+from .extract_result import ExtractResult
+from .segment_result import SegmentResult
+from .exceptions import GdoczAPIError, GdoczTimeoutError, GdoczFileError, GdoczValidationError
+class GdoczaiClient:
+    def __init__(self, api_key=None, convert_base_url=None, extract_base_url=None, segment_base_url=None, timeout=300):
+        self.api_key = api_key or os.getenv("GDOCZAI_API_KEY")
+        if not self.api_key:
+            raise GdoczValidationError("API key required")
+        self.convert_base_url = convert_base_url or "https://gdocz.gramopro.ai/ocr"
+        self.extract_base_url = extract_base_url or "https://gdocz.gramopro.ai/ocr"
+        self.segment_base_url = segment_base_url or "https://gdocz.gramopro.ai/ocr"
+        self.timeout = timeout
+    def convert(self, file_path, options=None):
+        if not os.path.exists(file_path):
+            raise GdoczFileError(f"File not found: {file_path}")
+        mode = "chandra"
+        page_range = None
+        if options:
+            mode = options.mode
+            page_range = options.page_range
+        try:
+            with open(file_path, "rb") as f:
+                files = {"file": (os.path.basename(file_path), f)}
+                data = {"model": mode}
+                if page_range:
+                    data["page_range"] = page_range
+                headers = {"X-API-Key": self.api_key}
+                response = requests.post(
+                    f"{self.convert_base_url}/ocr/markdown-only",
+                    files=files,
+                    data=data,
+                    headers=headers,
+                    timeout=self.timeout
+                )
+        except requests.exceptions.Timeout:
+            raise GdoczTimeoutError("Convert request timed out")
+        if response.status_code != 200:
+            raise GdoczAPIError(response.status_code, response.text)
+        return ConvertResult(response.json())
+    def extract(self, file_path, options=None):
+        if not os.path.exists(file_path):
+            raise GdoczFileError(f"File not found: {file_path}")
+        if options and options.fields is not None and not isinstance(options.fields, (list, dict)):
+            raise GdoczValidationError("fields must be a list or schema dict")
+        headers = {
+            "X-API-Key": self.api_key,
+            "Content-Type": "application/json"
+        }
+        try:
+            if options and options.request_id:
+                payload = {
+                    "request_id": options.request_id,
+                    "fields": options.fields
+                }
+            else:
+                convert_result = self.convert(file_path)
+                markdown = convert_result.markdown
+                if not markdown:
+                    raise GdoczValidationError("Markdown extraction failed — empty response from convert")
+                payload = {
+                    "markdown_content": markdown,
+                    "fields": options.fields if options else []
+                }
+            response = requests.post(
+                f"{self.extract_base_url}/ocr/extract/markdown",
+                json=payload,
+                headers=headers,
+                timeout=self.timeout
+            )
+        except requests.exceptions.Timeout:
+            raise GdoczTimeoutError("Extract request timed out")
+        if response.status_code != 200:
+            raise GdoczAPIError(response.status_code, response.text)
+        return ExtractResult(response.json())
+    def segment(self, file_path, options=None):
+        if not os.path.exists(file_path):
+            raise GdoczFileError(f"File not found: {file_path}")
+        mode = "auto"
+        segments = None
+        if options:
+            mode = options.mode
+            segments = options.segments
+        if mode == "guided" and not segments:
+            raise GdoczValidationError("Guided mode requires segments list")
+        try:
+            with open(file_path, "rb") as f:
+                files = {"file": (os.path.basename(file_path), f)}
+                data = {"mode": mode}
+                if mode == "guided" and segments:
+                    import json
+                    data["segments"] = json.dumps(segments)
+                headers = {"X-API-Key": self.api_key}
+                response = requests.post(
+                    f"{self.segment_base_url}/ocr/segment",
+                    files=files,
+                    data=data,
+                    headers=headers,
+                    timeout=self.timeout
+                )
+        except requests.exceptions.Timeout:
+            raise GdoczTimeoutError("Segment request timed out")
+        if response.status_code != 200:
+            raise GdoczAPIError(response.status_code, response.text)
+        return SegmentResult(response.json())

gdoczai-0.1.0/gdocz_sdk/convert_options.py ADDED Viewed

@@ -0,0 +1,5 @@
+class ConvertOptions:
+    def __init__(self, mode="chandra", page_range=None):
+        self.mode = mode
+        self.page_range = page_range

gdoczai-0.1.0/gdocz_sdk/convert_result.py ADDED Viewed

@@ -0,0 +1,22 @@
+import os
+class ConvertResult:
+    def __init__(self, data):
+        self.success = data.get("success", False)
+        self.request_id = data.get("request_id")
+        self.markdown = data.get("markdown")
+        # metadata fields
+        metadata = data.get("metadata", {})
+        self.metadata = metadata
+        self.page_count = metadata.get("page_count")
+        self.processing_time = metadata.get("processing_time_seconds")
+        self.filename = metadata.get("filename")
+        self.model = metadata.get("model")
+    def save_output(self, folder_path):
+        os.makedirs(folder_path, exist_ok=True)
+        with open(os.path.join(folder_path, "output.md"), "w", encoding="utf-8") as f:
+            f.write(self.markdown or "")

gdoczai-0.1.0/gdocz_sdk/exceptions.py ADDED Viewed

@@ -0,0 +1,14 @@
+class GdoczAPIError(Exception):
+    def __init__(self, status_code, response_data):
+        self.status_code = status_code
+        self.response_data = response_data
+        super().__init__(f"API error {status_code}: {response_data}")
+class GdoczTimeoutError(Exception):
+    pass
+class GdoczFileError(Exception):
+    pass
+class GdoczValidationError(Exception):
+    pass

gdoczai-0.1.0/gdocz_sdk/extract_options.py ADDED Viewed

@@ -0,0 +1,9 @@
+class ExtractOptions:
+    def __init__(self, fields=None, request_id=None):
+        """
+        fields     : list of field names e.g. ["total_amount", "tax"]
+        request_id : reuse existing convert request
+        """
+        self.fields = fields or []
+        self.request_id = request_id

gdoczai-0.1.0/gdocz_sdk/extract_result.py ADDED Viewed

@@ -0,0 +1,13 @@
+class ExtractResult:
+    def __init__(self, data):
+        self.success = data.get("success", False)
+        self.extracted_data = data.get("extracted_data", {})
+        # metadata fields
+        metadata = data.get("metadata", {})
+        self.metadata = metadata
+        self.fields_requested = metadata.get("fields_requested")
+        self.fields_extracted = metadata.get("fields_extracted")
+        self.schema_type = metadata.get("schema_type")
+        self.token_usage = metadata.get("token_usage", {})

gdoczai-0.1.0/gdocz_sdk/segment_options.py ADDED Viewed

@@ -0,0 +1,9 @@
+class SegmentOptions:
+    def __init__(self, mode="auto", segments=None):
+        """
+        mode: "auto" or "guided"
+        segments: list of {name, description}
+        """
+        self.mode = mode
+        self.segments = segments or []

gdoczai-0.1.0/gdocz_sdk/segment_result.py ADDED Viewed

@@ -0,0 +1,17 @@
+class SegmentResult:
+    def __init__(self, data):
+        self.success = data.get("success", False)
+        self.segments = data.get("segments", [])
+        self.request_id = data.get("request_id")
+        self.processing_time = data.get("processing_time")
+        # metadata fields
+        metadata = data.get("metadata", {})
+        self.metadata = metadata
+        self.total_pages = metadata.get("total_pages")
+        self.strategy = metadata.get("strategy")
+        self.mode = metadata.get("mode")
+        self.filename = metadata.get("filename")
+        self.ocr_time = metadata.get("ocr_time_seconds")
+        self.segmentation_time = metadata.get("segmentation_time_seconds")

gdoczai-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,26 @@
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "GdoczAI"
+version = "0.1.0"
+description = "Python SDK for GdoczAI — OCR, extract, and segment documents"
+readme = "README.md"
+license = { file = "LICENSE" }
+authors = [
+    { name = "Ramkumar", email = "ramkumarlpm4@gmail.com" }
+]
+keywords = ["ocr", "document", "pdf", "extract", "gdocz", "ai"]
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+]
+requires-python = ">=3.8"
+dependencies = [
+    "requests>=2.28.0"
+]
+[project.urls]
+Homepage = "https://github.com/GramosoftAI/GdoczAI"

gdoczai-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0