PyPI - datalab-python-sdk - Versions diffs - 0.1.15__tar.gz → 0.2.0__tar.gz - Mend

datalab-python-sdk 0.1.15tar.gz → 0.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

{datalab_python_sdk-0.1.15 → datalab_python_sdk-0.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: datalab-python-sdk
-Version: 0.1.15
+Version: 0.2.0
 Summary: SDK for the Datalab document intelligence API
 Author-email: Datalab Team <hi@datalab.to>
 License-Expression: MIT
@@ -46,10 +46,6 @@ client = DatalabClient() # use env var from above, or pass api_key="your_api_key
 # Convert PDF to markdown
 result = client.convert("document.pdf")
 print(result.markdown)
-# OCR a document
-ocr_result = client.ocr("document.pdf")
-print(ocr_result.pages)  # Get all text as string
 ```
 ## Workflows

{datalab_python_sdk-0.1.15 → datalab_python_sdk-0.2.0}/README.md RENAMED Viewed

@@ -30,10 +30,6 @@ client = DatalabClient() # use env var from above, or pass api_key="your_api_key
 # Convert PDF to markdown
 result = client.convert("document.pdf")
 print(result.markdown)
-# OCR a document
-ocr_result = client.ocr("document.pdf")
-print(ocr_result.pages)  # Get all text as string
 ```
 ## Workflows

{datalab_python_sdk-0.1.15 → datalab_python_sdk-0.2.0}/datalab_sdk/__init__.py RENAMED Viewed

@@ -12,6 +12,8 @@ from .models import (
     OCRResult,
     ConvertOptions,
     OCROptions,
+    FormFillingOptions,
+    FormFillingResult,
     Workflow,
     WorkflowStep,
     WorkflowExecution,
@@ -31,6 +33,8 @@ __all__ = [
     "OCRResult",
     "ConvertOptions",
     "OCROptions",
+    "FormFillingOptions",
+    "FormFillingResult",
     "Workflow",
     "WorkflowStep",
     "WorkflowExecution",

{datalab_python_sdk-0.1.15 → datalab_python_sdk-0.2.0}/datalab_sdk/cli.py RENAMED Viewed

@@ -67,28 +67,16 @@ def marker_options(func):
         type=click.Choice(["markdown", "html", "json", "chunks"]),
         help="Output format",
     )(func)
-    func = click.option("--force_ocr", is_flag=True, help="Force OCR on every page")(
-        func
-    )
-    func = click.option(
-        "--format_lines", is_flag=True, help="Partially OCR lines for better formatting"
-    )(func)
     func = click.option(
         "--paginate", is_flag=True, help="Add page delimiters to output"
     )(func)
-    func = click.option("--use_llm", is_flag=True, help="Use LLM to enhance accuracy")(
-        func
-    )
-    func = click.option(
-        "--strip_existing_ocr",
-        is_flag=True,
-        help="Remove existing OCR text and redo OCR",
-    )(func)
     func = click.option(
         "--disable_image_extraction", is_flag=True, help="Disable extraction of images"
     )(func)
     func = click.option(
-        "--block_correction_prompt", help="Custom prompt for block correction"
+        "--disable_image_captions",
+        is_flag=True,
+        help="Disable synthetic image captions/descriptions in output",
     )(func)
     func = click.option(
         "--page_schema", help="Schema to set to do structured extraction"
@@ -96,6 +84,12 @@ def marker_options(func):
     func = click.option(
         "--add_block_ids", is_flag=True, help="Add block IDs to HTML output"
     )(func)
+    func = click.option(
+        "--mode",
+        type=click.Choice(["fast", "balanced", "accurate"]),
+        default="balanced",
+        help="OCR mode",
+    )(func)
     return func
@@ -248,15 +242,12 @@ def process_documents(
     poll_interval: int,
     # Convert-specific options
     output_format: Optional[str] = None,
-    force_ocr: bool = False,
-    format_lines: bool = False,
     paginate: bool = False,
-    use_llm: bool = False,
-    strip_existing_ocr: bool = False,
     disable_image_extraction: bool = False,
-    block_correction_prompt: Optional[str] = None,
+    disable_image_captions: bool = False,
     page_schema: Optional[str] = None,
     add_block_ids: bool = False,
+    mode: str = "balanced",
 ):
     """Unified document processing function"""
     try:
@@ -290,17 +281,14 @@ def process_documents(
             options = ConvertOptions(
                 output_format=output_format,
                 max_pages=max_pages,
-                force_ocr=force_ocr,
-                format_lines=format_lines,
                 paginate=paginate,
-                use_llm=use_llm,
-                strip_existing_ocr=strip_existing_ocr,
                 disable_image_extraction=disable_image_extraction,
+                disable_image_captions=disable_image_captions,
                 page_range=page_range,
-                block_correction_prompt=block_correction_prompt,
                 skip_cache=skip_cache,
                 page_schema=page_schema,
                 add_block_ids=add_block_ids,
+                mode=mode,
             )
         else:  # method == "ocr"
             options = OCROptions(
@@ -355,15 +343,12 @@ def convert(
     max_polls: int,
     poll_interval: int,
     output_format: str,
-    force_ocr: bool,
-    format_lines: bool,
     paginate: bool,
-    use_llm: bool,
-    strip_existing_ocr: bool,
     disable_image_extraction: bool,
-    block_correction_prompt: Optional[str],
+    disable_image_captions: bool,
     page_schema: Optional[str],
     add_block_ids: bool,
+    mode: str,
 ):
     """Convert documents to markdown, HTML, or JSON"""
     process_documents(
@@ -380,48 +365,12 @@ def convert(
         max_polls=max_polls,
         poll_interval=poll_interval,
         output_format=output_format,
-        force_ocr=force_ocr,
-        format_lines=format_lines,
         paginate=paginate,
-        use_llm=use_llm,
-        strip_existing_ocr=strip_existing_ocr,
         disable_image_extraction=disable_image_extraction,
-        block_correction_prompt=block_correction_prompt,
+        disable_image_captions=disable_image_captions,
         page_schema=page_schema,
         add_block_ids=add_block_ids,
-    )
-@click.command()
-@click.argument("path", type=click.Path(exists=True))
-@common_options
-def ocr(
-    path: str,
-    api_key: str,
-    output_dir: str,
-    max_pages: Optional[int],
-    extensions: Optional[str],
-    max_concurrent: int,
-    base_url: str,
-    page_range: Optional[str],
-    skip_cache: bool,
-    max_polls: int,
-    poll_interval: int,
-):
-    """Perform OCR on documents"""
-    process_documents(
-        path=path,
-        method="ocr",
-        api_key=api_key,
-        output_dir=output_dir,
-        max_pages=max_pages,
-        extensions=extensions,
-        max_concurrent=max_concurrent,
-        base_url=base_url,
-        page_range=page_range,
-        skip_cache=skip_cache,
-        max_polls=max_polls,
-        poll_interval=poll_interval,
+        mode=mode,
     )
@@ -905,7 +854,6 @@ def _render_dag_simple(layers, children, step_map):
 # Add commands to CLI group
 cli.add_command(convert)
-cli.add_command(ocr)
 cli.add_command(create_workflow)
 cli.add_command(get_workflow)
 cli.add_command(get_step_types)

{datalab_python_sdk-0.1.15 → datalab_python_sdk-0.2.0}/datalab_sdk/client.py RENAMED Viewed

@@ -27,6 +27,8 @@ from datalab_sdk.models import (
     ProcessingOptions,
     ConvertOptions,
     OCROptions,
+    FormFillingOptions,
+    FormFillingResult,
     Workflow,
     WorkflowStep,
     WorkflowExecution,
@@ -345,6 +347,70 @@ class AsyncDatalabClient:
         return result
+    async def fill(
+        self,
+        file_path: Optional[Union[str, Path]] = None,
+        file_url: Optional[str] = None,
+        options: Optional[FormFillingOptions] = None,
+        save_output: Optional[Union[str, Path]] = None,
+        max_polls: int = 300,
+        poll_interval: int = 1,
+    ) -> FormFillingResult:
+        """
+        Fill PDF or image forms with provided field data
+        Args:
+            file_path: Path to the file to fill
+            file_url: URL of the file to fill
+            options: Form filling options (must include field_data)
+            save_output: Optional path to save output files
+            max_polls: Maximum number of polling attempts
+            poll_interval: Seconds between polling attempts
+        """
+        if options is None:
+            raise ValueError("options must be provided with field_data")
+        initial_data = await self._make_request(
+            "POST",
+            "/api/v1/fill",
+            data=self.get_form_params(
+                file_path=file_path, file_url=file_url, options=options
+            ),
+        )
+        if not initial_data.get("success"):
+            raise DatalabAPIError(
+                f"Request failed: {initial_data.get('error', 'Unknown error')}"
+            )
+        result_data = await self._poll_result(
+            initial_data["request_check_url"],
+            max_polls=max_polls,
+            poll_interval=poll_interval,
+        )
+        result = FormFillingResult(
+            status=result_data.get("status", "complete"),
+            success=result_data.get("success"),
+            error=result_data.get("error"),
+            output_format=result_data.get("output_format"),
+            output_base64=result_data.get("output_base64"),
+            fields_filled=result_data.get("fields_filled"),
+            fields_not_found=result_data.get("fields_not_found"),
+            runtime=result_data.get("runtime"),
+            page_count=result_data.get("page_count"),
+            cost_breakdown=result_data.get("cost_breakdown"),
+            versions=result_data.get("versions"),
+        )
+        # Save output if requested
+        if save_output and result.success and result.output_base64:
+            output_path = Path(save_output)
+            output_path.parent.mkdir(parents=True, exist_ok=True)
+            result.save_output(output_path)
+        return result
     # Workflow methods
     async def create_workflow(
         self,
@@ -1016,6 +1082,37 @@ class DatalabClient:
             )
         )
+    def fill(
+        self,
+        file_path: Optional[Union[str, Path]] = None,
+        file_url: Optional[str] = None,
+        options: Optional[FormFillingOptions] = None,
+        save_output: Optional[Union[str, Path]] = None,
+        max_polls: int = 300,
+        poll_interval: int = 1,
+    ) -> FormFillingResult:
+        """
+        Fill PDF or image forms with provided field data (sync version)
+        Args:
+            file_path: Path to the file to fill
+            file_url: URL of the file to fill
+            options: Form filling options (must include field_data)
+            save_output: Optional path to save output files
+            max_polls: Maximum number of polling attempts
+            poll_interval: Seconds between polling attempts
+        """
+        return self._run_async(
+            self._async_client.fill(
+                file_path=file_path,
+                file_url=file_url,
+                options=options,
+                save_output=save_output,
+                max_polls=max_polls,
+                poll_interval=poll_interval,
+            )
+        )
     # Workflow methods (sync)
     def create_workflow(
         self,

{datalab_python_sdk-0.1.15 → datalab_python_sdk-0.2.0}/datalab_sdk/models.py RENAMED Viewed

@@ -38,14 +38,9 @@ class ConvertOptions(ProcessingOptions):
     """Options for marker conversion"""
     # Marker specific options
-    force_ocr: bool = False
-    format_lines: bool = False
     paginate: bool = False
-    use_llm: bool = False
-    strip_existing_ocr: bool = False
     disable_image_extraction: bool = False
-    disable_ocr_math: bool = False
-    block_correction_prompt: Optional[str] = None
+    disable_image_captions: bool = False
     additional_config: Optional[Dict[str, Any]] = None
     page_schema: Optional[Dict[str, Any]] = None
     segmentation_schema: Optional[str] = None  # JSON string for document segmentation
@@ -54,7 +49,7 @@ class ConvertOptions(ProcessingOptions):
         None  # Comma-separated list: 'track_changes', 'chart_understanding'
     )
     output_format: str = "markdown"  # markdown, json, html, chunks
-    mode: str = "fast"  # fast, balanced, accurate
+    mode: str = "balanced"  # fast, balanced, accurate
     keep_spreadsheet_formatting: bool = False
     webhook_url: Optional[str] = None
     extras: Optional[str] = None  # comma-separated extras
@@ -85,6 +80,32 @@ class OCROptions(ProcessingOptions):
     pass
+@dataclass
+class FormFillingOptions(ProcessingOptions):
+    """Options for form filling"""
+    field_data: Dict[str, Dict[str, str]] = field(default_factory=dict)
+    context: Optional[str] = None  # Optional context to guide form filling
+    confidence_threshold: float = 0.5  # Minimum confidence for field matching (0.0-1.0)
+    def to_form_data(self) -> Dict[str, Any]:
+        """Convert to form data format for API requests"""
+        # Start with parent's form data
+        form_data = super().to_form_data()
+        # field_data must be JSON string
+        form_data["field_data"] = (None, json.dumps(self.field_data))
+        # Add context if provided
+        if self.context is not None:
+            form_data["context"] = (None, self.context)
+        # Add confidence_threshold
+        form_data["confidence_threshold"] = (None, str(self.confidence_threshold))
+        return form_data
 @dataclass
 class ConversionResult:
     """Result from document conversion (marker endpoint)"""
@@ -345,3 +366,63 @@ class OCRResult:
                 f,
                 indent=2,
             )
+@dataclass
+class FormFillingResult:
+    """Result from form filling"""
+    status: str
+    success: Optional[bool] = None
+    error: Optional[str] = None
+    output_format: Optional[str] = None  # "pdf" or "png"
+    output_base64: Optional[str] = None  # Base64-encoded filled form
+    fields_filled: Optional[List[str]] = (
+        None  # List of field keys that were successfully filled
+    )
+    fields_not_found: Optional[List[str]] = (
+        None  # List of field keys that couldn't be matched
+    )
+    runtime: Optional[float] = None
+    page_count: Optional[int] = None
+    cost_breakdown: Optional[Dict[str, Any]] = None
+    versions: Optional[Union[Dict[str, Any], str]] = None
+    def save_output(self, output_path: Union[str, Path]) -> None:
+        """Save the filled form to a file"""
+        output_path = Path(output_path)
+        if not self.output_base64:
+            raise ValueError("No output data available to save")
+        # Determine file extension based on output_format
+        if self.output_format == "png":
+            output_path = output_path.with_suffix(".png")
+        elif self.output_format == "pdf":
+            output_path = output_path.with_suffix(".pdf")
+        else:
+            # Default to PDF if format is unknown
+            output_path = output_path.with_suffix(".pdf")
+        # Decode and save base64 data
+        with open(output_path, "wb") as f:
+            f.write(base64.b64decode(self.output_base64))
+        # Save metadata if available
+        metadata = {
+            "status": self.status,
+            "success": self.success,
+            "error": self.error,
+            "output_format": self.output_format,
+            "fields_filled": self.fields_filled,
+            "fields_not_found": self.fields_not_found,
+            "runtime": self.runtime,
+            "page_count": self.page_count,
+            "cost_breakdown": self.cost_breakdown,
+            "versions": self.versions,
+        }
+        with open(
+            output_path.with_suffix(".metadata.json"), "w", encoding="utf-8"
+        ) as f:
+            json.dump(metadata, f, indent=2)

{datalab_python_sdk-0.1.15 → datalab_python_sdk-0.2.0}/integration/test_readme_examples.py RENAMED Viewed

@@ -98,9 +98,7 @@ class TestAPIMethodExamples:
         # With options
         options = ConvertOptions(
-            force_ocr=True,
             output_format="html",
-            use_llm=False,  # Keep false for cost reasons
             max_pages=1,
         )
         result = client.convert(DATA_DIR / "adversarial.pdf", options=options)
@@ -294,9 +292,7 @@ class TestProcessingOptionsVariations:
         from datalab_sdk import ConvertOptions
         options = ConvertOptions()
-        assert options.force_ocr is False
         assert options.output_format == "markdown"
-        assert options.use_llm is False
         assert options.max_pages is None
     def test_processing_options_custom_values(self):
@@ -304,9 +300,7 @@ class TestProcessingOptionsVariations:
         from datalab_sdk import ConvertOptions
         options = ConvertOptions(
-            force_ocr=True,
             output_format="html",
-            use_llm=False,  # Keep false for cost reasons
             max_pages=1,
         )

{datalab_python_sdk-0.1.15 → datalab_python_sdk-0.2.0}/pyproject.toml RENAMED Viewed

@@ -7,7 +7,7 @@ readme = "README.md"
 license = "MIT"
 repository = "https://github.com/datalab-to/sdk"
 keywords = ["datalab", "sdk", "document-intelligence", "api"]
-version = "0.1.15"
+version = "0.2.0"
 description = "SDK for the Datalab document intelligence API"
 requires-python = ">=3.10"
 dependencies = [

{datalab_python_sdk-0.1.15 → datalab_python_sdk-0.2.0}/recipes/workflows/workflow_api_tutorial/3_create_workflow.py RENAMED Viewed

@@ -29,7 +29,7 @@ from datalab_sdk import DatalabClient, WorkflowStep
 def load_workflow_definition(definition_path: str) -> dict:
     """Load workflow definition from JSON file"""
-    with open(definition_path, 'r') as f:
+    with open(definition_path, "r") as f:
         return json.load(f)
@@ -40,15 +40,12 @@ def create_workflow_from_definition(client: DatalabClient, workflow_def: dict):
             step_key=step["step_key"],
             unique_name=step["unique_name"],
             settings=step["settings"],
-            depends_on=step.get("depends_on", [])
+            depends_on=step.get("depends_on", []),
         )
         for step in workflow_def["steps"]
     ]
-    return client.create_workflow(
-        name=workflow_def["name"],
-        steps=steps
-    )
+    return client.create_workflow(name=workflow_def["name"], steps=steps)
 def create_simple_workflow(client: DatalabClient):
@@ -60,9 +57,8 @@ def create_simple_workflow(client: DatalabClient):
             settings={
                 "max_pages": 10,
                 "output_format": "json",
-                "force_ocr": False
             },
-            depends_on=[]
+            depends_on=[],
         ),
         WorkflowStep(
             step_key="marker_extract",
@@ -72,16 +68,15 @@ def create_simple_workflow(client: DatalabClient):
                     "title": {"type": "string"},
                     "author": {"type": "string"},
                     "date": {"type": "string"},
-                    "summary": {"type": "string"}
+                    "summary": {"type": "string"},
                 }
             },
-            depends_on=["parse_document"]
-        )
+            depends_on=["parse_document"],
+        ),
     ]
     return client.create_workflow(
-        name="Document Parser with Metadata Extraction",
-        steps=steps
+        name="Document Parser with Metadata Extraction", steps=steps
     )
@@ -89,16 +84,13 @@ def main():
     parser = argparse.ArgumentParser(
         description="Create a workflow from JSON definition or hardcoded example"
     )
-    parser.add_argument(
-        "--definition",
-        help="Path to workflow definition JSON file"
-    )
+    parser.add_argument("--definition", help="Path to workflow definition JSON file")
     parser.add_argument(
         "--replace",
         action="append",
         nargs=2,
         metavar=("TOKEN", "VALUE"),
-        help="Replace tokens in definition (e.g., --replace YOUR_API_KEY abc123)"
+        help="Replace tokens in definition (e.g., --replace YOUR_API_KEY abc123)",
     )
     args = parser.parse_args()
@@ -132,7 +124,7 @@ def main():
     # Display results
     print("✅ Workflow created successfully!\n")
-    print(f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
+    print("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
     print(f"ID:         {workflow.id}")
     print(f"Name:       {workflow.name}")
     print(f"Team ID:    {workflow.team_id}")
@@ -148,13 +140,13 @@ def main():
             print(f"     Depends on: {', '.join(step.depends_on)}")
     print()
-    print(f"💡 Next steps:")
-    print(f"   1. Execute this workflow:")
-    print(f"      python recipes/workflows/tutorial/execute_workflow.py \\")
+    print("💡 Next steps:")
+    print("   1. Execute this workflow:")
+    print("      python recipes/workflows/tutorial/execute_workflow.py \\")
     print(f"          --workflow_id {workflow.id} \\")
-    print(f"          --file_url https://example.com/doc.pdf")
+    print("          --file_url https://example.com/doc.pdf")
     print()
-    print(f"   2. Or use the CLI:")
+    print("   2. Or use the CLI:")
     print(f"      datalab execute-workflow --workflow_id {workflow.id} \\")
     json_example = '{"file_urls": ["https://example.com/doc.pdf"]}'
     print(f"          --input_config '{json_example}'")

{datalab_python_sdk-0.1.15 → datalab_python_sdk-0.2.0}/tests/conftest.py RENAMED Viewed

@@ -143,9 +143,7 @@ async def mock_async_client(mock_server):
 @pytest.fixture
 def processing_options():
     """Create sample processing options"""
-    return ConvertOptions(
-        force_ocr=True, output_format="markdown", use_llm=False, max_pages=10
-    )
+    return ConvertOptions(output_format="markdown", max_pages=10)
 @pytest.fixture

{datalab_python_sdk-0.1.15 → datalab_python_sdk-0.2.0}/tests/test_cli_simple.py RENAMED Viewed

@@ -104,94 +104,3 @@ class TestConvertCommand:
             finally:
                 os.unlink(tmp_file.name)
-class TestOCRCommand:
-    """Test the OCR command"""
-    @patch("datalab_sdk.cli.asyncio.run")
-    def test_ocr_successful_single_file(self, mock_client_class):
-        """Test successful OCR of a single file"""
-        mock_client_class.return_value = ASYNC_RETURN_VALUE
-        runner = CliRunner()
-        with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp_file:
-            try:
-                result = runner.invoke(
-                    cli,
-                    [
-                        "ocr",
-                        tmp_file.name,
-                        "--api_key",
-                        "test-key",
-                        "--output_dir",
-                        "/tmp/output",
-                    ],
-                )
-                assert result.exit_code == 0
-                assert "Successfully processed: 2 files" in result.output
-                # Verify client was called correctly
-                mock_client_class.assert_called_once()
-            finally:
-                os.unlink(tmp_file.name)
-    @patch("datalab_sdk.cli.asyncio.run")
-    def test_ocr_with_max_pages(self, mock_asyncio_run):
-        """Test OCR command with max_pages option"""
-        # Mock the client
-        mock_asyncio_run.return_value = ASYNC_RETURN_VALUE
-        runner = CliRunner()
-        with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp_file:
-            try:
-                result = runner.invoke(
-                    cli,
-                    [
-                        "ocr",
-                        tmp_file.name,
-                        "--api_key",
-                        "test-key",
-                        "--output_dir",
-                        "/tmp/output",
-                        "--max_pages",
-                        "5",
-                    ],
-                )
-                assert result.exit_code == 0
-                assert "Successfully processed: 2 files" in result.output
-            finally:
-                os.unlink(tmp_file.name)
-    @patch("datalab_sdk.cli.asyncio.run")
-    def test_ocr_multiple_files(self, mock_asyncio_run):
-        """Test OCR of multiple files"""
-        # Mock async processing results
-        mock_asyncio_run.return_value = ASYNC_RETURN_VALUE
-        runner = CliRunner()
-        with tempfile.TemporaryDirectory() as tmp_dir:
-            with open(os.path.join(tmp_dir, "test1.pdf"), "w") as f:
-                f.write("Dummy content for test1.pdf")
-            with open(os.path.join(tmp_dir, "test2.pdf"), "w") as f:
-                f.write("Dummy content for test2.pdf")
-            result = runner.invoke(
-                cli,
-                [
-                    "ocr",
-                    tmp_dir,
-                    "--api_key",
-                    "test-key",
-                    "--output_dir",
-                    "/tmp/output",
-                ],
-            )
-            assert result.exit_code == 0
-            assert "OCR Summary:" in result.output
-            assert "Successfully processed: 2 files" in result.output

{datalab_python_sdk-0.1.15 → datalab_python_sdk-0.2.0}/tests/test_client_methods.py RENAMED Viewed

@@ -123,7 +123,9 @@ class TestConvertMethod:
                     )
                     assert (output_path.with_suffix(".chunks.json")).exists()
-                    saved_chunks = json.loads((output_path.with_suffix(".chunks.json")).read_text())
+                    saved_chunks = json.loads(
+                        (output_path.with_suffix(".chunks.json")).read_text()
+                    )
                     assert saved_chunks == {"some_content": True}
     def test_convert_sync_with_processing_options(self, temp_dir):
@@ -133,9 +135,7 @@ class TestConvertMethod:
         pdf_file.write_bytes(b"%PDF-1.4\n%Test PDF content\n%%EOF\n")
         # Create processing options
-        options = ConvertOptions(
-            force_ocr=True, output_format="html", use_llm=True, max_pages=5
-        )
+        options = ConvertOptions(output_format="html", max_pages=5)
         # Mock API responses
         mock_initial_response = {

{datalab_python_sdk-0.1.15 → datalab_python_sdk-0.2.0}/uv.lock RENAMED Viewed

@@ -212,7 +212,7 @@ wheels = [
 [[package]]
 name = "datalab-python-sdk"
-version = "0.1.15"
+version = "0.2.0"
 source = { editable = "." }
 dependencies = [
     { name = "aiohttp" },