PyPI - retab - Versions diffs - 0.0.35__py3-none-any.whl → 0.0.37__py3-none-any.whl - Mend

retab 0.0.35py3-none-any.whl → 0.0.37py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (134) hide show

{uiform → retab}/_utils/ai_models.py +2 -2
{uiform → retab}/_utils/benchmarking.py +15 -16
{uiform → retab}/_utils/chat.py +9 -14
{uiform → retab}/_utils/display.py +0 -3
{uiform → retab}/_utils/json_schema.py +9 -14
{uiform → retab}/_utils/mime.py +11 -14
{uiform → retab}/_utils/responses.py +9 -3
{uiform → retab}/_utils/stream_context_managers.py +1 -1
{uiform → retab}/_utils/usage/usage.py +28 -28
{uiform → retab}/client.py +32 -31
{uiform → retab}/resources/consensus/client.py +17 -36
{uiform → retab}/resources/consensus/completions.py +24 -47
{uiform → retab}/resources/consensus/completions_stream.py +26 -38
{uiform → retab}/resources/consensus/responses.py +31 -80
{uiform → retab}/resources/consensus/responses_stream.py +31 -79
{uiform → retab}/resources/documents/client.py +59 -45
{uiform → retab}/resources/documents/extractions.py +181 -90
{uiform → retab}/resources/evals.py +56 -43
retab/resources/evaluations/__init__.py +3 -0
retab/resources/evaluations/client.py +301 -0
retab/resources/evaluations/documents.py +233 -0
retab/resources/evaluations/iterations.py +452 -0
{uiform → retab}/resources/files.py +2 -2
{uiform → retab}/resources/jsonlUtils.py +220 -216
retab/resources/models.py +73 -0
retab/resources/processors/automations/client.py +244 -0
{uiform → retab}/resources/processors/automations/endpoints.py +77 -118
retab/resources/processors/automations/links.py +294 -0
{uiform → retab}/resources/processors/automations/logs.py +30 -19
{uiform → retab}/resources/processors/automations/mailboxes.py +136 -174
retab/resources/processors/automations/outlook.py +337 -0
{uiform → retab}/resources/processors/automations/tests.py +22 -25
{uiform → retab}/resources/processors/client.py +179 -164
{uiform → retab}/resources/schemas.py +78 -66
{uiform → retab}/resources/secrets/external_api_keys.py +1 -5
retab/resources/secrets/webhook.py +64 -0
{uiform → retab}/resources/usage.py +39 -2
{uiform → retab}/types/ai_models.py +13 -13
{uiform → retab}/types/automations/cron.py +19 -12
{uiform → retab}/types/automations/endpoints.py +7 -4
{uiform → retab}/types/automations/links.py +7 -3
{uiform → retab}/types/automations/mailboxes.py +9 -9
{uiform → retab}/types/automations/outlook.py +15 -11
retab/types/browser_canvas.py +3 -0
{uiform → retab}/types/chat.py +2 -2
{uiform → retab}/types/completions.py +9 -12
retab/types/consensus.py +19 -0
{uiform → retab}/types/db/annotations.py +3 -3
{uiform → retab}/types/db/files.py +8 -6
{uiform → retab}/types/documents/create_messages.py +18 -20
{uiform → retab}/types/documents/extractions.py +69 -24
{uiform → retab}/types/evals.py +5 -5
retab/types/evaluations/__init__.py +31 -0
retab/types/evaluations/documents.py +30 -0
retab/types/evaluations/iterations.py +112 -0
retab/types/evaluations/model.py +73 -0
retab/types/events.py +79 -0
{uiform → retab}/types/extractions.py +33 -10
retab/types/inference_settings.py +15 -0
retab/types/jobs/base.py +54 -0
retab/types/jobs/batch_annotation.py +12 -0
{uiform → retab}/types/jobs/evaluation.py +1 -2
{uiform → retab}/types/logs.py +37 -34
retab/types/metrics.py +32 -0
{uiform → retab}/types/mime.py +22 -20
{uiform → retab}/types/modalities.py +10 -10
retab/types/predictions.py +19 -0
{uiform → retab}/types/schemas/enhance.py +4 -2
{uiform → retab}/types/schemas/evaluate.py +7 -4
{uiform → retab}/types/schemas/generate.py +6 -3
{uiform → retab}/types/schemas/layout.py +1 -1
{uiform → retab}/types/schemas/object.py +13 -14
{uiform → retab}/types/schemas/templates.py +1 -3
{uiform → retab}/types/secrets/external_api_keys.py +0 -1
{uiform → retab}/types/standards.py +18 -1
{retab-0.0.35.dist-info → retab-0.0.37.dist-info}/METADATA +7 -6
retab-0.0.37.dist-info/RECORD +107 -0
retab-0.0.37.dist-info/top_level.txt +1 -0
retab-0.0.35.dist-info/RECORD +0 -111
retab-0.0.35.dist-info/top_level.txt +0 -1
uiform/_utils/benchmarking copy.py +0 -588
uiform/resources/deployments/__init__.py +0 -9
uiform/resources/deployments/client.py +0 -78
uiform/resources/deployments/endpoints.py +0 -322
uiform/resources/deployments/links.py +0 -452
uiform/resources/deployments/logs.py +0 -211
uiform/resources/deployments/mailboxes.py +0 -496
uiform/resources/deployments/outlook.py +0 -531
uiform/resources/deployments/tests.py +0 -158
uiform/resources/models.py +0 -45
uiform/resources/processors/automations/client.py +0 -78
uiform/resources/processors/automations/links.py +0 -356
uiform/resources/processors/automations/outlook.py +0 -444
uiform/resources/secrets/webhook.py +0 -62
uiform/types/consensus.py +0 -10
uiform/types/deployments/cron.py +0 -59
uiform/types/deployments/endpoints.py +0 -28
uiform/types/deployments/links.py +0 -36
uiform/types/deployments/mailboxes.py +0 -67
uiform/types/deployments/outlook.py +0 -76
uiform/types/deployments/webhooks.py +0 -21
uiform/types/events.py +0 -76
uiform/types/jobs/base.py +0 -150
uiform/types/jobs/batch_annotation.py +0 -22
uiform/types/secrets/__init__.py +0 -0
{uiform → retab}/__init__.py +0 -0
{uiform → retab}/_resource.py +0 -0
{uiform → retab}/_utils/__init__.py +0 -0
{uiform → retab}/_utils/usage/__init__.py +0 -0
{uiform → retab}/py.typed +0 -0
{uiform → retab}/resources/__init__.py +0 -0
{uiform → retab}/resources/consensus/__init__.py +0 -0
{uiform → retab}/resources/documents/__init__.py +0 -0
{uiform → retab}/resources/finetuning.py +0 -0
{uiform → retab}/resources/openai_example.py +0 -0
{uiform → retab}/resources/processors/__init__.py +0 -0
{uiform → retab}/resources/processors/automations/__init__.py +0 -0
{uiform → retab}/resources/prompt_optimization.py +0 -0
{uiform → retab}/resources/secrets/__init__.py +0 -0
{uiform → retab}/resources/secrets/client.py +0 -0
{uiform → retab}/types/__init__.py +0 -0
{uiform → retab}/types/automations/__init__.py +0 -0
{uiform → retab}/types/automations/webhooks.py +0 -0
{uiform → retab}/types/db/__init__.py +0 -0
{uiform/types/deployments → retab/types/documents}/__init__.py +0 -0
{uiform → retab}/types/documents/correct_orientation.py +0 -0
{uiform/types/documents → retab/types/jobs}/__init__.py +0 -0
{uiform → retab}/types/jobs/finetune.py +0 -0
{uiform → retab}/types/jobs/prompt_optimization.py +0 -0
{uiform → retab}/types/jobs/webcrawl.py +0 -0
{uiform → retab}/types/pagination.py +0 -0
{uiform/types/jobs → retab/types/schemas}/__init__.py +0 -0
{uiform/types/schemas → retab/types/secrets}/__init__.py +0 -0
{retab-0.0.35.dist-info → retab-0.0.37.dist-info}/WHEEL +0 -0

{uiform → retab}/resources/jsonlUtils.py RENAMED Viewed

@@ -8,12 +8,13 @@ import time
 from concurrent.futures import ThreadPoolExecutor
 from io import IOBase
 from pathlib import Path
-from typing import IO, Any, Literal, Optional
+from typing import IO, Any, Optional, TypedDict
 from anthropic import Anthropic
 from openai import OpenAI
 from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam
 from pydantic import BaseModel
+from pydantic_core import PydanticUndefined
 from tqdm import tqdm
 from .._resource import AsyncAPIResource, SyncAPIResource
@@ -24,6 +25,7 @@ from .._utils.json_schema import load_json_schema
 from ..types.chat import ChatCompletionUiformMessage
 from ..types.modalities import Modality
 from ..types.schemas.object import Schema
+from ..types.browser_canvas import BrowserCanvas
 class FinetuningJSON(BaseModel):
@@ -31,7 +33,6 @@ class FinetuningJSON(BaseModel):
 FinetuningJSONL = list[FinetuningJSON]
-from typing import TypedDict
 class BatchJSONLResponseFormat(TypedDict):
@@ -106,9 +107,9 @@ class BatchJSONLResponse(BaseModel):
 class BaseDatasetsMixin:
     def _dump_training_set(self, training_set: list[dict[str, Any]], dataset_path: Path | str) -> None:
-        with open(dataset_path, 'w', encoding='utf-8') as file:
+        with open(dataset_path, "w", encoding="utf-8") as file:
             for entry in training_set:
-                file.write(json.dumps(entry) + '\n')
+                file.write(json.dumps(entry) + "\n")
 class Datasets(SyncAPIResource, BaseDatasetsMixin):
@@ -138,8 +139,8 @@ class Datasets(SyncAPIResource, BaseDatasetsMixin):
         json_schema: dict[str, Any] | Path | str,
         document_annotation_pairs_paths: list[dict[str, Path | str]],
         dataset_path: Path | str,
-        image_resolution_dpi: int | None = None,
-        browser_canvas: Literal['A3', 'A4', 'A5'] | None = None,
+        image_resolution_dpi: int = PydanticUndefined,  # type: ignore[assignment]
+        browser_canvas: BrowserCanvas = PydanticUndefined,  # type: ignore[assignment]
         modality: Modality = "native",
     ) -> None:
         """Save document-annotation pairs to a JSONL training set.
@@ -153,16 +154,18 @@ class Datasets(SyncAPIResource, BaseDatasetsMixin):
         json_schema = load_json_schema(json_schema)
         schema_obj = Schema(json_schema=json_schema)
-        with open(dataset_path, 'w', encoding='utf-8') as file:
+        with open(dataset_path, "w", encoding="utf-8") as file:
             for pair_paths in tqdm(document_annotation_pairs_paths, desc="Processing pairs", position=0):
-                document_message = self._client.documents.create_messages(document=pair_paths['document_fpath'], modality=modality, image_resolution_dpi=image_resolution_dpi, browser_canvas=browser_canvas)
+                document_message = self._client.documents.create_messages(
+                    document=pair_paths["document_fpath"], modality=modality, image_resolution_dpi=image_resolution_dpi, browser_canvas=browser_canvas
+                )
-                with open(pair_paths['annotation_fpath'], 'r') as f:
+                with open(pair_paths["annotation_fpath"], "r") as f:
                     annotation = json.loads(f.read())
                 assistant_message = {"role": "assistant", "content": json.dumps(annotation, ensure_ascii=False, indent=2)}
                 entry = {"messages": schema_obj.messages + document_message.messages + [assistant_message]}
-                file.write(json.dumps(entry) + '\n')
+                file.write(json.dumps(entry) + "\n")
     def change_schema(
         self,
@@ -193,8 +196,8 @@ class Datasets(SyncAPIResource, BaseDatasetsMixin):
         assert isinstance(target_path, Path) or isinstance(target_path, str)
         # Use a temporary file to write the updated content
-        with tempfile.NamedTemporaryFile('w', delete=False, encoding='utf-8') as temp_file:
-            with open(input_dataset_path, 'r', encoding='utf-8') as infile:
+        with tempfile.NamedTemporaryFile("w", delete=False, encoding="utf-8") as temp_file:
+            with open(input_dataset_path, "r", encoding="utf-8") as infile:
                 for line in infile:
                     entry = json.loads(line)
                     messages = entry.get("messages", [])
@@ -208,7 +211,7 @@ class Datasets(SyncAPIResource, BaseDatasetsMixin):
                     updated_entry = {"messages": updated_messages}
                     # Write the updated entry to the temporary file
-                    temp_file.write(json.dumps(updated_entry) + '\n')
+                    temp_file.write(json.dumps(updated_entry) + "\n")
         # Replace the original file with the temporary file
         shutil.move(temp_file.name, target_path)
@@ -241,19 +244,19 @@ class Datasets(SyncAPIResource, BaseDatasetsMixin):
         for pair_paths in tqdm(pairs_paths):
             document_messages: list[ChatCompletionUiformMessage] = []
-            if isinstance(pair_paths['document_fpath'], str) or isinstance(pair_paths['document_fpath'], Path):
-                document_message = self._client.documents.create_messages(document=pair_paths['document_fpath'], modality=modality)
+            if isinstance(pair_paths["document_fpath"], str) or isinstance(pair_paths["document_fpath"], Path):
+                document_message = self._client.documents.create_messages(document=pair_paths["document_fpath"], modality=modality)
                 document_messages.extend(document_message.messages)
             else:
-                assert isinstance(pair_paths['document_fpath'], list)
-                for document_fpath in pair_paths['document_fpath']:
+                assert isinstance(pair_paths["document_fpath"], list)
+                for document_fpath in pair_paths["document_fpath"]:
                     document_message = self._client.documents.create_messages(document=document_fpath, modality=modality)
                     document_messages.extend(document_message.messages)
             # Use context manager to properly close the file
-            assert isinstance(pair_paths['annotation_fpath'], Path) or isinstance(pair_paths['annotation_fpath'], str)
-            with open(pair_paths['annotation_fpath'], 'r') as f:
+            assert isinstance(pair_paths["annotation_fpath"], Path) or isinstance(pair_paths["annotation_fpath"], str)
+            with open(pair_paths["annotation_fpath"], "r") as f:
                 annotation = json.loads(f.read())
             assistant_message = {"role": "assistant", "content": json.dumps(annotation, ensure_ascii=False, indent=2)}
@@ -389,6 +392,7 @@ class Datasets(SyncAPIResource, BaseDatasetsMixin):
                     raise ValueError(f"Invalid file path: {doc_path}")
                 hash_str = hashlib.md5(doc_path.as_posix().encode()).hexdigest()
             elif isinstance(doc, IO):
+                doc_path = Path(doc.name) or "unknown_file"
                 file_bytes = doc.read()
                 hash_str = hashlib.md5(file_bytes).hexdigest()
                 doc.seek(0)
@@ -408,7 +412,7 @@ class Datasets(SyncAPIResource, BaseDatasetsMixin):
             annotation_path = Path(root_dir) / f"annotations_{hash_str}.json"
             annotation_path.parent.mkdir(parents=True, exist_ok=True)
-            with open(annotation_path, 'w', encoding='utf-8') as f:
+            with open(annotation_path, "w", encoding="utf-8") as f:
                 json.dump(string_json, f, ensure_ascii=False, indent=2)
             return {"document_fpath": str(doc_path), "annotation_fpath": str(annotation_path)}
@@ -442,176 +446,176 @@ class Datasets(SyncAPIResource, BaseDatasetsMixin):
         # Generate final training set from all results
         self.save(json_schema=json_schema, document_annotation_pairs_paths=pairs_paths, dataset_path=dataset_path)
-    def eval(
-        self,
-        json_schema: dict[str, Any] | Path | str,
-        dataset_path: str | Path,
-        model: str = "gpt-4o-2024-08-06",
-        temperature: float = 0.0,
-        batch_size: int = 5,
-        max_concurrent: int = 3,
-        display: bool = True,
-    ) -> ComparisonMetrics:
-        """Evaluate model performance on a test dataset.
-        Args:
-            json_schema: JSON schema defining the expected data structure
-            dataset_path: Path to the JSONL file containing test examples
-            model: The model to use for benchmarking
-            temperature: Model temperature setting (0-1)
-            batch_size: Number of examples to process in each batch
-            max_concurrent: Maximum number of concurrent API calls
-        """
-        json_schema = load_json_schema(json_schema)
-        assert_valid_model_extraction(model)
-        schema_obj = Schema(json_schema=json_schema)
-        # Initialize appropriate client
-        client, provider = self._initialize_model_client(model)
-        # Read all lines from the JSONL file
-        with open(dataset_path, 'r') as f:
-            lines = [json.loads(line) for line in f]
-        extraction_analyses: list[ExtractionAnalysis] = []
-        total_batches = (len(lines) + batch_size - 1) // batch_size
-        # Create main progress bar for batches
-        batch_pbar = tqdm(total=total_batches, desc="Processing batches", position=0)
-        # Track running metrics
-        class RunningMetrics(BaseModel):
-            model: str
-            accuracy: float
-            levenshtein: float
-            jaccard: float
-            false_positive: float
-            mismatched: float
-            processed: int
-        running_metrics: RunningMetrics = RunningMetrics(
-            model=model,
-            accuracy=0.0,
-            levenshtein=0.0,
-            jaccard=0.0,
-            false_positive=0.0,
-            mismatched=0.0,
-            processed=0,  # number of processed examples - used in the loop to compute the running averages
-        )
-        def update_running_metrics(analysis: ExtractionAnalysis) -> None:
-            comparison = normalized_comparison_metrics([analysis])
-            running_metrics.processed += 1
-            n = running_metrics.processed
-            # Update running averages
-            running_metrics.accuracy = (running_metrics.accuracy * (n - 1) + comparison.accuracy) / n
-            running_metrics.levenshtein = (running_metrics.levenshtein * (n - 1) + comparison.levenshtein_similarity) / n
-            running_metrics.jaccard = (running_metrics.jaccard * (n - 1) + comparison.jaccard_similarity) / n
-            running_metrics.false_positive = (running_metrics.false_positive * (n - 1) + comparison.false_positive_rate) / n
-            running_metrics.mismatched = (running_metrics.mismatched * (n - 1) + comparison.mismatched_value_rate) / n
-            # Update progress bar description
-            batch_pbar.set_description(
-                f"Processing batches | Model: {running_metrics.model} | Acc: {running_metrics.accuracy:.2f} | "
-                f"Lev: {running_metrics.levenshtein:.2f} | "
-                f"IOU: {running_metrics.jaccard:.2f} | "
-                f"FP: {running_metrics.false_positive:.2f} | "
-                f"Mism: {running_metrics.mismatched:.2f}"
-            )
-        def process_example(jsonline: dict) -> ExtractionAnalysis | None:
-            line_number = jsonline['line_number']
-            try:
-                messages = jsonline['messages']
-                ground_truth = json.loads(messages[-1]['content'])
-                inference_messages = messages[:-1]
-                # Use _get_model_completion instead of duplicating provider-specific logic
-                string_json = self._get_model_completion(client=client, provider=provider, model=model, temperature=temperature, messages=inference_messages, schema_obj=schema_obj)
-                prediction = json.loads(string_json)
-                analysis = ExtractionAnalysis(
-                    ground_truth=ground_truth,
-                    prediction=prediction,
-                )
-                update_running_metrics(analysis)
-                return analysis
-            except Exception as e:
-                print(f"\nWarning: Failed to process line number {line_number}: {str(e)}")
-                return None
-        with ThreadPoolExecutor(max_workers=max_concurrent) as executor:
-            # Split entries into batches
-            for batch_idx in range(0, len(lines), batch_size):
-                batch = lines[batch_idx : batch_idx + batch_size]
-                # Submit and process batch
-                futures = [executor.submit(process_example, entry | {"line_number": batch_idx * batch_size + i}) for i, entry in enumerate(batch)]
-                for future in futures:
-                    result = future.result()
-                    if result is not None:
-                        extraction_analyses.append(result)
-                batch_pbar.update(1)
-        batch_pbar.close()
-        # Analyze error patterns across all examples
-        analysis = normalized_comparison_metrics(extraction_analyses)
-        if display:
-            plot_comparison_metrics(analysis=analysis, top_n=10)
-        return analysis
-    def benchmark(
-        self,
-        json_schema: dict[str, Any] | Path | str,
-        dataset_path: str | Path,
-        models: list[str],
-        temperature: float = 0.0,
-        batch_size: int = 5,
-        max_concurrent: int = 3,
-        print: bool = True,
-        verbose: bool = False,
-    ) -> list[BenchmarkMetrics]:
-        """Benchmark multiple models on a test dataset.
-        Args:
-            json_schema: JSON schema defining the expected data structure
-            dataset_path: Path to the JSONL file containing test examples
-            models: List of models to benchmark
-            temperature: Model temperature setting (0-1)
-            batch_size: Number of examples to process in each batch
-            max_concurrent: Maximum number of concurrent API calls
-            print: Whether to print the metrics
-            verbose: Whether to print all the metrics of all the function calls
-        Returns:
-            Dictionary mapping model names to their evaluation metrics
-        """
-        results: list[BenchmarkMetrics] = []
-        for model in models:
-            metrics: ComparisonMetrics = self.eval(
-                json_schema=json_schema, dataset_path=dataset_path, model=model, temperature=temperature, batch_size=batch_size, max_concurrent=max_concurrent, display=verbose
-            )
-            results.append(
-                BenchmarkMetrics(
-                    ai_model=model,
-                    accuracy=metrics.accuracy,
-                    levenshtein_similarity=metrics.levenshtein_similarity,
-                    jaccard_similarity=metrics.jaccard_similarity,
-                    false_positive_rate=metrics.false_positive_rate,
-                    false_negative_rate=metrics.false_negative_rate,
-                    mismatched_value_rate=metrics.mismatched_value_rate,
-                )
-            )
-        if print:
-            display_benchmark_metrics(results)
-        return results
+    # def eval(
+    #     self,
+    #     json_schema: dict[str, Any] | Path | str,
+    #     dataset_path: str | Path,
+    #     model: str = "gpt-4o-2024-08-06",
+    #     temperature: float = 0.0,
+    #     batch_size: int = 5,
+    #     max_concurrent: int = 3,
+    #     display: bool = True,
+    # ) -> ComparisonMetrics:
+    #     """Evaluate model performance on a test dataset.
+    #     Args:
+    #         json_schema: JSON schema defining the expected data structure
+    #         dataset_path: Path to the JSONL file containing test examples
+    #         model: The model to use for benchmarking
+    #         temperature: Model temperature setting (0-1)
+    #         batch_size: Number of examples to process in each batch
+    #         max_concurrent: Maximum number of concurrent API calls
+    #     """
+    #     json_schema = load_json_schema(json_schema)
+    #     assert_valid_model_extraction(model)
+    #     schema_obj = Schema(json_schema=json_schema)
+    #     # Initialize appropriate client
+    #     client, provider = self._initialize_model_client(model)
+    #     # Read all lines from the JSONL file
+    #     with open(dataset_path, "r") as f:
+    #         lines = [json.loads(line) for line in f]
+    #     extraction_analyses: list[ExtractionAnalysis] = []
+    #     total_batches = (len(lines) + batch_size - 1) // batch_size
+    #     # Create main progress bar for batches
+    #     batch_pbar = tqdm(total=total_batches, desc="Processing batches", position=0)
+    #     # Track running metrics
+    #     class RunningMetrics(BaseModel):
+    #         model: str
+    #         accuracy: float
+    #         levenshtein: float
+    #         jaccard: float
+    #         false_positive: float
+    #         mismatched: float
+    #         processed: int
+    #     running_metrics: RunningMetrics = RunningMetrics(
+    #         model=model,
+    #         accuracy=0.0,
+    #         levenshtein=0.0,
+    #         jaccard=0.0,
+    #         false_positive=0.0,
+    #         mismatched=0.0,
+    #         processed=0,  # number of processed examples - used in the loop to compute the running averages
+    #     )
+    #     # def update_running_metrics(analysis: ExtractionAnalysis) -> None:
+    #     #     comparison = normalized_comparison_metrics([analysis])
+    #     #     running_metrics.processed += 1
+    #     #     n = running_metrics.processed
+    #     #     # Update running averages
+    #     #     running_metrics.accuracy = (running_metrics.accuracy * (n - 1) + comparison.accuracy) / n
+    #     #     running_metrics.levenshtein = (running_metrics.levenshtein * (n - 1) + comparison.levenshtein_similarity) / n
+    #     #     running_metrics.jaccard = (running_metrics.jaccard * (n - 1) + comparison.jaccard_similarity) / n
+    #     #     running_metrics.false_positive = (running_metrics.false_positive * (n - 1) + comparison.false_positive_rate) / n
+    #     #     running_metrics.mismatched = (running_metrics.mismatched * (n - 1) + comparison.mismatched_value_rate) / n
+    #     #     # Update progress bar description
+    #     #     batch_pbar.set_description(
+    #     #         f"Processing batches | Model: {running_metrics.model} | Acc: {running_metrics.accuracy:.2f} | "
+    #     #         f"Lev: {running_metrics.levenshtein:.2f} | "
+    #     #         f"IOU: {running_metrics.jaccard:.2f} | "
+    #     #         f"FP: {running_metrics.false_positive:.2f} | "
+    #     #         f"Mism: {running_metrics.mismatched:.2f}"
+    #     #     )
+    #     # def process_example(jsonline: dict) -> ExtractionAnalysis | None:
+    #     #     line_number = jsonline["line_number"]
+    #     #     try:
+    #     #         messages = jsonline["messages"]
+    #     #         ground_truth = json.loads(messages[-1]["content"])
+    #     #         inference_messages = messages[:-1]
+    #     #         # Use _get_model_completion instead of duplicating provider-specific logic
+    #     #         string_json = self._get_model_completion(client=client, provider=provider, model=model, temperature=temperature, messages=inference_messages, schema_obj=schema_obj)
+    #     #         prediction = json.loads(string_json)
+    #     #         analysis = ExtractionAnalysis(
+    #     #             ground_truth=ground_truth,
+    #     #             prediction=prediction,
+    #     #         )
+    #     #         update_running_metrics(analysis)
+    #     #         return analysis
+    #     #     except Exception as e:
+    #     #         print(f"\nWarning: Failed to process line number {line_number}: {str(e)}")
+    #     #         return None
+    #     # with ThreadPoolExecutor(max_workers=max_concurrent) as executor:
+    #     #     # Split entries into batches
+    #     #     for batch_idx in range(0, len(lines), batch_size):
+    #     #         batch = lines[batch_idx : batch_idx + batch_size]
+    #     #         # Submit and process batch
+    #     #         futures = [executor.submit(process_example, entry | {"line_number": batch_idx * batch_size + i}) for i, entry in enumerate(batch)]
+    #     #         for future in futures:
+    #     #             result = future.result()
+    #     #             if result is not None:
+    #     #                 extraction_analyses.append(result)
+    #     #         batch_pbar.update(1)
+    #     # batch_pbar.close()
+    #     # # Analyze error patterns across all examples
+    #     # analysis = normalized_comparison_metrics(extraction_analyses)
+    #     # if display:
+    #     #     plot_comparison_metrics(analysis=analysis, top_n=10)
+    #     # return analysis
+    # def benchmark(
+    #     self,
+    #     json_schema: dict[str, Any] | Path | str,
+    #     dataset_path: str | Path,
+    #     models: list[str],
+    #     temperature: float = 0.0,
+    #     batch_size: int = 5,
+    #     max_concurrent: int = 3,
+    #     print: bool = True,
+    #     verbose: bool = False,
+    # ) -> list[BenchmarkMetrics]:
+    #     """Benchmark multiple models on a test dataset.
+    #     Args:
+    #         json_schema: JSON schema defining the expected data structure
+    #         dataset_path: Path to the JSONL file containing test examples
+    #         models: List of models to benchmark
+    #         temperature: Model temperature setting (0-1)
+    #         batch_size: Number of examples to process in each batch
+    #         max_concurrent: Maximum number of concurrent API calls
+    #         print: Whether to print the metrics
+    #         verbose: Whether to print all the metrics of all the function calls
+    #     Returns:
+    #         Dictionary mapping model names to their evaluation metrics
+    #     """
+    #     results: list[BenchmarkMetrics] = []
+    #     for model in models:
+    #         metrics: ComparisonMetrics = self.eval(
+    #             json_schema=json_schema, dataset_path=dataset_path, model=model, temperature=temperature, batch_size=batch_size, max_concurrent=max_concurrent, display=verbose
+    #         )
+    #         results.append(
+    #             BenchmarkMetrics(
+    #                 ai_model=model,
+    #                 accuracy=metrics.accuracy,
+    #                 levenshtein_similarity=metrics.levenshtein_similarity,
+    #                 jaccard_similarity=metrics.jaccard_similarity,
+    #                 false_positive_rate=metrics.false_positive_rate,
+    #                 false_negative_rate=metrics.false_negative_rate,
+    #                 mismatched_value_rate=metrics.mismatched_value_rate,
+    #             )
+    #         )
+    #     if print:
+    #         display_benchmark_metrics(results)
+    #     return results
     def update_annotations(
         self,
@@ -642,7 +646,7 @@ class Datasets(SyncAPIResource, BaseDatasetsMixin):
         client, provider = self._initialize_model_client(model)
         # Read all lines from the JSONL file
-        with open(old_dataset_path, 'r') as f:
+        with open(old_dataset_path, "r") as f:
             lines = [json.loads(line) for line in f]
         updated_entries = []
@@ -651,13 +655,13 @@ class Datasets(SyncAPIResource, BaseDatasetsMixin):
         batch_pbar = tqdm(total=total_batches, desc="Processing batches", position=0)
         def process_entry(entry: dict) -> dict:
-            messages = entry['messages']
+            messages = entry["messages"]
             system_message, user_messages, assistant_messages = separate_messages(messages)
             system_and_user_messages = messages[:-1]
             previous_annotation_message: ChatCompletionUiformMessage = {
                 "role": "user",
-                "content": "Here is an old annotation using a different schema. Use it as a reference to update the annotation: " + messages[-1]['content'],
+                "content": "Here is an old annotation using a different schema. Use it as a reference to update the annotation: " + messages[-1]["content"],
             }
             string_json = self._get_model_completion(
@@ -691,9 +695,9 @@ class Datasets(SyncAPIResource, BaseDatasetsMixin):
         batch_pbar.close()
-        with open(new_dataset_path, 'w') as f:
+        with open(new_dataset_path, "w") as f:
             for entry in updated_entries:
-                f.write(json.dumps(entry) + '\n')
+                f.write(json.dumps(entry) + "\n")
     #########################
     ##### BATCH METHODS #####
@@ -722,7 +726,7 @@ class Datasets(SyncAPIResource, BaseDatasetsMixin):
         schema_obj = Schema(json_schema=loaded_json_schema)
         assert_valid_model_extraction(model)
-        with open(batch_requests_path, 'w', encoding='utf-8') as f:
+        with open(batch_requests_path, "w", encoding="utf-8") as f:
             for i, doc in tqdm(enumerate(documents)):
                 # Create document messages
                 doc_msg = self._client.documents.create_messages(
@@ -744,7 +748,7 @@ class Datasets(SyncAPIResource, BaseDatasetsMixin):
                 }
                 # Write the request as a JSON line
-                f.write(json.dumps(request) + '\n')
+                f.write(json.dumps(request) + "\n")
     def save_batch_update_annotation_requests(
         self,
@@ -768,18 +772,18 @@ class Datasets(SyncAPIResource, BaseDatasetsMixin):
         schema_obj = Schema(json_schema=loaded_json_schema)
         # Read existing annotations
-        with open(old_dataset_path, 'r') as f:
+        with open(old_dataset_path, "r") as f:
             entries = [json.loads(line) for line in f]
         # Create new JSONL with update requests
-        with open(batch_requests_path, 'w', encoding='utf-8') as f:
+        with open(batch_requests_path, "w", encoding="utf-8") as f:
             for i, entry in enumerate(entries):
-                existing_messages = entry['messages']
+                existing_messages = entry["messages"]
                 system_and_user_messages = existing_messages[:-1]
                 previous_annotation_message: ChatCompletionMessageParam = {
                     "role": "user",
-                    "content": "Here is an old annotation using a different schema. Use it as a reference to update the annotation: " + existing_messages[-1]['content'],
+                    "content": "Here is an old annotation using a different schema. Use it as a reference to update the annotation: " + existing_messages[-1]["content"],
                 }
                 # Construct the request object
@@ -798,7 +802,7 @@ class Datasets(SyncAPIResource, BaseDatasetsMixin):
                 request: BatchJSONL = {"custom_id": f"request-{i}", "method": "POST", "url": "/v1/chat/completions", "body": body}
                 # Write the request as a JSON line
-                f.write(json.dumps(request) + '\n')
+                f.write(json.dumps(request) + "\n")
     def build_dataset_from_batch_results(
         self,
@@ -806,27 +810,27 @@ class Datasets(SyncAPIResource, BaseDatasetsMixin):
         batch_results_path: str | Path,
         dataset_results_path: str | Path,
     ) -> None:
-        with open(batch_requests_path, 'r') as f:
+        with open(batch_requests_path, "r") as f:
             input_lines: list[BatchJSONL] = [json.loads(line) for line in f]
-        with open(batch_results_path, 'r') as f:
-            batch_results_lines: list[BatchJSONLResponse] = [json.loads(line) for line in f]
+        with open(batch_results_path, "r") as f:
+            batch_results_lines: list[BatchJSONLResponse] = [BatchJSONLResponse.model_validate_json(line) for line in f]
         assert len(input_lines) == len(batch_results_lines), "Input and batch results must have the same number of lines"
         for input_line, batch_result in zip(input_lines, batch_results_lines):
-            messages = input_line['body']['messages']
+            messages = input_line["body"]["messages"]
             # Filter out messages containing the old annotation reference to remove messages that come from "update annotation"
-            if isinstance(messages[-1].get('content'), str):
-                if re.search(r'Here is an old annotation using a different schema\. Use it as a reference to update the annotation:', str(messages[-1].get('content', ''))):
+            if isinstance(messages[-1].get("content"), str):
+                if re.search(r"Here is an old annotation using a different schema\. Use it as a reference to update the annotation:", str(messages[-1].get("content", ""))):
                     print("found keyword")
-                    input_line['body']['messages'] = messages[:-1]
+                    input_line["body"]["messages"] = messages[:-1]
-            input_line['body']['messages'].append(batch_result['response']['body']['choices'][0]['message'])
+            input_line["body"]["messages"].append(batch_result.response.body.choices[0].message)
-        with open(dataset_results_path, 'w') as f:
+        with open(dataset_results_path, "w") as f:
             for input_line in input_lines:
-                f.write(json.dumps({'messages': input_line['body']['messages']}) + '\n')
+                f.write(json.dumps({"messages": input_line["body"]["messages"]}) + "\n")
         print(f"Dataset saved to {dataset_results_path}")
@@ -849,9 +853,9 @@ class AsyncDatasets(AsyncAPIResource, BaseDatasetsMixin):
         training_set = []
         for pair_paths in tqdm(pairs_paths):
-            document_message = await self._client.documents.create_messages(document=pair_paths['document_fpath'], modality=modality)
+            document_message = await self._client.documents.create_messages(document=pair_paths["document_fpath"], modality=modality)
-            with open(pair_paths['annotation_fpath'], 'r') as f:
+            with open(pair_paths["annotation_fpath"], "r") as f:
                 annotation = json.loads(f.read())
             assistant_message = {"role": "assistant", "content": json.dumps(annotation, ensure_ascii=False, indent=2)}
@@ -917,7 +921,7 @@ class AsyncDatasets(AsyncAPIResource, BaseDatasetsMixin):
                 annotation_path.parent.mkdir(parents=True, exist_ok=True)
-                with open(annotation_path, 'w', encoding='utf-8') as f:
+                with open(annotation_path, "w", encoding="utf-8") as f:
                     json.dump(result.choices[0].message.content, f, ensure_ascii=False, indent=2)
                 return {"document_fpath": str(doc_path), "annotation_fpath": str(annotation_path)}
@@ -954,7 +958,7 @@ class AsyncDatasets(AsyncAPIResource, BaseDatasetsMixin):
                 annotation_path.parent.mkdir(parents=True, exist_ok=True)
-                with open(annotation_path, 'w', encoding='utf-8') as f:
+                with open(annotation_path, "w", encoding="utf-8") as f:
                     json.dump(result.choices[0].message.content, f, ensure_ascii=False, indent=2)
                 return {

retab 0.0.35__py3-none-any.whl → 0.0.37__py3-none-any.whl

retab 0.0.35py3-none-any.whl → 0.0.37py3-none-any.whl