PyPI - haiku.rag - Versions diffs - 0.11.0__tar.gz → 0.11.2__tar.gz - Mend

haiku.rag 0.11.0tar.gz → 0.11.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of haiku.rag might be problematic. Click here for more details.

Files changed (101) hide show

{haiku_rag-0.11.0 → haiku_rag-0.11.2}/.gitignore RENAMED Viewed

@@ -11,6 +11,7 @@ wheels/
 # tests
 .coverage*
+src/evaluations/data/
 tests/data/
 .pytest_cache/
 .ruff_cache/

{haiku_rag-0.11.0 → haiku_rag-0.11.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: haiku.rag
-Version: 0.11.0
+Version: 0.11.2
 Summary: Agentic Retrieval Augmented Generation (RAG) with LanceDB
 Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
 License: MIT

{haiku_rag-0.11.0 → haiku_rag-0.11.2}/pyproject.toml RENAMED Viewed

@@ -2,7 +2,7 @@
 name = "haiku.rag"
 description = "Agentic Retrieval Augmented Generation (RAG) with LanceDB"
-version = "0.11.0"
+version = "0.11.2"
 authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
 license = { text = "MIT" }
 readme = { file = "README.md", content-type = "text/markdown" }
@@ -48,6 +48,9 @@ haiku-rag = "haiku.rag.cli:cli"
 requires = ["hatchling"]
 build-backend = "hatchling.build"
+[tool.hatch.build]
+exclude = ["/docs", "/tests", "/.github"]
 [tool.hatch.build.targets.wheel]
 packages = ["src/haiku"]
@@ -57,6 +60,7 @@ dev = [
     "logfire>=4.7.0",
     "mkdocs>=1.6.1",
     "mkdocs-material>=9.6.14",
+    "pydantic-evals>=1.0.8",
     "pre-commit>=4.2.0",
     "pyright>=1.1.405",
     "pytest>=8.4.2",

haiku_rag-0.11.2/src/evaluations/benchmark.py ADDED Viewed

@@ -0,0 +1,320 @@
+import asyncio
+from collections.abc import Mapping
+from typing import Any, cast
+import logfire
+import typer
+from pydantic_ai.models.openai import OpenAIChatModel
+from pydantic_ai.providers.ollama import OllamaProvider
+from pydantic_evals import Dataset as EvalDataset
+from pydantic_evals.evaluators import IsInstance, LLMJudge
+from pydantic_evals.reporting import ReportCaseFailure
+from rich.console import Console
+from rich.progress import Progress
+from evaluations.config import DatasetSpec, RetrievalSample
+from evaluations.datasets import DATASETS
+from evaluations.llm_judge import ANSWER_EQUIVALENCE_RUBRIC
+from haiku.rag import logging  # noqa: F401
+from haiku.rag.client import HaikuRAG
+from haiku.rag.config import Config
+from haiku.rag.logging import configure_cli_logging
+from haiku.rag.qa import get_qa_agent
+QA_JUDGE_MODEL = "qwen3"
+logfire.configure(send_to_logfire="if-token-present", service_name="evals")
+logfire.instrument_pydantic_ai()
+configure_cli_logging()
+console = Console()
+async def populate_db(spec: DatasetSpec) -> None:
+    spec.db_path.parent.mkdir(parents=True, exist_ok=True)
+    corpus = spec.document_loader()
+    if spec.document_limit is not None:
+        corpus = corpus.select(range(min(spec.document_limit, len(corpus))))
+    with Progress() as progress:
+        task = progress.add_task("[green]Populating database...", total=len(corpus))
+        async with HaikuRAG(spec.db_path) as rag:
+            for doc in corpus:
+                doc_mapping = cast(Mapping[str, Any], doc)
+                payload = spec.document_mapper(doc_mapping)
+                if payload is None:
+                    progress.advance(task)
+                    continue
+                existing = await rag.get_document_by_uri(payload.uri)
+                if existing is not None:
+                    assert existing.id
+                    chunks = await rag.chunk_repository.get_by_document_id(existing.id)
+                    if chunks:
+                        progress.advance(task)
+                        continue
+                    await rag.document_repository.delete(existing.id)
+                await rag.create_document(
+                    content=payload.content,
+                    uri=payload.uri,
+                    title=payload.title,
+                    metadata=payload.metadata,
+                )
+                progress.advance(task)
+            rag.store.vacuum()
+def _is_relevant_match(retrieved_uri: str | None, sample: RetrievalSample) -> bool:
+    return retrieved_uri is not None and retrieved_uri in sample.expected_uris
+async def run_retrieval_benchmark(spec: DatasetSpec) -> dict[str, float] | None:
+    if spec.retrieval_loader is None or spec.retrieval_mapper is None:
+        console.print("Skipping retrieval benchmark; no retrieval config.")
+        return None
+    corpus = spec.retrieval_loader()
+    recall_totals = {
+        1: 0.0,
+        3: 0.0,
+        5: 0.0,
+    }
+    total_queries = 0
+    with Progress() as progress:
+        task = progress.add_task(
+            "[blue]Running retrieval benchmark...", total=len(corpus)
+        )
+        async with HaikuRAG(spec.db_path) as rag:
+            for doc in corpus:
+                doc_mapping = cast(Mapping[str, Any], doc)
+                sample = spec.retrieval_mapper(doc_mapping)
+                if sample is None or sample.skip:
+                    progress.advance(task)
+                    continue
+                matches = await rag.search(query=sample.question, limit=5)
+                if not matches:
+                    progress.advance(task)
+                    continue
+                total_queries += 1
+                retrieved_uris: list[str] = []
+                for chunk, _ in matches:
+                    if chunk.document_id is None:
+                        continue
+                    retrieved_doc = await rag.get_document_by_id(chunk.document_id)
+                    if retrieved_doc and retrieved_doc.uri:
+                        retrieved_uris.append(retrieved_doc.uri)
+                # Compute per-query recall@K by counting how many relevant
+                # documents are retrieved within the first K results and
+                # averaging these fractions across all queries.
+                for cutoff in (1, 3, 5):
+                    top_k = set(retrieved_uris[:cutoff])
+                    relevant = set(sample.expected_uris)
+                    if relevant:
+                        matched = len(top_k & relevant)
+                        recall_totals[cutoff] += matched / len(relevant)
+                progress.advance(task)
+    if total_queries == 0:
+        console.print("No retrieval cases to evaluate.")
+        return None
+    recall_at_1 = recall_totals[1] / total_queries
+    recall_at_3 = recall_totals[3] / total_queries
+    recall_at_5 = recall_totals[5] / total_queries
+    console.print("\n=== Retrieval Benchmark Results ===", style="bold cyan")
+    console.print(f"Total queries: {total_queries}")
+    console.print(f"Recall@1: {recall_at_1:.4f}")
+    console.print(f"Recall@3: {recall_at_3:.4f}")
+    console.print(f"Recall@5: {recall_at_5:.4f}")
+    return {
+        "recall@1": recall_at_1,
+        "recall@3": recall_at_3,
+        "recall@5": recall_at_5,
+    }
+async def run_qa_benchmark(
+    spec: DatasetSpec, qa_limit: int | None = None
+) -> ReportCaseFailure[str, str, dict[str, str]] | None:
+    corpus = spec.qa_loader()
+    if qa_limit is not None:
+        corpus = corpus.select(range(min(qa_limit, len(corpus))))
+    cases = [
+        spec.qa_case_builder(index, cast(Mapping[str, Any], doc))
+        for index, doc in enumerate(corpus, start=1)
+    ]
+    judge_model = OpenAIChatModel(
+        model_name=QA_JUDGE_MODEL,
+        provider=OllamaProvider(base_url=f"{Config.OLLAMA_BASE_URL}/v1"),
+    )
+    evaluation_dataset = EvalDataset[str, str, dict[str, str]](
+        cases=cases,
+        evaluators=[
+            IsInstance(type_name="str"),
+            LLMJudge(
+                rubric=ANSWER_EQUIVALENCE_RUBRIC,
+                include_input=True,
+                include_expected_output=True,
+                model=judge_model,
+                assertion={
+                    "evaluation_name": "answer_equivalent",
+                    "include_reason": True,
+                },
+            ),
+        ],
+    )
+    total_processed = 0
+    passing_cases = 0
+    failures: list[ReportCaseFailure[str, str, dict[str, str]]] = []
+    with Progress(console=console) as progress:
+        qa_task = progress.add_task(
+            "[yellow]Evaluating QA cases...",
+            total=len(evaluation_dataset.cases),
+        )
+        async with HaikuRAG(spec.db_path) as rag:
+            qa = get_qa_agent(rag)
+            async def answer_question(question: str) -> str:
+                return await qa.answer(question)
+            for case in evaluation_dataset.cases:
+                progress.console.print(f"\n[bold]Evaluating case:[/bold] {case.name}")
+                single_case_dataset = EvalDataset[str, str, dict[str, str]](
+                    cases=[case],
+                    evaluators=evaluation_dataset.evaluators,
+                )
+                report = await single_case_dataset.evaluate(
+                    answer_question,
+                    name="qa_answer",
+                    max_concurrency=1,
+                    progress=False,
+                )
+                total_processed += 1
+                if report.cases:
+                    result_case = report.cases[0]
+                    equivalence = result_case.assertions.get("answer_equivalent")
+                    progress.console.print(f"Question: {result_case.inputs}")
+                    progress.console.print(f"Expected: {result_case.expected_output}")
+                    progress.console.print(f"Generated: {result_case.output}")
+                    if equivalence is not None:
+                        progress.console.print(
+                            f"Equivalent: {equivalence.value}"
+                            + (f" — {equivalence.reason}" if equivalence.reason else "")
+                        )
+                        if equivalence.value:
+                            passing_cases += 1
+                    progress.console.print("")
+                if report.failures:
+                    failures.extend(report.failures)
+                    failure = report.failures[0]
+                    progress.console.print(
+                        "[red]Failure encountered during case evaluation:[/red]"
+                    )
+                    progress.console.print(f"Question: {failure.inputs}")
+                    progress.console.print(f"Error: {failure.error_message}")
+                    progress.console.print("")
+                progress.console.print(
+                    f"[green]Accuracy: {(passing_cases / total_processed):.4f} "
+                    f"{passing_cases}/{total_processed}[/green]"
+                )
+                progress.advance(qa_task)
+    total_cases = total_processed
+    accuracy = passing_cases / total_cases if total_cases > 0 else 0
+    console.print("\n=== QA Benchmark Results ===", style="bold cyan")
+    console.print(f"Total questions: {total_cases}")
+    console.print(f"Correct answers: {passing_cases}")
+    console.print(f"QA Accuracy: {accuracy:.4f} ({accuracy * 100:.2f}%)")
+    if failures:
+        console.print("[red]\nSummary of failures:[/red]")
+        for failure in failures:
+            console.print(f"Case: {failure.name}")
+            console.print(f"Question: {failure.inputs}")
+            console.print(f"Error: {failure.error_message}")
+            console.print("")
+    return failures[0] if failures else None
+async def evaluate_dataset(
+    spec: DatasetSpec,
+    skip_db: bool,
+    skip_retrieval: bool,
+    skip_qa: bool,
+    qa_limit: int | None,
+) -> None:
+    if not skip_db:
+        console.print(f"Using dataset: {spec.key}", style="bold magenta")
+        await populate_db(spec)
+    if not skip_retrieval:
+        console.print("Running retrieval benchmarks...", style="bold blue")
+        await run_retrieval_benchmark(spec)
+    if not skip_qa:
+        console.print("\nRunning QA benchmarks...", style="bold yellow")
+        await run_qa_benchmark(spec, qa_limit=qa_limit)
+app = typer.Typer(help="Run retrieval and QA benchmarks for configured datasets.")
+@app.command()
+def run(
+    dataset: str = typer.Argument(..., help="Dataset key to evaluate."),
+    skip_db: bool = typer.Option(
+        False, "--skip-db", help="Skip updateing the evaluation db."
+    ),
+    skip_retrieval: bool = typer.Option(
+        False, "--skip-retrieval", help="Skip retrieval benchmark."
+    ),
+    skip_qa: bool = typer.Option(False, "--skip-qa", help="Skip QA benchmark."),
+    qa_limit: int | None = typer.Option(
+        None, "--qa-limit", help="Limit number of QA cases."
+    ),
+) -> None:
+    spec = DATASETS.get(dataset.lower())
+    if spec is None:
+        valid_datasets = ", ".join(sorted(DATASETS))
+        raise typer.BadParameter(
+            f"Unknown dataset '{dataset}'. Choose from: {valid_datasets}"
+        )
+    asyncio.run(
+        evaluate_dataset(
+            spec=spec,
+            skip_db=skip_db,
+            skip_retrieval=skip_retrieval,
+            skip_qa=skip_qa,
+            qa_limit=qa_limit,
+        )
+    )
+if __name__ == "__main__":
+    app()

haiku_rag-0.11.2/src/evaluations/config.py ADDED Viewed

@@ -0,0 +1,46 @@
+from collections.abc import Callable, Mapping
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+from datasets import Dataset
+from pydantic_evals import Case
+@dataclass
+class DocumentPayload:
+    uri: str
+    content: str
+    title: str | None = None
+    metadata: dict[str, Any] | None = None
+@dataclass
+class RetrievalSample:
+    question: str
+    expected_uris: tuple[str, ...]
+    skip: bool = False
+DocumentLoader = Callable[[], Dataset]
+DocumentMapper = Callable[[Mapping[str, Any]], DocumentPayload | None]
+RetrievalLoader = Callable[[], Dataset]
+RetrievalMapper = Callable[[Mapping[str, Any]], RetrievalSample | None]
+CaseBuilder = Callable[[int, Mapping[str, Any]], Case[str, str, dict[str, str]]]
+@dataclass
+class DatasetSpec:
+    key: str
+    db_filename: str
+    document_loader: DocumentLoader
+    document_mapper: DocumentMapper
+    qa_loader: DocumentLoader
+    qa_case_builder: CaseBuilder
+    retrieval_loader: RetrievalLoader | None = None
+    retrieval_mapper: RetrievalMapper | None = None
+    document_limit: int | None = None
+    @property
+    def db_path(self) -> Path:
+        return Path(__file__).parent / "data" / self.db_filename

haiku_rag-0.11.2/src/evaluations/datasets/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+from evaluations.config import DatasetSpec
+from .repliqa import REPLIQ_SPEC
+from .wix import WIX_SPEC
+DATASETS: dict[str, DatasetSpec] = {spec.key: spec for spec in (REPLIQ_SPEC, WIX_SPEC)}
+__all__ = ["DATASETS"]

haiku_rag-0.11.2/src/evaluations/datasets/repliqa.py ADDED Viewed

@@ -0,0 +1,58 @@
+from collections.abc import Mapping
+from typing import Any, cast
+from datasets import Dataset, DatasetDict, load_dataset
+from pydantic_evals import Case
+from evaluations.config import DatasetSpec, DocumentPayload, RetrievalSample
+def load_repliqa_corpus() -> Dataset:
+    dataset_dict = cast(DatasetDict, load_dataset("ServiceNow/repliqa"))
+    dataset = cast(Dataset, dataset_dict["repliqa_3"])
+    return dataset.filter(lambda doc: doc["document_topic"] == "News Stories")
+def map_repliqa_document(doc: Mapping[str, Any]) -> DocumentPayload:
+    return DocumentPayload(
+        uri=str(doc["document_id"]),
+        content=doc["document_extracted"],
+    )
+def map_repliqa_retrieval(doc: Mapping[str, Any]) -> RetrievalSample | None:
+    expected_answer = doc["answer"]
+    if expected_answer == "The answer is not found in the document.":
+        return None
+    return RetrievalSample(
+        question=doc["question"],
+        expected_uris=(str(doc["document_id"]),),
+    )
+def build_repliqa_case(
+    index: int, doc: Mapping[str, Any]
+) -> Case[str, str, dict[str, str]]:
+    document_id = doc["document_id"]
+    case_name = f"{index}_{document_id}" if document_id is not None else f"case_{index}"
+    return Case(
+        name=case_name,
+        inputs=doc["question"],
+        expected_output=doc["answer"],
+        metadata={
+            "document_id": str(document_id),
+            "case_index": str(index),
+        },
+    )
+REPLIQ_SPEC = DatasetSpec(
+    key="repliqa",
+    db_filename="repliqa.lancedb",
+    document_loader=load_repliqa_corpus,
+    document_mapper=map_repliqa_document,
+    qa_loader=load_repliqa_corpus,
+    qa_case_builder=build_repliqa_case,
+    retrieval_loader=load_repliqa_corpus,
+    retrieval_mapper=map_repliqa_retrieval,
+)

haiku_rag-0.11.2/src/evaluations/datasets/wix.py ADDED Viewed

@@ -0,0 +1,81 @@
+import json
+from collections.abc import Iterable, Mapping
+from typing import Any, cast
+from datasets import Dataset, DatasetDict, load_dataset
+from pydantic_evals import Case
+from evaluations.config import DatasetSpec, DocumentPayload, RetrievalSample
+def load_wix_corpus() -> Dataset:
+    dataset_dict = cast(DatasetDict, load_dataset("Wix/WixQA", "wix_kb_corpus"))
+    return cast(Dataset, dataset_dict["train"])
+def map_wix_document(doc: Mapping[str, Any]) -> DocumentPayload:
+    article_id = doc.get("id")
+    url = doc.get("url")
+    uri = str(article_id) if article_id is not None else str(url)
+    metadata: dict[str, str] = {}
+    if article_id is not None:
+        metadata["article_id"] = str(article_id)
+    if url:
+        metadata["url"] = str(url)
+    return DocumentPayload(
+        uri=uri,
+        content=doc["contents"],
+        title=doc.get("title"),
+        metadata=metadata or None,
+    )
+def load_wix_qa() -> Dataset:
+    dataset_dict = cast(DatasetDict, load_dataset("Wix/WixQA", "wixqa_expertwritten"))
+    return cast(Dataset, dataset_dict["train"])
+def map_wix_retrieval(doc: Mapping[str, Any]) -> RetrievalSample | None:
+    article_ids: Iterable[int | str] | None = doc.get("article_ids")
+    if not article_ids:
+        return None
+    expected_uris = tuple(str(article_id) for article_id in article_ids)
+    return RetrievalSample(
+        question=doc["question"],
+        expected_uris=expected_uris,
+    )
+def build_wix_case(
+    index: int, doc: Mapping[str, Any]
+) -> Case[str, str, dict[str, str]]:
+    article_ids = tuple(str(article_id) for article_id in doc.get("article_ids") or [])
+    joined_ids = "-".join(article_ids)
+    case_name = f"{index}_{joined_ids}" if joined_ids else f"case_{index}"
+    metadata = {
+        "case_index": str(index),
+        "document_ids": json.dumps(article_ids),
+    }
+    return Case(
+        name=case_name,
+        inputs=doc["question"],
+        expected_output=doc["answer"],
+        metadata=metadata,
+    )
+WIX_SPEC = DatasetSpec(
+    key="wix",
+    db_filename="wix.lancedb",
+    document_loader=load_wix_corpus,
+    document_mapper=map_wix_document,
+    qa_loader=load_wix_qa,
+    qa_case_builder=build_wix_case,
+    retrieval_loader=load_wix_qa,
+    retrieval_mapper=map_wix_retrieval,
+)

{haiku_rag-0.11.0/tests → haiku_rag-0.11.2/src/evaluations}/llm_judge.py RENAMED Viewed

@@ -37,7 +37,7 @@ class LLMJudgeResponseSchema(BaseModel):
 class LLMJudge:
     """LLM-as-judge for evaluating answer equivalence using Pydantic AI."""
-    def __init__(self, model: str = "qwen3"):
+    def __init__(self, model: str = "gpt-oss"):
         # Create Ollama model
         ollama_model = OpenAIChatModel(
             model_name=model,
@@ -49,6 +49,7 @@ class LLMJudge:
             model=ollama_model,
             output_type=LLMJudgeResponseSchema,
             system_prompt=ANSWER_EQUIVALENCE_RUBRIC,
+            retries=3,
         )
     async def judge_answers(

{haiku_rag-0.11.0 → haiku_rag-0.11.2}/src/haiku/rag/client.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import hashlib
+import logging
 import mimetypes
 import tempfile
 from collections.abc import AsyncGenerator
@@ -18,6 +19,8 @@ from haiku.rag.store.repositories.document import DocumentRepository
 from haiku.rag.store.repositories.settings import SettingsRepository
 from haiku.rag.utils import text_to_docling_document
+logger = logging.getLogger(__name__)
 class HaikuRAG:
     """High-level haiku-rag client."""
@@ -538,8 +541,8 @@ class HaikuRAG:
         """Rebuild the database by deleting all chunks and re-indexing all documents.
         For documents with URIs:
-        - Deletes the document and re-adds it from source if source exists
-        - Skips documents where source no longer exists
+        - Re-adds from source if source exists
+        - Re-embeds from existing content if source is missing
         For documents without URIs:
         - Re-creates chunks from existing content
@@ -559,29 +562,51 @@ class HaikuRAG:
         for doc in documents:
             assert doc.id is not None, "Document ID should not be None"
             if doc.uri:
-                # Document has a URI - delete and try to re-add from source
-                try:
-                    # Delete the old document first
-                    await self.delete_document(doc.id)
+                # Document has a URI - check if source is accessible
+                source_accessible = False
+                parsed_url = urlparse(doc.uri)
-                    # Try to re-create from source (this creates the document with chunks)
-                    new_doc = await self.create_document_from_source(
-                        source=doc.uri, metadata=doc.metadata or {}
+                try:
+                    if parsed_url.scheme == "file":
+                        # Check if file exists
+                        source_path = Path(parsed_url.path)
+                        source_accessible = source_path.exists()
+                    elif parsed_url.scheme in ("http", "https"):
+                        # For URLs, we'll try to create and catch errors
+                        source_accessible = True
+                    else:
+                        source_accessible = False
+                except Exception:
+                    source_accessible = False
+                if source_accessible:
+                    # Source exists - delete and recreate from source
+                    try:
+                        await self.delete_document(doc.id)
+                        new_doc = await self.create_document_from_source(
+                            source=doc.uri, metadata=doc.metadata or {}
+                        )
+                        assert new_doc.id is not None, (
+                            "New document ID should not be None"
+                        )
+                        yield new_doc.id
+                    except Exception as e:
+                        logger.error(
+                            "Error recreating document from source %s: %s",
+                            doc.uri,
+                            e,
+                        )
+                        continue
+                else:
+                    # Source missing - re-embed from existing content
+                    logger.warning(
+                        "Source missing for %s, re-embedding from content", doc.uri
                     )
-                    assert new_doc.id is not None, "New document ID should not be None"
-                    yield new_doc.id
-                except (FileNotFoundError, ValueError, OSError) as e:
-                    # Source doesn't exist or can't be accessed - document already deleted, skip
-                    print(f"Skipping document with URI {doc.uri}: {e}")
-                    continue
-                except Exception as e:
-                    # Unexpected error - log it and skip
-                    print(
-                        f"Unexpected error processing document with URI {doc.uri}: {e}"
+                    docling_document = text_to_docling_document(doc.content)
+                    await self.chunk_repository.create_chunks_for_document(
+                        doc.id, docling_document
                     )
-                    continue
+                    yield doc.id
             else:
                 # Document without URI - re-create chunks from existing content
                 docling_document = text_to_docling_document(doc.content)

{haiku_rag-0.11.0 → haiku_rag-0.11.2}/src/haiku/rag/config.py RENAMED Viewed

@@ -20,8 +20,8 @@ class AppConfig(BaseModel):
     MONITOR_DIRECTORIES: list[Path] = []
     EMBEDDINGS_PROVIDER: str = "ollama"
-    EMBEDDINGS_MODEL: str = "mxbai-embed-large"
-    EMBEDDINGS_VECTOR_DIM: int = 1024
+    EMBEDDINGS_MODEL: str = "qwen3-embedding"
+    EMBEDDINGS_VECTOR_DIM: int = 4096
     RERANK_PROVIDER: str = ""
     RERANK_MODEL: str = ""

haiku.rag 0.11.0__tar.gz → 0.11.2__tar.gz

Potentially problematic release.

haiku.rag 0.11.0tar.gz → 0.11.2tar.gz