PyPI - classifyre-cli - Versions diffs - 0.4.10__tar.gz → 0.4.12__tar.gz - Mend

classifyre-cli 0.4.10tar.gz → 0.4.12tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (185) hide show

{classifyre_cli-0.4.10 → classifyre_cli-0.4.12}/.turbo/turbo-build.log RENAMED Viewed

@@ -1,3 +1,3 @@
 $ uv sync
-Resolved 265 packages in 156ms
+Resolved 267 packages in 181ms
 Checked 50 packages in 1ms

{classifyre_cli-0.4.10 → classifyre_cli-0.4.12}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: classifyre-cli
-Version: 0.4.10
+Version: 0.4.12
 Summary: Classifyre CLI — scan and classify unstructured data sources
 License: MIT
 Keywords: data,ingestion,metadata,pii,secrets,unstructured

{classifyre_cli-0.4.10 → classifyre_cli-0.4.12}/package.json RENAMED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@classifyre/cli",
-  "version": "0.4.10",
+  "version": "0.4.12",
   "private": true,
   "scripts": {
     "build": "uv sync",

{classifyre_cli-0.4.10 → classifyre_cli-0.4.12}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "classifyre-cli"
-version = "0.4.10"
+version = "0.4.12"
 description = "Classifyre CLI — scan and classify unstructured data sources"
 readme = "README.md"
 requires-python = ">=3.12"
@@ -47,7 +47,7 @@ privacy = [
     # mid-run in frozen/venv contexts. 8.x eagerly loads all data at import time,
     # avoiding ModuleNotFoundError during Presidio phone number analysis.
     "phonenumbers>=8.13.0,<10.0.0",
-    "numpy>=1.26.0,<2.0.0",
+    "numpy>=1.26.0,<3.0.0",
 ]
 security = [
     "detect-secrets>=1.5.0",
@@ -93,6 +93,10 @@ regex = [
 ]
 llm = [
     "litellm>=1.86.2",
+    # Pure-wheel PDF renderer (permissive license, no system binaries) used to
+    # rasterise PDF pages to images for vision-capable LLM detectors.
+    "pypdfium2>=4.30.0",
+    "pillow>=12.2.0",
 ]
 detectors = [
     { include-group = "file-processing" },
@@ -270,6 +274,8 @@ module = [
     "setfit",
     "litellm.*",
     "litellm",
+    "pypdfium2.*",
+    "pypdfium2",
     "sklearn.*",
     "sklearn",
     "numpy",

{classifyre_cli-0.4.10 → classifyre_cli-0.4.12}/src/detectors/custom/runners/_base.py RENAMED Viewed

@@ -2,6 +2,8 @@
 from __future__ import annotations
+import io
+import logging
 import re
 from abc import ABC, abstractmethod
 from datetime import UTC, datetime
@@ -38,6 +40,32 @@ _IMAGE_CONTENT_TYPES = [
     "image/bmp",
     "image/tiff",
 ]
+# Content types HuggingFace image detectors accept. Non-image renderable files
+# (PDFs) are rasterised page-by-page via render_to_images before classification,
+# mirroring the vision LLM detector's input handling.
+_IMAGE_INPUT_CONTENT_TYPES = [*_IMAGE_CONTENT_TYPES, "application/pdf"]
+logger = logging.getLogger(__name__)
+def _load_input_images(content: bytes, content_type: str, pil: Any) -> list[tuple[int, Any]]:
+    """Return ``(page_index, PIL.Image)`` tuples for an image or renderable file.
+    Image MIME types open directly; PDFs (and any type ``render_to_images`` supports)
+    are rasterised to one image per page. Unsupported types return ``[]``.
+    """
+    from ....utils.file_to_images import render_to_images, supported_mime_type
+    normalized = content_type.split(";", 1)[0].strip().lower()
+    try:
+        if normalized.startswith("image/"):
+            return [(0, pil.open(io.BytesIO(content)))]
+        if supported_mime_type(content_type):
+            pages = render_to_images(content, content_type)
+            return [(idx, pil.open(io.BytesIO(png))) for idx, png in enumerate(pages)]
+    except Exception as exc:  # pragma: no cover - defensive
+        logger.warning("Failed to load input images (%s): %s", normalized, exc)
+    return []
 def _resolve_pipeline_severity(

{classifyre_cli-0.4.10 → classifyre_cli-0.4.12}/src/detectors/custom/runners/_image_classification.py RENAMED Viewed

@@ -2,7 +2,6 @@
 from __future__ import annotations
-import io
 import logging
 from typing import Any
@@ -11,8 +10,9 @@ from ....models.generated_single_asset_scan_results import DetectionResult
 from ...dependencies import ensure_torch, require_module
 from ._base import (
     _DEFAULT_IMAGE_CLASSIFICATION_MODEL,
-    _IMAGE_CONTENT_TYPES,
+    _IMAGE_INPUT_CONTENT_TYPES,
     BaseRunner,
+    _load_input_images,
     _resolve_pipeline_severity,
 )
@@ -54,45 +54,55 @@ class ImageClassificationRunner(BaseRunner):
         raise NotImplementedError("ImageClassificationRunner uses detect() directly")
     def detect(self, content: str | bytes, content_type: str) -> list[DetectionResult]:
-        if not content_type.startswith("image/"):
-            return []
         if isinstance(content, str):
             logger.warning("image_classification: received string content, expected bytes")
             return []
+        # image/* opens directly; PDFs are rasterised to one image per page.
+        images = _load_input_images(content, content_type, self._pil)
+        if not images:
+            return []
         schema = self._schema
         threshold = schema.confidence_threshold if schema.confidence_threshold is not None else 0.0
+        multi_page = len(images) > 1
         results: list[DetectionResult] = []
-        try:
-            image = self._pil.open(io.BytesIO(content))
-            predictions: list[dict[str, Any]] = self._pipe(image) or []
-            for pred in predictions:
-                label: str = pred.get("label", "unknown")
-                score: float = float(pred.get("score", 0.0))
-                if score < threshold:
-                    continue
-                severity = _resolve_pipeline_severity(label, schema.severity_map)
-                results.append(
-                    self._make_result(
-                        finding_type=f"classification:{label}",
-                        category="CONTENT",
-                        severity=severity,
-                        confidence=score,
-                        matched_content=f"Image classified as: {label} ({score:.3f})",
-                        location=None,
-                        metadata={
-                            "image_size": f"{image.size[0]}x{image.size[1]}",
-                            "image_mode": image.mode,
-                            "model": self._model_id,
-                        },
+        for page_index, image in images:
+            try:
+                predictions: list[dict[str, Any]] = self._pipe(image) or []
+                for pred in predictions:
+                    label: str = pred.get("label", "unknown")
+                    score: float = float(pred.get("score", 0.0))
+                    if score < threshold:
+                        continue
+                    severity = _resolve_pipeline_severity(label, schema.severity_map)
+                    page_suffix = f" (page {page_index + 1})" if multi_page else ""
+                    metadata: dict[str, Any] = {
+                        "image_size": f"{image.size[0]}x{image.size[1]}",
+                        "image_mode": image.mode,
+                        "model": self._model_id,
+                    }
+                    if multi_page:
+                        metadata["page"] = page_index + 1
+                    results.append(
+                        self._make_result(
+                            finding_type=f"classification:{label}",
+                            category="CONTENT",
+                            severity=severity,
+                            confidence=score,
+                            matched_content=(
+                                f"Image classified as: {label} ({score:.3f}){page_suffix}"
+                            ),
+                            location=None,
+                            metadata=metadata,
+                        )
                     )
+            except Exception as exc:
+                logger.error(
+                    "image_classification error (model=%s): %s", self._model_id, exc, exc_info=True
                 )
-        except Exception as exc:
-            logger.error(
-                "image_classification error (model=%s): %s", self._model_id, exc, exc_info=True
-            )
         results.sort(key=lambda r: r.confidence, reverse=True)
         return results
     def get_supported_content_types(self) -> list[str]:
-        return list(_IMAGE_CONTENT_TYPES)
+        return list(_IMAGE_INPUT_CONTENT_TYPES)

{classifyre_cli-0.4.10 → classifyre_cli-0.4.12}/src/detectors/custom/runners/_llm.py RENAMED Viewed

@@ -2,6 +2,7 @@
 from __future__ import annotations
+import base64
 import json
 import logging
 import os
@@ -17,8 +18,9 @@ from ....models.generated_single_asset_scan_results import (
     DetectionResult,
     DetectorType,
 )
+from ....utils.file_to_images import render_to_images, supported_mime_type
 from ...dependencies import require_module
-from ._base import _TEXT_CONTENT_TYPES, BaseRunner, _resolve_pipeline_severity
+from ._base import _IMAGE_CONTENT_TYPES, _TEXT_CONTENT_TYPES, BaseRunner, _resolve_pipeline_severity
 logger = logging.getLogger(__name__)
@@ -29,6 +31,14 @@ _PROVIDER_PREFIX: dict[str, str] = {
     "OPENAI_COMPATIBLE": "openai",
 }
+# Content types a vision-capable LLM detector renders to images and sends to the
+# model directly. PDFs are rasterised page-by-page; images pass through.
+_VISION_CONTENT_TYPES = [*_IMAGE_CONTENT_TYPES, "application/pdf"]
+# Cap the number of rendered page images sent in a single completion to bound
+# token cost and request size for multi-page PDFs.
+_MAX_VISION_IMAGES = 20
 class LLMRunner(BaseRunner):
     """AI detector — sends content to a configured LLM provider for classification + extraction."""
@@ -60,7 +70,7 @@ class LLMRunner(BaseRunner):
     def detect(self, content: str | bytes, content_type: str) -> list[DetectionResult]:
         if isinstance(content, bytes):
-            return []
+            return self._detect_vision(content, content_type)
         if content_type not in _TEXT_CONTENT_TYPES:
             return []
         text = content.strip()
@@ -75,7 +85,48 @@ class LLMRunner(BaseRunner):
             {"role": "system", "content": self._build_system_prompt()},
             {"role": "user", "content": snippet},
         ]
+        return self._complete_and_parse(messages, snippet)
+    def _detect_vision(self, content: bytes, content_type: str) -> list[DetectionResult]:
+        """Render a binary file (image/PDF) to images and classify via the model."""
+        if not self._vision_enabled():
+            return []
+        if not supported_mime_type(content_type):
+            return []
+        images = render_to_images(
+            content,
+            content_type,
+            max_pages=_MAX_VISION_IMAGES,
+        )
+        if not images:
+            return []
+        image_blocks = [
+            {
+                "type": "image_url",
+                "image_url": {
+                    "url": f"data:image/png;base64,{base64.b64encode(png).decode('ascii')}"
+                },
+            }
+            for png in images[:_MAX_VISION_IMAGES]
+        ]
+        messages = [
+            {"role": "system", "content": self._build_system_prompt()},
+            {"role": "user", "content": image_blocks},
+        ]
+        # matched_content fallback descriptor — there is no text snippet for files.
+        descriptor = f"[{content_type}, {len(image_blocks)} page image(s)]"
+        return self._complete_and_parse(messages, descriptor, vision_pages=len(image_blocks))
+    def _complete_and_parse(
+        self,
+        messages: list[dict[str, Any]],
+        snippet: str,
+        *,
+        vision_pages: int | None = None,
+    ) -> list[DetectionResult]:
+        schema = self._schema
         try:
             response = self._litellm.completion(
                 model=self._model_string(),
@@ -98,10 +149,16 @@ class LLMRunner(BaseRunner):
             )
             return []
-        return self._results_from_payload(snippet, parsed)
+        return self._results_from_payload(snippet, parsed, vision_pages=vision_pages)
+    def _vision_enabled(self) -> bool:
+        return bool(getattr(self._runtime, "supports_vision", False))
     def get_supported_content_types(self) -> list[str]:
-        return list(_TEXT_CONTENT_TYPES)
+        types = list(_TEXT_CONTENT_TYPES)
+        if self._vision_enabled():
+            types.extend(_VISION_CONTENT_TYPES)
+        return types
     # ── Internals ────────────────────────────────────────────────────────────
@@ -175,7 +232,13 @@ class LLMRunner(BaseRunner):
                 return {}
         return parsed if isinstance(parsed, dict) else {}
-    def _results_from_payload(self, snippet: str, payload: dict[str, Any]) -> list[DetectionResult]:
+    def _results_from_payload(
+        self,
+        snippet: str,
+        payload: dict[str, Any],
+        *,
+        vision_pages: int | None = None,
+    ) -> list[DetectionResult]:
         schema = self._schema
         threshold = schema.confidence_threshold if schema.confidence_threshold is not None else 0.5
         default_severity = schema.severity or Severity.info
@@ -201,7 +264,7 @@ class LLMRunner(BaseRunner):
             results.append(
                 DetectionResult(
                     detector_type=DetectorType.CUSTOM,
-                    finding_type=f"llm:{label}",
+                    finding_type=label,
                     category="CLASSIFICATION",
                     severity=severity,
                     confidence=min(0.99, confidence),
@@ -216,6 +279,8 @@ class LLMRunner(BaseRunner):
                         "model": self._runtime.model,
                         "label": label,
                         "fields": extracted,
+                        "input": "vision" if vision_pages is not None else "text",
+                        **({"vision_pages": vision_pages} if vision_pages is not None else {}),
                     },
                     extracted_data=extracted or None,
                     extraction_method="LLM",

classifyre_cli-0.4.12/src/detectors/custom/runners/_object_detection.py ADDED Viewed

@@ -0,0 +1,121 @@
+"""Object detection pipeline runner."""
+from __future__ import annotations
+import logging
+from typing import Any
+from ....models.generated_detectors import ObjectDetectionPipelineSchema
+from ....models.generated_single_asset_scan_results import DetectionResult, Location
+from ...dependencies import MissingDependencyError, ensure_torch, require_module
+from ._base import (
+    _IMAGE_INPUT_CONTENT_TYPES,
+    BaseRunner,
+    _load_input_images,
+    _resolve_pipeline_severity,
+)
+logger = logging.getLogger(__name__)
+class ObjectDetectionRunner(BaseRunner):
+    """Object detection via a single HuggingFace object-detection pipeline."""
+    def __init__(
+        self,
+        schema: ObjectDetectionPipelineSchema,
+        detector_key: str = "",
+        detector_name: str = "",
+    ) -> None:
+        self._schema = schema
+        self._detector_key = detector_key
+        self._detector_name = detector_name
+        ensure_torch("object_detection", ["custom", "detectors"])
+        transformers = require_module("transformers", "object_detection", ["custom", "detectors"])
+        self._pil = require_module("PIL.Image", "object_detection", ["custom", "detectors"])
+        pipeline_kwargs: dict[str, Any] = {
+            "model": schema.model,
+            "device": schema.device or "cpu",
+        }
+        if schema.model_revision:
+            pipeline_kwargs["revision"] = schema.model_revision
+        nms = getattr(schema.nms_threshold, "root", schema.nms_threshold)
+        if nms is not None:
+            pipeline_kwargs["threshold"] = nms
+        try:
+            self._pipe: Any = transformers.pipeline("object-detection", **pipeline_kwargs)
+        except ImportError as exc:
+            raise MissingDependencyError(
+                "object_detection",
+                ["custom", "detectors"],
+                f"ObjectDetectionRunner requires additional dependencies: {exc}",
+            ) from exc
+    def run(self, text: str) -> None:  # type: ignore[override]  # pragma: no cover
+        raise NotImplementedError("ObjectDetectionRunner uses detect() directly")
+    def detect(self, content: str | bytes, content_type: str) -> list[DetectionResult]:
+        if isinstance(content, str):
+            logger.warning("object_detection: received string content, expected bytes")
+            return []
+        # image/* opens directly; PDFs are rasterised to one image per page.
+        images = _load_input_images(content, content_type, self._pil)
+        if not images:
+            return []
+        schema = self._schema
+        threshold = schema.confidence_threshold if schema.confidence_threshold is not None else 0.5
+        multi_page = len(images) > 1
+        results: list[DetectionResult] = []
+        for page_index, image in images:
+            try:
+                detections: list[dict[str, Any]] = self._pipe(image) or []
+                for det in detections:
+                    label: str = det.get("label", "unknown")
+                    score: float = float(det.get("score", 0.0))
+                    box: dict[str, int] = det.get("box", {})
+                    if score < threshold:
+                        continue
+                    if schema.min_box_area is not None:
+                        w = max(0, box.get("xmax", 0) - box.get("xmin", 0))
+                        h = max(0, box.get("ymax", 0) - box.get("ymin", 0))
+                        if w * h < schema.min_box_area:
+                            continue
+                    severity = _resolve_pipeline_severity(label, schema.severity_map)
+                    page_prefix = f"page {page_index + 1} " if multi_page else ""
+                    metadata: dict[str, Any] = {
+                        "box": box,
+                        "score": score,
+                        "image_size": f"{image.size[0]}x{image.size[1]}",
+                        "model": schema.model,
+                    }
+                    if multi_page:
+                        metadata["page"] = page_index + 1
+                    results.append(
+                        self._make_result(
+                            finding_type=label,
+                            category="CONTENT",
+                            severity=severity,
+                            confidence=score,
+                            matched_content=label,
+                            location=Location(
+                                description=(
+                                    f"{page_prefix}box xmin={box.get('xmin')} ymin={box.get('ymin')}"
+                                    f" xmax={box.get('xmax')} ymax={box.get('ymax')}"
+                                ),
+                            ),
+                            metadata=metadata,
+                        )
+                    )
+            except Exception as exc:
+                logger.error(
+                    "object_detection error (model=%s): %s", schema.model, exc, exc_info=True
+                )
+        results.sort(key=lambda r: r.confidence, reverse=True)
+        if schema.top_k is not None:
+            results = results[: schema.top_k]
+        return results
+    def get_supported_content_types(self) -> list[str]:
+        return list(_IMAGE_INPUT_CONTENT_TYPES)

{classifyre_cli-0.4.10 → classifyre_cli-0.4.12}/src/models/generated_detectors.py RENAMED Viewed

@@ -1027,6 +1027,10 @@ class LLMProviderRuntime(BaseModel):
     context_size: int | None = Field(
         None, description='Optional context window size configured for the provider.'
     )
+    supports_vision: bool | None = Field(
+        False,
+        description='Whether the resolved provider/model accepts image/PDF input. When true the detector renders supported files to images and sends them to the model as multimodal input instead of extracting text.',
+    )
 class Type4(StrEnum):

{classifyre_cli-0.4.10 → classifyre_cli-0.4.12}/src/models/generated_input.py RENAMED Viewed

@@ -1078,9 +1078,10 @@ class DatabricksAuthMode(StrEnum):
     PAT_TOKEN = 'PAT_TOKEN'
     SERVICE_PRINCIPAL = 'SERVICE_PRINCIPAL'
+    AZURE_SERVICE_PRINCIPAL = 'AZURE_SERVICE_PRINCIPAL'
-class DatabricksRequiredPat(BaseModel):
+class PersonalAccessToken(BaseModel):
     model_config = ConfigDict(
         extra='forbid',
     )
@@ -1094,7 +1095,7 @@ class DatabricksRequiredPat(BaseModel):
     )
-class DatabricksRequiredServicePrincipal(BaseModel):
+class ServicePrincipalOAuthM2M(BaseModel):
     model_config = ConfigDict(
         extra='forbid',
     )
@@ -1109,6 +1110,24 @@ class DatabricksRequiredServicePrincipal(BaseModel):
     client_id: str = Field(..., description='Databricks service principal client ID')
+class AzureServicePrincipal(BaseModel):
+    model_config = ConfigDict(
+        extra='forbid',
+    )
+    auth_mode: Literal['AZURE_SERVICE_PRINCIPAL']
+    workspace_url: AnyUrl = Field(
+        ...,
+        description='Azure Databricks workspace URL (for example, https://adb-1234567890123456.7.azuredatabricks.net)',
+    )
+    warehouse_id: str = Field(
+        ..., description='Databricks SQL warehouse ID used for sampling queries'
+    )
+    client_id: str = Field(
+        ..., description='Azure AD application (client) ID for the service principal'
+    )
+    tenant_id: str = Field(..., description='Azure AD tenant ID')
 class DatabricksMaskedPat(BaseModel):
     model_config = ConfigDict(
         extra='forbid',
@@ -1125,6 +1144,15 @@ class DatabricksMaskedServicePrincipal(BaseModel):
     )
+class DatabricksMaskedAzureServicePrincipal(BaseModel):
+    model_config = ConfigDict(
+        extra='forbid',
+    )
+    client_secret: str = Field(
+        ..., description='Azure AD client secret for the service principal'
+    )
 class DatabricksOptionalConnection(BaseModel):
     """
     Databricks API and SQL statement execution tuning options.
@@ -2020,8 +2048,8 @@ class DatabricksInput(CoreInput):
     type: Literal['DATABRICKS'] = Field(
         'DATABRICKS', description='Type of the asset or source'
     )
-    required: DatabricksRequiredPat | DatabricksRequiredServicePrincipal = Field(
-        ..., title='DatabricksRequired'
+    required: PersonalAccessToken | ServicePrincipalOAuthM2M | AzureServicePrincipal = (
+        Field(..., title='DatabricksRequired')
     )
     masked: DatabricksMaskedPat | DatabricksMaskedServicePrincipal = Field(
         ..., title='DatabricksMasked'

{classifyre_cli-0.4.10 → classifyre_cli-0.4.12}/src/outputs/rest.py RENAMED Viewed

@@ -131,6 +131,10 @@ class RestOutputSink:
         self.base_url = base_url.rstrip("/")
         self.timeout_sec = timeout_sec
         self.session = requests.Session()
+        # Disable keep-alive so stale pooled connections are never reused after
+        # a pod restart or server-side keep-alive timeout.  Each request opens
+        # a fresh TCP connection, which is cheap enough for our batch cadence.
+        self.session.headers.update({"Connection": "close"})
         adapter = HTTPAdapter(max_retries=_RETRY_POLICY)
         self.session.mount("http://", adapter)
         self.session.mount("https://", adapter)

{classifyre_cli-0.4.10 → classifyre_cli-0.4.12}/src/pipeline/detector_pipeline.py RENAMED Viewed

@@ -410,7 +410,7 @@ class DetectorPipeline:
             )
             return page_findings, page_types, page_errors, page_content, page_num
-        async def _collect_done_and_flush() -> None:
+        async def _collect_done_and_flush(min_findings: int = 1) -> None:
             nonlocal detector_types_run, unflushed_count
             done = {t for t in pending_tasks if t.done()}
             for task in done:
@@ -430,7 +430,7 @@ class DetectorPipeline:
                 )
                 unflushed_count += len(page_findings)
-            if unflushed_count >= findings_flush_size and unflushed_count > 0:
+            if unflushed_count >= min_findings and unflushed_count > 0:
                 logger.debug(
                     "  %s flushing %d findings (%d total)",
                     asset.name,
@@ -449,36 +449,17 @@ class DetectorPipeline:
             if not text_content:
                 continue
+            # Bound the number of detector tasks in flight. While the buffer is
+            # full we batch flushes by ``findings_flush_size`` to avoid hammering
+            # the API when pages pile up faster than detectors can drain them.
             while len(pending_tasks) >= max_pending:
-                done, pending_tasks_set = await asyncio.wait(
-                    pending_tasks,
-                    return_when=asyncio.FIRST_COMPLETED,
-                )
-                pending_tasks = pending_tasks_set
-                for task in done:
-                    page_findings, page_types, page_errors, page_content, _pn = task.result()
-                    for finding in page_findings:
-                        self.content_provider.enrich_finding_location(
-                            finding,
-                            asset,
-                            page_content,
-                        )
-                    findings.extend(page_findings)
-                    errors.extend(page_errors)
-                    detector_types_run = self._merge_detector_types(
-                        detector_types_run,
-                        page_types,
-                    )
-                    unflushed_count += len(page_findings)
-                if unflushed_count >= findings_flush_size and unflushed_count > 0:
-                    logger.info(
-                        "  %s flushing %d findings (%d total)",
-                        asset.name,
-                        unflushed_count,
-                        len(findings),
-                    )
-                    await on_findings_flushed(list(findings))
-                    unflushed_count = 0
+                await asyncio.wait(pending_tasks, return_when=asyncio.FIRST_COMPLETED)
+                await _collect_done_and_flush(findings_flush_size)
+            # Steady state: flush findings from any page that has already
+            # finished as soon as they are available, so real findings stream to
+            # the API per page instead of only once the whole asset is processed.
+            await _collect_done_and_flush()
             task = asyncio.create_task(_detect_page(text_content, page_index))
             pending_tasks.add(task)
@@ -652,7 +633,7 @@ class DetectorPipeline:
         detected_at = datetime.now(UTC)
         for i, (detector, result) in enumerate(zip(runnable_detectors, results, strict=False)):
-            detector_name = detector.__class__.__name__
+            detector_name = self._detector_log_label(detector)
             via = task_via[i]
             loc = f"{asset_name}:{page_tag}" if page_tag else asset_name

classifyre-cli 0.4.10__tar.gz → 0.4.12__tar.gz

classifyre-cli 0.4.10tar.gz → 0.4.12tar.gz