moderators 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- moderators/__init__.py +1 -0
- moderators/auto_model.py +158 -0
- moderators/cli.py +39 -0
- moderators/integrations/__init__.py +1 -0
- moderators/integrations/base.py +138 -0
- moderators/integrations/transformers_moderator.py +192 -0
- moderators/utils/__init__.py +11 -0
- moderators/utils/callbacks.py +30 -0
- moderators/utils/deps.py +88 -0
- moderators/utils/events.py +169 -0
- moderators/utils/image.py +58 -0
- moderators-0.0.1.dist-info/METADATA +156 -0
- moderators-0.0.1.dist-info/RECORD +16 -0
- moderators-0.0.1.dist-info/WHEEL +4 -0
- moderators-0.0.1.dist-info/entry_points.txt +2 -0
- moderators-0.0.1.dist-info/licenses/LICENSE +201 -0
moderators/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.1"
|
moderators/auto_model.py
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# auto_model.py
|
|
2
|
+
"""
|
|
3
|
+
AutoModerator Factory.
|
|
4
|
+
|
|
5
|
+
This module contains the AutoModerator class, a factory that automatically selects and initializes the correct moderator
|
|
6
|
+
class based on a model identifier from the Hugging Face Hub.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import importlib
|
|
12
|
+
import json
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
# We import BaseModerator only for type hinting.
|
|
17
|
+
# This avoids potential circular dependency issues.
|
|
18
|
+
from .integrations.base import BaseModerator
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _load_config(identifier: str, *, local_files_only: bool = False) -> dict[str, Any]:
|
|
22
|
+
"""Loads a config.json file from a local path or the Hugging Face Hub."""
|
|
23
|
+
p = Path(identifier)
|
|
24
|
+
if p.is_dir():
|
|
25
|
+
cfg_path = p / "config.json"
|
|
26
|
+
if not cfg_path.exists():
|
|
27
|
+
raise FileNotFoundError(f"config.json not found in local folder: {cfg_path}")
|
|
28
|
+
return json.loads(cfg_path.read_text(encoding="utf-8"))
|
|
29
|
+
|
|
30
|
+
# We lazy-import huggingface_hub only when needed to reduce initial import time.
|
|
31
|
+
from huggingface_hub import hf_hub_download
|
|
32
|
+
|
|
33
|
+
cfg_fp = hf_hub_download(
|
|
34
|
+
repo_id=identifier,
|
|
35
|
+
filename="config.json",
|
|
36
|
+
repo_type="model",
|
|
37
|
+
local_files_only=local_files_only,
|
|
38
|
+
)
|
|
39
|
+
return json.loads(Path(cfg_fp).read_text(encoding="utf-8"))
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _is_transformers_cfg(cfg: dict[str, Any]) -> bool:
|
|
43
|
+
"""Checks if the given configuration belongs to a Transformers model."""
|
|
44
|
+
# The `architectures` key alone is not enough; we confirm with other signatures.
|
|
45
|
+
has_tf_sig = any(k in cfg for k in ("transformers_version", "model_type", "id2label", "label2id"))
|
|
46
|
+
has_arch_list = isinstance(cfg.get("architectures"), list)
|
|
47
|
+
return has_arch_list and has_tf_sig
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _infer_task(cfg: dict[str, Any]) -> str | None:
|
|
51
|
+
"""Attempts to infer the model's task by inspecting its architecture or problem_type."""
|
|
52
|
+
archs = [str(a).lower() for a in cfg.get("architectures", [])]
|
|
53
|
+
if any("classification" in a for a in archs):
|
|
54
|
+
return "image-classification"
|
|
55
|
+
|
|
56
|
+
prob = str(cfg.get("problem_type", "")).lower()
|
|
57
|
+
if "classification" in prob:
|
|
58
|
+
return "image-classification"
|
|
59
|
+
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class AutoModerator:
|
|
64
|
+
"""
|
|
65
|
+
A factory class that loads the correct moderator using the `from_pretrained` method.
|
|
66
|
+
|
|
67
|
+
This class cannot be instantiated directly (its `__init__` method will raise an error).
|
|
68
|
+
Instead, it should be used like:
|
|
69
|
+
`AutoModerator.from_pretrained('username/my-model')`
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
def __init__(self, *args, **kwargs) -> None:
|
|
73
|
+
"""AutoModerator cannot be instantiated directly."""
|
|
74
|
+
raise OSError(
|
|
75
|
+
"AutoModerator is a factory class and cannot be instantiated directly. "
|
|
76
|
+
"Please use the `AutoModerator.from_pretrained('model_id')` method."
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
@classmethod
|
|
80
|
+
def from_pretrained(
|
|
81
|
+
cls,
|
|
82
|
+
model_id: str,
|
|
83
|
+
config: dict | None = None,
|
|
84
|
+
local_files_only: bool = False,
|
|
85
|
+
**kwargs: Any,
|
|
86
|
+
) -> BaseModerator:
|
|
87
|
+
"""
|
|
88
|
+
Loads the appropriate moderator from a model ID on the Hub or a local path.
|
|
89
|
+
|
|
90
|
+
This method reads the `config.json` file, determines the model's architecture,
|
|
91
|
+
dynamically loads the corresponding moderator class, and returns an initialized instance of it.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
model_id (str): The Hugging Face Hub ID of the model to load or a path to a
|
|
95
|
+
local directory.
|
|
96
|
+
config (dict, optional): If provided, this config will be used instead of
|
|
97
|
+
downloading one from the Hub.
|
|
98
|
+
local_files_only (bool, optional): If True, will not attempt to download files
|
|
99
|
+
and will only look at local cached files. Defaults to False.
|
|
100
|
+
**kwargs: Additional keyword arguments to be passed to the moderator
|
|
101
|
+
class's `__init__` method.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
BaseModerator: A loaded and ready-to-use moderator object.
|
|
105
|
+
"""
|
|
106
|
+
# Step 1: Load the configuration
|
|
107
|
+
cfg = dict(config or _load_config(model_id, local_files_only=local_files_only))
|
|
108
|
+
|
|
109
|
+
# Step 2: Determine the model architecture
|
|
110
|
+
architecture = cfg.get("architecture")
|
|
111
|
+
if not architecture:
|
|
112
|
+
# If architecture is not specified, try to infer if it's a Transformers model
|
|
113
|
+
if _is_transformers_cfg(cfg):
|
|
114
|
+
cfg["architecture"] = "TransformersModerator"
|
|
115
|
+
# If the task is also not specified, try to infer it
|
|
116
|
+
if not cfg.get("task"):
|
|
117
|
+
inferred_task = _infer_task(cfg)
|
|
118
|
+
if inferred_task:
|
|
119
|
+
cfg["task"] = inferred_task
|
|
120
|
+
else:
|
|
121
|
+
raise ValueError(
|
|
122
|
+
"Could not infer 'task' from the Transformers config. "
|
|
123
|
+
"Please specify 'task' in the model's config.json "
|
|
124
|
+
"(e.g. 'image-classification')."
|
|
125
|
+
)
|
|
126
|
+
else:
|
|
127
|
+
raise ValueError(
|
|
128
|
+
f"Could not determine 'architecture' from config.json for model '{model_id}'. "
|
|
129
|
+
"Please specify 'architecture' in the config file."
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
architecture = cfg["architecture"]
|
|
133
|
+
|
|
134
|
+
# Step 3: Dynamically load the correct moderator class based on the architecture
|
|
135
|
+
# For MVP, only TransformersModerator is implemented
|
|
136
|
+
if architecture != "TransformersModerator":
|
|
137
|
+
raise NotImplementedError(
|
|
138
|
+
f"'{architecture}' is not yet supported in this version of Moderators. "
|
|
139
|
+
"As of now, only 'TransformersModerator' is implemented."
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
module_name = architecture.replace("Moderator", "_moderator").lower()
|
|
143
|
+
module_path = f"moderators.integrations.{module_name}"
|
|
144
|
+
|
|
145
|
+
try:
|
|
146
|
+
module = importlib.import_module(module_path)
|
|
147
|
+
moderator_class = getattr(module, architecture)
|
|
148
|
+
except (ImportError, AttributeError) as e:
|
|
149
|
+
raise ImportError(
|
|
150
|
+
f"Could not find or import the class '{architecture}'. "
|
|
151
|
+
f"Please ensure it is defined in '{module_path}.py'. Error: {e}"
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# Step 4: Initialize the moderator class and load its model
|
|
155
|
+
instance = moderator_class(model_id=model_id, config=cfg, **kwargs)
|
|
156
|
+
instance.load_model()
|
|
157
|
+
|
|
158
|
+
return instance
|
moderators/cli.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# src/moderators/cli.py
|
|
2
|
+
import argparse
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import asdict, is_dataclass
|
|
5
|
+
|
|
6
|
+
from moderators.auto_model import AutoModerator
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _to_jsonable(obj):
|
|
10
|
+
"""Convert objects to JSON-serializable format."""
|
|
11
|
+
if is_dataclass(obj):
|
|
12
|
+
return asdict(obj)
|
|
13
|
+
if isinstance(obj, (list, dict, str, int, float)) or obj is None:
|
|
14
|
+
return obj
|
|
15
|
+
return str(obj)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def main():
|
|
19
|
+
"""Run the moderators CLI."""
|
|
20
|
+
parser = argparse.ArgumentParser(prog="moderators", description="Moderators CLI")
|
|
21
|
+
parser.add_argument("model", nargs="?", help="Local model folder or HF model id")
|
|
22
|
+
parser.add_argument("input", nargs="?", help="Input text or file path")
|
|
23
|
+
parser.add_argument("--local-files-only", action="store_true", dest="local_files_only", help="Use only local files")
|
|
24
|
+
args = parser.parse_args()
|
|
25
|
+
|
|
26
|
+
if not args.model:
|
|
27
|
+
parser.print_help()
|
|
28
|
+
return 0
|
|
29
|
+
|
|
30
|
+
mod = AutoModerator.from_pretrained(args.model, local_files_only=args.local_files_only)
|
|
31
|
+
if args.input:
|
|
32
|
+
out = mod(args.input)
|
|
33
|
+
print(json.dumps([_to_jsonable(x) for x in out], ensure_ascii=False, indent=2))
|
|
34
|
+
else:
|
|
35
|
+
print("Model loaded. Provide the 'input' argument to run inference.")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
if __name__ == "__main__":
|
|
39
|
+
main()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Integration package init
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod # added
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from huggingface_hub import ModelHubMixin
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class Box:
|
|
12
|
+
"""
|
|
13
|
+
Represents a bounding box detection result.
|
|
14
|
+
|
|
15
|
+
Attributes:
|
|
16
|
+
xyxy: Bounding box coordinates as [x1, y1, x2, y2]
|
|
17
|
+
label: Classification label for the detected object
|
|
18
|
+
score: Confidence score for the detection
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
# xyxy: [x1, y1, x2, y2]
|
|
22
|
+
xyxy: list[float]
|
|
23
|
+
label: str
|
|
24
|
+
score: float
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class PredictionResult:
|
|
29
|
+
"""
|
|
30
|
+
Represents the output of a moderation prediction.
|
|
31
|
+
|
|
32
|
+
Attributes:
|
|
33
|
+
source_path: Context about the source (file path, URL, etc.)
|
|
34
|
+
classifications: Probability map for classification tasks
|
|
35
|
+
detections: List of bounding box detections
|
|
36
|
+
raw_output: Raw model output specific to the integration
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
# Context about the source (file path, URL, etc.)
|
|
40
|
+
source_path: str = ""
|
|
41
|
+
# Probability map for classification
|
|
42
|
+
classifications: dict[str, float] = field(default_factory=dict)
|
|
43
|
+
# Detection results
|
|
44
|
+
detections: list[Box] = field(default_factory=list)
|
|
45
|
+
# Raw output specific to models/integrations
|
|
46
|
+
raw_output: Any = None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class BaseModerator(ABC, ModelHubMixin):
|
|
50
|
+
"""
|
|
51
|
+
Base class for all moderator implementations.
|
|
52
|
+
|
|
53
|
+
Provides the core prediction flow and callback system for content moderation.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __init__(self, config: dict[str, Any], model_id: str, **kwargs: Any) -> None:
|
|
57
|
+
"""
|
|
58
|
+
Initialize the moderator.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
config: Configuration dictionary for the moderator
|
|
62
|
+
model_id: Model identifier (HuggingFace Hub ID or local path)
|
|
63
|
+
**kwargs: Additional keyword arguments
|
|
64
|
+
"""
|
|
65
|
+
self.config: dict[str, Any] = dict(config or {})
|
|
66
|
+
self.model_id: str = model_id
|
|
67
|
+
self.config.setdefault("model_id", self.model_id)
|
|
68
|
+
self.callbacks = self.get_default_callbacks()
|
|
69
|
+
|
|
70
|
+
@abstractmethod
|
|
71
|
+
def load_model(self) -> None:
|
|
72
|
+
"""Load model/pipeline and any processors if present."""
|
|
73
|
+
pass
|
|
74
|
+
|
|
75
|
+
# Inference flow
|
|
76
|
+
def __call__(self, source: Any, **kwargs: Any):
|
|
77
|
+
"""
|
|
78
|
+
Execute the full prediction pipeline.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
source: Input source (text, image path, PIL Image, etc.)
|
|
82
|
+
**kwargs: Additional keyword arguments
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
List of PredictionResult objects
|
|
86
|
+
"""
|
|
87
|
+
self.run_callbacks("on_predict_start")
|
|
88
|
+
processed_inputs = self._preprocess(source)
|
|
89
|
+
model_outputs = self._predict(processed_inputs)
|
|
90
|
+
results = self._postprocess(model_outputs)
|
|
91
|
+
self.run_callbacks("on_predict_end")
|
|
92
|
+
return results
|
|
93
|
+
|
|
94
|
+
@abstractmethod
|
|
95
|
+
def _preprocess(self, inputs: Any) -> Any:
|
|
96
|
+
"""Convert inputs to model-ready format."""
|
|
97
|
+
pass
|
|
98
|
+
|
|
99
|
+
@abstractmethod
|
|
100
|
+
def _predict(self, processed_inputs: Any) -> Any:
|
|
101
|
+
"""Run model inference."""
|
|
102
|
+
pass
|
|
103
|
+
|
|
104
|
+
@abstractmethod
|
|
105
|
+
def _postprocess(self, model_outputs: Any) -> Any:
|
|
106
|
+
"""Convert outputs to PredictionResult format."""
|
|
107
|
+
pass
|
|
108
|
+
|
|
109
|
+
@abstractmethod
|
|
110
|
+
def save_pretrained(self, save_directory: str, **kwargs: Any) -> str:
|
|
111
|
+
"""Save model and any processors to the given directory."""
|
|
112
|
+
raise NotImplementedError
|
|
113
|
+
|
|
114
|
+
# Callback system (simple MVP)
|
|
115
|
+
def get_default_callbacks(self) -> dict[str, list]:
|
|
116
|
+
"""
|
|
117
|
+
Get the default callback configuration.
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
Dictionary mapping event names to lists of callback functions
|
|
121
|
+
"""
|
|
122
|
+
from moderators.utils.callbacks import DEFAULT_CALLBACKS
|
|
123
|
+
|
|
124
|
+
return {k: list(v) for k, v in DEFAULT_CALLBACKS.items()}
|
|
125
|
+
|
|
126
|
+
def run_callbacks(self, event_name: str) -> None:
|
|
127
|
+
"""
|
|
128
|
+
Execute all callbacks for a given event.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
event_name: Name of the event to trigger callbacks for
|
|
132
|
+
"""
|
|
133
|
+
for func in self.callbacks.get(event_name, []):
|
|
134
|
+
try:
|
|
135
|
+
func(self)
|
|
136
|
+
except Exception:
|
|
137
|
+
# Do not break inference flow due to a callback failure
|
|
138
|
+
pass
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from moderators.utils import (
|
|
8
|
+
auto_install,
|
|
9
|
+
ensure_dl_framework,
|
|
10
|
+
ensure_pillow_for_task,
|
|
11
|
+
ensure_transformers,
|
|
12
|
+
preprocess_image_input,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
from .base import BaseModerator, PredictionResult
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TransformersModerator(BaseModerator):
|
|
19
|
+
"""Moderator implementation using HuggingFace Transformers."""
|
|
20
|
+
|
|
21
|
+
def load_model(self) -> None:
|
|
22
|
+
"""
|
|
23
|
+
Build a transformers pipeline deterministically:
|
|
24
|
+
- Validate task.
|
|
25
|
+
- Ensure deps (transformers, DL framework, Pillow for image tasks).
|
|
26
|
+
- Try AutoProcessor (if local `preprocessor_config.json` exists).
|
|
27
|
+
- Fallback order: AutoImageProcessor -> AutoFeatureExtractor.
|
|
28
|
+
- Also try AutoTokenizer when relevant.
|
|
29
|
+
- Pass only successfully loaded components (processor / image_processor / feature_extractor / tokenizer).
|
|
30
|
+
"""
|
|
31
|
+
task = self.config.get("task")
|
|
32
|
+
if not task:
|
|
33
|
+
raise ValueError("TransformersModerator requires 'task' in config.json")
|
|
34
|
+
|
|
35
|
+
# Ensure transformers is available
|
|
36
|
+
try:
|
|
37
|
+
_transformers = ensure_transformers(auto_install)
|
|
38
|
+
except Exception as e:
|
|
39
|
+
raise ImportError(
|
|
40
|
+
"TransformersModerator requires the 'transformers' package. "
|
|
41
|
+
"Install with: uv pip install -e '.[transformers]' or: uv pip install transformers"
|
|
42
|
+
) from e
|
|
43
|
+
|
|
44
|
+
pipeline = getattr(_transformers, "pipeline")
|
|
45
|
+
|
|
46
|
+
# Ensure a DL framework (pt/tf/flax)
|
|
47
|
+
framework = ensure_dl_framework(auto_install)
|
|
48
|
+
|
|
49
|
+
# Ensure Pillow for image tasks
|
|
50
|
+
ensure_pillow_for_task(task, auto_install)
|
|
51
|
+
|
|
52
|
+
model_id = self.model_id
|
|
53
|
+
|
|
54
|
+
processor = None
|
|
55
|
+
image_processor = None
|
|
56
|
+
feature_extractor = None
|
|
57
|
+
tokenizer = None
|
|
58
|
+
|
|
59
|
+
# Check local preprocessor_config.json
|
|
60
|
+
try:
|
|
61
|
+
p = Path(model_id)
|
|
62
|
+
has_local_preprocessor = p.is_dir() and (p / "preprocessor_config.json").exists()
|
|
63
|
+
except Exception:
|
|
64
|
+
has_local_preprocessor = False
|
|
65
|
+
|
|
66
|
+
# AutoProcessor (generic unified) first if local config hints it exists
|
|
67
|
+
if has_local_preprocessor:
|
|
68
|
+
try:
|
|
69
|
+
AutoProcessor = getattr(_transformers, "AutoProcessor", None)
|
|
70
|
+
if AutoProcessor:
|
|
71
|
+
processor = AutoProcessor.from_pretrained(model_id)
|
|
72
|
+
except Exception:
|
|
73
|
+
processor = None # soft fallback
|
|
74
|
+
|
|
75
|
+
# If no unified processor, attempt vision processors explicitly
|
|
76
|
+
if processor is None:
|
|
77
|
+
# Newer API
|
|
78
|
+
try:
|
|
79
|
+
AutoImageProcessor = getattr(_transformers, "AutoImageProcessor", None)
|
|
80
|
+
if AutoImageProcessor:
|
|
81
|
+
image_processor = AutoImageProcessor.from_pretrained(model_id)
|
|
82
|
+
except Exception:
|
|
83
|
+
image_processor = None
|
|
84
|
+
# Legacy feature extractor
|
|
85
|
+
if image_processor is None:
|
|
86
|
+
try:
|
|
87
|
+
AutoFeatureExtractor = getattr(_transformers, "AutoFeatureExtractor", None)
|
|
88
|
+
if AutoFeatureExtractor:
|
|
89
|
+
feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
|
|
90
|
+
except Exception:
|
|
91
|
+
feature_extractor = None
|
|
92
|
+
|
|
93
|
+
# Tokenizer (independent of vision processors)
|
|
94
|
+
try:
|
|
95
|
+
AutoTokenizer = getattr(_transformers, "AutoTokenizer", None)
|
|
96
|
+
if AutoTokenizer:
|
|
97
|
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
98
|
+
except Exception:
|
|
99
|
+
tokenizer = None
|
|
100
|
+
|
|
101
|
+
pipe_kwargs = {}
|
|
102
|
+
if processor is not None:
|
|
103
|
+
pipe_kwargs["processor"] = processor
|
|
104
|
+
else:
|
|
105
|
+
if image_processor is not None:
|
|
106
|
+
pipe_kwargs["image_processor"] = image_processor
|
|
107
|
+
elif feature_extractor is not None:
|
|
108
|
+
pipe_kwargs["feature_extractor"] = feature_extractor
|
|
109
|
+
if tokenizer is not None:
|
|
110
|
+
pipe_kwargs["tokenizer"] = tokenizer
|
|
111
|
+
|
|
112
|
+
self._pipe = pipeline(
|
|
113
|
+
task,
|
|
114
|
+
model=model_id,
|
|
115
|
+
framework=framework,
|
|
116
|
+
**pipe_kwargs,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
def _preprocess(self, inputs: Any) -> Any:
|
|
120
|
+
task = str(self.config.get("task", "")).lower()
|
|
121
|
+
if "image" in task:
|
|
122
|
+
return preprocess_image_input(inputs, min_side=2)
|
|
123
|
+
return inputs
|
|
124
|
+
|
|
125
|
+
def _predict(self, processed_inputs: Any) -> Any:
|
|
126
|
+
return self._pipe(processed_inputs)
|
|
127
|
+
|
|
128
|
+
def _postprocess(self, model_outputs: Any) -> list[PredictionResult]:
|
|
129
|
+
# Pipelines typically return dict or list[dict]
|
|
130
|
+
outputs = model_outputs
|
|
131
|
+
if isinstance(outputs, dict):
|
|
132
|
+
outputs = [outputs]
|
|
133
|
+
|
|
134
|
+
results: list[PredictionResult] = []
|
|
135
|
+
for out in outputs:
|
|
136
|
+
classifications: dict[str, float] = {}
|
|
137
|
+
label = out.get("label")
|
|
138
|
+
score = out.get("score")
|
|
139
|
+
if label is not None and score is not None:
|
|
140
|
+
classifications[str(label)] = float(score)
|
|
141
|
+
|
|
142
|
+
results.append(
|
|
143
|
+
PredictionResult(
|
|
144
|
+
source_path=str(self.config.get("source", "")),
|
|
145
|
+
classifications=classifications,
|
|
146
|
+
detections=[],
|
|
147
|
+
raw_output=out,
|
|
148
|
+
)
|
|
149
|
+
)
|
|
150
|
+
return results
|
|
151
|
+
|
|
152
|
+
def save_pretrained(self, save_directory: str, **kwargs: Any) -> str:
|
|
153
|
+
"""Saves model + tokenizer + (processor / image_processor / feature_extractor) and refreshes/creates a
|
|
154
|
+
config.json with required moderator metadata.
|
|
155
|
+
"""
|
|
156
|
+
out_dir = Path(save_directory)
|
|
157
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
158
|
+
|
|
159
|
+
pipe = getattr(self, "_pipe", None)
|
|
160
|
+
|
|
161
|
+
model = getattr(pipe, "model", None) if pipe is not None else None
|
|
162
|
+
tokenizer = getattr(pipe, "tokenizer", None) if pipe is not None else None
|
|
163
|
+
|
|
164
|
+
# Unified vision processor resolution order:
|
|
165
|
+
# processor (generic) -> image_processor (newer HF) -> feature_extractor (legacy)
|
|
166
|
+
vision_processor = None
|
|
167
|
+
if pipe is not None:
|
|
168
|
+
vision_processor = (
|
|
169
|
+
getattr(pipe, "processor", None)
|
|
170
|
+
or getattr(pipe, "image_processor", None)
|
|
171
|
+
or getattr(pipe, "feature_extractor", None)
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
if model and hasattr(model, "save_pretrained"):
|
|
175
|
+
model.save_pretrained(out_dir)
|
|
176
|
+
if tokenizer and hasattr(tokenizer, "save_pretrained"):
|
|
177
|
+
tokenizer.save_pretrained(out_dir)
|
|
178
|
+
if vision_processor and hasattr(vision_processor, "save_pretrained"):
|
|
179
|
+
vision_processor.save_pretrained(out_dir)
|
|
180
|
+
|
|
181
|
+
cfg_path = out_dir / "config.json"
|
|
182
|
+
try:
|
|
183
|
+
cfg = json.loads(cfg_path.read_text(encoding="utf-8")) if cfg_path.exists() else {}
|
|
184
|
+
except Exception:
|
|
185
|
+
cfg = {}
|
|
186
|
+
|
|
187
|
+
cfg["architecture"] = "TransformersModerator"
|
|
188
|
+
if self.config.get("task"):
|
|
189
|
+
cfg["task"] = self.config["task"]
|
|
190
|
+
|
|
191
|
+
cfg_path.write_text(json.dumps(cfg, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
192
|
+
return str(out_dir)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# filepath init for utils package
|
|
2
|
+
from .deps import auto_install, ensure_dl_framework, ensure_pillow_for_task, ensure_transformers
|
|
3
|
+
from .image import preprocess_image_input
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"auto_install",
|
|
7
|
+
"ensure_transformers",
|
|
8
|
+
"ensure_dl_framework",
|
|
9
|
+
"ensure_pillow_for_task",
|
|
10
|
+
"preprocess_image_input",
|
|
11
|
+
]
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from moderators.utils.events import events
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def on_predict_start(predictor):
|
|
5
|
+
"""
|
|
6
|
+
Callback executed before prediction starts.
|
|
7
|
+
|
|
8
|
+
Enqueues analytics event with task and model_id information.
|
|
9
|
+
"""
|
|
10
|
+
# Add analytics event
|
|
11
|
+
try:
|
|
12
|
+
cfg = {
|
|
13
|
+
"task": predictor.config.get("task", "unknown_task"),
|
|
14
|
+
"model_id": getattr(predictor, "model_id", None),
|
|
15
|
+
}
|
|
16
|
+
events(cfg)
|
|
17
|
+
except Exception:
|
|
18
|
+
# Callback should not break inference flow
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def on_predict_end(predictor):
|
|
23
|
+
"""Callback executed after prediction ends."""
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
DEFAULT_CALLBACKS = {
|
|
28
|
+
"on_predict_start": [on_predict_start],
|
|
29
|
+
"on_predict_end": [on_predict_end],
|
|
30
|
+
}
|
moderators/utils/deps.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import shutil
|
|
5
|
+
import subprocess
|
|
6
|
+
import sys
|
|
7
|
+
from collections.abc import Callable
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def auto_install(packages: list[str]) -> bool:
|
|
11
|
+
"""
|
|
12
|
+
Try to auto-install required packages using 'uv' if available, otherwise fall back to 'pip'.
|
|
13
|
+
|
|
14
|
+
Controlled by env var: MODERATORS_DISABLE_AUTO_INSTALL=1 to disable.
|
|
15
|
+
"""
|
|
16
|
+
if str(os.environ.get("MODERATORS_DISABLE_AUTO_INSTALL", "")).lower() in ("1", "true", "yes"):
|
|
17
|
+
return False
|
|
18
|
+
|
|
19
|
+
uv = shutil.which("uv")
|
|
20
|
+
cmd = [uv, "pip", "install", *packages] if uv else [sys.executable, "-m", "pip", "install", *packages]
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
subprocess.check_call(cmd)
|
|
24
|
+
return True
|
|
25
|
+
except Exception:
|
|
26
|
+
return False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def ensure_transformers(install_fn: Callable[[list[str]], bool]):
|
|
30
|
+
"""Ensure 'transformers' is importable; optionally auto-install and retry."""
|
|
31
|
+
try:
|
|
32
|
+
import transformers as _transformers # noqa: F401
|
|
33
|
+
|
|
34
|
+
return _transformers
|
|
35
|
+
except Exception:
|
|
36
|
+
if not install_fn(["transformers"]):
|
|
37
|
+
raise
|
|
38
|
+
import transformers as _transformers # type: ignore
|
|
39
|
+
|
|
40
|
+
return _transformers
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def ensure_dl_framework(install_fn: Callable[[list[str]], bool]) -> str:
|
|
44
|
+
"""
|
|
45
|
+
Ensure at least one DL framework is available.
|
|
46
|
+
|
|
47
|
+
Preference: PyTorch ('pt'), TensorFlow ('tf'), JAX/Flax ('flax').
|
|
48
|
+
Tries to auto-install torch first.
|
|
49
|
+
"""
|
|
50
|
+
try:
|
|
51
|
+
import torch # noqa: F401
|
|
52
|
+
|
|
53
|
+
return "pt"
|
|
54
|
+
except Exception:
|
|
55
|
+
if install_fn(["torch"]):
|
|
56
|
+
try:
|
|
57
|
+
import torch # noqa: F401
|
|
58
|
+
|
|
59
|
+
return "pt"
|
|
60
|
+
except Exception:
|
|
61
|
+
pass
|
|
62
|
+
try:
|
|
63
|
+
import tensorflow # noqa: F401
|
|
64
|
+
|
|
65
|
+
return "tf"
|
|
66
|
+
except Exception:
|
|
67
|
+
pass
|
|
68
|
+
try:
|
|
69
|
+
import jax # noqa: F401
|
|
70
|
+
|
|
71
|
+
return "flax"
|
|
72
|
+
except Exception:
|
|
73
|
+
pass
|
|
74
|
+
raise ImportError(
|
|
75
|
+
"A deep learning framework is required for transformers pipelines. Install PyTorch with: uv pip install torch"
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def ensure_pillow_for_task(task: str, install_fn: Callable[[list[str]], bool]) -> None:
|
|
80
|
+
"""For image tasks, ensure Pillow is available; auto-install if missing."""
|
|
81
|
+
if "image" not in str(task).lower():
|
|
82
|
+
return
|
|
83
|
+
try:
|
|
84
|
+
import PIL # noqa: F401
|
|
85
|
+
except Exception:
|
|
86
|
+
if not install_fn(["Pillow"]):
|
|
87
|
+
raise ImportError("This image task requires Pillow. Install with: uv pip install Pillow")
|
|
88
|
+
import PIL # noqa: F401
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import threading
|
|
5
|
+
import time
|
|
6
|
+
import uuid
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import requests
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _robust_post_request(
|
|
14
|
+
url: str,
|
|
15
|
+
json_data: dict[str, Any],
|
|
16
|
+
retries: int = 3,
|
|
17
|
+
initial_wait: float = 1.0,
|
|
18
|
+
timeout: int = 5,
|
|
19
|
+
) -> None:
|
|
20
|
+
"""
|
|
21
|
+
Sends a POST request with JSON data to a URL, with retries on transient errors using an exponential backoff
|
|
22
|
+
strategy.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
url (str): The URL to send the request to.
|
|
26
|
+
json_data (dict[str, Any]): The JSON data to send.
|
|
27
|
+
retries (int): The maximum number of retries for a failed request.
|
|
28
|
+
initial_wait (float): The initial wait time between retries in seconds.
|
|
29
|
+
timeout (int): The timeout for each individual request in seconds.
|
|
30
|
+
"""
|
|
31
|
+
wait_time = initial_wait
|
|
32
|
+
for attempt in range(retries + 1):
|
|
33
|
+
try:
|
|
34
|
+
response = requests.post(url, json=json_data, timeout=timeout)
|
|
35
|
+
# 2xx status codes indicate success, exit the function.
|
|
36
|
+
if 200 <= response.status_code < 300:
|
|
37
|
+
return
|
|
38
|
+
# 4xx client errors are not worth retrying, break the loop.
|
|
39
|
+
if 400 <= response.status_code < 500:
|
|
40
|
+
break
|
|
41
|
+
# 5xx server errors are worth retrying.
|
|
42
|
+
except (requests.exceptions.Timeout, requests.exceptions.ConnectionError):
|
|
43
|
+
# In case of a timeout or connection error, proceed to the next attempt.
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
# Wait only if this is not the last attempt.
|
|
47
|
+
if attempt < retries:
|
|
48
|
+
time.sleep(wait_time)
|
|
49
|
+
wait_time *= 2 # Double the wait time for the next attempt.
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _settings_path() -> Path:
|
|
53
|
+
base = Path.home() / ".moderators"
|
|
54
|
+
base.mkdir(parents=True, exist_ok=True)
|
|
55
|
+
return base / "settings.json"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _read_settings() -> dict[str, Any]:
|
|
59
|
+
path = _settings_path()
|
|
60
|
+
if not path.exists():
|
|
61
|
+
return {"sync": True}
|
|
62
|
+
try:
|
|
63
|
+
return json.loads(path.read_text())
|
|
64
|
+
except Exception:
|
|
65
|
+
return {"sync": True}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class Events:
|
|
69
|
+
"""
|
|
70
|
+
Handles the collection and transmission of anonymous usage analytics.
|
|
71
|
+
|
|
72
|
+
Implemented as a queue with background sending and rate limiting.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def __init__(self) -> None:
|
|
76
|
+
"""Initialize the Events analytics handler."""
|
|
77
|
+
self.url = (
|
|
78
|
+
"https://www.google-analytics.com/mp/collect?measurement_id=G-XDJCD0WJDW&api_secret=xgz_lUC6SK-u4-EDAUFcFg"
|
|
79
|
+
)
|
|
80
|
+
self.events: list[dict[str, Any]] = []
|
|
81
|
+
self.rate_limit_seconds = 30.0
|
|
82
|
+
self.last_sent_ts = 0.0
|
|
83
|
+
self.metadata = self._get_metadata()
|
|
84
|
+
self.enabled = self._is_enabled()
|
|
85
|
+
|
|
86
|
+
def __call__(self, cfg: dict[str, Any]) -> None:
|
|
87
|
+
"""
|
|
88
|
+
Enqueue an analytics event.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
cfg: Configuration dictionary containing task and model_id
|
|
92
|
+
"""
|
|
93
|
+
if not self.enabled:
|
|
94
|
+
return
|
|
95
|
+
if len(self.events) > 25:
|
|
96
|
+
return
|
|
97
|
+
|
|
98
|
+
# Clean the event name by replacing hyphens with underscores
|
|
99
|
+
original_task_name = cfg.get("task", "unknown_task")
|
|
100
|
+
sanitized_event_name = original_task_name.replace("-", "_")
|
|
101
|
+
|
|
102
|
+
event_data = {
|
|
103
|
+
"name": sanitized_event_name,
|
|
104
|
+
"params": {**self.metadata, "model_id": cfg.get("model_id")},
|
|
105
|
+
}
|
|
106
|
+
self.events.append(event_data)
|
|
107
|
+
|
|
108
|
+
if (time.time() - self.last_sent_ts) > self.rate_limit_seconds:
|
|
109
|
+
self.send_events()
|
|
110
|
+
|
|
111
|
+
def send_events(self) -> None:
|
|
112
|
+
"""Send queued analytics events in a background thread."""
|
|
113
|
+
if not self.events:
|
|
114
|
+
return
|
|
115
|
+
|
|
116
|
+
data_payload = {"client_id": self.metadata["user_id"], "events": self.events}
|
|
117
|
+
self.events = []
|
|
118
|
+
self.last_sent_ts = time.time()
|
|
119
|
+
threading.Thread(target=self._make_request, args=(data_payload,), daemon=True).start()
|
|
120
|
+
|
|
121
|
+
def _make_request(self, json_data: dict[str, Any]) -> None:
|
|
122
|
+
"""
|
|
123
|
+
Makes a robust network request to send analytics data.
|
|
124
|
+
|
|
125
|
+
This method now uses a helper function with retry logic.
|
|
126
|
+
"""
|
|
127
|
+
# We are now using the more robust helper function.
|
|
128
|
+
_robust_post_request(self.url, json_data)
|
|
129
|
+
|
|
130
|
+
def _get_metadata(self) -> dict[str, Any]:
|
|
131
|
+
user_id = self._get_or_create_user_id()
|
|
132
|
+
return {
|
|
133
|
+
"user_id": user_id,
|
|
134
|
+
"library": "moderators",
|
|
135
|
+
"library_version": self._get_version(),
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
def _get_or_create_user_id(self) -> str:
|
|
139
|
+
path = Path.home() / ".moderators" / "user.json"
|
|
140
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
141
|
+
if path.exists():
|
|
142
|
+
try:
|
|
143
|
+
data = json.loads(path.read_text())
|
|
144
|
+
if "user_id" in data:
|
|
145
|
+
return str(data["user_id"])
|
|
146
|
+
except Exception:
|
|
147
|
+
pass
|
|
148
|
+
uid = str(uuid.uuid4())
|
|
149
|
+
try:
|
|
150
|
+
path.write_text(json.dumps({"user_id": uid}))
|
|
151
|
+
except Exception:
|
|
152
|
+
pass
|
|
153
|
+
return uid
|
|
154
|
+
|
|
155
|
+
def _is_enabled(self) -> bool:
|
|
156
|
+
settings = _read_settings()
|
|
157
|
+
return bool(settings.get("sync", True))
|
|
158
|
+
|
|
159
|
+
def _get_version(self) -> str:
|
|
160
|
+
try:
|
|
161
|
+
from moderators import __version__ # type: ignore
|
|
162
|
+
|
|
163
|
+
return str(__version__)
|
|
164
|
+
except Exception:
|
|
165
|
+
return "0"
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
# Global singleton instance
|
|
169
|
+
events = Events()
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def preprocess_image_input(inputs: Any, min_side: int = 0) -> Any:
|
|
8
|
+
"""
|
|
9
|
+
Preprocesses image inputs from Path, PIL Image, or a list/tuple (batch).
|
|
10
|
+
|
|
11
|
+
- Opens the image if the input is a path.
|
|
12
|
+
- Converts the image to RGB.
|
|
13
|
+
- (Optional) If min_side > 0, proportionally scales up small images.
|
|
14
|
+
Returns the input as is if Pillow is not installed or if the input type is unrecognized.
|
|
15
|
+
"""
|
|
16
|
+
try:
|
|
17
|
+
from PIL import Image
|
|
18
|
+
except ImportError:
|
|
19
|
+
return inputs
|
|
20
|
+
|
|
21
|
+
def _process(obj: Any):
|
|
22
|
+
# Path or string
|
|
23
|
+
if isinstance(obj, (str, Path)):
|
|
24
|
+
try:
|
|
25
|
+
img = Image.open(str(obj))
|
|
26
|
+
except (FileNotFoundError, OSError):
|
|
27
|
+
return obj
|
|
28
|
+
# PIL Image-like object
|
|
29
|
+
elif hasattr(obj, "mode") and hasattr(obj, "convert"):
|
|
30
|
+
img = obj
|
|
31
|
+
else:
|
|
32
|
+
return obj
|
|
33
|
+
|
|
34
|
+
# Ensure the image is in RGB mode
|
|
35
|
+
if img.mode != "RGB":
|
|
36
|
+
try:
|
|
37
|
+
img = img.convert("RGB")
|
|
38
|
+
except Exception:
|
|
39
|
+
return obj # Return the original if conversion fails
|
|
40
|
+
|
|
41
|
+
# Optional resizing
|
|
42
|
+
if min_side and min_side > 0:
|
|
43
|
+
try:
|
|
44
|
+
w, h = img.size
|
|
45
|
+
if w < min_side or h < min_side:
|
|
46
|
+
scale = max(min_side / w, min_side / h)
|
|
47
|
+
new_w = int(round(w * scale))
|
|
48
|
+
new_h = int(round(h * scale))
|
|
49
|
+
resample = getattr(getattr(Image, "Resampling", Image), "BILINEAR")
|
|
50
|
+
img = img.resize((new_w, new_h), resample)
|
|
51
|
+
except Exception:
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
return img
|
|
55
|
+
|
|
56
|
+
if isinstance(inputs, (list, tuple)):
|
|
57
|
+
return [_process(x) for x in inputs]
|
|
58
|
+
return _process(inputs)
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: moderators
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Moderators: One package to moderate them all
|
|
5
|
+
Project-URL: Homepage, https://github.com/viddexa/moderators
|
|
6
|
+
Project-URL: Source, https://github.com/viddexa/moderators
|
|
7
|
+
Project-URL: Bug Reports, https://github.com/viddexa/moderators/discussions/categories/q-a
|
|
8
|
+
Project-URL: Changelog, https://github.com/viddexa/moderators/releases
|
|
9
|
+
Author: Moderators Team
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Requires-Python: >=3.9
|
|
12
|
+
Requires-Dist: huggingface-hub>=0.22
|
|
13
|
+
Requires-Dist: requests
|
|
14
|
+
Provides-Extra: dev
|
|
15
|
+
Requires-Dist: docformatter; extra == 'dev'
|
|
16
|
+
Requires-Dist: pillow>=9.0; extra == 'dev'
|
|
17
|
+
Requires-Dist: pytest-xdist; extra == 'dev'
|
|
18
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
19
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
20
|
+
Requires-Dist: torch; extra == 'dev'
|
|
21
|
+
Provides-Extra: transformers
|
|
22
|
+
Requires-Dist: transformers>=4.36; extra == 'transformers'
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# Moderators
|
|
26
|
+
|
|
27
|
+
[](https://github.com/viddexa/moderators/actions/workflows/ci.yml)
|
|
28
|
+
|
|
29
|
+
# TODO: refactor readme to target users instead of maintainers
|
|
30
|
+
|
|
31
|
+
This repository provides an extensible core skeleton for content moderation. Phase 1 includes:
|
|
32
|
+
- Standard data classes (Box, PredictionResult)
|
|
33
|
+
- BaseModerator flow (predict → _preprocess → _predict → _postprocess)
|
|
34
|
+
- ModelHubMixin-based `AutoModerator` factory (reads `config.json` from HF Hub or local)
|
|
35
|
+
- CLI: `moderators` (load and run inference)
|
|
36
|
+
|
|
37
|
+
First integration: Transformers.
|
|
38
|
+
|
|
39
|
+
## Installation
|
|
40
|
+
|
|
41
|
+
Create Python environment (Python 3.10+ recommended):
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
uv venv --python 3.10
|
|
45
|
+
source .venv/bin/activate
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Install with pip:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install moderators[transformers]
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Install with uv:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
uv add "moderators[transformers]"
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Install from source:
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
uv sync --extra transformers
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Quick Start
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
from moderators.auto_model import AutoModerator
|
|
70
|
+
|
|
71
|
+
moderator = AutoModerator.from_pretrained("org/model") # or a local folder path
|
|
72
|
+
results = moderator("some input")
|
|
73
|
+
print(results)
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
`config.json` example (Transformers):
|
|
77
|
+
```json
|
|
78
|
+
{
|
|
79
|
+
"architecture": "TransformersModerator",
|
|
80
|
+
"task": "image-classification"
|
|
81
|
+
}
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
- Naming convention: the `XyzModerator` class must be defined in `moderators/integrations/xyz_moderator.py`.
|
|
85
|
+
- Note: `AutoModerator` is a factory class; it returns the actual integration instance.
|
|
86
|
+
|
|
87
|
+
## Automatic dependency installation
|
|
88
|
+
When using the Transformers integration, the library may auto-install missing dependencies at runtime:
|
|
89
|
+
- transformers
|
|
90
|
+
- A deep learning framework (PyTorch preferred: torch)
|
|
91
|
+
- Pillow (for image tasks)
|
|
92
|
+
|
|
93
|
+
It uses `uv` if available, otherwise falls back to `pip`. Disable auto-install via:
|
|
94
|
+
```
|
|
95
|
+
export MODERATORS_DISABLE_AUTO_INSTALL=1
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Usage Overview
|
|
99
|
+
`AutoModerator.from_pretrained("org/model")` dynamically loads the correct integration class based on the `"architecture"` field in `config.json`.
|
|
100
|
+
|
|
101
|
+
## Command Line (CLI)
|
|
102
|
+
Run models directly from the terminal.
|
|
103
|
+
|
|
104
|
+
Usage:
|
|
105
|
+
```
|
|
106
|
+
moderators <model_id_or_local_dir> <input> [--local-files-only]
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Examples:
|
|
110
|
+
- Text classification:
|
|
111
|
+
```
|
|
112
|
+
moderators distilbert/distilbert-base-uncased-finetuned-sst-2-english "I love this!"
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
- Image classification (Falconsai/nsfw_image_detection) with a local image:
|
|
116
|
+
```
|
|
117
|
+
moderators Falconsai/nsfw_image_detection /path/to/image.jpg
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
Notes:
|
|
121
|
+
- The CLI prints JSON to stdout.
|
|
122
|
+
- Use `--local-files-only` to force offline usage if all files are already cached.
|
|
123
|
+
|
|
124
|
+
## Transformers config inference
|
|
125
|
+
If `"architecture"` is missing but the config looks like a Transformers model (e.g., has `architectures`, `transformers_version`, `id2label`/`label2id`), the factory assumes:
|
|
126
|
+
- `architecture = "TransformersModerator"`
|
|
127
|
+
- It tries to infer `"task"` (e.g., classification). If it cannot infer, you must specify `"task"` explicitly (e.g., `"image-classification"`).
|
|
128
|
+
|
|
129
|
+
## Callbacks
|
|
130
|
+
Moderators run a minimal callback system around prediction:
|
|
131
|
+
- `on_predict_start(moderator)` is called before prediction.
|
|
132
|
+
- `on_predict_end(moderator)` is called after prediction.
|
|
133
|
+
|
|
134
|
+
By default, `on_predict_start` enqueues a lightweight analytics event (see below). You can customize per-instance callbacks:
|
|
135
|
+
```python
|
|
136
|
+
mod = AutoModerator.from_pretrained("org/model")
|
|
137
|
+
# Disable all start callbacks (including analytics)
|
|
138
|
+
mod.callbacks["on_predict_start"].clear()
|
|
139
|
+
# Or add your own callback
|
|
140
|
+
def my_callback(m):
|
|
141
|
+
print("Starting inference for", m.model_id)
|
|
142
|
+
mod.callbacks["on_predict_start"].append(my_callback)
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
## Anonymous Telemetry
|
|
146
|
+
|
|
147
|
+
We believe in providing our users with full control over their data. By default, our package is configured to collect analytics to help improve the experience for all users. However, we respect that some users may prefer to opt out of this data collection.
|
|
148
|
+
|
|
149
|
+
To opt out of sending analytics, you can simply create `~/.moderators/settings.json` file with `"sync": false`. This ensures that no data is transmitted from your machine to our analytics tools.
|
|
150
|
+
|
|
151
|
+
## Limitations (Phase 1)
|
|
152
|
+
- Only `TransformersModerator` is supported; other architectures raise `NotImplementedError`.
|
|
153
|
+
- Image tasks require Pillow and at least one DL framework (preferably PyTorch). The library may attempt auto-install, otherwise it will raise an error.
|
|
154
|
+
|
|
155
|
+
## Integrations
|
|
156
|
+
- Transformers integration
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
moderators/__init__.py,sha256=sXLh7g3KC4QCFxcZGBTpG2scR7hmmBsMjq6LqRptkRg,22
|
|
2
|
+
moderators/auto_model.py,sha256=ZUX2ZjJJcW-2UelCggf7n9oUUvp7Tbr7TG7kaSgp2EE,6388
|
|
3
|
+
moderators/cli.py,sha256=4N-NZaX56odysAoICrOnRfw5wH0hxWzlIDbw8_qdbWs,1263
|
|
4
|
+
moderators/integrations/__init__.py,sha256=s0PG87w1yriVdi58mFpaVPyK3KS7eytvFhPJZ2S8pes,27
|
|
5
|
+
moderators/integrations/base.py,sha256=6sqeQVT2CAQ2pMikEkH7UCa2GbM4jYmHnYZEe7r8ks0,4185
|
|
6
|
+
moderators/integrations/transformers_moderator.py,sha256=axFzJAIUUAL-JYerU7ydO2iWdhu9BFyik4emcZIh7RE,7171
|
|
7
|
+
moderators/utils/__init__.py,sha256=1qcESgxVGKkI-KPMVBzTyuh2QD2Tc98e1gyTMlG6Gfw,322
|
|
8
|
+
moderators/utils/callbacks.py,sha256=qJ9fzJIsx9_Ei56xKyaQjr81h3oScWLArdBy37NPvh8,706
|
|
9
|
+
moderators/utils/deps.py,sha256=_-3Mms_20BiZss0TTHnhNDfxIBVcIlOLX5Ht3Dj8pZU,2480
|
|
10
|
+
moderators/utils/events.py,sha256=FBdfFESXp-PFj1d8SjGB8qSUElsPrINHHDos3AtK99M,5359
|
|
11
|
+
moderators/utils/image.py,sha256=vr9N-JrfaFkxX9pt-DddjUAXmbLknl8EnxsHvsHmGDU,1856
|
|
12
|
+
moderators-0.0.1.dist-info/METADATA,sha256=3DqAMPdeuW7OK5mJYadRiudtkN8ucFlN-c8PqTTSPA4,5304
|
|
13
|
+
moderators-0.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
14
|
+
moderators-0.0.1.dist-info/entry_points.txt,sha256=x4tWXqjIbl_SyOAJm0PjW1GHBocMQPFlOGxmdCvK2D0,51
|
|
15
|
+
moderators-0.0.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
16
|
+
moderators-0.0.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
Apache License
|
|
2
|
+
Version 2.0, January 2004
|
|
3
|
+
http://www.apache.org/licenses/
|
|
4
|
+
|
|
5
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
6
|
+
|
|
7
|
+
1. Definitions.
|
|
8
|
+
|
|
9
|
+
"License" shall mean the terms and conditions for use, reproduction,
|
|
10
|
+
and distribution as defined by Sections 1 through 9 of this document.
|
|
11
|
+
|
|
12
|
+
"Licensor" shall mean the copyright owner or entity authorized by
|
|
13
|
+
the copyright owner that is granting the License.
|
|
14
|
+
|
|
15
|
+
"Legal Entity" shall mean the union of the acting entity and all
|
|
16
|
+
other entities that control, are controlled by, or are under common
|
|
17
|
+
control with that entity. For the purposes of this definition,
|
|
18
|
+
"control" means (i) the power, direct or indirect, to cause the
|
|
19
|
+
direction or management of such entity, whether by contract or
|
|
20
|
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
|
21
|
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
|
22
|
+
|
|
23
|
+
"You" (or "Your") shall mean an individual or Legal Entity
|
|
24
|
+
exercising permissions granted by this License.
|
|
25
|
+
|
|
26
|
+
"Source" form shall mean the preferred form for making modifications,
|
|
27
|
+
including but not limited to software source code, documentation
|
|
28
|
+
source, and configuration files.
|
|
29
|
+
|
|
30
|
+
"Object" form shall mean any form resulting from mechanical
|
|
31
|
+
transformation or translation of a Source form, including but
|
|
32
|
+
not limited to compiled object code, generated documentation,
|
|
33
|
+
and conversions to other media types.
|
|
34
|
+
|
|
35
|
+
"Work" shall mean the work of authorship, whether in Source or
|
|
36
|
+
Object form, made available under the License, as indicated by a
|
|
37
|
+
copyright notice that is included in or attached to the work
|
|
38
|
+
(an example is provided in the Appendix below).
|
|
39
|
+
|
|
40
|
+
"Derivative Works" shall mean any work, whether in Source or Object
|
|
41
|
+
form, that is based on (or derived from) the Work and for which the
|
|
42
|
+
editorial revisions, annotations, elaborations, or other modifications
|
|
43
|
+
represent, as a whole, an original work of authorship. For the purposes
|
|
44
|
+
of this License, Derivative Works shall not include works that remain
|
|
45
|
+
separable from, or merely link (or bind by name) to the interfaces of,
|
|
46
|
+
the Work and Derivative Works thereof.
|
|
47
|
+
|
|
48
|
+
"Contribution" shall mean any work of authorship, including
|
|
49
|
+
the original version of the Work and any modifications or additions
|
|
50
|
+
to that Work or Derivative Works thereof, that is intentionally
|
|
51
|
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
|
52
|
+
or by an individual or Legal Entity authorized to submit on behalf of
|
|
53
|
+
the copyright owner. For the purposes of this definition, "submitted"
|
|
54
|
+
means any form of electronic, verbal, or written communication sent
|
|
55
|
+
to the Licensor or its representatives, including but not limited to
|
|
56
|
+
communication on electronic mailing lists, source code control systems,
|
|
57
|
+
and issue tracking systems that are managed by, or on behalf of, the
|
|
58
|
+
Licensor for the purpose of discussing and improving the Work, but
|
|
59
|
+
excluding communication that is conspicuously marked or otherwise
|
|
60
|
+
designated in writing by the copyright owner as "Not a Contribution."
|
|
61
|
+
|
|
62
|
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
|
63
|
+
on behalf of whom a Contribution has been received by Licensor and
|
|
64
|
+
subsequently incorporated within the Work.
|
|
65
|
+
|
|
66
|
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
|
67
|
+
this License, each Contributor hereby grants to You a perpetual,
|
|
68
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
69
|
+
copyright license to reproduce, prepare Derivative Works of,
|
|
70
|
+
publicly display, publicly perform, sublicense, and distribute the
|
|
71
|
+
Work and such Derivative Works in Source or Object form.
|
|
72
|
+
|
|
73
|
+
3. Grant of Patent License. Subject to the terms and conditions of
|
|
74
|
+
this License, each Contributor hereby grants to You a perpetual,
|
|
75
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
76
|
+
(except as stated in this section) patent license to make, have made,
|
|
77
|
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
|
78
|
+
where such license applies only to those patent claims licensable
|
|
79
|
+
by such Contributor that are necessarily infringed by their
|
|
80
|
+
Contribution(s) alone or by combination of their Contribution(s)
|
|
81
|
+
with the Work to which such Contribution(s) was submitted. If You
|
|
82
|
+
institute patent litigation against any entity (including a
|
|
83
|
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
|
84
|
+
or a Contribution incorporated within the Work constitutes direct
|
|
85
|
+
or contributory patent infringement, then any patent licenses
|
|
86
|
+
granted to You under this License for that Work shall terminate
|
|
87
|
+
as of the date such litigation is filed.
|
|
88
|
+
|
|
89
|
+
4. Redistribution. You may reproduce and distribute copies of the
|
|
90
|
+
Work or Derivative Works thereof in any medium, with or without
|
|
91
|
+
modifications, and in Source or Object form, provided that You
|
|
92
|
+
meet the following conditions:
|
|
93
|
+
|
|
94
|
+
(a) You must give any other recipients of the Work or
|
|
95
|
+
Derivative Works a copy of this License; and
|
|
96
|
+
|
|
97
|
+
(b) You must cause any modified files to carry prominent notices
|
|
98
|
+
stating that You changed the files; and
|
|
99
|
+
|
|
100
|
+
(c) You must retain, in the Source form of any Derivative Works
|
|
101
|
+
that You distribute, all copyright, patent, trademark, and
|
|
102
|
+
attribution notices from the Source form of the Work,
|
|
103
|
+
excluding those notices that do not pertain to any part of
|
|
104
|
+
the Derivative Works; and
|
|
105
|
+
|
|
106
|
+
(d) If the Work includes a "NOTICE" text file as part of its
|
|
107
|
+
distribution, then any Derivative Works that You distribute must
|
|
108
|
+
include a readable copy of the attribution notices contained
|
|
109
|
+
within such NOTICE file, excluding those notices that do not
|
|
110
|
+
pertain to any part of the Derivative Works, in at least one
|
|
111
|
+
of the following places: within a NOTICE text file distributed
|
|
112
|
+
as part of the Derivative Works; within the Source form or
|
|
113
|
+
documentation, if provided along with the Derivative Works; or,
|
|
114
|
+
within a display generated by the Derivative Works, if and
|
|
115
|
+
wherever such third-party notices normally appear. The contents
|
|
116
|
+
of the NOTICE file are for informational purposes only and
|
|
117
|
+
do not modify the License. You may add Your own attribution
|
|
118
|
+
notices within Derivative Works that You distribute, alongside
|
|
119
|
+
or as an addendum to the NOTICE text from the Work, provided
|
|
120
|
+
that such additional attribution notices cannot be construed
|
|
121
|
+
as modifying the License.
|
|
122
|
+
|
|
123
|
+
You may add Your own copyright statement to Your modifications and
|
|
124
|
+
may provide additional or different license terms and conditions
|
|
125
|
+
for use, reproduction, or distribution of Your modifications, or
|
|
126
|
+
for any such Derivative Works as a whole, provided Your use,
|
|
127
|
+
reproduction, and distribution of the Work otherwise complies with
|
|
128
|
+
the conditions stated in this License.
|
|
129
|
+
|
|
130
|
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
|
131
|
+
any Contribution intentionally submitted for inclusion in the Work
|
|
132
|
+
by You to the Licensor shall be under the terms and conditions of
|
|
133
|
+
this License, without any additional terms or conditions.
|
|
134
|
+
Notwithstanding the above, nothing herein shall supersede or modify
|
|
135
|
+
the terms of any separate license agreement you may have executed
|
|
136
|
+
with Licensor regarding such Contributions.
|
|
137
|
+
|
|
138
|
+
6. Trademarks. This License does not grant permission to use the trade
|
|
139
|
+
names, trademarks, service marks, or product names of the Licensor,
|
|
140
|
+
except as required for reasonable and customary use in describing the
|
|
141
|
+
origin of the Work and reproducing the content of the NOTICE file.
|
|
142
|
+
|
|
143
|
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
|
144
|
+
agreed to in writing, Licensor provides the Work (and each
|
|
145
|
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
|
146
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
147
|
+
implied, including, without limitation, any warranties or conditions
|
|
148
|
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
|
149
|
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
|
150
|
+
appropriateness of using or redistributing the Work and assume any
|
|
151
|
+
risks associated with Your exercise of permissions under this License.
|
|
152
|
+
|
|
153
|
+
8. Limitation of Liability. In no event and under no legal theory,
|
|
154
|
+
whether in tort (including negligence), contract, or otherwise,
|
|
155
|
+
unless required by applicable law (such as deliberate and grossly
|
|
156
|
+
negligent acts) or agreed to in writing, shall any Contributor be
|
|
157
|
+
liable to You for damages, including any direct, indirect, special,
|
|
158
|
+
incidental, or consequential damages of any character arising as a
|
|
159
|
+
result of this License or out of the use or inability to use the
|
|
160
|
+
Work (including but not limited to damages for loss of goodwill,
|
|
161
|
+
work stoppage, computer failure or malfunction, or any and all
|
|
162
|
+
other commercial damages or losses), even if such Contributor
|
|
163
|
+
has been advised of the possibility of such damages.
|
|
164
|
+
|
|
165
|
+
9. Accepting Warranty or Additional Liability. While redistributing
|
|
166
|
+
the Work or Derivative Works thereof, You may choose to offer,
|
|
167
|
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
|
168
|
+
or other liability obligations and/or rights consistent with this
|
|
169
|
+
License. However, in accepting such obligations, You may act only
|
|
170
|
+
on Your own behalf and on Your sole responsibility, not on behalf
|
|
171
|
+
of any other Contributor, and only if You agree to indemnify,
|
|
172
|
+
defend, and hold each Contributor harmless for any liability
|
|
173
|
+
incurred by, or claims asserted against, such Contributor by reason
|
|
174
|
+
of your accepting any such warranty or additional liability.
|
|
175
|
+
|
|
176
|
+
END OF TERMS AND CONDITIONS
|
|
177
|
+
|
|
178
|
+
APPENDIX: How to apply the Apache License to your work.
|
|
179
|
+
|
|
180
|
+
To apply the Apache License to your work, attach the following
|
|
181
|
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
|
182
|
+
replaced with your own identifying information. (Don't include
|
|
183
|
+
the brackets!) The text should be enclosed in the appropriate
|
|
184
|
+
comment syntax for the file format. We also recommend that a
|
|
185
|
+
file or class name and description of purpose be included on the
|
|
186
|
+
same "printed page" as the copyright notice for easier
|
|
187
|
+
identification within third-party archives.
|
|
188
|
+
|
|
189
|
+
Copyright [yyyy] [name of copyright owner]
|
|
190
|
+
|
|
191
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
192
|
+
you may not use this file except in compliance with the License.
|
|
193
|
+
You may obtain a copy of the License at
|
|
194
|
+
|
|
195
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
196
|
+
|
|
197
|
+
Unless required by applicable law or agreed to in writing, software
|
|
198
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
199
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
200
|
+
See the License for the specific language governing permissions and
|
|
201
|
+
limitations under the License.
|