hamtaa-texttools 0.1.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hamtaa-texttools might be problematic. Click here for more details.

Files changed (60) hide show
  1. hamtaa_texttools-0.1.43.dist-info/METADATA +60 -0
  2. hamtaa_texttools-0.1.43.dist-info/RECORD +60 -0
  3. hamtaa_texttools-0.1.43.dist-info/WHEEL +5 -0
  4. hamtaa_texttools-0.1.43.dist-info/top_level.txt +1 -0
  5. texttools/__init__.py +26 -0
  6. texttools/base/__init__.py +3 -0
  7. texttools/base/base_categorizer.py +40 -0
  8. texttools/base/base_keyword_extractor.py +35 -0
  9. texttools/base/base_ner_extractor.py +61 -0
  10. texttools/base/base_question_detector.py +35 -0
  11. texttools/base/base_question_generator.py +99 -0
  12. texttools/base/base_question_merger.py +59 -0
  13. texttools/base/base_question_rewriter.py +61 -0
  14. texttools/base/base_router.py +33 -0
  15. texttools/base/base_summarizer.py +55 -0
  16. texttools/base/base_task_performer.py +53 -0
  17. texttools/base/base_translator.py +38 -0
  18. texttools/batch_manager/__init__.py +2 -0
  19. texttools/batch_manager/batch_manager.py +241 -0
  20. texttools/batch_manager/batch_runner.py +207 -0
  21. texttools/formatter/__init__.py +1 -0
  22. texttools/formatter/base.py +26 -0
  23. texttools/formatter/gemma3_formatter.py +51 -0
  24. texttools/handlers/__init__.py +6 -0
  25. texttools/handlers/categorizer/__init__.py +6 -0
  26. texttools/handlers/categorizer/categorizer.py +61 -0
  27. texttools/handlers/handlers.py +88 -0
  28. texttools/tools/__init__.py +33 -0
  29. texttools/tools/categorizer/__init__.py +2 -0
  30. texttools/tools/categorizer/encoder_model/__init__.py +1 -0
  31. texttools/tools/categorizer/encoder_model/encoder_vectorizer.py +51 -0
  32. texttools/tools/categorizer/llm/__init__.py +2 -0
  33. texttools/tools/categorizer/llm/gemma_categorizer.py +169 -0
  34. texttools/tools/categorizer/llm/openai_categorizer.py +80 -0
  35. texttools/tools/keyword_extractor/__init__.py +1 -0
  36. texttools/tools/keyword_extractor/gemma_extractor.py +138 -0
  37. texttools/tools/merger/__init__.py +2 -0
  38. texttools/tools/merger/gemma_question_merger.py +214 -0
  39. texttools/tools/ner/__init__.py +1 -0
  40. texttools/tools/ner/gemma_ner_extractor.py +157 -0
  41. texttools/tools/question_detector/__init__.py +2 -0
  42. texttools/tools/question_detector/gemma_detector.py +130 -0
  43. texttools/tools/question_detector/llm_detector.py +112 -0
  44. texttools/tools/question_generator/__init__.py +1 -0
  45. texttools/tools/question_generator/gemma_question_generator.py +198 -0
  46. texttools/tools/reranker/__init__.py +3 -0
  47. texttools/tools/reranker/reranker.py +137 -0
  48. texttools/tools/reranker/scorer.py +216 -0
  49. texttools/tools/reranker/sorter.py +278 -0
  50. texttools/tools/rewriter/__init__.py +2 -0
  51. texttools/tools/rewriter/gemma_question_rewriter.py +213 -0
  52. texttools/tools/router/__init__.py +0 -0
  53. texttools/tools/router/gemma_router.py +169 -0
  54. texttools/tools/subject_to_question/__init__.py +1 -0
  55. texttools/tools/subject_to_question/gemma_question_generator.py +224 -0
  56. texttools/tools/summarizer/__init__.py +2 -0
  57. texttools/tools/summarizer/gemma_summarizer.py +140 -0
  58. texttools/tools/summarizer/llm_summerizer.py +108 -0
  59. texttools/tools/translator/__init__.py +1 -0
  60. texttools/tools/translator/gemma_translator.py +202 -0
@@ -0,0 +1,214 @@
1
+ from typing import Any, Optional
2
+
3
+ from openai import OpenAI
4
+ from texttools.base.base_question_merger import BaseQuestionsMerger, MergingMode
5
+ from texttools.formatter import Gemma3Formatter
6
+
7
+ # class QuestionGeneration(BaseModel):
8
+ # generated_question: str
9
+
10
+
11
+ class GemmaQuestionMerger(BaseQuestionsMerger):
12
+ """
13
+ Questions merger for Gemma-style models with one mode for now:
14
+ 1. merge the provided questions, preserving all the main points.
15
+ Outputs JSON with a single string field: {"merged_question": "..."}.
16
+
17
+ Allows optional extra instructions via `prompt_template`.
18
+ """
19
+
20
+ def __init__(
21
+ self,
22
+ client: OpenAI,
23
+ *,
24
+ model: str,
25
+ chat_formatter: Optional[Any] = None,
26
+ use_reason: bool = False,
27
+ temperature: float = 0.5,
28
+ prompt_template: Optional[str] = None,
29
+ handlers: Optional[list[Any]] = None,
30
+ **client_kwargs: Any,
31
+ ):
32
+ super().__init__(handlers)
33
+ self.client = client
34
+ self.model = model
35
+ self.temperature = temperature
36
+ self.client_kwargs = client_kwargs
37
+
38
+ self.chat_formatter = chat_formatter or Gemma3Formatter()
39
+
40
+ self.use_reason = use_reason
41
+ self.reason_summary = None
42
+ self.prompt_template = prompt_template
43
+
44
+ self.json_schema = {"rewritten_question": "string"}
45
+
46
+ def _build_messages(
47
+ self,
48
+ questions: list[str],
49
+ mode: MergingMode,
50
+ ) -> list[dict[str, str]]:
51
+ """
52
+ Builds the message list for the LLM API call for question merging,
53
+ adapting the prompt based on the chosen mode.
54
+ """
55
+ clean_questions = self.preprocess(questions)
56
+ messages: list[dict[str, str]] = []
57
+
58
+ if self.prompt_template:
59
+ messages.append({"role": "user", "content": self.prompt_template})
60
+
61
+ if self.reason_summary:
62
+ messages.append(
63
+ {
64
+ "role": "user",
65
+ "content": f"Based on this analysis: {self.reason_summary}",
66
+ }
67
+ )
68
+
69
+ if mode == MergingMode.DEFAULT_MODE:
70
+ instruction = (
71
+ "You are a language expert."
72
+ "I will give you a list of questions that are semantically similar."
73
+ "Your task is to merge them into one unified question that:"
74
+ "- Preserves all the information and intent from the original questions."
75
+ "- Sounds natural, fluent, and concise."
76
+ "- Avoids redundancy or unnecessary repetition."
77
+ "- Does not omit any unique idea from the originals."
78
+ "**Output only the merged question.**"
79
+ )
80
+ elif mode == MergingMode.REASON_MODE:
81
+ instruction = (
82
+ "You are an AI assistant helping to unify semantically similar questions."
83
+ "First, briefly extract the unique intent or content from each input question."
84
+ "Then, write one merged question that combines all their content clearly and naturally, without redundancy."
85
+ "Step 1: Extract key ideas."
86
+ "Step 2: Write the final merged question."
87
+ )
88
+ else:
89
+ raise ValueError(f"Unsupported rewrite mode: {mode}")
90
+
91
+ messages.append({"role": "user", "content": instruction})
92
+ messages.append(
93
+ {"role": "user", "content": f"here is the questions: {clean_questions}"}
94
+ )
95
+
96
+ # schema_instr = f"Respond only in JSON format: {json.dumps(self.json_schema)}"
97
+ messages.append(
98
+ {
99
+ "role": "user",
100
+ "content": """
101
+ Respond only with the new generated question, without any additional information.
102
+ **the generated question will be in the language of the users input**
103
+ """,
104
+ }
105
+ )
106
+
107
+ # messages.append({"role": "assistant", "content": "{"})
108
+ # deprecated method for structured output
109
+
110
+ # this line will restructure the messages
111
+ # based on the formatter that we provided
112
+ # some models will require custom settings
113
+ restructured = self.chat_formatter.format(messages=messages)
114
+
115
+ return restructured
116
+
117
+ def _reason(self, questions: list[str], mode: MergingMode) -> str:
118
+ """
119
+ Internal reasoning step to help the model understand the core meaning
120
+ or structure of the question depending on the mode.
121
+ """
122
+ if mode == MergingMode.DEFAULT_MODE:
123
+ reason_prompt = """
124
+ Analyze the following questions to identify their core intent, key concepts,
125
+ and the specific information they are seeking.
126
+ Provide a brief, summarized understanding of the questions' meaning that
127
+ will help in merging and rephrasing it accurately without changing its intent.
128
+
129
+ **respond in the language of the question**
130
+ """
131
+ elif mode == MergingMode.REASON_MODE:
132
+ reason_prompt = """
133
+ Analyze the following questions to identify their exact wording, phrasing,
134
+ and the literal meaning it conveys.
135
+ Provide a brief, summarized analysis of their linguistic structure and current meaning,
136
+ which will then be used to create a new question containing all of their contents.
137
+ **respond in the language of the question**
138
+ """
139
+ else:
140
+ raise ValueError(f"Unsupported rewrite mode for reason: {mode}")
141
+
142
+ messages = [
143
+ {"role": "user", "content": reason_prompt},
144
+ {"role": "user", "content": f"here is the question: {questions}"},
145
+ ]
146
+
147
+ restructured = self.chat_formatter.format(messages=messages)
148
+
149
+ resp = self.client.chat.completions.create(
150
+ model=self.model,
151
+ messages=restructured,
152
+ temperature=self.temperature,
153
+ **self.client_kwargs,
154
+ )
155
+
156
+ reason_summary = resp.choices[0].message.content.strip()
157
+ self.reason_summary = reason_summary
158
+
159
+ def rewrite_questions(
160
+ self,
161
+ questions: list[str],
162
+ mode: MergingMode = MergingMode.DEFAULT_MODE,
163
+ reason_summary: str = None,
164
+ ) -> str:
165
+ """
166
+ merging the input `questions` based on the specified `mode`.
167
+ Optionally uses an internal reasoning step for better accuracy.
168
+ """
169
+
170
+ if self.use_reason and not reason_summary:
171
+ self._reason(questions, mode)
172
+ elif reason_summary:
173
+ self.reason_summary = reason_summary
174
+
175
+ messages = self._build_messages(questions, mode)
176
+
177
+ # for structured output formatting
178
+ # but now i want to try somthing else
179
+ # i want to see if i could get the results without structured output
180
+ # completion = self.client.beta.chat.completions.parse(
181
+ # model=self.model,
182
+ # messages=messages,
183
+ # response_format=QuestionGeneration,
184
+ # temperature=self.temperature,
185
+ # extra_body=dict(guided_decoding_backend="outlines"),
186
+ # **self.client_kwargs,
187
+ # )
188
+ # message = completion.choices[0].message
189
+ # if message.parsed:
190
+ # result = message.parsed.generated_question
191
+ # else:
192
+ # raise ValueError(f"Failed to parse the response. Raw content: {message.content}")
193
+
194
+ resp = self.client.chat.completions.create(
195
+ model=self.model,
196
+ messages=messages,
197
+ temperature=self.temperature,
198
+ **self.client_kwargs,
199
+ )
200
+
201
+ result = resp.choices[0].message.content.strip()
202
+
203
+ # dispatch and return
204
+ self._dispatch(
205
+ {
206
+ "original_questions": questions,
207
+ "merged_question": result,
208
+ "mode": mode.value,
209
+ }
210
+ )
211
+ return result
212
+
213
+ def get_reason(self):
214
+ return self.reason_summary
@@ -0,0 +1 @@
1
+ from texttools.tools.ner.gemma_ner_extractor import GemmaNERExtractor
@@ -0,0 +1,157 @@
1
+ import json
2
+ from typing import Any, Optional
3
+
4
+ from openai import OpenAI
5
+
6
+ from texttools.base.base_ner_extractor import BaseNERExtractor
7
+
8
+
9
+ class GemmaNERExtractor(BaseNERExtractor):
10
+ """
11
+ Named Entity Recognition (NER) system for Gemma-style models with optional reasoning step.
12
+ Outputs JSON with a single array field: {"entities": [{"text": "...", "type": "..."}, ...]}.
13
+
14
+ Allows optional extra instructions via `prompt_template`.
15
+ """
16
+
17
+ def __init__(
18
+ self,
19
+ client: OpenAI,
20
+ *,
21
+ model: str,
22
+ use_reason: bool = False,
23
+ temperature: float = 0.0,
24
+ prompt_template: Optional[str] = None,
25
+ # Handlers can be any type that implements a .handle method
26
+ handlers: Optional[list[Any]] = None,
27
+ **client_kwargs: Any,
28
+ ):
29
+ super().__init__(handlers)
30
+ self.client = client
31
+ self.model = model
32
+ self.temperature = temperature
33
+ self.client_kwargs = client_kwargs
34
+
35
+ self.use_reason = use_reason
36
+ self.prompt_template = prompt_template
37
+
38
+ # Define the JSON schema for NER output
39
+ # This specifies an array of objects, where each object has 'text' (string) and 'type' (string)
40
+ self.json_schema = {
41
+ "entities": [
42
+ {
43
+ "text": "string",
44
+ "type": "string",
45
+ }
46
+ ]
47
+ }
48
+
49
+ def _build_messages(
50
+ self, text: str, reason: Optional[str] = None
51
+ ) -> list[dict[str, str]]:
52
+ """
53
+ Builds the message list for the LLM API call for entity extraction.
54
+ """
55
+ clean_text = self.preprocess(text)
56
+
57
+ messages: list[dict[str, str]] = []
58
+
59
+ if self.prompt_template:
60
+ messages.append({"role": "user", "content": self.prompt_template})
61
+
62
+ if reason:
63
+ messages.append(
64
+ {"role": "user", "content": f"Based on this analysis: {reason}"}
65
+ )
66
+
67
+ messages.append(
68
+ {
69
+ "role": "user",
70
+ "content": "Identify and extract all named entities (e.g., PER, ORG, LOC, DAT, etc.) from the following text. For each entity, provide its text and a clear type. Respond as a JSON array of objects.",
71
+ }
72
+ )
73
+ messages.append({"role": "user", "content": clean_text})
74
+
75
+ # Ensure the schema is dumped as a valid JSON string for the LLM
76
+ schema_instr = f"Respond only in JSON format: {json.dumps(self.json_schema)}"
77
+ messages.append({"role": "user", "content": schema_instr})
78
+
79
+ messages.append(
80
+ {"role": "assistant", "content": "{"}
81
+ ) # Hint to start JSON output
82
+ return messages
83
+
84
+ def _reason(self, text: str) -> str:
85
+ """
86
+ Internal reasoning step to help the model identify potential entities and their context.
87
+ """
88
+ messages = [
89
+ {
90
+ "role": "user",
91
+ "content": """
92
+ Read the following text and identify any proper nouns, key concepts, or specific mentions that might represent named entities.
93
+ Provide a brief, summarized analysis that could help in categorizing these entities.
94
+ """,
95
+ },
96
+ {
97
+ "role": "user",
98
+ "content": f"""
99
+ {text}
100
+ """,
101
+ },
102
+ ]
103
+
104
+ resp = self.client.chat.completions.create(
105
+ model=self.model,
106
+ messages=messages,
107
+ temperature=self.temperature,
108
+ **self.client_kwargs,
109
+ )
110
+
111
+ reason_summary = resp.choices[0].message.content.strip()
112
+ return reason_summary
113
+
114
+ def extract_entities(self, text: str) -> list[dict[str, str]]:
115
+ """
116
+ Extracts named entities from `text`.
117
+ Optionally uses an internal reasoning step for better accuracy.
118
+ """
119
+ reason_summary = None
120
+ if self.use_reason:
121
+ reason_summary = self._reason(text)
122
+
123
+ messages = self._build_messages(text, reason_summary)
124
+ resp = self.client.chat.completions.create(
125
+ model=self.model,
126
+ messages=messages,
127
+ temperature=self.temperature,
128
+ **self.client_kwargs,
129
+ )
130
+ raw = resp.choices[0].message.content.strip()
131
+
132
+ # Robustly parse JSON, even if the LLM adds extraneous text before the JSON
133
+ if not raw.startswith("{"):
134
+ raw = "{" + raw
135
+ try:
136
+ parsed = json.loads(raw)
137
+ except json.JSONDecodeError as e:
138
+ raise ValueError(f"Failed to parse JSON for NER: {e}\nRaw output: {raw}")
139
+
140
+ entities = parsed.get("entities")
141
+
142
+ # Validate that 'entities' is a list and contains dictionaries with 'text' and 'type'
143
+ if not isinstance(entities, list) or not all(
144
+ isinstance(item, dict)
145
+ and "text" in item
146
+ and "type" in item
147
+ and isinstance(item["text"], str)
148
+ and isinstance(item["type"], str)
149
+ for item in entities
150
+ ):
151
+ raise ValueError(
152
+ f"Invalid response schema for NER. Expected 'entities' as a list of dicts with 'text' and 'type', got: {parsed}"
153
+ )
154
+
155
+ # dispatch and return
156
+ self._dispatch(entities=entities, original_text=text)
157
+ return entities
@@ -0,0 +1,2 @@
1
+ from texttools.tools.question_detector.llm_detector import LLMQuestionDetector
2
+ from texttools.tools.question_detector.gemma_detector import GemmaQuestionDetector
@@ -0,0 +1,130 @@
1
+ from typing import Any, Optional
2
+
3
+ from openai import OpenAI
4
+ from pydantic import BaseModel
5
+
6
+ from texttools.base.base_question_detector import BaseQuestionDetector
7
+ from texttools.formatter import Gemma3Formatter
8
+
9
+
10
+ class QuestionDetection(BaseModel):
11
+ is_question: bool
12
+
13
+
14
+ class GemmaQuestionDetector(BaseQuestionDetector):
15
+ """
16
+ Simplified binary question detector for Gemma-style models without system prompts.
17
+ Outputs JSON with a single boolean field: {"is_question": true|false}.
18
+
19
+ Allows optional extra instructions via `prompt_template`.
20
+ """
21
+
22
+ def __init__(
23
+ self,
24
+ client: OpenAI,
25
+ *,
26
+ model: str,
27
+ chat_formatter: Optional[Any] = None,
28
+ use_reason: bool = False,
29
+ temperature: float = 0.0,
30
+ prompt_template: str = None,
31
+ handlers: list[Any] = None,
32
+ **client_kwargs: Any,
33
+ ):
34
+ super().__init__(handlers)
35
+ self.client = client
36
+ self.model = model
37
+ self.temperature = temperature
38
+ self.client_kwargs = client_kwargs
39
+
40
+ self.chat_formatter = chat_formatter or Gemma3Formatter()
41
+
42
+ self.use_reason = use_reason
43
+ self.prompt_template = prompt_template
44
+
45
+ self.json_schema = {"is_question": bool}
46
+
47
+ def _build_messages(self, text: str, reason: str = None) -> list[dict[str, str]]:
48
+ clean = self.preprocess(text)
49
+ schema_instr = f"respond only in JSON format: {self.json_schema}"
50
+ messages: list[dict[str, str]] = []
51
+
52
+ if reason:
53
+ messages.append({"role": "user", "content": reason})
54
+
55
+ messages.append({"role": "user", "content": schema_instr})
56
+ if self.prompt_template:
57
+ messages.append({"role": "user", "content": self.prompt_template})
58
+ messages.append({"role": "user", "content": clean})
59
+
60
+ # this line will restructure the messages
61
+ # based on the formatter that we provided
62
+ # some models will require custom settings
63
+ restructured = self.chat_formatter.format(messages=messages)
64
+
65
+ return restructured
66
+
67
+ def _reason(self, text: str) -> list:
68
+ messages = [
69
+ {
70
+ "role": "user",
71
+ "content": """
72
+ we want to analyze this text snippet to see if it contains any question
73
+ or request of some kind or not
74
+ read the text, and reason about it being a request or not
75
+ summerized
76
+ short answer
77
+ """,
78
+ },
79
+ {
80
+ "role": "user",
81
+ "content": f"""
82
+ {text}
83
+ """,
84
+ },
85
+ ]
86
+
87
+ restructured = self.chat_formatter.format(messages=messages)
88
+
89
+ resp = self.client.chat.completions.create(
90
+ model=self.model,
91
+ messages=restructured,
92
+ temperature=self.temperature,
93
+ **self.client_kwargs,
94
+ )
95
+
96
+ reason = resp.choices[0].message.content.strip()
97
+ return reason
98
+
99
+ def detect(self, text: str) -> bool:
100
+ """
101
+ Returns True if `text` is a question, False otherwise.
102
+ Optionally uses an internal reasoning step for better accuracy.
103
+ """
104
+ reason_summary = None
105
+ if self.use_reason:
106
+ reason_summary = self._reason(text)
107
+
108
+ # print(reason_summary)
109
+
110
+ messages = self._build_messages(text, reason_summary)
111
+
112
+ completion = self.client.beta.chat.completions.parse(
113
+ model=self.model,
114
+ messages=messages,
115
+ response_format=QuestionDetection,
116
+ temperature=self.temperature,
117
+ extra_body=dict(guided_decoding_backend="auto"),
118
+ **self.client_kwargs,
119
+ )
120
+ message = completion.choices[0].message
121
+ if message.parsed:
122
+ result = message.parsed.is_question
123
+ else:
124
+ raise ValueError(
125
+ f"Failed to parse the response. Raw content: {message.content}"
126
+ )
127
+
128
+ # dispatch and return
129
+ self._dispatch({"question": text, "result": result})
130
+ return result
@@ -0,0 +1,112 @@
1
+ from typing import Any
2
+
3
+ from openai import OpenAI
4
+ from pydantic import BaseModel, create_model
5
+
6
+ from texttools.base.base_question_detector import BaseQuestionDetector
7
+
8
+
9
+ class LLMQuestionDetector(BaseQuestionDetector):
10
+ """
11
+ LLM-based binary question detector that wraps OpenAI s structured output parsing.
12
+
13
+ Usage:
14
+ ```python
15
+ from openai import OpenAI
16
+ from texttools import LLMQuestionDetector
17
+
18
+ # Instantiate an OpenAI client (ensure you ve set OPENAI_API_KEY)
19
+ client = OpenAI()
20
+
21
+ # Create detector
22
+ detector = LLMQuestionDetector(
23
+ client=client,
24
+ model="gpt-4o-2024-08-06",
25
+ temperature=0.0, # deterministic outputs
26
+ prompt_template=(
27
+ "You are a binary classifier. "
28
+ "Answer only with `true` or `false` depending on the input."
29
+ ),
30
+ handlers=[my_handler], # optional callbacks on each detection
31
+ max_tokens=10 # any other OpenAIClient kwargs
32
+ )
33
+
34
+ # Detect whether a string is a question
35
+ is_question = detector.detect("How are you today?")
36
+ # is_question == True
37
+ ```
38
+
39
+ Parameters:
40
+ client (OpenAI):
41
+ Instantiated OpenAI client. Make sure your API key is configured.
42
+ model (str):
43
+ Model name to use (e.g. "gpt-4", "gpt-4o-2024-08-06").
44
+ temperature (float, default=0.0):
45
+ Sampling temperature; 0.0 yields deterministic outputs.
46
+ prompt_template (str, optional):
47
+ System‐level instructions guiding the classification.
48
+ handlers (list[callable], optional):
49
+ List of callables that receive {"text": bool} after each detect().
50
+ client_kwargs (Any):
51
+ Additional parameters passed directly to OpenAI (e.g., max_tokens, top_p).
52
+
53
+ Internals:
54
+ - Wraps your input in system/user messages.
55
+ - Uses Pydantic to enforce that the API returns a boolean.
56
+ - Dispatches result to any registered handlers.
57
+
58
+ """
59
+
60
+ def __init__(
61
+ self,
62
+ client: OpenAI,
63
+ *,
64
+ model: str,
65
+ temperature: float = 0.0,
66
+ prompt_template: str = None,
67
+ handlers: list[Any] = None,
68
+ **client_kwargs: Any,
69
+ ):
70
+ """
71
+ :param client: an instantiated OpenAI client
72
+ :param model: the model name (e.g. "gpt-4o-2024-08-06")
73
+ :param temperature: sampling temperature
74
+ :param prompt_template: override default prompt instructions
75
+ :param handlers: optional list of result handlers
76
+ :param client_kwargs: any other OpenAI kwargs (e.g. `max_tokens`, `top_p`, etc.)
77
+ """
78
+ super().__init__(handlers)
79
+ self.client = client
80
+ self.model = model
81
+ self.temperature = temperature
82
+ self.client_kwargs = client_kwargs
83
+
84
+ self.prompt_template = prompt_template or (
85
+ "You are a binary classifier. "
86
+ "Answer only with `true` or `false` depending on the input."
87
+ )
88
+
89
+ self._OutputModel = create_model(
90
+ "DetectionOutput",
91
+ result=(bool, ...),
92
+ )
93
+
94
+ def _build_messages(self, text: str) -> list[dict[str, str]]:
95
+ clean = self.preprocess(text)
96
+ return [
97
+ {"role": "system", "content": self.prompt_template},
98
+ {"role": "user", "content": clean},
99
+ ]
100
+
101
+ def detect(self, text: str) -> bool:
102
+ msgs = self._build_messages(text)
103
+ resp = self.client.responses.parse(
104
+ model=self.model,
105
+ input=msgs,
106
+ text_format=self._OutputModel,
107
+ temperature=self.temperature,
108
+ **self.client_kwargs,
109
+ )
110
+ output: BaseModel = resp.output_parsed
111
+ self._dispatch({"question": text, "result": output.result})
112
+ return output.result
@@ -0,0 +1 @@
1
+ from .gemma_question_generator import GemmaQuestionGenerator