hamtaa-texttools 0.1.48__py3-none-any.whl → 1.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hamtaa-texttools might be problematic. Click here for more details.

Files changed (86) hide show
  1. hamtaa_texttools-1.1.7.dist-info/METADATA +228 -0
  2. hamtaa_texttools-1.1.7.dist-info/RECORD +30 -0
  3. hamtaa_texttools-1.1.7.dist-info/licenses/LICENSE +21 -0
  4. texttools/__init__.py +4 -26
  5. texttools/batch/__init__.py +3 -0
  6. texttools/{utils/batch_manager → batch}/batch_manager.py +226 -241
  7. texttools/batch/batch_runner.py +254 -0
  8. texttools/prompts/README.md +35 -0
  9. texttools/prompts/categorizer.yaml +28 -0
  10. texttools/prompts/extract_entities.yaml +20 -0
  11. texttools/prompts/extract_keywords.yaml +18 -0
  12. texttools/prompts/is_question.yaml +14 -0
  13. texttools/prompts/merge_questions.yaml +46 -0
  14. texttools/prompts/rewrite.yaml +111 -0
  15. texttools/prompts/run_custom.yaml +7 -0
  16. texttools/prompts/subject_to_question.yaml +22 -0
  17. texttools/prompts/summarize.yaml +14 -0
  18. texttools/prompts/text_to_question.yaml +20 -0
  19. texttools/prompts/translate.yaml +15 -0
  20. texttools/tools/__init__.py +4 -33
  21. texttools/tools/async_the_tool.py +435 -0
  22. texttools/tools/internals/async_operator.py +242 -0
  23. texttools/tools/internals/base_operator.py +100 -0
  24. texttools/tools/internals/formatters.py +24 -0
  25. texttools/tools/internals/operator.py +242 -0
  26. texttools/tools/internals/output_models.py +62 -0
  27. texttools/tools/internals/prompt_loader.py +60 -0
  28. texttools/tools/the_tool.py +433 -0
  29. hamtaa_texttools-0.1.48.dist-info/METADATA +0 -60
  30. hamtaa_texttools-0.1.48.dist-info/RECORD +0 -61
  31. texttools/base/__init__.py +0 -3
  32. texttools/base/base_categorizer.py +0 -40
  33. texttools/base/base_keyword_extractor.py +0 -35
  34. texttools/base/base_ner_extractor.py +0 -61
  35. texttools/base/base_question_detector.py +0 -35
  36. texttools/base/base_question_generator.py +0 -99
  37. texttools/base/base_question_merger.py +0 -59
  38. texttools/base/base_question_rewriter.py +0 -61
  39. texttools/base/base_router.py +0 -33
  40. texttools/base/base_summarizer.py +0 -55
  41. texttools/base/base_task_performer.py +0 -53
  42. texttools/base/base_translator.py +0 -38
  43. texttools/formatter/__init__.py +0 -1
  44. texttools/formatter/base.py +0 -26
  45. texttools/formatter/gemma3_formatter.py +0 -54
  46. texttools/handlers/__init__.py +0 -6
  47. texttools/handlers/categorizer/__init__.py +0 -6
  48. texttools/handlers/categorizer/categorizer.py +0 -61
  49. texttools/handlers/handlers.py +0 -88
  50. texttools/tools/categorizer/__init__.py +0 -2
  51. texttools/tools/categorizer/encoder_model/__init__.py +0 -1
  52. texttools/tools/categorizer/encoder_model/encoder_vectorizer.py +0 -51
  53. texttools/tools/categorizer/llm/__init__.py +0 -2
  54. texttools/tools/categorizer/llm/gemma_categorizer.py +0 -169
  55. texttools/tools/categorizer/llm/openai_categorizer.py +0 -80
  56. texttools/tools/keyword_extractor/__init__.py +0 -1
  57. texttools/tools/keyword_extractor/gemma_extractor.py +0 -138
  58. texttools/tools/merger/__init__.py +0 -2
  59. texttools/tools/merger/gemma_question_merger.py +0 -214
  60. texttools/tools/ner/__init__.py +0 -1
  61. texttools/tools/ner/gemma_ner_extractor.py +0 -157
  62. texttools/tools/question_detector/__init__.py +0 -2
  63. texttools/tools/question_detector/gemma_detector.py +0 -114
  64. texttools/tools/question_detector/llm_detector.py +0 -112
  65. texttools/tools/question_generator/__init__.py +0 -1
  66. texttools/tools/question_generator/gemma_question_generator.py +0 -198
  67. texttools/tools/reranker/__init__.py +0 -3
  68. texttools/tools/reranker/reranker.py +0 -137
  69. texttools/tools/reranker/scorer.py +0 -216
  70. texttools/tools/reranker/sorter.py +0 -278
  71. texttools/tools/rewriter/__init__.py +0 -2
  72. texttools/tools/rewriter/gemma_question_rewriter.py +0 -213
  73. texttools/tools/router/__init__.py +0 -0
  74. texttools/tools/router/gemma_router.py +0 -169
  75. texttools/tools/subject_to_question/__init__.py +0 -1
  76. texttools/tools/subject_to_question/gemma_question_generator.py +0 -224
  77. texttools/tools/summarizer/__init__.py +0 -2
  78. texttools/tools/summarizer/gemma_summarizer.py +0 -140
  79. texttools/tools/summarizer/llm_summerizer.py +0 -108
  80. texttools/tools/translator/__init__.py +0 -1
  81. texttools/tools/translator/gemma_translator.py +0 -189
  82. texttools/utils/batch_manager/__init__.py +0 -2
  83. texttools/utils/batch_manager/batch_runner.py +0 -207
  84. texttools/utils/flex_processor.py +0 -78
  85. {hamtaa_texttools-0.1.48.dist-info → hamtaa_texttools-1.1.7.dist-info}/WHEEL +0 -0
  86. {hamtaa_texttools-0.1.48.dist-info → hamtaa_texttools-1.1.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,433 @@
1
+ from typing import Literal, Any, Callable
2
+
3
+ from openai import OpenAI
4
+
5
+ from texttools.tools.internals.operator import Operator
6
+ import texttools.tools.internals.output_models as OutputModels
7
+
8
+
9
+ class TheTool:
10
+ """
11
+ Each method configures the operator with a specific YAML prompt,
12
+ output schema, and flags, then delegates execution to `operator.run()`.
13
+
14
+ Usage:
15
+ client = OpenAI(...)
16
+ tool = TheTool(client, model="model-name")
17
+ result = tool.categorize("text ...", with_analysis=True)
18
+ """
19
+
20
+ def __init__(
21
+ self,
22
+ client: OpenAI,
23
+ model: str,
24
+ ):
25
+ self.operator = Operator(client=client, model=model)
26
+
27
+ def categorize(
28
+ self,
29
+ text: str,
30
+ with_analysis: bool = False,
31
+ user_prompt: str | None = None,
32
+ temperature: float | None = 0.0,
33
+ logprobs: bool = False,
34
+ top_logprobs: int | None = None,
35
+ validator: Callable[[Any], bool] | None = None,
36
+ ) -> OutputModels.ToolOutput:
37
+ """
38
+ Categorize a text into a single Islamic studies domain category.
39
+
40
+ Returns:
41
+ ToolOutput: Object containing:
42
+ - result (str): The assigned Islamic studies category
43
+ - logprobs (list | None): Probability data if logprobs enabled
44
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
45
+ """
46
+ return self.operator.run(
47
+ # User parameters
48
+ text=text,
49
+ with_analysis=with_analysis,
50
+ user_prompt=user_prompt,
51
+ temperature=temperature,
52
+ logprobs=logprobs,
53
+ top_logprobs=top_logprobs,
54
+ validator=validator,
55
+ # Internal parameters
56
+ prompt_file="categorizer.yaml",
57
+ output_model=OutputModels.CategorizerOutput,
58
+ resp_format="parse",
59
+ mode=None,
60
+ output_lang=None,
61
+ )
62
+
63
+ def extract_keywords(
64
+ self,
65
+ text: str,
66
+ with_analysis: bool = False,
67
+ output_lang: str | None = None,
68
+ user_prompt: str | None = None,
69
+ temperature: float | None = 0.0,
70
+ logprobs: bool = False,
71
+ top_logprobs: int | None = None,
72
+ validator: Callable[[Any], bool] | None = None,
73
+ ) -> OutputModels.ToolOutput:
74
+ """
75
+ Extract salient keywords from text.
76
+
77
+ Returns:
78
+ ToolOutput: Object containing:
79
+ - result (list[str]): List of extracted keywords
80
+ - logprobs (list | None): Probability data if logprobs enabled
81
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
82
+ """
83
+ return self.operator.run(
84
+ # User parameters
85
+ text=text,
86
+ with_analysis=with_analysis,
87
+ output_lang=output_lang,
88
+ user_prompt=user_prompt,
89
+ temperature=temperature,
90
+ logprobs=logprobs,
91
+ top_logprobs=top_logprobs,
92
+ validator=validator,
93
+ # Internal parameters
94
+ prompt_file="extract_keywords.yaml",
95
+ output_model=OutputModels.ListStrOutput,
96
+ resp_format="parse",
97
+ mode=None,
98
+ )
99
+
100
+ def extract_entities(
101
+ self,
102
+ text: str,
103
+ with_analysis: bool = False,
104
+ output_lang: str | None = None,
105
+ user_prompt: str | None = None,
106
+ temperature: float | None = 0.0,
107
+ logprobs: bool = False,
108
+ top_logprobs: int | None = None,
109
+ validator: Callable[[Any], bool] | None = None,
110
+ ) -> OutputModels.ToolOutput:
111
+ """
112
+ Perform Named Entity Recognition (NER) over the input text.
113
+
114
+ Returns:
115
+ ToolOutput: Object containing:
116
+ - result (list[dict]): List of entities with 'text' and 'type' keys
117
+ - logprobs (list | None): Probability data if logprobs enabled
118
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
119
+ """
120
+ return self.operator.run(
121
+ # User parameters
122
+ text=text,
123
+ with_analysis=with_analysis,
124
+ output_lang=output_lang,
125
+ user_prompt=user_prompt,
126
+ temperature=temperature,
127
+ logprobs=logprobs,
128
+ top_logprobs=top_logprobs,
129
+ validator=validator,
130
+ # Internal parameters
131
+ prompt_file="extract_entities.yaml",
132
+ output_model=OutputModels.ListDictStrStrOutput,
133
+ resp_format="parse",
134
+ mode=None,
135
+ )
136
+
137
+ def is_question(
138
+ self,
139
+ text: str,
140
+ with_analysis: bool = False,
141
+ user_prompt: str | None = None,
142
+ temperature: float | None = 0.0,
143
+ logprobs: bool = False,
144
+ top_logprobs: int | None = None,
145
+ validator: Callable[[Any], bool] | None = None,
146
+ ) -> OutputModels.ToolOutput:
147
+ """
148
+ Detect if the input is phrased as a question.
149
+
150
+ Returns:
151
+ ToolOutput: Object containing:
152
+ - result (bool): True if text is a question, False otherwise
153
+ - logprobs (list | None): Probability data if logprobs enabled
154
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
155
+ """
156
+ return self.operator.run(
157
+ # User parameters
158
+ text=text,
159
+ with_analysis=with_analysis,
160
+ user_prompt=user_prompt,
161
+ temperature=temperature,
162
+ logprobs=logprobs,
163
+ top_logprobs=top_logprobs,
164
+ validator=validator,
165
+ # Internal parameters
166
+ prompt_file="is_question.yaml",
167
+ output_model=OutputModels.BoolOutput,
168
+ resp_format="parse",
169
+ mode=None,
170
+ output_lang=None,
171
+ )
172
+
173
+ def text_to_question(
174
+ self,
175
+ text: str,
176
+ with_analysis: bool = False,
177
+ output_lang: str | None = None,
178
+ user_prompt: str | None = None,
179
+ temperature: float | None = 0.0,
180
+ logprobs: bool = False,
181
+ top_logprobs: int | None = None,
182
+ validator: Callable[[Any], bool] | None = None,
183
+ ) -> OutputModels.ToolOutput:
184
+ """
185
+ Generate a single question from the given text.
186
+
187
+ Returns:
188
+ ToolOutput: Object containing:
189
+ - result (str): The generated question
190
+ - logprobs (list | None): Probability data if logprobs enabled
191
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
192
+ """
193
+ return self.operator.run(
194
+ # User parameters
195
+ text=text,
196
+ with_analysis=with_analysis,
197
+ output_lang=output_lang,
198
+ user_prompt=user_prompt,
199
+ temperature=temperature,
200
+ logprobs=logprobs,
201
+ top_logprobs=top_logprobs,
202
+ validator=validator,
203
+ # Internal parameters
204
+ prompt_file="text_to_question.yaml",
205
+ output_model=OutputModels.StrOutput,
206
+ resp_format="parse",
207
+ mode=None,
208
+ )
209
+
210
+ def merge_questions(
211
+ self,
212
+ text: list[str],
213
+ with_analysis: bool = False,
214
+ output_lang: str | None = None,
215
+ user_prompt: str | None = None,
216
+ temperature: float | None = 0.0,
217
+ logprobs: bool = False,
218
+ top_logprobs: int | None = None,
219
+ mode: Literal["default", "reason"] = "default",
220
+ validator: Callable[[Any], bool] | None = None,
221
+ ) -> OutputModels.ToolOutput:
222
+ """
223
+ Merge multiple questions into a single unified question.
224
+
225
+ Returns:
226
+ ToolOutput: Object containing:
227
+ - result (str): The merged question
228
+ - logprobs (list | None): Probability data if logprobs enabled
229
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
230
+ """
231
+ text = ", ".join(text)
232
+ return self.operator.run(
233
+ # User parameters
234
+ text=text,
235
+ with_analysis=with_analysis,
236
+ output_lang=output_lang,
237
+ user_prompt=user_prompt,
238
+ temperature=temperature,
239
+ logprobs=logprobs,
240
+ top_logprobs=top_logprobs,
241
+ validator=validator,
242
+ # Internal parameters
243
+ prompt_file="merge_questions.yaml",
244
+ output_model=OutputModels.StrOutput,
245
+ resp_format="parse",
246
+ mode=mode,
247
+ )
248
+
249
+ def rewrite(
250
+ self,
251
+ text: str,
252
+ with_analysis: bool = False,
253
+ output_lang: str | None = None,
254
+ user_prompt: str | None = None,
255
+ temperature: float | None = 0.0,
256
+ logprobs: bool = False,
257
+ top_logprobs: int | None = None,
258
+ mode: Literal["positive", "negative", "hard_negative"] = "positive",
259
+ validator: Callable[[Any], bool] | None = None,
260
+ ) -> OutputModels.ToolOutput:
261
+ """
262
+ Rewrite a text with different modes.
263
+
264
+ Returns:
265
+ ToolOutput: Object containing:
266
+ - result (str): The rewritten text
267
+ - logprobs (list | None): Probability data if logprobs enabled
268
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
269
+ """
270
+ return self.operator.run(
271
+ # User parameters
272
+ text=text,
273
+ with_analysis=with_analysis,
274
+ output_lang=output_lang,
275
+ user_prompt=user_prompt,
276
+ temperature=temperature,
277
+ logprobs=logprobs,
278
+ top_logprobs=top_logprobs,
279
+ validator=validator,
280
+ # Internal parameters
281
+ prompt_file="rewrite.yaml",
282
+ output_model=OutputModels.StrOutput,
283
+ resp_format="parse",
284
+ mode=mode,
285
+ )
286
+
287
+ def subject_to_question(
288
+ self,
289
+ text: str,
290
+ number_of_questions: int,
291
+ with_analysis: bool = False,
292
+ output_lang: str | None = None,
293
+ user_prompt: str | None = None,
294
+ temperature: float | None = 0.0,
295
+ logprobs: bool = False,
296
+ top_logprobs: int | None = None,
297
+ validator: Callable[[Any], bool] | None = None,
298
+ ) -> OutputModels.ToolOutput:
299
+ """
300
+ Generate a list of questions about a subject.
301
+
302
+ Returns:
303
+ ToolOutput: Object containing:
304
+ - result (list[str]): List of generated questions
305
+ - logprobs (list | None): Probability data if logprobs enabled
306
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
307
+ """
308
+ return self.operator.run(
309
+ # User parameters
310
+ text=text,
311
+ number_of_questions=number_of_questions,
312
+ with_analysis=with_analysis,
313
+ output_lang=output_lang,
314
+ user_prompt=user_prompt,
315
+ temperature=temperature,
316
+ logprobs=logprobs,
317
+ top_logprobs=top_logprobs,
318
+ validator=validator,
319
+ # Internal parameters
320
+ prompt_file="subject_to_question.yaml",
321
+ output_model=OutputModels.ReasonListStrOutput,
322
+ resp_format="parse",
323
+ mode=None,
324
+ )
325
+
326
+ def summarize(
327
+ self,
328
+ text: str,
329
+ with_analysis: bool = False,
330
+ output_lang: str | None = None,
331
+ user_prompt: str | None = None,
332
+ temperature: float | None = 0.0,
333
+ logprobs: bool = False,
334
+ top_logprobs: int | None = None,
335
+ validator: Callable[[Any], bool] | None = None,
336
+ ) -> OutputModels.ToolOutput:
337
+ """
338
+ Summarize the given subject text.
339
+
340
+ Returns:
341
+ ToolOutput: Object containing:
342
+ - result (str): The summary text
343
+ - logprobs (list | None): Probability data if logprobs enabled
344
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
345
+ """
346
+ return self.operator.run(
347
+ # User parameters
348
+ text=text,
349
+ with_analysis=with_analysis,
350
+ output_lang=output_lang,
351
+ user_prompt=user_prompt,
352
+ temperature=temperature,
353
+ logprobs=logprobs,
354
+ top_logprobs=top_logprobs,
355
+ validator=validator,
356
+ # Internal parameters
357
+ prompt_file="summarize.yaml",
358
+ output_model=OutputModels.StrOutput,
359
+ resp_format="parse",
360
+ mode=None,
361
+ )
362
+
363
+ def translate(
364
+ self,
365
+ text: str,
366
+ target_language: str,
367
+ with_analysis: bool = False,
368
+ user_prompt: str | None = None,
369
+ temperature: float | None = 0.0,
370
+ logprobs: bool = False,
371
+ top_logprobs: int | None = None,
372
+ validator: Callable[[Any], bool] | None = None,
373
+ ) -> OutputModels.ToolOutput:
374
+ """
375
+ Translate text between languages.
376
+
377
+ Returns:
378
+ ToolOutput: Object containing:
379
+ - result (str): The translated text
380
+ - logprobs (list | None): Probability data if logprobs enabled
381
+ - analysis (str | None): Detailed reasoning if with_analysis enabled
382
+ """
383
+ return self.operator.run(
384
+ # User parameters
385
+ text=text,
386
+ target_language=target_language,
387
+ with_analysis=with_analysis,
388
+ user_prompt=user_prompt,
389
+ temperature=temperature,
390
+ logprobs=logprobs,
391
+ top_logprobs=top_logprobs,
392
+ validator=validator,
393
+ # Internal parameters
394
+ prompt_file="translate.yaml",
395
+ output_model=OutputModels.StrOutput,
396
+ resp_format="parse",
397
+ mode=None,
398
+ output_lang=None,
399
+ )
400
+
401
+ def run_custom(
402
+ self,
403
+ prompt: str,
404
+ output_model: Any,
405
+ output_lang: str | None = None,
406
+ temperature: float | None = None,
407
+ logprobs: bool | None = None,
408
+ top_logprobs: int | None = None,
409
+ ) -> OutputModels.ToolOutput:
410
+ """
411
+ Custom tool that can do almost anything!
412
+
413
+ Returns:
414
+ ToolOutput: Object with fields:
415
+ - result (str): The output result
416
+ """
417
+ return self.operator.run(
418
+ # User paramaeters
419
+ text=prompt,
420
+ output_model=output_model,
421
+ output_model_str=output_model.model_json_schema(),
422
+ output_lang=output_lang,
423
+ temperature=temperature,
424
+ logprobs=logprobs,
425
+ top_logprobs=top_logprobs,
426
+ # Internal parameters
427
+ prompt_file="run_custom.yaml",
428
+ resp_format="parse",
429
+ user_prompt=None,
430
+ with_analysis=False,
431
+ mode=None,
432
+ validator=None,
433
+ )
@@ -1,60 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: hamtaa-texttools
3
- Version: 0.1.48
4
- Summary: A set of high-level NLP tools
5
- Author: Tohidi, Montazer, Givechi, Mousavinezhad
6
- Requires-Python: >=3.8
7
- Description-Content-Type: text/markdown
8
- Requires-Dist: openai==1.97.1
9
- Requires-Dist: numpy==1.26.4
10
-
11
- # Text Tools
12
-
13
- <p align="center">
14
- <img src="https://img.shields.io/badge/TextTools-Python%20Text%20Processing-black?style=for-the-badge&logo=python&logoColor=white">
15
- </p>
16
-
17
-
18
- <p align="center">
19
- <img src="docs/logo.png" alt="Preview" width="300" height="300">
20
- </p>
21
-
22
-
23
- ## How to Install
24
-
25
- Install the package using:
26
-
27
- ```bash
28
- pip install -U hamta-texttools
29
- ```
30
-
31
-
32
- ---
33
-
34
- ## What This Library Is *Not*
35
-
36
- This is **not** a collection of low-level utilities.
37
-
38
- To clarify: this library **does not** include things like:
39
- - An standard `regex`
40
- - Word normalization utilities
41
-
42
- ---
43
-
44
- ## What This Library *Provides*
45
-
46
- This is a set of **high-level natural language processing (NLP)** tools.
47
-
48
- Some of the features include:
49
- - `question_detector`: Detecting if an incoming text is a question or not
50
- - `categorizer`: No finetuning need, categorizer
51
- - ... (Tell me what you want!)
52
-
53
- ---
54
-
55
- ## When to Use This Library
56
-
57
- Use `texttools` when:
58
- - You need to **process large volumes of data using OpenAI’s GPT models** via the BATCH API.
59
- - You want to treat an **LLM as a function** in Python that outputs structured JSON or Pydantic models.
60
- - You need to **categorize large datasets** using vector embeddings, efficiently and at scale.
@@ -1,61 +0,0 @@
1
- texttools/__init__.py,sha256=UEPcm1wKU7Hiqw1K_E2ojZSnyQrGyNLzN0spQ_ZXjJ4,784
2
- texttools/base/__init__.py,sha256=KUGm-Oe0BxlrRhPS-Jm2q1NCmwX8MdtZtloia7bcLaM,189
3
- texttools/base/base_categorizer.py,sha256=ojup94iXLxh92TjiJmrFXeRbsWKlon7PPAqez96B1bs,1130
4
- texttools/base/base_keyword_extractor.py,sha256=uKpxb3xI-sim-vXWe1R4_36QRhSNsWDR4IuVdpkZMME,868
5
- texttools/base/base_ner_extractor.py,sha256=D0LRNSyq1uIU9Qtepi7zpCWWzYz-AOxpVNjq97S1oUA,1933
6
- texttools/base/base_question_detector.py,sha256=FR9yDP0Z8aAfGafZy3kcpSDUUYWLJM7saRKdeVN5TiM,829
7
- texttools/base/base_question_generator.py,sha256=L_2ZwqyV9GxsKiQynWKRJG15OBFgQqiCic5H0i8R5yk,3238
8
- texttools/base/base_question_merger.py,sha256=TYhsihKaIdyGCVu4AcjxPZ1_HocHt__voV8WWGMRpMs,1945
9
- texttools/base/base_question_rewriter.py,sha256=K6ZnAjxi2qw4yLxm92zTI1IStCfX6c_6lCfIuBDSx8w,1973
10
- texttools/base/base_router.py,sha256=pFDjIXFqAhPiS9Onu5py_GxOq8geDGJDQh6k6IhCkvw,933
11
- texttools/base/base_summarizer.py,sha256=7NAilhUPs6ZUwkBpTtXAj6n2XxQH1w6SOolf3gQX2gc,1627
12
- texttools/base/base_task_performer.py,sha256=3-6qshkie50S7pRG4WHRNC_RdUbSmHOPKW56CD92-rM,1852
13
- texttools/base/base_translator.py,sha256=BoOxqaoPoUs8t1O3m2yL9pQa5iwisl097immTVcGZoE,1020
14
- texttools/formatter/__init__.py,sha256=KHz2tFZctbit_HVbQNCTMi46JzmKlg-uB6Ost63IpVU,46
15
- texttools/formatter/base.py,sha256=0fiM6E7NdJevAVpL6yyPaUZVJGKWxE3fr-Ay1oqgJqQ,879
16
- texttools/formatter/gemma3_formatter.py,sha256=AmdKBYLj6HMsI2DDX4KHNEEVYJmz_VVNUBOv8ScGjsY,1865
17
- texttools/handlers/__init__.py,sha256=sv0JloipQ57AI0xo-3w9k6cK5rYjZP3ltR2EbBhkHTA,121
18
- texttools/handlers/handlers.py,sha256=LtC4FBuzRUDy3Jw-Fp21WR-QS1jOcDhsGaMPFQGjfTw,2381
19
- texttools/handlers/categorizer/__init__.py,sha256=mE05vt_ma6vcP8pQ37BZ85WVQ8jhcjDS0iZV81_LFCY,127
20
- texttools/handlers/categorizer/categorizer.py,sha256=HBpdhtCGUPl1TJUOxbgSLmVWD7o9xeIjmSWXvYzGrCA,1592
21
- texttools/tools/__init__.py,sha256=V3ZjSj_ZI9r02sOmxpxxxKBbBbtuYS1MQqtrdGZHC_A,1121
22
- texttools/tools/categorizer/__init__.py,sha256=VY0SVdik0et0fwLDj7qn-d5LtVqVBIalvlRVci699i4,48
23
- texttools/tools/categorizer/encoder_model/__init__.py,sha256=7UwoPlQ09VGN0cqfi5fPQRfsZZ8hoZj6fL6cax1BLSU,53
24
- texttools/tools/categorizer/encoder_model/encoder_vectorizer.py,sha256=MHPVJQJlvNhZ5xLVXk4FtvrORW2yxPSAnjEhjPbkQts,1476
25
- texttools/tools/categorizer/llm/__init__.py,sha256=0VbxvInITfNUlOF6bJqcUKKaYWlIe9K3vRmIRuvAGcY,95
26
- texttools/tools/categorizer/llm/gemma_categorizer.py,sha256=tjwKonTjT5cAhxWQaVyvyooRyOlGACHpnn72PNoLk-8,5636
27
- texttools/tools/categorizer/llm/openai_categorizer.py,sha256=omRk77Z5ZCIAz17h4wPDP_EcBSsscA-PQJpQjtI6--o,2547
28
- texttools/tools/keyword_extractor/__init__.py,sha256=eTpujS85MmRRbnNwc2ifKUh60W8OG4RQFmWki3Z7C_0,84
29
- texttools/tools/keyword_extractor/gemma_extractor.py,sha256=TJ4wMPWRuuzRi_Q0hr7UauKhEg8U_5U5j1D_lTFrn4s,4349
30
- texttools/tools/merger/__init__.py,sha256=bh2RBpqJvDaqEmDrM9y_GcjRqibagifAxiZVu8nEHc0,115
31
- texttools/tools/merger/gemma_question_merger.py,sha256=JAC-52kBbabIzEWp0MFi9viiu8nZOAMPaJZALHvNMqo,8035
32
- texttools/tools/ner/__init__.py,sha256=BW84BcItel6Mc2JlaDL6qvAktVMkti67VXceeCnOB1g,70
33
- texttools/tools/ner/gemma_ner_extractor.py,sha256=YhyIwX_8bdwkFb4gY8g9mZdYHW_r1jCvbmjjNCK9Wfo,5384
34
- texttools/tools/question_detector/__init__.py,sha256=ulArGttooSoxEe0vUDQSxUQrnsxr7gH9l-LjSER2dVI,162
35
- texttools/tools/question_detector/gemma_detector.py,sha256=DhlCAA6Hws_OTuYil6UY4sYlbjdQQU6EqHdoTl3a--w,3772
36
- texttools/tools/question_detector/llm_detector.py,sha256=zo89eh359hqQGGf83-6M22AaiH7q-m0m91SjTyxZaYs,3862
37
- texttools/tools/question_generator/__init__.py,sha256=EAElpB_YeyMoBqvFNjbW2a_j18SLtiKQ7sRmdS58Fww,61
38
- texttools/tools/question_generator/gemma_question_generator.py,sha256=V5QcXmHZ5shTvrThOxUrKJ4FqP0P58NIJbsPdyyy5IM,6744
39
- texttools/tools/reranker/__init__.py,sha256=70jqJ9cjpPzzvnMYgHYGVZ9PrWrN9N97visqD_PVxwU,100
40
- texttools/tools/reranker/reranker.py,sha256=2SiTMIxempMuHui2n4GJV_2dLGBeoC7WAn_rVVXlMBA,5518
41
- texttools/tools/reranker/scorer.py,sha256=fQ3Ya8QmNhrcmb-Rf-72hvhweGvVj6gQ4KOlham2eE8,8176
42
- texttools/tools/reranker/sorter.py,sha256=_ed5zGz7K60skPFFuEQZ1ObBFA71LAfVT6FyWicA-Pw,11419
43
- texttools/tools/rewriter/__init__.py,sha256=U_qwGeEOqHAcV4p2CHVb0AIvHKFfdvykRzGyWD54aWA,121
44
- texttools/tools/rewriter/gemma_question_rewriter.py,sha256=jXtRswfBvHn9QmE90JvxEmLvCTbwZqZhD_A5ONWeCzo,7925
45
- texttools/tools/router/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
- texttools/tools/router/gemma_router.py,sha256=VX-kHphZVZNd0_ajugN08hGkWNUeUriwfonpYy2TIS4,5619
47
- texttools/tools/subject_to_question/__init__.py,sha256=VJpns16Qe5OL_-4WuGDUNShcJsodB2khGWT3Q1Hc-WU,72
48
- texttools/tools/subject_to_question/gemma_question_generator.py,sha256=VKXHhYHEvhFLUR87iEh0eFpD_4ueX4np8IjF-NkgWrY,7417
49
- texttools/tools/summarizer/__init__.py,sha256=phrR7qO20CNhO3hjXQBzhTRVumdVdGSufmH4GEYkhj4,140
50
- texttools/tools/summarizer/gemma_summarizer.py,sha256=ikhsBv7AiZD1dT_d12AyjXxojzSW92e2y5WjchI_3bE,4474
51
- texttools/tools/summarizer/llm_summerizer.py,sha256=-0rUKbSnl1aDeBfJ5DCSbIlwd2k-9qIaCKgoQJa0hWc,3412
52
- texttools/tools/translator/__init__.py,sha256=KO1m08J2BZwRqBGO9ICB4l4cnH1jfHLHL5HbgYFUWM8,72
53
- texttools/tools/translator/gemma_translator.py,sha256=rbP0kgkhOiEPdHWgHQc7Lev7lrAIYqNb6t_OfZLp44E,7180
54
- texttools/utils/flex_processor.py,sha256=Y44uTracvXUJiUm5hh57Uk0933RU9GTc3dN_1Bo_XQA,3214
55
- texttools/utils/batch_manager/__init__.py,sha256=3ZkxA395lRD4gNxJ1vp0fNuz_XuBr50GoP51rrwQ0Ks,87
56
- texttools/utils/batch_manager/batch_manager.py,sha256=jAmKskL3OTYwwsO1mWsWAB3VxMlOF07c2GW1Ev83ZhY,9283
57
- texttools/utils/batch_manager/batch_runner.py,sha256=kW0IPauI11xpssApMA7b4XI19FePImywym3V7tBaa-o,7404
58
- hamtaa_texttools-0.1.48.dist-info/METADATA,sha256=cUMn4AfOaYObchCIOQ-xrj0vmF3VTsmHYnob3Xi7R2M,1481
59
- hamtaa_texttools-0.1.48.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
60
- hamtaa_texttools-0.1.48.dist-info/top_level.txt,sha256=5Mh0jIxxZ5rOXHGJ6Mp-JPKviywwN0MYuH0xk5bEWqE,10
61
- hamtaa_texttools-0.1.48.dist-info/RECORD,,
@@ -1,3 +0,0 @@
1
- from texttools.base.base_categorizer import BaseCategorizer
2
- from texttools.base.base_question_detector import BaseQuestionDetector
3
- from texttools.base.base_summarizer import BaseSummarizer
@@ -1,40 +0,0 @@
1
- import logging
2
- from abc import ABC, abstractmethod
3
- from enum import Enum
4
- from typing import Optional
5
-
6
- from texttools.handlers import NoOpResultHandler, ResultHandler
7
-
8
-
9
- class BaseCategorizer(ABC):
10
- def __init__(
11
- self,
12
- handlers: Optional[list[ResultHandler]] = None,
13
- ):
14
- """
15
- handlers: List of ResultHandler objects that will process results after categorization.
16
- """
17
- self.handlers = handlers or [NoOpResultHandler()]
18
-
19
- @abstractmethod
20
- def categorize(self, text: str) -> Enum:
21
- """
22
- Categorize the input text.
23
- Must return one of the Enum members defined in self.categories.
24
- """
25
- pass
26
-
27
- def preprocess(self, text: str) -> str:
28
- """
29
- Optional: Preprocess text before categorization.
30
- """
31
- return text
32
-
33
- def _dispatch(self, results: dict) -> None:
34
- for handler in self.handlers:
35
- try:
36
- handler.handle(results)
37
- except Exception:
38
- logging.error(
39
- f"Handler {handler.__class__.__name__} failed", exc_info=True
40
- )
@@ -1,35 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from typing import Any, Optional
3
-
4
-
5
- class BaseKeywordExtractor(ABC):
6
- """
7
- Base class for all detectors that output a list of keywords.
8
- """
9
-
10
- def __init__(
11
- self,
12
- handlers: Optional[list[Any]] = None,
13
- ):
14
- self.handlers = handlers or []
15
-
16
- @abstractmethod
17
- def extract_keywords(self, text: str) -> list[str]:
18
- """
19
- Extract keywords from the input text.
20
- Should return a list of strings, where each string is a keyword.
21
- """
22
- pass
23
-
24
- def preprocess(self, text: str) -> str:
25
- """
26
- Optional text preprocessing step.
27
- """
28
- return text.strip()
29
-
30
- def _dispatch(self, result: dict) -> None:
31
- """
32
- Dispatch the result to handlers.
33
- """
34
- for handler in self.handlers:
35
- handler.handle(result)