diversify-text 0.2.0__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {diversify_text-0.2.0 → diversify_text-0.2.1}/PKG-INFO +26 -1
  2. {diversify_text-0.2.0 → diversify_text-0.2.1}/README.md +25 -0
  3. {diversify_text-0.2.0 → diversify_text-0.2.1}/diversify_text/core.py +30 -6
  4. {diversify_text-0.2.0 → diversify_text-0.2.1}/diversify_text/method/prompting/method.py +51 -13
  5. {diversify_text-0.2.0 → diversify_text-0.2.1}/diversify_text/method/prompting/prompts.py +3 -0
  6. {diversify_text-0.2.0 → diversify_text-0.2.1}/diversify_text/method/tinystyler/method.py +1 -1
  7. {diversify_text-0.2.0 → diversify_text-0.2.1}/pyproject.toml +1 -1
  8. {diversify_text-0.2.0 → diversify_text-0.2.1}/.gitignore +0 -0
  9. {diversify_text-0.2.0 → diversify_text-0.2.1}/LICENSE +0 -0
  10. {diversify_text-0.2.0 → diversify_text-0.2.1}/diversify_text/__init__.py +0 -0
  11. {diversify_text-0.2.0 → diversify_text-0.2.1}/diversify_text/_cache.py +0 -0
  12. {diversify_text-0.2.0 → diversify_text-0.2.1}/diversify_text/_input.py +0 -0
  13. {diversify_text-0.2.0 → diversify_text-0.2.1}/diversify_text/_output.py +0 -0
  14. {diversify_text-0.2.0 → diversify_text-0.2.1}/diversify_text/_postprocess.py +0 -0
  15. {diversify_text-0.2.0 → diversify_text-0.2.1}/diversify_text/_preprocess.py +0 -0
  16. {diversify_text-0.2.0 → diversify_text-0.2.1}/diversify_text/_utils.py +0 -0
  17. {diversify_text-0.2.0 → diversify_text-0.2.1}/diversify_text/filter/__init__.py +0 -0
  18. {diversify_text-0.2.0 → diversify_text-0.2.1}/diversify_text/filter/mis.py +0 -0
  19. {diversify_text-0.2.0 → diversify_text-0.2.1}/diversify_text/method/__init__.py +0 -0
  20. {diversify_text-0.2.0 → diversify_text-0.2.1}/diversify_text/method/base.py +0 -0
  21. {diversify_text-0.2.0 → diversify_text-0.2.1}/diversify_text/method/echo.py +0 -0
  22. {diversify_text-0.2.0 → diversify_text-0.2.1}/diversify_text/method/prompting/__init__.py +0 -0
  23. {diversify_text-0.2.0 → diversify_text-0.2.1}/diversify_text/method/prompting/model.py +0 -0
  24. {diversify_text-0.2.0 → diversify_text-0.2.1}/diversify_text/method/registry.py +0 -0
  25. {diversify_text-0.2.0 → diversify_text-0.2.1}/diversify_text/method/tinystyler/__init__.py +0 -0
  26. {diversify_text-0.2.0 → diversify_text-0.2.1}/diversify_text/method/tinystyler/model.py +0 -0
  27. {diversify_text-0.2.0 → diversify_text-0.2.1}/diversify_text/py.typed +0 -0
  28. {diversify_text-0.2.0 → diversify_text-0.2.1}/diversify_text/styles.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diversify-text
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: Generate stylistic paraphrases of texts using local transformer models.
5
5
  Project-URL: Homepage, https://github.com/AnnaWegmann/diversify_text
6
6
  Project-URL: Documentation, https://annawegmann.github.io/diversify_text/
@@ -46,6 +46,7 @@ pip install diversify-text
46
46
  - [Usage](#usage)
47
47
  - [Single text](#single-text)
48
48
  - [Control number of paraphrases](#control-number-of-paraphrases)
49
+ - [Prompting method](#prompting-method)
49
50
  - [Caching](#caching)
50
51
  - [Using the class directly](#using-the-class-directly)
51
52
  - [List of texts](#list-of-texts)
@@ -92,6 +93,30 @@ results = diversify("Some text.", n=3)
92
93
  [{"original": "Some text.", "paraphrases": ["...", "...", "..."]}]
93
94
  ```
94
95
 
96
+ ### Prompting method
97
+
98
+ Use the prompting method to generate paraphrases via a causal language model (default: [SmolLM3-3B](https://huggingface.co/HuggingFaceTB/SmolLM3-3B)):
99
+
100
+ ```python
101
+ results = diversify("The experiment was conducted in a controlled lab setting.", methods=["prompting"])
102
+ ```
103
+
104
+ Select specific prompt styles:
105
+
106
+ ```python
107
+ results = diversify(
108
+ "The experiment was conducted in a controlled lab setting.",
109
+ methods=["prompting"],
110
+ method_kwargs={
111
+ "prompting": {
112
+ "prompt_keys": ["simple_kew", "complex_kew", "caps_reif"]
113
+ }
114
+ },
115
+ )
116
+ ```
117
+
118
+ Available prompt keys: `wikipedia_paraphrase`, `simple_kew`, `complex_kew`, `formal_reif`, `simple_reif`, `passive_reif`, `caps_reif`, `lowcaps_reif`, `text_emojis_reif`, `less_common_verbs_reif`, `humanize_llm-as-coauthor_original`, and all `finephrase_*` templates. See the [full prompt reference](https://annawegmann.github.io/diversify_text/prompts.html) for details.
119
+
95
120
  ### Caching
96
121
 
97
122
  The `diversify()` function automatically caches loaded models between calls.
@@ -13,6 +13,7 @@ pip install diversify-text
13
13
  - [Usage](#usage)
14
14
  - [Single text](#single-text)
15
15
  - [Control number of paraphrases](#control-number-of-paraphrases)
16
+ - [Prompting method](#prompting-method)
16
17
  - [Caching](#caching)
17
18
  - [Using the class directly](#using-the-class-directly)
18
19
  - [List of texts](#list-of-texts)
@@ -59,6 +60,30 @@ results = diversify("Some text.", n=3)
59
60
  [{"original": "Some text.", "paraphrases": ["...", "...", "..."]}]
60
61
  ```
61
62
 
63
+ ### Prompting method
64
+
65
+ Use the prompting method to generate paraphrases via a causal language model (default: [SmolLM3-3B](https://huggingface.co/HuggingFaceTB/SmolLM3-3B)):
66
+
67
+ ```python
68
+ results = diversify("The experiment was conducted in a controlled lab setting.", methods=["prompting"])
69
+ ```
70
+
71
+ Select specific prompt styles:
72
+
73
+ ```python
74
+ results = diversify(
75
+ "The experiment was conducted in a controlled lab setting.",
76
+ methods=["prompting"],
77
+ method_kwargs={
78
+ "prompting": {
79
+ "prompt_keys": ["simple_kew", "complex_kew", "caps_reif"]
80
+ }
81
+ },
82
+ )
83
+ ```
84
+
85
+ Available prompt keys: `wikipedia_paraphrase`, `simple_kew`, `complex_kew`, `formal_reif`, `simple_reif`, `passive_reif`, `caps_reif`, `lowcaps_reif`, `text_emojis_reif`, `less_common_verbs_reif`, `humanize_llm-as-coauthor_original`, and all `finephrase_*` templates. See the [full prompt reference](https://annawegmann.github.io/diversify_text/prompts.html) for details.
86
+
62
87
  ### Caching
63
88
 
64
89
  The `diversify()` function automatically caches loaded models between calls.
@@ -248,16 +248,40 @@ class Diversifier:
248
248
  ) -> int:
249
249
  """Infer *n* from per-method kwargs when only one method is used.
250
250
 
251
- When a single method is active and the caller provided method-
252
- specific keys (``prompt_keys`` for prompting, ``styles`` for
253
- tinystyler), returns the length of those keys so each is used
254
- exactly once. Otherwise returns :attr:`_DEFAULT_N`.
251
+ When a single method is active (currently outof tinystyler and prompting)
252
+ and the caller provided method-specific keys, infers the number of
253
+ paraphrases from number of keys. Otherwise returns :attr:`_DEFAULT_N`.
254
+
255
+ For the prompting method, the inference depends on what is provided:
256
+ * ``prompt_keys`` only → ``len(prompt_keys)`` (one per template).
257
+ * ``styles`` only → ``len(styles)`` (style transfer, one per style).
258
+ * Both → each style-dependent template (in
259
+ :data:`EXAMPLE_BASED_PROMPT_BANK` or :data:`NAME_BASED_PROMPT_BANK`)
260
+ contributes ``len(styles)``, each zero-shot template contributes 1.
255
261
  """
256
262
  if len(self._methods) == 1 and method_kwargs:
257
263
  method = self._methods[0]
258
264
  kw = method_kwargs.get(method.name, {})
259
- if method.name == "prompting" and "prompt_keys" in kw:
260
- return len(kw["prompt_keys"])
265
+ if method.name == "prompting":
266
+ from diversify_text.method.prompting.prompts import STYLE_DEP_PROMPTS
267
+ from diversify_text.styles import DEFAULT_STYLES
268
+
269
+ prompt_keys = kw.get("prompt_keys")
270
+ styles = kw.get("styles")
271
+ # When styles are not provided but style-dependent prompts
272
+ # are selected, default to DEFAULT_STYLES.
273
+ if not styles and prompt_keys and any(k in STYLE_DEP_PROMPTS for k in prompt_keys):
274
+ styles = DEFAULT_STYLES
275
+ if prompt_keys:
276
+ n = 0
277
+ for key in prompt_keys:
278
+ if key in STYLE_DEP_PROMPTS and styles:
279
+ n += len(styles)
280
+ else:
281
+ n += 1
282
+ return n
283
+ if styles:
284
+ return len(styles)
261
285
  if method.name == "tinystyler" and "styles" in kw:
262
286
  return len(kw["styles"])
263
287
  return self._DEFAULT_N
@@ -14,6 +14,7 @@ from diversify_text.method.prompting.prompts import (
14
14
  PLACEHOLDER_STYLE_NAME,
15
15
  PLACEHOLDER_TEXT,
16
16
  PROMPT_BANK,
17
+ STYLE_DEP_PROMPTS,
17
18
  )
18
19
  from diversify_text.styles import resolve_style_sets
19
20
 
@@ -75,7 +76,7 @@ class PromptingMethod(DiversificationMethod):
75
76
  def _resolve_prompts(
76
77
  prompt_bank: dict[str, str] | None = None,
77
78
  prompt_keys: list[str] | None = None,
78
- style_example_keys: list[str] | None = None,
79
+ styles: list[str] | None = None,
79
80
  custom_style_bank: dict[str, list[str]] | None = None,
80
81
  ) -> list[tuple[str, str]]:
81
82
  """Resolve prompt configuration into an ordered list of (key, template) pairs.
@@ -93,7 +94,7 @@ class PromptingMethod(DiversificationMethod):
93
94
  zero-shot + few-shot bank).
94
95
  prompt_keys : list[str] or None
95
96
  Select only these keys from the bank. Order is preserved.
96
- style_example_keys : list[str] or None
97
+ styles : list[str] or None
97
98
  Names of style sets for few-shot examples. If provided
98
99
  without *prompt_keys*, the method automatically selects
99
100
  the ``"style_transfer"`` prompt template. When combined
@@ -101,7 +102,7 @@ class PromptingMethod(DiversificationMethod):
101
102
  the ``[STYLE EXAMPLES]`` placeholder.
102
103
  custom_style_bank : dict or None
103
104
  Custom style bank — same trigger behavior as
104
- *style_example_keys*.
105
+ *styles*.
105
106
 
106
107
  Returns
107
108
  -------
@@ -113,13 +114,13 @@ class PromptingMethod(DiversificationMethod):
113
114
  Raises
114
115
  ------
115
116
  ValueError
116
- If *prompt_keys* contains unknown keys, or if *style_example_keys*
117
+ If *prompt_keys* contains unknown keys, or if *styles*
117
118
  / *custom_style_bank* are provided but the selected
118
119
  templates do not contain a ``[STYLE EXAMPLES]``
119
120
  placeholder.
120
121
  """
121
122
  bank = prompt_bank if prompt_bank is not None else PROMPT_BANK
122
- has_styles = style_example_keys is not None or custom_style_bank is not None
123
+ has_styles = styles is not None or custom_style_bank is not None
123
124
 
124
125
  # --- Select templates (four mutually exclusive cases) ---
125
126
 
@@ -153,12 +154,12 @@ class PromptingMethod(DiversificationMethod):
153
154
  for _k, t in templates
154
155
  ):
155
156
  raise ValueError(
156
- "style_example_keys or custom_style_bank were provided, but the "
157
+ "styles or custom_style_bank were provided, but the "
157
158
  "selected prompt template(s) do not contain the "
158
159
  f"{PLACEHOLDER_STYLE_EXAMPLES} or {PLACEHOLDER_STYLE_NAME} "
159
160
  f"placeholder. Use a style-aware template "
160
161
  f"(e.g. prompt_keys=['style_transfer'] or prompt_keys=['reif']) "
161
- f"or remove style_example_keys. See "
162
+ f"or remove styles. See "
162
163
  f"https://annawegmann.github.io/diversify_text/prompts.html"
163
164
  )
164
165
 
@@ -244,12 +245,41 @@ class PromptingMethod(DiversificationMethod):
244
245
  Returns a dict mapping style names to example sentences, or
245
246
  an empty dict when no style kwargs are provided.
246
247
  """
247
- style_keys = kwargs.get("style_example_keys")
248
+ style_keys = kwargs.get("styles")
248
249
  custom_bank = kwargs.get("custom_style_bank")
249
250
  if style_keys is not None or custom_bank is not None:
250
251
  return resolve_style_sets(custom_bank, style_keys)
251
252
  return {}
252
253
 
254
+ @staticmethod
255
+ def _build_schedule(
256
+ prompt_templates: list[tuple[str, str]],
257
+ fs_style_examples: dict[str, list[str]],
258
+ ) -> list[tuple[str, str, int | None]]:
259
+ """Build a generation schedule from templates and style examples.
260
+
261
+ Style-dependent templates (in :data:`EXAMPLE_BASED_PROMPT_BANK` or
262
+ :data:`NAME_BASED_PROMPT_BANK`) expand to one entry per style;
263
+ zero-shot templates get a single entry.
264
+
265
+ The caller iterates the schedule with modulo to fill ``n`` slots,
266
+ so the schedule represents one full "natural" cycle.
267
+
268
+ Each entry is ``(key, template, style_idx)`` where *style_idx* is
269
+ ``None`` for zero-shot templates.
270
+ """
271
+ n_styles = len(fs_style_examples) if fs_style_examples else 0
272
+
273
+
274
+ schedule: list[tuple[str, str, int | None]] = []
275
+ for key, tmpl in prompt_templates:
276
+ if n_styles and key in STYLE_DEP_PROMPTS:
277
+ for style_idx in range(n_styles):
278
+ schedule.append((key, tmpl, style_idx))
279
+ else:
280
+ schedule.append((key, tmpl, None))
281
+ return schedule
282
+
253
283
  def _fill_template(
254
284
  self,
255
285
  template: str,
@@ -346,7 +376,7 @@ class PromptingMethod(DiversificationMethod):
346
376
  Sampling parameters. ``None`` uses defaults.
347
377
  **kwargs
348
378
  Extra options forwarded from ``Diversifier``, including
349
- ``prompt_keys``, ``prompt_bank``, ``style_example_keys``,
379
+ ``prompt_keys``, ``prompt_bank``, ``styles``,
350
380
  ``custom_style_bank``, and ``n_style_examples``.
351
381
  """
352
382
  model = self._ensure_model()
@@ -356,7 +386,7 @@ class PromptingMethod(DiversificationMethod):
356
386
  prompt_templates = self._resolve_prompts(
357
387
  prompt_bank=kwargs.get("prompt_bank"),
358
388
  prompt_keys=kwargs.get("prompt_keys"),
359
- style_example_keys=kwargs.get("style_example_keys"),
389
+ styles=kwargs.get("styles"),
360
390
  custom_style_bank=kwargs.get("custom_style_bank"),
361
391
  )
362
392
  all_max_new_tokens = self._compute_max_new_tokens(
@@ -369,24 +399,32 @@ class PromptingMethod(DiversificationMethod):
369
399
  )
370
400
 
371
401
  fs_style_examples = self._resolve_few_shot_examples(**kwargs)
402
+ # Default to DEFAULT_STYLES when style-dependent prompts are
403
+ # selected but no explicit styles were provided.
404
+ if not fs_style_examples:
405
+
406
+ if any(k in STYLE_DEP_PROMPTS for k, _ in prompt_templates):
407
+ from diversify_text.styles import DEFAULT_STYLES
408
+ fs_style_examples = resolve_style_sets(None, DEFAULT_STYLES)
372
409
  if fs_style_examples:
373
410
  logger.info("Style sets: %s", ", ".join(fs_style_examples.keys()))
374
411
 
375
412
  n_ex = kwargs.get("n_style_examples", _DEFAULT_N_STYLE_EXAMPLES)
376
413
 
377
- # Build prompts in the same order as all_max_new_tokens.
414
+ schedule = self._build_schedule(prompt_templates, fs_style_examples)
415
+
378
416
  # TODO: accept texts as an Iterable (not just list) to support
379
417
  # streaming from large files without materialising everything
380
418
  # in memory.
381
419
  all_prompts: list[str] = []
382
420
  for i in range(n):
383
- _key, template = prompt_templates[i % len(prompt_templates)]
421
+ _key, template, style_idx = schedule[i % len(schedule)]
384
422
  for t in texts:
385
423
  all_prompts.append(
386
424
  self._fill_template(
387
425
  template=template,
388
426
  text=t,
389
- style_idx=i,
427
+ style_idx=style_idx,
390
428
  fs_style_examples=fs_style_examples,
391
429
  n_style_examples=n_ex,
392
430
  )
@@ -249,6 +249,9 @@ PLACEHOLDER_STYLE_NAME = "[STYLE NAME]"
249
249
 
250
250
  PROMPT_BANK: dict[str, str] = {**ZS_PROMPT_BANK, **EXAMPLE_BASED_PROMPT_BANK, **NAME_BASED_PROMPT_BANK}
251
251
 
252
+ #: Prompt keys whose templates depend on style examples or style names.
253
+ STYLE_DEP_PROMPTS: set[str] = {*EXAMPLE_BASED_PROMPT_BANK, *NAME_BASED_PROMPT_BANK}
254
+
252
255
  DEFAULT_PROMPTS: list[str] = [
253
256
  HUMANIZE_LLM_AS_COAUTHOR_ORIGINAL,
254
257
  COMPLEX_KEW,
@@ -118,7 +118,7 @@ class TinyStylerMethod(DiversificationMethod):
118
118
  styles_arg,
119
119
  )
120
120
  # When explicit style keys are given, they determine the count.
121
- effective_n = len(styles_arg) if styles_arg is not None else n
121
+ effective_n = n
122
122
  if effective_n > len(style_bank):
123
123
  logger.warning(
124
124
  "n=%d exceeds the number of style bank entries (%d). "
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "diversify-text"
7
- version = "0.2.0"
7
+ version = "0.2.1"
8
8
  description = "Generate stylistic paraphrases of texts using local transformer models."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
File without changes