dataforge-py 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataforge/__init__.py +20 -0
- dataforge/backend.py +147 -0
- dataforge/cli.py +166 -0
- dataforge/core.py +1169 -0
- dataforge/locales/__init__.py +1 -0
- dataforge/locales/ar_SA/__init__.py +1 -0
- dataforge/locales/ar_SA/address.py +128 -0
- dataforge/locales/ar_SA/company.py +183 -0
- dataforge/locales/ar_SA/internet.py +25 -0
- dataforge/locales/ar_SA/person.py +217 -0
- dataforge/locales/ar_SA/phone.py +15 -0
- dataforge/locales/de_DE/__init__.py +1 -0
- dataforge/locales/de_DE/address.py +148 -0
- dataforge/locales/de_DE/company.py +125 -0
- dataforge/locales/de_DE/internet.py +32 -0
- dataforge/locales/de_DE/person.py +212 -0
- dataforge/locales/de_DE/phone.py +17 -0
- dataforge/locales/en_AU/__init__.py +1 -0
- dataforge/locales/en_AU/address.py +231 -0
- dataforge/locales/en_AU/company.py +193 -0
- dataforge/locales/en_AU/internet.py +34 -0
- dataforge/locales/en_AU/person.py +370 -0
- dataforge/locales/en_AU/phone.py +16 -0
- dataforge/locales/en_CA/__init__.py +1 -0
- dataforge/locales/en_CA/address.py +276 -0
- dataforge/locales/en_CA/company.py +193 -0
- dataforge/locales/en_CA/internet.py +34 -0
- dataforge/locales/en_CA/person.py +377 -0
- dataforge/locales/en_CA/phone.py +15 -0
- dataforge/locales/en_GB/__init__.py +1 -0
- dataforge/locales/en_GB/address.py +312 -0
- dataforge/locales/en_GB/company.py +196 -0
- dataforge/locales/en_GB/internet.py +34 -0
- dataforge/locales/en_GB/person.py +372 -0
- dataforge/locales/en_GB/phone.py +15 -0
- dataforge/locales/en_US/__init__.py +1 -0
- dataforge/locales/en_US/address.py +268 -0
- dataforge/locales/en_US/company.py +191 -0
- dataforge/locales/en_US/internet.py +34 -0
- dataforge/locales/en_US/person.py +370 -0
- dataforge/locales/en_US/phone.py +15 -0
- dataforge/locales/es_ES/__init__.py +1 -0
- dataforge/locales/es_ES/address.py +151 -0
- dataforge/locales/es_ES/company.py +125 -0
- dataforge/locales/es_ES/internet.py +30 -0
- dataforge/locales/es_ES/person.py +207 -0
- dataforge/locales/es_ES/phone.py +15 -0
- dataforge/locales/fr_FR/__init__.py +1 -0
- dataforge/locales/fr_FR/address.py +145 -0
- dataforge/locales/fr_FR/company.py +125 -0
- dataforge/locales/fr_FR/internet.py +30 -0
- dataforge/locales/fr_FR/person.py +212 -0
- dataforge/locales/fr_FR/phone.py +15 -0
- dataforge/locales/hi_IN/__init__.py +1 -0
- dataforge/locales/hi_IN/address.py +177 -0
- dataforge/locales/hi_IN/company.py +191 -0
- dataforge/locales/hi_IN/internet.py +26 -0
- dataforge/locales/hi_IN/person.py +218 -0
- dataforge/locales/hi_IN/phone.py +21 -0
- dataforge/locales/it_IT/__init__.py +1 -0
- dataforge/locales/it_IT/address.py +218 -0
- dataforge/locales/it_IT/company.py +151 -0
- dataforge/locales/it_IT/internet.py +31 -0
- dataforge/locales/it_IT/person.py +187 -0
- dataforge/locales/it_IT/phone.py +15 -0
- dataforge/locales/ja_JP/__init__.py +1 -0
- dataforge/locales/ja_JP/address.py +174 -0
- dataforge/locales/ja_JP/company.py +121 -0
- dataforge/locales/ja_JP/internet.py +30 -0
- dataforge/locales/ja_JP/person.py +207 -0
- dataforge/locales/ja_JP/phone.py +18 -0
- dataforge/locales/ko_KR/__init__.py +1 -0
- dataforge/locales/ko_KR/address.py +121 -0
- dataforge/locales/ko_KR/company.py +151 -0
- dataforge/locales/ko_KR/internet.py +30 -0
- dataforge/locales/ko_KR/person.py +157 -0
- dataforge/locales/ko_KR/phone.py +26 -0
- dataforge/locales/nl_NL/__init__.py +1 -0
- dataforge/locales/nl_NL/address.py +152 -0
- dataforge/locales/nl_NL/company.py +182 -0
- dataforge/locales/nl_NL/internet.py +41 -0
- dataforge/locales/nl_NL/person.py +218 -0
- dataforge/locales/nl_NL/phone.py +19 -0
- dataforge/locales/pl_PL/__init__.py +1 -0
- dataforge/locales/pl_PL/address.py +140 -0
- dataforge/locales/pl_PL/company.py +183 -0
- dataforge/locales/pl_PL/internet.py +36 -0
- dataforge/locales/pl_PL/person.py +217 -0
- dataforge/locales/pl_PL/phone.py +15 -0
- dataforge/locales/pt_BR/__init__.py +1 -0
- dataforge/locales/pt_BR/address.py +127 -0
- dataforge/locales/pt_BR/company.py +151 -0
- dataforge/locales/pt_BR/internet.py +31 -0
- dataforge/locales/pt_BR/person.py +187 -0
- dataforge/locales/pt_BR/phone.py +15 -0
- dataforge/locales/ru_RU/__init__.py +1 -0
- dataforge/locales/ru_RU/address.py +156 -0
- dataforge/locales/ru_RU/company.py +168 -0
- dataforge/locales/ru_RU/internet.py +26 -0
- dataforge/locales/ru_RU/person.py +218 -0
- dataforge/locales/ru_RU/phone.py +16 -0
- dataforge/locales/zh_CN/__init__.py +1 -0
- dataforge/locales/zh_CN/address.py +141 -0
- dataforge/locales/zh_CN/company.py +151 -0
- dataforge/locales/zh_CN/internet.py +30 -0
- dataforge/locales/zh_CN/person.py +157 -0
- dataforge/locales/zh_CN/phone.py +25 -0
- dataforge/providers/__init__.py +1 -0
- dataforge/providers/address.py +460 -0
- dataforge/providers/ai_chat.py +170 -0
- dataforge/providers/ai_prompt.py +447 -0
- dataforge/providers/automotive.py +416 -0
- dataforge/providers/barcode.py +149 -0
- dataforge/providers/base.py +34 -0
- dataforge/providers/color.py +247 -0
- dataforge/providers/company.py +144 -0
- dataforge/providers/crypto.py +105 -0
- dataforge/providers/datetime.py +397 -0
- dataforge/providers/ecommerce.py +316 -0
- dataforge/providers/education.py +234 -0
- dataforge/providers/file.py +271 -0
- dataforge/providers/finance.py +545 -0
- dataforge/providers/geo.py +332 -0
- dataforge/providers/government.py +114 -0
- dataforge/providers/internet.py +351 -0
- dataforge/providers/llm.py +726 -0
- dataforge/providers/lorem.py +241 -0
- dataforge/providers/medical.py +364 -0
- dataforge/providers/misc.py +196 -0
- dataforge/providers/network.py +283 -0
- dataforge/providers/payment.py +300 -0
- dataforge/providers/person.py +195 -0
- dataforge/providers/phone.py +87 -0
- dataforge/providers/profile.py +265 -0
- dataforge/providers/science.py +365 -0
- dataforge/providers/text.py +365 -0
- dataforge/py.typed +0 -0
- dataforge/pytest_plugin.py +80 -0
- dataforge/registry.py +164 -0
- dataforge/schema.py +772 -0
- dataforge/unique.py +171 -0
- dataforge_py-0.2.0.dist-info/METADATA +964 -0
- dataforge_py-0.2.0.dist-info/RECORD +145 -0
- dataforge_py-0.2.0.dist-info/WHEEL +4 -0
- dataforge_py-0.2.0.dist-info/entry_points.txt +35 -0
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""AI Chat provider — assembles realistic conversation turns.
|
|
2
|
+
|
|
3
|
+
This is a **compound** provider (``_needs_forge = True``) that
|
|
4
|
+
delegates to ``ai_prompt`` and ``llm`` providers to assemble
|
|
5
|
+
realistic chat messages with role, model, content, and token usage.
|
|
6
|
+
|
|
7
|
+
Individual string fields are exposed in ``_field_map`` for Schema
|
|
8
|
+
compatibility. The compound ``chat_message()`` method returns a
|
|
9
|
+
``dict`` and is available only via direct API use.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from typing import TYPE_CHECKING, Literal, overload
|
|
13
|
+
|
|
14
|
+
from dataforge.backend import RandomEngine
|
|
15
|
+
from dataforge.providers.base import BaseProvider
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from dataforge.core import DataForge
|
|
19
|
+
|
|
20
|
+
# Module-level constants — zero per-call allocation
|
|
21
|
+
_ROLES: tuple[str, ...] = (
|
|
22
|
+
"system",
|
|
23
|
+
"user",
|
|
24
|
+
"assistant",
|
|
25
|
+
"tool",
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
_CHAT_ROLES_WEIGHTED: tuple[tuple[str, int], ...] = (
|
|
29
|
+
("user", 40),
|
|
30
|
+
("assistant", 40),
|
|
31
|
+
("system", 15),
|
|
32
|
+
("tool", 5),
|
|
33
|
+
)
|
|
34
|
+
_CHAT_ROLE_VALUES: tuple[str, ...] = tuple(r for r, _ in _CHAT_ROLES_WEIGHTED)
|
|
35
|
+
_CHAT_ROLE_WEIGHTS: tuple[int, ...] = tuple(w for _, w in _CHAT_ROLES_WEIGHTED)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class AiChatProvider(BaseProvider):
|
|
39
|
+
"""Generates fake AI chat data — messages, roles, conversations.
|
|
40
|
+
|
|
41
|
+
Delegates to ``ai_prompt`` and ``llm`` providers for content
|
|
42
|
+
and metadata generation.
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
engine : RandomEngine
|
|
47
|
+
The shared random engine instance.
|
|
48
|
+
forge : DataForge
|
|
49
|
+
The parent DataForge instance for cross-provider access.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
__slots__ = ("_forge",)
|
|
53
|
+
|
|
54
|
+
_provider_name = "ai_chat"
|
|
55
|
+
_locale_modules: tuple[str, ...] = ()
|
|
56
|
+
_needs_forge: bool = True
|
|
57
|
+
_field_map: dict[str, str] = {
|
|
58
|
+
"chat_role": "chat_role",
|
|
59
|
+
"chat_model": "chat_model",
|
|
60
|
+
"chat_content": "chat_content",
|
|
61
|
+
"chat_tokens": "chat_tokens",
|
|
62
|
+
"chat_finish_reason": "chat_finish_reason",
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
def __init__(self, engine: RandomEngine, forge: "DataForge") -> None:
|
|
66
|
+
super().__init__(engine)
|
|
67
|
+
self._forge = forge
|
|
68
|
+
|
|
69
|
+
# ------------------------------------------------------------------
|
|
70
|
+
# Individual field methods (for _field_map / Schema compatibility)
|
|
71
|
+
# ------------------------------------------------------------------
|
|
72
|
+
|
|
73
|
+
@overload
|
|
74
|
+
def chat_role(self) -> str: ...
|
|
75
|
+
@overload
|
|
76
|
+
def chat_role(self, count: Literal[1]) -> str: ...
|
|
77
|
+
@overload
|
|
78
|
+
def chat_role(self, count: int) -> str | list[str]: ...
|
|
79
|
+
def chat_role(self, count: int = 1) -> str | list[str]:
|
|
80
|
+
"""Generate a chat message role (user, assistant, system, tool)."""
|
|
81
|
+
if count == 1:
|
|
82
|
+
return self._engine.weighted_choice(_CHAT_ROLE_VALUES, _CHAT_ROLE_WEIGHTS)
|
|
83
|
+
return self._engine.weighted_choices(
|
|
84
|
+
_CHAT_ROLE_VALUES, _CHAT_ROLE_WEIGHTS, count
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
@overload
|
|
88
|
+
def chat_model(self) -> str: ...
|
|
89
|
+
@overload
|
|
90
|
+
def chat_model(self, count: Literal[1]) -> str: ...
|
|
91
|
+
@overload
|
|
92
|
+
def chat_model(self, count: int) -> str | list[str]: ...
|
|
93
|
+
def chat_model(self, count: int = 1) -> str | list[str]:
|
|
94
|
+
"""Generate a model name for the chat (delegates to llm.model_name)."""
|
|
95
|
+
return self._forge.llm.model_name(count)
|
|
96
|
+
|
|
97
|
+
@overload
|
|
98
|
+
def chat_content(self) -> str: ...
|
|
99
|
+
@overload
|
|
100
|
+
def chat_content(self, count: Literal[1]) -> str: ...
|
|
101
|
+
@overload
|
|
102
|
+
def chat_content(self, count: int) -> str | list[str]: ...
|
|
103
|
+
def chat_content(self, count: int = 1) -> str | list[str]:
|
|
104
|
+
"""Generate chat message content (delegates to ai_prompt.user_prompt)."""
|
|
105
|
+
return self._forge.ai_prompt.user_prompt(count)
|
|
106
|
+
|
|
107
|
+
@overload
|
|
108
|
+
def chat_tokens(self) -> str: ...
|
|
109
|
+
@overload
|
|
110
|
+
def chat_tokens(self, count: Literal[1]) -> str: ...
|
|
111
|
+
@overload
|
|
112
|
+
def chat_tokens(self, count: int) -> str | list[str]: ...
|
|
113
|
+
def chat_tokens(self, count: int = 1) -> str | list[str]:
|
|
114
|
+
"""Generate a token count for the message (delegates to llm.token_count)."""
|
|
115
|
+
return self._forge.llm.token_count(count)
|
|
116
|
+
|
|
117
|
+
@overload
|
|
118
|
+
def chat_finish_reason(self) -> str: ...
|
|
119
|
+
@overload
|
|
120
|
+
def chat_finish_reason(self, count: Literal[1]) -> str: ...
|
|
121
|
+
@overload
|
|
122
|
+
def chat_finish_reason(self, count: int) -> str | list[str]: ...
|
|
123
|
+
def chat_finish_reason(self, count: int = 1) -> str | list[str]:
|
|
124
|
+
"""Generate a finish reason (delegates to llm.finish_reason)."""
|
|
125
|
+
return self._forge.llm.finish_reason(count)
|
|
126
|
+
|
|
127
|
+
# ------------------------------------------------------------------
|
|
128
|
+
# Compound message method (direct API only, not in _field_map)
|
|
129
|
+
# ------------------------------------------------------------------
|
|
130
|
+
|
|
131
|
+
def chat_message(self, count: int = 1) -> dict[str, str] | list[dict[str, str]]:
|
|
132
|
+
"""Generate a realistic chat message with role, model, content, tokens.
|
|
133
|
+
|
|
134
|
+
Returns a dict with keys: ``role``, ``model``, ``content``,
|
|
135
|
+
``tokens``, ``finish_reason``.
|
|
136
|
+
|
|
137
|
+
Parameters
|
|
138
|
+
----------
|
|
139
|
+
count : int
|
|
140
|
+
Number of messages to generate.
|
|
141
|
+
|
|
142
|
+
Returns
|
|
143
|
+
-------
|
|
144
|
+
dict[str, str] or list[dict[str, str]]
|
|
145
|
+
"""
|
|
146
|
+
|
|
147
|
+
def _one_message() -> dict[str, str]:
|
|
148
|
+
role = self._engine.weighted_choice(_CHAT_ROLE_VALUES, _CHAT_ROLE_WEIGHTS)
|
|
149
|
+
model = self._forge.llm.model_name()
|
|
150
|
+
# Pick content based on role for realism
|
|
151
|
+
if role == "system":
|
|
152
|
+
content = self._forge.ai_prompt.system_prompt()
|
|
153
|
+
elif role == "user":
|
|
154
|
+
content = self._forge.ai_prompt.user_prompt()
|
|
155
|
+
else:
|
|
156
|
+
# assistant or tool — use a user prompt as stand-in
|
|
157
|
+
content = self._forge.ai_prompt.user_prompt()
|
|
158
|
+
tokens = self._forge.llm.token_count()
|
|
159
|
+
finish = self._forge.llm.finish_reason()
|
|
160
|
+
return {
|
|
161
|
+
"role": role,
|
|
162
|
+
"model": model,
|
|
163
|
+
"content": content,
|
|
164
|
+
"tokens": tokens,
|
|
165
|
+
"finish_reason": finish,
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
if count == 1:
|
|
169
|
+
return _one_message()
|
|
170
|
+
return [_one_message() for _ in range(count)]
|
|
@@ -0,0 +1,447 @@
|
|
|
1
|
+
"""AI Prompt provider — user prompts, system prompts, prompt templates."""
|
|
2
|
+
|
|
3
|
+
from typing import Literal, overload
|
|
4
|
+
|
|
5
|
+
from dataforge.providers.base import BaseProvider
|
|
6
|
+
|
|
7
|
+
# ---------------------------------------------------------------------------
|
|
8
|
+
# Module-level immutable tuples — zero per-call allocation
|
|
9
|
+
# ---------------------------------------------------------------------------
|
|
10
|
+
|
|
11
|
+
_USER_PROMPTS: tuple[str, ...] = (
|
|
12
|
+
"Summarize this article in 3 bullet points",
|
|
13
|
+
"Explain this concept to a 5 year old",
|
|
14
|
+
"What are the pros and cons of this approach?",
|
|
15
|
+
"Help me brainstorm ideas for a presentation",
|
|
16
|
+
"Translate this text to French",
|
|
17
|
+
"Can you proofread this email for me?",
|
|
18
|
+
"What does this error message mean?",
|
|
19
|
+
"Help me write a cover letter",
|
|
20
|
+
"Create a meal plan for the week",
|
|
21
|
+
"Explain the difference between these two concepts",
|
|
22
|
+
"Suggest improvements for this paragraph",
|
|
23
|
+
"Help me organize my thoughts on this topic",
|
|
24
|
+
"What are some alternatives to this approach?",
|
|
25
|
+
"Can you simplify this explanation?",
|
|
26
|
+
"Give me a step-by-step guide for this process",
|
|
27
|
+
"What questions should I ask in this interview?",
|
|
28
|
+
"Help me draft a professional response",
|
|
29
|
+
"Summarize the key takeaways from this meeting",
|
|
30
|
+
"What are the main arguments for and against this?",
|
|
31
|
+
"Help me create an outline for this essay",
|
|
32
|
+
"Can you fact-check this statement?",
|
|
33
|
+
"Suggest a catchy title for this blog post",
|
|
34
|
+
"Help me write a thank you note",
|
|
35
|
+
"What are best practices for this workflow?",
|
|
36
|
+
"Rewrite this in a more formal tone",
|
|
37
|
+
"Break down this complex topic into simple parts",
|
|
38
|
+
"What should I consider before making this decision?",
|
|
39
|
+
"Help me prepare talking points for this meeting",
|
|
40
|
+
"Compare and contrast these two options",
|
|
41
|
+
"Give me feedback on this draft",
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
_CODING_PROMPTS: tuple[str, ...] = (
|
|
45
|
+
"Write a Python function that parses JSON from a file",
|
|
46
|
+
"How do I fix this TypeError in my code?",
|
|
47
|
+
"Refactor this function to be more efficient",
|
|
48
|
+
"Write unit tests for this class",
|
|
49
|
+
"Explain what this regex pattern does",
|
|
50
|
+
"Help me debug this API endpoint",
|
|
51
|
+
"Convert this SQL query to an ORM query",
|
|
52
|
+
"Write a bash script to automate this deployment",
|
|
53
|
+
"How do I implement pagination in this REST API?",
|
|
54
|
+
"Create a TypeScript interface for this data model",
|
|
55
|
+
"Optimize this database query for performance",
|
|
56
|
+
"Write a GitHub Actions workflow for CI/CD",
|
|
57
|
+
"Help me implement error handling in this function",
|
|
58
|
+
"How do I set up logging in this application?",
|
|
59
|
+
"Write a Docker Compose file for this stack",
|
|
60
|
+
"Implement a retry mechanism with exponential backoff",
|
|
61
|
+
"Create a data validation schema for this input",
|
|
62
|
+
"Write a migration script for this database change",
|
|
63
|
+
"How do I implement caching for this endpoint?",
|
|
64
|
+
"Help me write a recursive algorithm for this problem",
|
|
65
|
+
"Create a REST API client with proper error handling",
|
|
66
|
+
"Write a function to sanitize user input",
|
|
67
|
+
"Implement rate limiting for this API",
|
|
68
|
+
"How do I handle concurrent requests in this service?",
|
|
69
|
+
"Write a custom middleware for this web framework",
|
|
70
|
+
"Create a connection pool for this database",
|
|
71
|
+
"Help me implement WebSocket support",
|
|
72
|
+
"Write a function to parse and validate CSV data",
|
|
73
|
+
"Implement a producer-consumer pattern for this queue",
|
|
74
|
+
"How do I properly handle database transactions here?",
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
_CREATIVE_PROMPTS: tuple[str, ...] = (
|
|
78
|
+
"Write a short story about a robot learning to paint",
|
|
79
|
+
"Create a poem about the changing seasons",
|
|
80
|
+
"Write a dialogue between a cat and a dog",
|
|
81
|
+
"Describe a futuristic city in vivid detail",
|
|
82
|
+
"Write a fairy tale with a modern twist",
|
|
83
|
+
"Create a character profile for a fantasy novel",
|
|
84
|
+
"Write a haiku about morning coffee",
|
|
85
|
+
"Compose a limerick about a programmer",
|
|
86
|
+
"Write a monologue from the perspective of the moon",
|
|
87
|
+
"Create a plot outline for a mystery novel",
|
|
88
|
+
"Write a song lyric about overcoming challenges",
|
|
89
|
+
"Describe an alien world with unique ecosystems",
|
|
90
|
+
"Write a letter from a time traveler to their past self",
|
|
91
|
+
"Create a backstory for a video game character",
|
|
92
|
+
"Write a flash fiction piece about a last sunset",
|
|
93
|
+
"Compose a children's story about a brave little cloud",
|
|
94
|
+
"Write a comedic sketch about office life",
|
|
95
|
+
"Create a recipe told as a love story",
|
|
96
|
+
"Write a news article from the year 2150",
|
|
97
|
+
"Describe a painting that doesn't exist yet",
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
_ANALYSIS_PROMPTS: tuple[str, ...] = (
|
|
101
|
+
"Analyze the sentiment of this customer review",
|
|
102
|
+
"What patterns do you see in this dataset?",
|
|
103
|
+
"Compare the performance metrics across these quarters",
|
|
104
|
+
"Identify potential risks in this project plan",
|
|
105
|
+
"Analyze the market trends for this industry",
|
|
106
|
+
"What insights can you draw from this survey data?",
|
|
107
|
+
"Evaluate the strengths and weaknesses of this proposal",
|
|
108
|
+
"Analyze the root cause of this system failure",
|
|
109
|
+
"What correlations exist between these variables?",
|
|
110
|
+
"Assess the competitive landscape for this product",
|
|
111
|
+
"Analyze the user engagement metrics for this feature",
|
|
112
|
+
"Identify bottlenecks in this supply chain",
|
|
113
|
+
"Evaluate the ROI of this marketing campaign",
|
|
114
|
+
"Analyze the demographic data for this region",
|
|
115
|
+
"What trends emerge from this time series data?",
|
|
116
|
+
"Assess the technical debt in this codebase",
|
|
117
|
+
"Analyze the customer churn data for patterns",
|
|
118
|
+
"Evaluate the scalability of this architecture",
|
|
119
|
+
"What anomalies do you detect in this log data?",
|
|
120
|
+
"Analyze the A/B test results for statistical significance",
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
_SYSTEM_PROMPTS: tuple[str, ...] = (
|
|
124
|
+
"You are a helpful assistant that provides clear and concise answers.",
|
|
125
|
+
"You are an expert technical writer. Provide detailed documentation.",
|
|
126
|
+
"You are a patient tutor. Explain concepts step by step.",
|
|
127
|
+
"You are a code reviewer. Focus on correctness, performance, and style.",
|
|
128
|
+
"You are a data analyst. Provide insights backed by evidence.",
|
|
129
|
+
"You are a creative writing coach. Give constructive feedback.",
|
|
130
|
+
"You are a security expert. Identify vulnerabilities and suggest fixes.",
|
|
131
|
+
"You are a product manager. Focus on user needs and business value.",
|
|
132
|
+
"You are a DevOps engineer. Emphasize reliability and automation.",
|
|
133
|
+
"You are a UX researcher. Focus on user experience and accessibility.",
|
|
134
|
+
"You are a financial advisor. Provide balanced and informed guidance.",
|
|
135
|
+
"You are a medical information assistant. Always recommend consulting a doctor.",
|
|
136
|
+
"You are a legal assistant. Provide general information, not legal advice.",
|
|
137
|
+
"You are a language tutor. Help users practice and improve their skills.",
|
|
138
|
+
"You are a project manager. Help organize tasks and track progress.",
|
|
139
|
+
"You are a marketing strategist. Focus on growth and engagement.",
|
|
140
|
+
"You are a database administrator. Optimize queries and schema design.",
|
|
141
|
+
"You are a machine learning engineer. Focus on model performance.",
|
|
142
|
+
"You are a technical interviewer. Ask relevant and fair questions.",
|
|
143
|
+
"You are a documentation specialist. Keep things clear and organized.",
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
_PERSONA_ROLES: tuple[str, ...] = (
|
|
147
|
+
"expert Python developer",
|
|
148
|
+
"senior data scientist",
|
|
149
|
+
"experienced DevOps engineer",
|
|
150
|
+
"seasoned frontend developer",
|
|
151
|
+
"professional technical writer",
|
|
152
|
+
"senior security analyst",
|
|
153
|
+
"experienced cloud architect",
|
|
154
|
+
"senior backend engineer",
|
|
155
|
+
"expert database administrator",
|
|
156
|
+
"professional UX designer",
|
|
157
|
+
"senior machine learning engineer",
|
|
158
|
+
"experienced full-stack developer",
|
|
159
|
+
"expert systems architect",
|
|
160
|
+
"professional data engineer",
|
|
161
|
+
"senior site reliability engineer",
|
|
162
|
+
"experienced mobile developer",
|
|
163
|
+
"expert network engineer",
|
|
164
|
+
"professional QA engineer",
|
|
165
|
+
"senior platform engineer",
|
|
166
|
+
"experienced AI researcher",
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
_PERSONA_TRAITS: tuple[str, ...] = (
|
|
170
|
+
"who focuses on clean, maintainable code",
|
|
171
|
+
"who prioritizes performance and scalability",
|
|
172
|
+
"who emphasizes security best practices",
|
|
173
|
+
"who values thorough testing and documentation",
|
|
174
|
+
"who cares about developer experience",
|
|
175
|
+
"who advocates for simplicity and clarity",
|
|
176
|
+
"who specializes in distributed systems",
|
|
177
|
+
"who focuses on observability and monitoring",
|
|
178
|
+
"who prioritizes accessibility and inclusion",
|
|
179
|
+
"who emphasizes pragmatic solutions",
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
_TEMPLATE_ACTIONS: tuple[str, ...] = (
|
|
183
|
+
"Write",
|
|
184
|
+
"Generate",
|
|
185
|
+
"Create",
|
|
186
|
+
"Draft",
|
|
187
|
+
"Compose",
|
|
188
|
+
"Produce",
|
|
189
|
+
"Build",
|
|
190
|
+
"Design",
|
|
191
|
+
"Develop",
|
|
192
|
+
"Craft",
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
_TEMPLATE_TONES: tuple[str, ...] = (
|
|
196
|
+
"formal",
|
|
197
|
+
"casual",
|
|
198
|
+
"professional",
|
|
199
|
+
"friendly",
|
|
200
|
+
"technical",
|
|
201
|
+
"persuasive",
|
|
202
|
+
"informative",
|
|
203
|
+
"concise",
|
|
204
|
+
"detailed",
|
|
205
|
+
"conversational",
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
_TEMPLATE_FORMATS: tuple[str, ...] = (
|
|
209
|
+
"email",
|
|
210
|
+
"blog post",
|
|
211
|
+
"report",
|
|
212
|
+
"summary",
|
|
213
|
+
"proposal",
|
|
214
|
+
"presentation",
|
|
215
|
+
"documentation",
|
|
216
|
+
"tutorial",
|
|
217
|
+
"review",
|
|
218
|
+
"analysis",
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
_TEMPLATE_TOPICS: tuple[str, ...] = (
|
|
222
|
+
"project updates",
|
|
223
|
+
"quarterly results",
|
|
224
|
+
"product launch",
|
|
225
|
+
"team performance",
|
|
226
|
+
"market research",
|
|
227
|
+
"customer feedback",
|
|
228
|
+
"technical architecture",
|
|
229
|
+
"process improvements",
|
|
230
|
+
"budget planning",
|
|
231
|
+
"risk assessment",
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
_FEW_SHOT_TASKS: tuple[str, ...] = (
|
|
235
|
+
"Classify the sentiment of the following text",
|
|
236
|
+
"Extract the key entities from the following passage",
|
|
237
|
+
"Categorize the following support ticket",
|
|
238
|
+
"Determine the language of the following text",
|
|
239
|
+
"Rate the quality of the following response",
|
|
240
|
+
"Identify the main topic of the following paragraph",
|
|
241
|
+
"Classify the intent of the following user query",
|
|
242
|
+
"Extract the action items from the following text",
|
|
243
|
+
"Determine the urgency level of the following message",
|
|
244
|
+
"Categorize the following product review",
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
_FEW_SHOT_LABELS: tuple[str, ...] = (
|
|
248
|
+
"positive",
|
|
249
|
+
"negative",
|
|
250
|
+
"neutral",
|
|
251
|
+
"urgent",
|
|
252
|
+
"low priority",
|
|
253
|
+
"bug report",
|
|
254
|
+
"feature request",
|
|
255
|
+
"question",
|
|
256
|
+
"feedback",
|
|
257
|
+
"complaint",
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
_FEW_SHOT_EXAMPLES: tuple[str, ...] = (
|
|
261
|
+
"The product works great and I love it!",
|
|
262
|
+
"This is the worst experience I've ever had.",
|
|
263
|
+
"The package arrived on time as expected.",
|
|
264
|
+
"I need help with my account immediately.",
|
|
265
|
+
"It would be nice to have dark mode support.",
|
|
266
|
+
"How do I reset my password?",
|
|
267
|
+
"The new update improved loading times significantly.",
|
|
268
|
+
"I've been waiting for a response for three days.",
|
|
269
|
+
"The interface is intuitive and easy to use.",
|
|
270
|
+
"There seems to be a bug in the checkout process.",
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
class AiPromptProvider(BaseProvider):
|
|
275
|
+
"""Generates fake AI prompts — user prompts, system prompts, templates."""
|
|
276
|
+
|
|
277
|
+
__slots__ = ()
|
|
278
|
+
|
|
279
|
+
_provider_name = "ai_prompt"
|
|
280
|
+
_locale_modules: tuple[str, ...] = ()
|
|
281
|
+
_field_map: dict[str, str] = {
|
|
282
|
+
"user_prompt": "user_prompt",
|
|
283
|
+
"coding_prompt": "coding_prompt",
|
|
284
|
+
"creative_prompt": "creative_prompt",
|
|
285
|
+
"analysis_prompt": "analysis_prompt",
|
|
286
|
+
"system_prompt": "system_prompt",
|
|
287
|
+
"persona_prompt": "persona_prompt",
|
|
288
|
+
"prompt_template": "prompt_template",
|
|
289
|
+
"few_shot_prompt": "few_shot_prompt",
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
# --- Public API ---
|
|
293
|
+
|
|
294
|
+
@overload
|
|
295
|
+
def user_prompt(self) -> str: ...
|
|
296
|
+
@overload
|
|
297
|
+
def user_prompt(self, count: Literal[1]) -> str: ...
|
|
298
|
+
@overload
|
|
299
|
+
def user_prompt(self, count: int) -> str | list[str]: ...
|
|
300
|
+
def user_prompt(self, count: int = 1) -> str | list[str]:
|
|
301
|
+
"""Generate a realistic user prompt (e.g. ChatGPT-style request)."""
|
|
302
|
+
if count == 1:
|
|
303
|
+
return self._engine.choice(_USER_PROMPTS)
|
|
304
|
+
return self._engine.choices(_USER_PROMPTS, count)
|
|
305
|
+
|
|
306
|
+
@overload
|
|
307
|
+
def coding_prompt(self) -> str: ...
|
|
308
|
+
@overload
|
|
309
|
+
def coding_prompt(self, count: Literal[1]) -> str: ...
|
|
310
|
+
@overload
|
|
311
|
+
def coding_prompt(self, count: int) -> str | list[str]: ...
|
|
312
|
+
def coding_prompt(self, count: int = 1) -> str | list[str]:
|
|
313
|
+
"""Generate a coding-related prompt."""
|
|
314
|
+
if count == 1:
|
|
315
|
+
return self._engine.choice(_CODING_PROMPTS)
|
|
316
|
+
return self._engine.choices(_CODING_PROMPTS, count)
|
|
317
|
+
|
|
318
|
+
@overload
|
|
319
|
+
def creative_prompt(self) -> str: ...
|
|
320
|
+
@overload
|
|
321
|
+
def creative_prompt(self, count: Literal[1]) -> str: ...
|
|
322
|
+
@overload
|
|
323
|
+
def creative_prompt(self, count: int) -> str | list[str]: ...
|
|
324
|
+
def creative_prompt(self, count: int = 1) -> str | list[str]:
|
|
325
|
+
"""Generate a creative writing prompt."""
|
|
326
|
+
if count == 1:
|
|
327
|
+
return self._engine.choice(_CREATIVE_PROMPTS)
|
|
328
|
+
return self._engine.choices(_CREATIVE_PROMPTS, count)
|
|
329
|
+
|
|
330
|
+
@overload
|
|
331
|
+
def analysis_prompt(self) -> str: ...
|
|
332
|
+
@overload
|
|
333
|
+
def analysis_prompt(self, count: Literal[1]) -> str: ...
|
|
334
|
+
@overload
|
|
335
|
+
def analysis_prompt(self, count: int) -> str | list[str]: ...
|
|
336
|
+
def analysis_prompt(self, count: int = 1) -> str | list[str]:
|
|
337
|
+
"""Generate a data analysis prompt."""
|
|
338
|
+
if count == 1:
|
|
339
|
+
return self._engine.choice(_ANALYSIS_PROMPTS)
|
|
340
|
+
return self._engine.choices(_ANALYSIS_PROMPTS, count)
|
|
341
|
+
|
|
342
|
+
@overload
|
|
343
|
+
def system_prompt(self) -> str: ...
|
|
344
|
+
@overload
|
|
345
|
+
def system_prompt(self, count: Literal[1]) -> str: ...
|
|
346
|
+
@overload
|
|
347
|
+
def system_prompt(self, count: int) -> str | list[str]: ...
|
|
348
|
+
def system_prompt(self, count: int = 1) -> str | list[str]:
|
|
349
|
+
"""Generate a system prompt for configuring an AI assistant."""
|
|
350
|
+
if count == 1:
|
|
351
|
+
return self._engine.choice(_SYSTEM_PROMPTS)
|
|
352
|
+
return self._engine.choices(_SYSTEM_PROMPTS, count)
|
|
353
|
+
|
|
354
|
+
# --- Computed string methods ---
|
|
355
|
+
|
|
356
|
+
def _one_persona_prompt(self) -> str:
|
|
357
|
+
role = self._engine.choice(_PERSONA_ROLES)
|
|
358
|
+
trait = self._engine.choice(_PERSONA_TRAITS)
|
|
359
|
+
return f"You are an {role} {trait}."
|
|
360
|
+
|
|
361
|
+
@overload
|
|
362
|
+
def persona_prompt(self) -> str: ...
|
|
363
|
+
@overload
|
|
364
|
+
def persona_prompt(self, count: Literal[1]) -> str: ...
|
|
365
|
+
@overload
|
|
366
|
+
def persona_prompt(self, count: int) -> str | list[str]: ...
|
|
367
|
+
def persona_prompt(self, count: int = 1) -> str | list[str]:
|
|
368
|
+
"""Generate a persona-based system prompt."""
|
|
369
|
+
if count == 1:
|
|
370
|
+
return self._one_persona_prompt()
|
|
371
|
+
# Inlined batch with local binding
|
|
372
|
+
_choice = self._engine.choice
|
|
373
|
+
_roles = _PERSONA_ROLES
|
|
374
|
+
_traits = _PERSONA_TRAITS
|
|
375
|
+
return [
|
|
376
|
+
f"You are an {_choice(_roles)} {_choice(_traits)}." for _ in range(count)
|
|
377
|
+
]
|
|
378
|
+
|
|
379
|
+
def _one_prompt_template(self) -> str:
|
|
380
|
+
action = self._engine.choice(_TEMPLATE_ACTIONS)
|
|
381
|
+
_tone = self._engine.choice(_TEMPLATE_TONES)
|
|
382
|
+
fmt = self._engine.choice(_TEMPLATE_FORMATS)
|
|
383
|
+
_topic = self._engine.choice(_TEMPLATE_TOPICS)
|
|
384
|
+
return f"{action} a {{tone}} {fmt} about {{topic}} for {{audience}}"
|
|
385
|
+
|
|
386
|
+
@overload
|
|
387
|
+
def prompt_template(self) -> str: ...
|
|
388
|
+
@overload
|
|
389
|
+
def prompt_template(self, count: Literal[1]) -> str: ...
|
|
390
|
+
@overload
|
|
391
|
+
def prompt_template(self, count: int) -> str | list[str]: ...
|
|
392
|
+
def prompt_template(self, count: int = 1) -> str | list[str]:
|
|
393
|
+
"""Generate a parameterized prompt template with placeholders."""
|
|
394
|
+
if count == 1:
|
|
395
|
+
return self._one_prompt_template()
|
|
396
|
+
# Inlined batch with local binding
|
|
397
|
+
_choice = self._engine.choice
|
|
398
|
+
_actions = _TEMPLATE_ACTIONS
|
|
399
|
+
_formats = _TEMPLATE_FORMATS
|
|
400
|
+
return [
|
|
401
|
+
f"{_choice(_actions)} a {{tone}} {_choice(_formats)} about {{topic}} for {{audience}}"
|
|
402
|
+
for _ in range(count)
|
|
403
|
+
]
|
|
404
|
+
|
|
405
|
+
def _one_few_shot_prompt(self) -> str:
|
|
406
|
+
task = self._engine.choice(_FEW_SHOT_TASKS)
|
|
407
|
+
# Build 2 examples
|
|
408
|
+
ex1 = self._engine.choice(_FEW_SHOT_EXAMPLES)
|
|
409
|
+
lb1 = self._engine.choice(_FEW_SHOT_LABELS)
|
|
410
|
+
ex2 = self._engine.choice(_FEW_SHOT_EXAMPLES)
|
|
411
|
+
lb2 = self._engine.choice(_FEW_SHOT_LABELS)
|
|
412
|
+
return (
|
|
413
|
+
f"{task}.\n\n"
|
|
414
|
+
f'Example 1:\nInput: "{ex1}"\nOutput: {lb1}\n\n'
|
|
415
|
+
f'Example 2:\nInput: "{ex2}"\nOutput: {lb2}\n\n'
|
|
416
|
+
f'Now classify:\nInput: "{{input}}"\nOutput:'
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
@overload
|
|
420
|
+
def few_shot_prompt(self) -> str: ...
|
|
421
|
+
@overload
|
|
422
|
+
def few_shot_prompt(self, count: Literal[1]) -> str: ...
|
|
423
|
+
@overload
|
|
424
|
+
def few_shot_prompt(self, count: int) -> str | list[str]: ...
|
|
425
|
+
def few_shot_prompt(self, count: int = 1) -> str | list[str]:
|
|
426
|
+
"""Generate a few-shot prompt with example pairs."""
|
|
427
|
+
if count == 1:
|
|
428
|
+
return self._one_few_shot_prompt()
|
|
429
|
+
# Inlined batch with local binding
|
|
430
|
+
_choice = self._engine.choice
|
|
431
|
+
_tasks = _FEW_SHOT_TASKS
|
|
432
|
+
_examples = _FEW_SHOT_EXAMPLES
|
|
433
|
+
_labels = _FEW_SHOT_LABELS
|
|
434
|
+
result: list[str] = []
|
|
435
|
+
for _ in range(count):
|
|
436
|
+
task = _choice(_tasks)
|
|
437
|
+
ex1 = _choice(_examples)
|
|
438
|
+
lb1 = _choice(_labels)
|
|
439
|
+
ex2 = _choice(_examples)
|
|
440
|
+
lb2 = _choice(_labels)
|
|
441
|
+
result.append(
|
|
442
|
+
f"{task}.\n\n"
|
|
443
|
+
f'Example 1:\nInput: "{ex1}"\nOutput: {lb1}\n\n'
|
|
444
|
+
f'Example 2:\nInput: "{ex2}"\nOutput: {lb2}\n\n'
|
|
445
|
+
f'Now classify:\nInput: "{{input}}"\nOutput:'
|
|
446
|
+
)
|
|
447
|
+
return result
|