sdg-hub 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sdg_hub/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.1.2'
21
- __version_tuple__ = version_tuple = (0, 1, 2)
20
+ __version__ = version = '0.1.4'
21
+ __version_tuple__ = version_tuple = (0, 1, 4)
@@ -1,4 +1,4 @@
1
- system: null
1
+ system: "You are a helpful assistant that annotates text."
2
2
  introduction: "Task Description: Data Annotation"
3
3
  principles: null
4
4
  examples: null
@@ -9,7 +9,7 @@ principles: |
9
9
 
10
10
  For each question, assign a score of 1 point if the response meets the criteria, and 0 points if it does not. After evaluating each question, provide detailed feedback explaining your reasoning behind the scores awarded.
11
11
 
12
- Conclude your evaluation with a final result, strictly using the following format: 'Total Score: X'. The total score should represent the sum of points assigned for each question, with a maximum possible score of 2 points.
12
+ Conclude your evaluation with a total score as a final result. The total score should represent the sum of points assigned for each question, with a maximum possible score of 2 points.
13
13
  Only evaluate the response based on the above criteria, do not create new questions.
14
14
 
15
15
  examples: |
@@ -49,7 +49,6 @@ examples: |
49
49
  0
50
50
  [End of Score]
51
51
 
52
-
53
52
  Example 3:
54
53
  [Start of Question]
55
54
  What are the benefits of electric vehicles?
sdg_hub/flow_runner.py CHANGED
@@ -42,6 +42,7 @@ def run_flow(
42
42
  debug: bool = False,
43
43
  dataset_start_index: int = 0,
44
44
  dataset_end_index: Optional[int] = None,
45
+ api_key: Optional[str] = None,
45
46
  ) -> None:
46
47
  """Process the dataset using the specified configuration.
47
48
 
@@ -69,6 +70,8 @@ def run_flow(
69
70
  Start index for dataset slicing, by default 0.
70
71
  dataset_end_index : Optional[int], optional
71
72
  End index for dataset slicing, by default None.
73
+ api_key : Optional[str], optional
74
+ API key for the remote endpoint. If not provided, will use OPENAI_API_KEY environment variable, by default None.
72
75
 
73
76
  Returns
74
77
  -------
@@ -137,9 +140,9 @@ def run_flow(
137
140
  ) from e
138
141
 
139
142
  # Validate API configuration
140
- openai_api_key = os.environ.get("OPENAI_API_KEY")
143
+ openai_api_key = api_key or os.environ.get("OPENAI_API_KEY")
141
144
  if not openai_api_key or openai_api_key == "EMPTY":
142
- logger.warning("OPENAI_API_KEY not set or is 'EMPTY'. API calls may fail.")
145
+ logger.warning("API key not provided and OPENAI_API_KEY not set or is 'EMPTY'. API calls may fail.")
143
146
 
144
147
  openai_api_base = endpoint
145
148
  if not openai_api_base:
@@ -349,6 +352,12 @@ def run_flow(
349
352
  @click.option(
350
353
  "--dataset_end_index", type=int, default=None, help="End index of the dataset."
351
354
  )
355
+ @click.option(
356
+ "--api_key",
357
+ type=str,
358
+ default=None,
359
+ help="API key for the remote endpoint. If not provided, will use OPENAI_API_KEY environment variable.",
360
+ )
352
361
  def main(
353
362
  ds_path: str,
354
363
  bs: int,
@@ -361,6 +370,7 @@ def main(
361
370
  debug: bool,
362
371
  dataset_start_index: int,
363
372
  dataset_end_index: Optional[int],
373
+ api_key: Optional[str],
364
374
  ) -> None:
365
375
  """CLI entry point for running data generation flows.
366
376
 
@@ -388,6 +398,8 @@ def main(
388
398
  Start index for dataset slicing.
389
399
  dataset_end_index : Optional[int]
390
400
  End index for dataset slicing.
401
+ api_key : Optional[str]
402
+ API key for the remote endpoint. If not provided, will use OPENAI_API_KEY environment variable.
391
403
 
392
404
  Returns
393
405
  -------
@@ -406,6 +418,7 @@ def main(
406
418
  debug=debug,
407
419
  dataset_start_index=dataset_start_index,
408
420
  dataset_end_index=dataset_end_index,
421
+ api_key=api_key,
409
422
  )
410
423
  except (
411
424
  DatasetLoadError,
@@ -2,7 +2,7 @@
2
2
  block_config:
3
3
  block_name: gen_mmlu_knowledge
4
4
  config_path: configs/knowledge/mcq_generation.yaml
5
- model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
5
+ model_id: meta-llama/Llama-3.3-70B-Instruct
6
6
  output_cols:
7
7
  - mmlubench_question
8
8
  - mmlubench_answer
@@ -2,7 +2,7 @@
2
2
  block_config:
3
3
  block_name: gen_knowledge
4
4
  config_path: configs/knowledge/simple_generate_qa.yaml
5
- model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
5
+ model_id: meta-llama/Llama-3.3-70B-Instruct
6
6
  output_cols:
7
7
  - output
8
8
  gen_kwargs:
@@ -2,7 +2,7 @@
2
2
  block_config:
3
3
  block_name: gen_knowledge
4
4
  config_path: configs/knowledge/generate_questions_responses.yaml
5
- model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
5
+ model_id: meta-llama/Llama-3.3-70B-Instruct
6
6
  output_cols:
7
7
  - question
8
8
  - response
@@ -20,7 +20,7 @@
20
20
  block_config:
21
21
  block_name: eval_faithfulness_qa_pair
22
22
  config_path: configs/knowledge/evaluate_faithfulness.yaml
23
- model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
23
+ model_id: meta-llama/Llama-3.3-70B-Instruct
24
24
  output_cols:
25
25
  - explanation
26
26
  - judgment
@@ -43,7 +43,7 @@
43
43
  block_config:
44
44
  block_name: eval_relevancy_qa_pair
45
45
  config_path: configs/knowledge/evaluate_relevancy.yaml
46
- model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
46
+ model_id: meta-llama/Llama-3.3-70B-Instruct
47
47
  output_cols:
48
48
  - feedback
49
49
  - score
@@ -67,7 +67,7 @@
67
67
  block_config:
68
68
  block_name: eval_verify_question
69
69
  config_path: configs/knowledge/evaluate_question.yaml
70
- model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
70
+ model_id: meta-llama/Llama-3.3-70B-Instruct
71
71
  output_cols:
72
72
  - explanation
73
73
  - rating
@@ -8,35 +8,31 @@
8
8
  block_config:
9
9
  block_name: gen_detailed_summary
10
10
  config_path: configs/knowledge/detailed_summary.yaml
11
- model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
11
+ model_id: meta-llama/Llama-3.3-70B-Instruct
12
12
  output_cols:
13
13
  - summary_detailed
14
14
  gen_kwargs:
15
- max_tokens: 4096
16
- temperature: 0.7
17
- n: 50
15
+ max_tokens: 2048
18
16
 
19
17
  - block_type: LLMBlock
20
18
  block_config:
21
19
  block_name: gen_atomic_facts
22
20
  config_path: configs/knowledge/atomic_facts.yaml
23
- model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
21
+ model_id: meta-llama/Llama-3.3-70B-Instruct
24
22
  output_cols:
25
23
  - summary_atomic_facts
26
24
  gen_kwargs:
27
- max_tokens: 4096
28
- temperature: 0.7
25
+ max_tokens: 2048
29
26
 
30
27
  - block_type: LLMBlock
31
28
  block_config:
32
29
  block_name: gen_extractive_summary
33
30
  config_path: configs/knowledge/extractive_summary.yaml
34
- model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
31
+ model_id: meta-llama/Llama-3.3-70B-Instruct
35
32
  output_cols:
36
33
  - summary_extractive
37
34
  gen_kwargs:
38
- max_tokens: 4096
39
- temperature: 0.7
35
+ max_tokens: 2048
40
36
 
41
37
  - block_type: FlattenColumnsBlock
42
38
  block_config:
@@ -59,33 +55,25 @@
59
55
  - block_type: LLMBlock
60
56
  block_config:
61
57
  block_name: knowledge generation
62
- config_path: configs/knowledge/generate_questions.yaml
63
- model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
58
+ config_path: configs/knowledge/generate_questions_responses.yaml
59
+ model_id: meta-llama/Llama-3.3-70B-Instruct
64
60
  output_cols:
65
61
  - question
62
+ - response
66
63
  parser_kwargs:
67
64
  parser_name: custom
68
- parsing_pattern: "\\[(?:Question|QUESTION)\\]\\s*(.*?)\\s*(?=\\[(?:Question|QUESTION)\\]|$)"
69
- gen_kwargs:
70
- temperature: 0.7
71
- max_tokens: 100
72
-
73
- - block_type: LLMBlock
74
- block_config:
75
- block_name: knowledge generation
76
- config_path: configs/knowledge/generate_responses.yaml
77
- model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
78
- output_cols:
79
- - response
65
+ parsing_pattern: "\\[(?:Question|QUESTION)\\]\\s*(.*?)\\s*\\[(?:Answer|ANSWER)\\]\\s*(.*?)\\s*(?=\\[(?:Question|QUESTION)\\]|$)"
66
+ parser_cleanup_tags:
67
+ - "[END]"
80
68
  gen_kwargs:
81
- temperature: 0.7
69
+ temperature: 0.0
82
70
  max_tokens: 2048
83
71
 
84
72
  - block_type: LLMBlock
85
73
  block_config:
86
74
  block_name: eval_faithfulness_qa_pair
87
75
  config_path: configs/knowledge/evaluate_faithfulness.yaml
88
- model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
76
+ model_id: meta-llama/Llama-3.3-70B-Instruct
89
77
  output_cols:
90
78
  - explanation
91
79
  - judgment
@@ -106,7 +94,7 @@
106
94
  block_config:
107
95
  block_name: eval_relevancy_qa_pair
108
96
  config_path: configs/knowledge/evaluate_relevancy.yaml
109
- model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
97
+ model_id: meta-llama/Llama-3.3-70B-Instruct
110
98
  output_cols:
111
99
  - feedback
112
100
  - score
@@ -128,7 +116,7 @@
128
116
  block_config:
129
117
  block_name: eval_verify_question
130
118
  config_path: configs/knowledge/evaluate_question.yaml
131
- model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
119
+ model_id: meta-llama/Llama-3.3-70B-Instruct
132
120
  output_cols:
133
121
  - explanation
134
122
  - rating
sdg_hub/prompts.py CHANGED
@@ -12,7 +12,7 @@ def instructlab_chat_template():
12
12
  return """{% for message in messages %}{% if message['role'] == 'pretraining' %}{{ '<|pretrain|>' + message['content'] + '<|endoftext|>' + '<|/pretrain|>' }}{% elif message['role'] == 'system' %}{{ '<|system|>' + '\n' + message['content'] + '\n' }}{% elif message['role'] == 'user' %}{{ '<|user|>' + '\n' + message['content'] + '\n' }}{% elif message['role'] == 'assistant' %}{{ '<|assistant|>' + '\n' + message['content'] + '<|endoftext|>' + ('' if loop.last else '\n') }}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|assistant|>' + '\n' }}{% endif %}{% endfor %}"""
13
13
 
14
14
 
15
- @PromptRegistry.register("mistralai")
15
+ @PromptRegistry.register("mistralai/Mixtral")
16
16
  def mistral_chat_template():
17
17
  return """{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n\n<s>\n{%- for message in loop_messages %}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n {%- endif %}\n {%- if message['role'] == 'user' %}\n {%- if loop.first and system_message is defined %}\n {{- ' [INST] ' + system_message + '\\n\\n' + message['content'] + ' [/INST]' }}\n {%- else %}\n {{- ' [INST] ' + message['content'] + ' [/INST]' }}\n {%- endif %}\n {%- elif message['role'] == 'assistant' %}\n {{- ' ' + message['content'] + '</s>'}}\n {%- else %}\n {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n {%- endif %}\n{%- endfor %}\n"""
18
18
 
@@ -26,11 +26,12 @@ def meta_llama_chat_template():
26
26
  def microsoft_phi_chat_template():
27
27
  return """{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|im_start|>system<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'user') %}{{'<|im_start|>user<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'assistant') %}{{'<|im_start|>assistant<|im_sep|>' + message['content'] + '<|im_end|>'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant<|im_sep|>' }}{% endif %}"""
28
28
 
29
+
29
30
  @PromptRegistry.register("nvidia/Llama-3_3-Nemotron-Super-49B-v1")
30
31
  def nemotron_chat_template():
31
32
  """
32
33
  Format chat messages for the Nemotron model, including a system prompt and structured message headers.
33
-
34
+
34
35
  The template starts with a system message containing "detailed thinking on", then iterates over messages, wrapping each with start and end header tokens and an end-of-text token. For assistant messages containing a `</think>` tag, only the content after this tag is included. Optionally appends an assistant prompt if generation is requested.
35
36
  """
36
37
  return """{{- bos_token }}
@@ -52,7 +53,7 @@ def nemotron_chat_template():
52
53
  def qwen_2_5_chat_template():
53
54
  """
54
55
  Formats chat messages into the prompt structure required by the Qwen 2.5 model family, supporting system messages, tool descriptions, function call instructions, and role-based message formatting.
55
-
56
+
56
57
  If tools are provided, includes tool signatures and instructions for function calls in the system prompt. User, assistant, and tool messages are wrapped with special tokens, and assistant tool calls are serialized as JSON within XML tags. Optionally appends a generation prompt for the assistant.
57
58
  """
58
59
  return """{%- if tools %}\n {{- \'<|im_start|>system\\n\' }}\n {%- if messages[0][\'role\'] == \'system\' %}\n {{- messages[0][\'content\'] }}\n {%- else %}\n {{- \'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\' }}\n {%- endif %}\n {{- "\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>" }}\n {%- for tool in tools %}\n {{- "\\n" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- "\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\"name\\": <function-name>, \\"arguments\\": <args-json-object>}\\n</tool_call><|im_end|>\\n" }}\n{%- else %}\n {%- if messages[0][\'role\'] == \'system\' %}\n {{- \'<|im_start|>system\\n\' + messages[0][\'content\'] + \'<|im_end|>\\n\' }}\n {%- else %}\n {{- \'<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n\' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}\n {{- \'<|im_start|>\' + message.role + \'\\n\' + message.content + \'<|im_end|>\' + \'\\n\' }}\n {%- elif message.role == "assistant" %}\n {{- \'<|im_start|>\' + message.role }}\n {%- if message.content %}\n {{- \'\\n\' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- \'\\n<tool_call>\\n{"name": "\' }}\n {{- tool_call.name }}\n {{- \'", "arguments": \' }}\n {{- tool_call.arguments | tojson }}\n {{- \'}\\n</tool_call>\' }}\n {%- endfor %}\n {{- \'<|im_end|>\\n\' }}\n {%- elif message.role == "tool" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}\n {{- \'<|im_start|>user\' }}\n {%- endif %}\n {{- \'\\n<tool_response>\\n\' }}\n {{- message.content }}\n {{- \'\\n</tool_response>\' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}\n {{- \'<|im_end|>\\n\' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- \'<|im_start|>assistant\\n\' }}\n{%- endif %}\n"""
@@ -62,9 +63,9 @@ def qwen_2_5_chat_template():
62
63
  def qwen_3_chat_template():
63
64
  """
64
65
  Formats chat messages for the Qwen 3 model family, supporting multi-step tool usage, reasoning content, and special XML tags for tool calls and responses.
65
-
66
+
66
67
  This template handles system messages, user and assistant roles, and tool interactions. When tools are provided, it outputs their signatures and instructions for function calls. It tracks the last user query to determine where to insert assistant reasoning content within `<think>` tags. Assistant tool calls are serialized as JSON within `<tool_call>` tags, and tool responses are grouped inside `<tool_response>` tags. Optionally, a generation prompt and empty reasoning block can be added.
67
-
68
+
68
69
  Parameters:
69
70
  tools (optional): List of tool signature objects to be included in the prompt.
70
71
  messages: List of message objects, each with a role and content, and optionally tool_calls or reasoning_content.
@@ -72,3 +73,8 @@ def qwen_3_chat_template():
72
73
  enable_thinking (optional): If false, inserts an empty reasoning block in the assistant prompt.
73
74
  """
74
75
  return """{%- if tools %}\n {{- \'<|im_start|>system\\n\' }}\n {%- if messages[0].role == \'system\' %}\n {{- messages[0].content + \'\\n\\n\' }}\n {%- endif %}\n {{- "# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>" }}\n {%- for tool in tools %}\n {{- "\\n" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- "\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\"name\\": <function-name>, \\"arguments\\": <args-json-object>}\\n</tool_call><|im_end|>\\n" }}\n{%- else %}\n {%- if messages[0].role == \'system\' %}\n {{- \'<|im_start|>system\\n\' + messages[0].content + \'<|im_end|>\\n\' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith(\'<tool_response>\') and message.content.endswith(\'</tool_response>\')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = \'\' %}\n {%- endif %}\n {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}\n {{- \'<|im_start|>\' + message.role + \'\\n\' + content + \'<|im_end|>\' + \'\\n\' }}\n {%- elif message.role == "assistant" %}\n {%- set reasoning_content = \'\' %}\n {%- if message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if \'</think>\' in content %}\n {%- set reasoning_content = content.split(\'</think>\')[0].rstrip(\'\\n\').split(\'<think>\')[-1].lstrip(\'\\n\') %}\n {%- set content = content.split(\'</think>\')[-1].lstrip(\'\\n\') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- \'<|im_start|>\' + message.role + \'\\n<think>\\n\' + reasoning_content.strip(\'\\n\') + \'\\n</think>\\n\\n\' + content.lstrip(\'\\n\') }}\n {%- else %}\n {{- \'<|im_start|>\' + message.role + \'\\n\' + content }}\n {%- endif %}\n {%- else %}\n {{- \'<|im_start|>\' + message.role + \'\\n\' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- \'\\n\' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- \'<tool_call>\\n{"name": "\' }}\n {{- tool_call.name }}\n {{- \'", "arguments": \' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- \'}\\n</tool_call>\' }}\n {%- endfor %}\n {%- endif %}\n {{- \'<|im_end|>\\n\' }}\n {%- elif message.role == "tool" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}\n {{- \'<|im_start|>user\' }}\n {%- endif %}\n {{- \'\\n<tool_response>\\n\' }}\n {{- content }}\n {{- \'\\n</tool_response>\' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}\n {{- \'<|im_end|>\\n\' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- \'<|im_start|>assistant\\n\' }}\n {%- if enable_thinking is defined and enable_thinking is false %}\n {{- \'<think>\\n\\n</think>\\n\\n\' }}\n {%- endif %}\n{%- endif %}"""
76
+
77
+
78
+ @PromptRegistry.register("mistralai/Mistral-Small-3")
79
+ def mistral_small_3_chat_template():
80
+ return """{%- if not date_string is defined %}\n {%- set date_string = \"2025-01-01\" %}\n{%- endif %}\n{%- set default_system_message = \"You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris.\\nYour knowledge base was last updated on 2023-10-01. The current date is \" + date_string + \".\\n\\nWhen you're not sure about some information, you say that you don't have the information and don't make up anything.\\nIf the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. \\\"What are some good restaurants around me?\\\" => \\\"Where are you?\\\" or \\\"When is the next flight to Tokyo\\\" => \\\"Where do you travel from?\\\")\" %}\n\n<s>\n\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set system_message = default_system_message %}\n {%- set loop_messages = messages %}\n{%- endif %}\n{{- '[SYSTEM_PROMPT]' + system_message + '[/SYSTEM_PROMPT]' }}\n\n{%- for message in loop_messages %}\n {%- if message['role'] == 'user' %}\n {{- '[INST]' + message['content'] + '[/INST]' }}\n {%- elif message['role'] == 'system' %}\n {{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}\n {%- elif message['role'] == 'assistant' %}\n {{- message['content'] + '</s>' }}\n {%- else %}\n {{- raise_exception('Only user, system and assistant roles are supported!') }}\n {%- endif %}\n{%- endfor %}"""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sdg_hub
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: Synthetic Data Generation
5
5
  Author-email: Red Hat AI Innovation <abhandwa@redhat.com>
6
6
  License: Apache-2.0
@@ -36,7 +36,7 @@ Requires-Dist: flask>=3.0.2; extra == "web-interface"
36
36
  Requires-Dist: pyyaml>=6.0.1; extra == "web-interface"
37
37
  Requires-Dist: flask-wtf>=1.2.2; extra == "web-interface"
38
38
  Provides-Extra: vllm
39
- Requires-Dist: vllm<0.8.4,>=0.8.0; extra == "vllm"
39
+ Requires-Dist: vllm>=0.9.1; extra == "vllm"
40
40
  Requires-Dist: torch>=2.0.0; extra == "vllm"
41
41
  Requires-Dist: transformers>=4.37.0; extra == "vllm"
42
42
  Requires-Dist: accelerate>=0.21.0; extra == "vllm"
@@ -1,11 +1,11 @@
1
1
  sdg_hub/__init__.py,sha256=5Wa6onDndPvG4iwnjq2jK747t3-7XKdQn2WfHfq1sFc,67
2
- sdg_hub/_version.py,sha256=bSmADqydH8nBu-J4lG8UVuR7hnU_zcwhnSav2oQ0W0A,511
2
+ sdg_hub/_version.py,sha256=hcPkC9vIGgfrKK6ft7ysLT7iOCjpFmCBmyKLmXiaZ1g,511
3
3
  sdg_hub/checkpointer.py,sha256=R0pNKL_q7-BerxmIarY0w1nFYaq7fGnoRRkCVL6Z-Gw,5053
4
4
  sdg_hub/flow.py,sha256=14WDZfb-VDUBwXsVo9u5oMuWD6aOm-GWtIdT64z4j-0,18050
5
- sdg_hub/flow_runner.py,sha256=xeAIdx2r86kwtdrMFysjR1N-j4teonvbSHKg-m1VNSs,14584
5
+ sdg_hub/flow_runner.py,sha256=rSoXoN2n2vsMmOnsRImeQivsY9zlrDig53O9DBbQzz0,15177
6
6
  sdg_hub/logger_config.py,sha256=7uHEJVRfym1c4n95DOKHelLXqAus8uHsZYmzLsEjqpo,422
7
7
  sdg_hub/pipeline.py,sha256=mahktfoCMVnuBnvLNjAVOAoFKNQo-wb0Dz1_xdYhKDM,3852
8
- sdg_hub/prompts.py,sha256=Gto1KcIhO-50ERvZx1Qzu-eAhSlIkOjYH9F6j2eIPfY,17482
8
+ sdg_hub/prompts.py,sha256=VMuPnABC9f3SgzSBFLprPbn1o2P4XttnKFwen_IYqR4,19273
9
9
  sdg_hub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  sdg_hub/registry.py,sha256=Sc_HNxo4n0pgWMiEDd_sLjxaSXAMZFiHJIhQKqjywwk,3772
11
11
  sdg_hub/sdg.py,sha256=8SKrSnqyvJAwE2Muf9lXw9ONRcDzqmCtaEzFHCYW4CY,6914
@@ -20,14 +20,14 @@ sdg_hub/configs/annotations/cot_reflection.yaml,sha256=60EdsTe1y7GoUIAWYSGfMa3EK
20
20
  sdg_hub/configs/annotations/detailed_annotations.yaml,sha256=in21xmlhxDJGEaWh1IgINh33tEyW9AuyG3k4pWBuKSM,1520
21
21
  sdg_hub/configs/annotations/detailed_description.yaml,sha256=FsGbQMBxf1MAOi0nhrQ4icxcwYMlRura_ji9Pmeh1AA,192
22
22
  sdg_hub/configs/annotations/detailed_description_icl.yaml,sha256=NDdwo5EShnYZjm1Fn80sZTAwfnwpPigixP2hvJ8--cU,679
23
- sdg_hub/configs/annotations/simple_annotations.yaml,sha256=e2F_Ow8EG_me4XJ2cnBTlKb9y1FmdX0DHKkiMqiwdUQ,188
23
+ sdg_hub/configs/annotations/simple_annotations.yaml,sha256=d80d0mK7Xz0MMCCSW3sYw3ztt5HASV5miu0krSAbjnA,234
24
24
  sdg_hub/configs/knowledge/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
25
  sdg_hub/configs/knowledge/atomic_facts.yaml,sha256=bIfQr0q0FyReO94v_lpLO56FikARCvFmZza-ISZTOnA,2453
26
26
  sdg_hub/configs/knowledge/auxilary_instructions.yaml,sha256=aCgIjvNacdC2ZHThEvhZKvwORK6KqErVvVYQYQrIDLE,2034
27
27
  sdg_hub/configs/knowledge/detailed_summary.yaml,sha256=_Mc_i9vaLp1OPKexSOURV5gbXEG41p1eELUukOhz8oM,388
28
28
  sdg_hub/configs/knowledge/evaluate_faithfulness.yaml,sha256=iuvx5vNNm_jzHlmcKF83StaDYezRz2vQn3JUHM-TMdQ,3054
29
29
  sdg_hub/configs/knowledge/evaluate_question.yaml,sha256=02mikEAJCUEkREBo7KxPY9H6iTUHQN-4cRkn2XMlVQ8,1915
30
- sdg_hub/configs/knowledge/evaluate_relevancy.yaml,sha256=ASh8A1HAYO1h1tQRrwGnkUmK1n-WDKLdfW_LbSW1ipQ,3690
30
+ sdg_hub/configs/knowledge/evaluate_relevancy.yaml,sha256=yPyW2BeLV07cvDU8NO6f-Wc32P9iycnpXyLvvTnUy44,3651
31
31
  sdg_hub/configs/knowledge/extractive_summary.yaml,sha256=TYgJ7WQc7NFkf3GeRsbx6lwfA_xFnEOYGELewSqorp0,399
32
32
  sdg_hub/configs/knowledge/generate_code_questions_responses.yaml,sha256=cIus2JYMYDvxHFVSU9QVa-1IK5KoChb3rCU2b4b9UmI,908
33
33
  sdg_hub/configs/knowledge/generate_questions.yaml,sha256=iJtttZrVvlXFraUSrMowqTCLoJOLDbBndcTNMPTO8A4,2788
@@ -67,10 +67,10 @@ sdg_hub/configs/skills/icl_examples/math.yaml,sha256=hNq-QudlXrg9CWLpJdrZ4v3vifG
67
67
  sdg_hub/configs/skills/icl_examples/reasoning.yaml,sha256=eesIlH9SO07TVF20gy18MZrcDzLhSmynd_F_lvg0oQg,4335
68
68
  sdg_hub/configs/skills/icl_examples/roleplay.yaml,sha256=LYEyA7wv7QWQscUNQr0K_lotNoWSfuoAEncx3PCRYIs,6997
69
69
  sdg_hub/configs/skills/icl_examples/writing.yaml,sha256=El-57IjZ5IvdcmCHyHvX_M2RFFkEos572220be8ecrQ,11335
70
- sdg_hub/flows/generation/knowledge/mmlu_bench.yaml,sha256=Rueuxr_n1zabE_nGqOgUfh5hqVmEONRka9NLiZANSew,346
71
- sdg_hub/flows/generation/knowledge/simple_knowledge.yaml,sha256=o4uyfs1nDiECcNROdsvHKiM46NYvQufo9dF4XSGpY54,298
72
- sdg_hub/flows/generation/knowledge/synth_knowledge.yaml,sha256=ZTZvevfwDQSKUwPcv1i5IzIchsRHSEN03eTefedQmU8,2172
73
- sdg_hub/flows/generation/knowledge/synth_knowledge1.5.yaml,sha256=KYMdStAsfWKZhoFzEwTfl8XhF0qRSc6WsgJbzLWCw-U,3634
70
+ sdg_hub/flows/generation/knowledge/mmlu_bench.yaml,sha256=U0S2NPkZ_9_8yQGgHJm4el-wVsg_6MllzbFT97cGNrI,343
71
+ sdg_hub/flows/generation/knowledge/simple_knowledge.yaml,sha256=_DkBZjS47bH0Lmu0eXVRlesTxeAF8Zlzj1PgR1vruuA,295
72
+ sdg_hub/flows/generation/knowledge/synth_knowledge.yaml,sha256=sYBzIFNBGks_o2Nwvov5MSrMadAB3g-niBAaWPbBYO0,2160
73
+ sdg_hub/flows/generation/knowledge/synth_knowledge1.5.yaml,sha256=afQu7wePfBEb6nqAahJ6K2dT5jgp25qU6nA5DRR4JV0,3372
74
74
  sdg_hub/flows/generation/skills/improve_responses.yaml,sha256=wUV0awTmKHNZ62pHiw_yz-IdG0OYgT_dCwlMUlZS3TA,2683
75
75
  sdg_hub/flows/generation/skills/simple_freeform_skill.yaml,sha256=iVEomFH1E52JA7KLmTIwkS1PnzxUJVPMgbK2O-m80As,309
76
76
  sdg_hub/flows/generation/skills/simple_grounded_skill.yaml,sha256=LTLxqdgbLIKSJonuIRHhcRSpit1EawwNvytWzXWXe2E,309
@@ -82,8 +82,8 @@ sdg_hub/utils/datautils.py,sha256=0t_SZ_UXBKl8uL6rVp3SUh8YKRbzKlh2oO5gr2cKyEw,38
82
82
  sdg_hub/utils/error_handling.py,sha256=UvPEmtdpbBL71Zx8DWpIqd8869kEY2dlCH11iDgMfec,1847
83
83
  sdg_hub/utils/path_resolution.py,sha256=M7hnwoyRQTKgwGC3Ld1_KmKaO_8Lu0PCk6JtQrLp67Q,2006
84
84
  sdg_hub/utils/validation_result.py,sha256=O3zF6r49LQ9StAf_oWmK2bg-JfTQw6rpbHtHr9lI4ks,264
85
- sdg_hub-0.1.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
86
- sdg_hub-0.1.2.dist-info/METADATA,sha256=4etDbH6APmsl8vh-b5H8-8r7pVCYBRWbqlRbf6gmYcY,7247
87
- sdg_hub-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
88
- sdg_hub-0.1.2.dist-info/top_level.txt,sha256=TqI7d-HE1n6zkXFkU0nF3A1Ct0P0pBaqI675uFokhx4,8
89
- sdg_hub-0.1.2.dist-info/RECORD,,
85
+ sdg_hub-0.1.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
86
+ sdg_hub-0.1.4.dist-info/METADATA,sha256=rXHwcDfzwZDA2gxcO0uNUMwr_oiDjNqMzdQIEkiakwg,7240
87
+ sdg_hub-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
88
+ sdg_hub-0.1.4.dist-info/top_level.txt,sha256=TqI7d-HE1n6zkXFkU0nF3A1Ct0P0pBaqI675uFokhx4,8
89
+ sdg_hub-0.1.4.dist-info/RECORD,,