sdg-hub 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sdg_hub/__init__.py +4 -0
- sdg_hub/_version.py +21 -0
- sdg_hub/blocks/__init__.py +6 -0
- sdg_hub/blocks/block.py +54 -0
- sdg_hub/blocks/filterblock.py +76 -0
- sdg_hub/blocks/iterblock.py +31 -0
- sdg_hub/blocks/llmblock.py +430 -0
- sdg_hub/blocks/rmblocks.py +194 -0
- sdg_hub/blocks/utilblocks.py +140 -0
- sdg_hub/configs/__init__.py +0 -0
- sdg_hub/configs/annotations/__init__.py +0 -0
- sdg_hub/configs/annotations/cot_reflection.yaml +34 -0
- sdg_hub/configs/annotations/detailed_description.yaml +10 -0
- sdg_hub/configs/annotations/detailed_description_icl.yaml +32 -0
- sdg_hub/configs/annotations/simple.yaml +10 -0
- sdg_hub/configs/knowledge/__init__.py +0 -0
- sdg_hub/configs/knowledge/atomic_facts.yaml +45 -0
- sdg_hub/configs/knowledge/auxilary_instructions.yaml +35 -0
- sdg_hub/configs/knowledge/data_recipe/__init__.py +0 -0
- sdg_hub/configs/knowledge/data_recipe/default_recipe.yaml +3 -0
- sdg_hub/configs/knowledge/detailed_summary.yaml +17 -0
- sdg_hub/configs/knowledge/evaluate_faithfulness.yaml +68 -0
- sdg_hub/configs/knowledge/evaluate_question.yaml +38 -0
- sdg_hub/configs/knowledge/evaluate_relevancy.yaml +85 -0
- sdg_hub/configs/knowledge/extractive_summary.yaml +17 -0
- sdg_hub/configs/knowledge/generate_code_questions_responses.yaml +39 -0
- sdg_hub/configs/knowledge/generate_questions_responses.yaml +56 -0
- sdg_hub/configs/knowledge/mcq_generation.yaml +83 -0
- sdg_hub/configs/knowledge/router.yaml +12 -0
- sdg_hub/configs/knowledge/simple_generate_qa.yaml +34 -0
- sdg_hub/configs/reasoning/dynamic_cot.yaml +40 -0
- sdg_hub/configs/skills/_A_.yaml +97 -0
- sdg_hub/configs/skills/_B_.yaml +36 -0
- sdg_hub/configs/skills/_C_.yaml +71 -0
- sdg_hub/configs/skills/_D_.yaml +85 -0
- sdg_hub/configs/skills/_E_.yaml +30 -0
- sdg_hub/configs/skills/_F_.yaml +45 -0
- sdg_hub/configs/skills/_G_.yaml +56 -0
- sdg_hub/configs/skills/_H_.yaml +80 -0
- sdg_hub/configs/skills/__init__.py +0 -0
- sdg_hub/configs/skills/analyzer.yaml +48 -0
- sdg_hub/configs/skills/annotation.yaml +36 -0
- sdg_hub/configs/skills/contexts.yaml +21 -0
- sdg_hub/configs/skills/critic.yaml +60 -0
- sdg_hub/configs/skills/data_recipe/__init__.py +0 -0
- sdg_hub/configs/skills/data_recipe/default_recipe.yaml +6 -0
- sdg_hub/configs/skills/evaluate_freeform_pair.yaml +44 -0
- sdg_hub/configs/skills/evaluate_freeform_questions.yaml +46 -0
- sdg_hub/configs/skills/evaluate_grounded_pair.yaml +54 -0
- sdg_hub/configs/skills/evaluate_grounded_questions.yaml +51 -0
- sdg_hub/configs/skills/freeform_questions.yaml +29 -0
- sdg_hub/configs/skills/freeform_responses.yaml +45 -0
- sdg_hub/configs/skills/grounded_questions.yaml +38 -0
- sdg_hub/configs/skills/grounded_responses.yaml +59 -0
- sdg_hub/configs/skills/judge.yaml +53 -0
- sdg_hub/configs/skills/planner.yaml +67 -0
- sdg_hub/configs/skills/respond.yaml +8 -0
- sdg_hub/configs/skills/revised_responder.yaml +78 -0
- sdg_hub/configs/skills/router.yaml +12 -0
- sdg_hub/configs/skills/simple_generate_qa_freeform.yaml +27 -0
- sdg_hub/configs/skills/simple_generate_qa_grounded.yaml +31 -0
- sdg_hub/flow.py +127 -0
- sdg_hub/flows/annotation/emotion/detailed_description.yaml +19 -0
- sdg_hub/flows/annotation/emotion/detailed_description_icl.yaml +19 -0
- sdg_hub/flows/annotation/emotion/simple.yaml +19 -0
- sdg_hub/flows/generation/knowledge/mmlu_bench.yaml +13 -0
- sdg_hub/flows/generation/knowledge/simple_knowledge.yaml +12 -0
- sdg_hub/flows/generation/knowledge/synth_knowledge.yaml +89 -0
- sdg_hub/flows/generation/knowledge/synth_knowledge1.5.yaml +136 -0
- sdg_hub/flows/generation/skills/agentic_improve_skill.yaml +108 -0
- sdg_hub/flows/generation/skills/simple_freeform_skill.yaml +12 -0
- sdg_hub/flows/generation/skills/simple_grounded_skill.yaml +12 -0
- sdg_hub/flows/generation/skills/synth_grounded_skills.yaml +80 -0
- sdg_hub/flows/generation/skills/synth_skills.yaml +59 -0
- sdg_hub/logger_config.py +20 -0
- sdg_hub/pipeline.py +66 -0
- sdg_hub/prompts.py +17 -0
- sdg_hub/py.typed +0 -0
- sdg_hub/registry.py +122 -0
- sdg_hub/sdg.py +164 -0
- sdg_hub/utils/__init__.py +5 -0
- sdg_hub/utils/chunking.py +73 -0
- sdg_hub/utils/datamixing.py +123 -0
- sdg_hub/utils/datautils.py +14 -0
- sdg_hub/utils/docprocessor.py +357 -0
- sdg_hub/utils/json.py +48 -0
- sdg_hub/utils/models.py +31 -0
- sdg_hub/utils/parse_and_convert.py +392 -0
- sdg_hub/utils/taxonomy.py +489 -0
- sdg_hub-0.1.0a1.dist-info/METADATA +154 -0
- sdg_hub-0.1.0a1.dist-info/RECORD +94 -0
- sdg_hub-0.1.0a1.dist-info/WHEEL +5 -0
- sdg_hub-0.1.0a1.dist-info/licenses/LICENSE +201 -0
- sdg_hub-0.1.0a1.dist-info/top_level.txt +1 -0
sdg_hub/flow.py
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
2
|
+
# Standard
|
3
|
+
from abc import ABC
|
4
|
+
from importlib import resources
|
5
|
+
from typing import Optional
|
6
|
+
import operator
|
7
|
+
import os
|
8
|
+
|
9
|
+
# Third Party
|
10
|
+
import yaml
|
11
|
+
|
12
|
+
# Local
|
13
|
+
from .registry import BlockRegistry, PromptRegistry
|
14
|
+
from . import prompts
|
15
|
+
from . import blocks
|
16
|
+
|
17
|
+
|
18
|
+
OPERATOR_MAP = {
|
19
|
+
"operator.eq": operator.eq,
|
20
|
+
"operator.ge": operator.ge,
|
21
|
+
"operator.contains": operator.contains,
|
22
|
+
}
|
23
|
+
|
24
|
+
CONVERT_DTYPE_MAP = {
|
25
|
+
"float": float,
|
26
|
+
"int": int,
|
27
|
+
}
|
28
|
+
|
29
|
+
|
30
|
+
class Flow(ABC):
|
31
|
+
def __init__(
|
32
|
+
self,
|
33
|
+
llm_client,
|
34
|
+
num_samples_to_generate: Optional[int] = None,
|
35
|
+
) -> None:
|
36
|
+
self.llm_client = llm_client
|
37
|
+
self.num_samples_to_generate = num_samples_to_generate
|
38
|
+
self.base_path = str(resources.files(__package__))
|
39
|
+
self.registered_blocks = BlockRegistry.get_registry()
|
40
|
+
|
41
|
+
def get_flow_from_file(self, yaml_path: str) -> list:
|
42
|
+
yaml_path_relative_to_base = os.path.join(self.base_path, yaml_path)
|
43
|
+
if os.path.isfile(yaml_path_relative_to_base):
|
44
|
+
yaml_path = yaml_path_relative_to_base
|
45
|
+
|
46
|
+
try:
|
47
|
+
with open(yaml_path, "r", encoding="utf-8") as yaml_file:
|
48
|
+
flow = yaml.safe_load(yaml_file)
|
49
|
+
except FileNotFoundError as exc:
|
50
|
+
raise FileNotFoundError(f"File not found: {yaml_path}") from exc
|
51
|
+
|
52
|
+
# update config with class instances
|
53
|
+
for block in flow:
|
54
|
+
# check if theres an llm block in the flow
|
55
|
+
if "LLM" in block["block_type"]:
|
56
|
+
block["block_config"]["client"] = self.llm_client
|
57
|
+
# model_id and prompt templates
|
58
|
+
# try to get a template using the model_id, but if model_prompt_template is provided, use that
|
59
|
+
if block["block_config"].get("model_prompt", None) is None:
|
60
|
+
# try to find a match in the registry
|
61
|
+
matched_prompt = next(
|
62
|
+
(
|
63
|
+
key
|
64
|
+
for key in PromptRegistry.get_registry()
|
65
|
+
if key in block["block_config"]["model_id"]
|
66
|
+
),
|
67
|
+
None,
|
68
|
+
)
|
69
|
+
if matched_prompt is not None:
|
70
|
+
block["block_config"]["model_prompt"] = matched_prompt
|
71
|
+
else:
|
72
|
+
raise KeyError(
|
73
|
+
f"Prompt not found in registry: {block['block_config']['model_id']}"
|
74
|
+
)
|
75
|
+
|
76
|
+
if self.num_samples_to_generate is not None:
|
77
|
+
block["num_samples"] = self.num_samples_to_generate
|
78
|
+
|
79
|
+
# update block type to llm class instance
|
80
|
+
try:
|
81
|
+
block["block_type"] = self.registered_blocks[block["block_type"]]
|
82
|
+
except KeyError as exc:
|
83
|
+
raise KeyError(
|
84
|
+
f"Block not found in registry: {block['block_type']}"
|
85
|
+
) from exc
|
86
|
+
|
87
|
+
# update config path to absolute path
|
88
|
+
if "config_path" in block["block_config"]:
|
89
|
+
config_path_relative_to_base = os.path.join(
|
90
|
+
self.base_path, block["block_config"]["config_path"]
|
91
|
+
)
|
92
|
+
if os.path.isfile(config_path_relative_to_base):
|
93
|
+
block["block_config"]["config_path"] = config_path_relative_to_base
|
94
|
+
|
95
|
+
# update config paths to absolute paths - this might be a list or a dict
|
96
|
+
if "config_paths" in block["block_config"]:
|
97
|
+
if isinstance(block["block_config"]["config_paths"], dict):
|
98
|
+
for key, path in block["block_config"]["config_paths"].items():
|
99
|
+
config_path_relative_to_base = os.path.join(
|
100
|
+
self.base_path, path
|
101
|
+
)
|
102
|
+
if os.path.isfile(config_path_relative_to_base):
|
103
|
+
block["block_config"]["config_paths"][key] = (
|
104
|
+
config_path_relative_to_base
|
105
|
+
)
|
106
|
+
|
107
|
+
if isinstance(block["block_config"]["config_paths"], list):
|
108
|
+
for i, path in enumerate(block["block_config"]["config_paths"]):
|
109
|
+
config_path_relative_to_base = os.path.join(
|
110
|
+
self.base_path, path
|
111
|
+
)
|
112
|
+
if os.path.isfile(config_path_relative_to_base):
|
113
|
+
block["block_config"]["config_paths"][i] = (
|
114
|
+
config_path_relative_to_base
|
115
|
+
)
|
116
|
+
|
117
|
+
if "operation" in block["block_config"]:
|
118
|
+
block["block_config"]["operation"] = OPERATOR_MAP[
|
119
|
+
block["block_config"]["operation"]
|
120
|
+
]
|
121
|
+
|
122
|
+
if "convert_dtype" in block["block_config"]:
|
123
|
+
block["block_config"]["convert_dtype"] = CONVERT_DTYPE_MAP[
|
124
|
+
block["block_config"]["convert_dtype"]
|
125
|
+
]
|
126
|
+
|
127
|
+
return flow
|
@@ -0,0 +1,19 @@
|
|
1
|
+
- block_type: LLMBlock
|
2
|
+
block_config:
|
3
|
+
block_name: gen_responses
|
4
|
+
config_path: configs/annotations/detailed_description.yaml
|
5
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
6
|
+
output_cols:
|
7
|
+
- output
|
8
|
+
gen_kwargs:
|
9
|
+
max_tokens: 5
|
10
|
+
temperature: 0
|
11
|
+
extra_body:
|
12
|
+
guided_choice:
|
13
|
+
- "joy"
|
14
|
+
- "sadness"
|
15
|
+
- "anger"
|
16
|
+
- "fear"
|
17
|
+
- "love"
|
18
|
+
drop_duplicates:
|
19
|
+
- prompt
|
@@ -0,0 +1,19 @@
|
|
1
|
+
- block_type: LLMBlock
|
2
|
+
block_config:
|
3
|
+
block_name: gen_responses
|
4
|
+
config_path: configs/annotations/detailed_description_icl.yaml
|
5
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
6
|
+
output_cols:
|
7
|
+
- output
|
8
|
+
gen_kwargs:
|
9
|
+
max_tokens: 5
|
10
|
+
temperature: 0
|
11
|
+
extra_body:
|
12
|
+
guided_choice:
|
13
|
+
- "joy"
|
14
|
+
- "sadness"
|
15
|
+
- "anger"
|
16
|
+
- "fear"
|
17
|
+
- "love"
|
18
|
+
drop_duplicates:
|
19
|
+
- prompt
|
@@ -0,0 +1,19 @@
|
|
1
|
+
- block_type: LLMBlock
|
2
|
+
block_config:
|
3
|
+
block_name: gen_responses
|
4
|
+
config_path: configs/annotations/simple.yaml
|
5
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
6
|
+
output_cols:
|
7
|
+
- output
|
8
|
+
gen_kwargs:
|
9
|
+
max_tokens: 5
|
10
|
+
temperature: 0
|
11
|
+
extra_body:
|
12
|
+
guided_choice:
|
13
|
+
- "joy"
|
14
|
+
- "sadness"
|
15
|
+
- "anger"
|
16
|
+
- "fear"
|
17
|
+
- "love"
|
18
|
+
drop_duplicates:
|
19
|
+
- prompt
|
@@ -0,0 +1,13 @@
|
|
1
|
+
- block_type: LLMBlock
|
2
|
+
block_config:
|
3
|
+
block_name: gen_mmlu_knowledge
|
4
|
+
config_path: configs/knowledge/mcq_generation.yaml
|
5
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
6
|
+
output_cols:
|
7
|
+
- mmlubench_question
|
8
|
+
- mmlubench_answer
|
9
|
+
gen_kwargs:
|
10
|
+
temperature: 0
|
11
|
+
max_tokens: 2048
|
12
|
+
drop_duplicates:
|
13
|
+
- mmlubench_question
|
@@ -0,0 +1,12 @@
|
|
1
|
+
- block_type: LLMBlock
|
2
|
+
block_config:
|
3
|
+
block_name: gen_knowledge
|
4
|
+
config_path: configs/knowledge/simple_generate_qa.yaml
|
5
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
6
|
+
output_cols:
|
7
|
+
- output
|
8
|
+
gen_kwargs:
|
9
|
+
temperature: 0.7
|
10
|
+
max_tokens: 2048
|
11
|
+
drop_duplicates:
|
12
|
+
- output
|
@@ -0,0 +1,89 @@
|
|
1
|
+
- block_type: LLMBlock
|
2
|
+
block_config:
|
3
|
+
block_name: gen_knowledge
|
4
|
+
config_path: configs/knowledge/generate_questions_responses.yaml
|
5
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
6
|
+
output_cols:
|
7
|
+
- question
|
8
|
+
- response
|
9
|
+
parser_kwargs:
|
10
|
+
parser_name: custom
|
11
|
+
parsing_pattern: "\\[(?:Question|QUESTION)\\]\\s*(.*?)\\s*\\[(?:Answer|ANSWER)\\]\\s*(.*?)\\s*(?=\\[(?:Question|QUESTION)\\]|$)"
|
12
|
+
parser_cleanup_tags:
|
13
|
+
- "[END]"
|
14
|
+
gen_kwargs:
|
15
|
+
max_tokens: 2048
|
16
|
+
drop_duplicates:
|
17
|
+
- question
|
18
|
+
|
19
|
+
- block_type: LLMBlock
|
20
|
+
block_config:
|
21
|
+
block_name: eval_faithfulness_qa_pair
|
22
|
+
config_path: configs/knowledge/evaluate_faithfulness.yaml
|
23
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
24
|
+
output_cols:
|
25
|
+
- explanation
|
26
|
+
- judgment
|
27
|
+
gen_kwargs:
|
28
|
+
max_tokens: 2048
|
29
|
+
|
30
|
+
- block_type: FilterByValueBlock
|
31
|
+
block_config:
|
32
|
+
block_name: filter_faithfulness
|
33
|
+
filter_column: judgment
|
34
|
+
filter_value: "YES"
|
35
|
+
operation: operator.eq
|
36
|
+
batch_kwargs:
|
37
|
+
num_procs: 8
|
38
|
+
drop_columns:
|
39
|
+
- judgment
|
40
|
+
- explanation
|
41
|
+
|
42
|
+
- block_type: LLMBlock
|
43
|
+
block_config:
|
44
|
+
block_name: eval_relevancy_qa_pair
|
45
|
+
config_path: configs/knowledge/evaluate_relevancy.yaml
|
46
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
47
|
+
output_cols:
|
48
|
+
- feedback
|
49
|
+
- score
|
50
|
+
gen_kwargs:
|
51
|
+
max_tokens: 2048
|
52
|
+
|
53
|
+
- block_type: FilterByValueBlock
|
54
|
+
block_config:
|
55
|
+
block_name: filter_relevancy
|
56
|
+
filter_column: score
|
57
|
+
filter_value: 2.0
|
58
|
+
operation: operator.eq
|
59
|
+
convert_dtype: float
|
60
|
+
batch_kwargs:
|
61
|
+
num_procs: 8
|
62
|
+
drop_columns:
|
63
|
+
- feedback
|
64
|
+
- score
|
65
|
+
|
66
|
+
- block_type: LLMBlock
|
67
|
+
block_config:
|
68
|
+
block_name: eval_verify_question
|
69
|
+
config_path: configs/knowledge/evaluate_question.yaml
|
70
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
71
|
+
output_cols:
|
72
|
+
- explanation
|
73
|
+
- rating
|
74
|
+
gen_kwargs:
|
75
|
+
max_tokens: 2048
|
76
|
+
|
77
|
+
- block_type: FilterByValueBlock
|
78
|
+
block_config:
|
79
|
+
block_name: filter_verify_question
|
80
|
+
filter_column: rating
|
81
|
+
filter_value: 1.0
|
82
|
+
operation: operator.eq
|
83
|
+
convert_dtype: float
|
84
|
+
batch_kwargs:
|
85
|
+
num_procs: 8
|
86
|
+
drop_columns:
|
87
|
+
- explanation
|
88
|
+
- rating
|
89
|
+
- __index_level_0__
|
@@ -0,0 +1,136 @@
|
|
1
|
+
- block_type: DuplicateColumns
|
2
|
+
block_config:
|
3
|
+
block_name: duplicate_document_col
|
4
|
+
columns_map:
|
5
|
+
document: base_document
|
6
|
+
|
7
|
+
- block_type: LLMBlock
|
8
|
+
block_config:
|
9
|
+
block_name: gen_detailed_summary
|
10
|
+
config_path: configs/knowledge/detailed_summary.yaml
|
11
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
12
|
+
output_cols:
|
13
|
+
- summary_detailed
|
14
|
+
gen_kwargs:
|
15
|
+
max_tokens: 2048
|
16
|
+
|
17
|
+
- block_type: LLMBlock
|
18
|
+
block_config:
|
19
|
+
block_name: gen_atomic_facts
|
20
|
+
config_path: configs/knowledge/atomic_facts.yaml
|
21
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
22
|
+
output_cols:
|
23
|
+
- summary_atomic_facts
|
24
|
+
gen_kwargs:
|
25
|
+
max_tokens: 2048
|
26
|
+
|
27
|
+
- block_type: LLMBlock
|
28
|
+
block_config:
|
29
|
+
block_name: gen_extractive_summary
|
30
|
+
config_path: configs/knowledge/extractive_summary.yaml
|
31
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
32
|
+
output_cols:
|
33
|
+
- summary_extractive
|
34
|
+
gen_kwargs:
|
35
|
+
max_tokens: 2048
|
36
|
+
|
37
|
+
- block_type: FlattenColumnsBlock
|
38
|
+
block_config:
|
39
|
+
block_name: flatten_summary_columns
|
40
|
+
var_cols:
|
41
|
+
- summary_detailed
|
42
|
+
- summary_extractive
|
43
|
+
- summary_atomic_facts
|
44
|
+
- base_document
|
45
|
+
value_name: summary
|
46
|
+
var_name: dataset_type
|
47
|
+
|
48
|
+
- block_type: RenameColumns
|
49
|
+
block_config:
|
50
|
+
block_name: rename_to_document_column
|
51
|
+
columns_map:
|
52
|
+
document: raw_document
|
53
|
+
summary: document
|
54
|
+
|
55
|
+
- block_type: LLMBlock
|
56
|
+
block_config:
|
57
|
+
block_name: knowledge generation
|
58
|
+
config_path: configs/knowledge/generate_questions_responses.yaml
|
59
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
60
|
+
output_cols:
|
61
|
+
- question
|
62
|
+
- response
|
63
|
+
parser_kwargs:
|
64
|
+
parser_name: custom
|
65
|
+
parsing_pattern: "\\[(?:Question|QUESTION)\\]\\s*(.*?)\\s*\\[(?:Answer|ANSWER)\\]\\s*(.*?)\\s*(?=\\[(?:Question|QUESTION)\\]|$)"
|
66
|
+
parser_cleanup_tags:
|
67
|
+
- "[END]"
|
68
|
+
gen_kwargs:
|
69
|
+
temperature: 0.0
|
70
|
+
max_tokens: 2048
|
71
|
+
|
72
|
+
- block_type: LLMBlock
|
73
|
+
block_config:
|
74
|
+
block_name: eval_faithfulness_qa_pair
|
75
|
+
config_path: configs/knowledge/evaluate_faithfulness.yaml
|
76
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
77
|
+
output_cols:
|
78
|
+
- explanation
|
79
|
+
- judgment
|
80
|
+
gen_kwargs:
|
81
|
+
max_tokens: 2048
|
82
|
+
|
83
|
+
- block_type: FilterByValueBlock
|
84
|
+
block_config:
|
85
|
+
block_name: filter_faithfulness
|
86
|
+
filter_column: judgment
|
87
|
+
filter_value: "YES"
|
88
|
+
operation: operator.eq
|
89
|
+
drop_columns:
|
90
|
+
- judgment
|
91
|
+
- explanation
|
92
|
+
|
93
|
+
- block_type: LLMBlock
|
94
|
+
block_config:
|
95
|
+
block_name: eval_relevancy_qa_pair
|
96
|
+
config_path: configs/knowledge/evaluate_relevancy.yaml
|
97
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
98
|
+
output_cols:
|
99
|
+
- feedback
|
100
|
+
- score
|
101
|
+
gen_kwargs:
|
102
|
+
max_tokens: 2048
|
103
|
+
|
104
|
+
- block_type: FilterByValueBlock
|
105
|
+
block_config:
|
106
|
+
block_name: filter_relevancy
|
107
|
+
filter_column: score
|
108
|
+
filter_value: 2.0
|
109
|
+
operation: operator.eq
|
110
|
+
convert_dtype: float
|
111
|
+
drop_columns:
|
112
|
+
- feedback
|
113
|
+
- score
|
114
|
+
|
115
|
+
- block_type: LLMBlock
|
116
|
+
block_config:
|
117
|
+
block_name: eval_verify_question
|
118
|
+
config_path: configs/knowledge/evaluate_question.yaml
|
119
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
120
|
+
output_cols:
|
121
|
+
- explanation
|
122
|
+
- rating
|
123
|
+
gen_kwargs:
|
124
|
+
max_tokens: 2048
|
125
|
+
|
126
|
+
- block_type: FilterByValueBlock
|
127
|
+
block_config:
|
128
|
+
block_name: filter_verify_question
|
129
|
+
filter_column: rating
|
130
|
+
filter_value: 1.0
|
131
|
+
operation: operator.eq
|
132
|
+
convert_dtype: float
|
133
|
+
drop_columns:
|
134
|
+
- explanation
|
135
|
+
- rating
|
136
|
+
- __index_level_0__
|
@@ -0,0 +1,108 @@
|
|
1
|
+
- block_type: LLMBlock
|
2
|
+
block_config:
|
3
|
+
block_name: router
|
4
|
+
config_path: configs/skills/router.yaml
|
5
|
+
model_id: skill-classifier-v3-clm
|
6
|
+
output_cols:
|
7
|
+
- route
|
8
|
+
gen_kwargs:
|
9
|
+
temperature: 0
|
10
|
+
max_tokens: 1
|
11
|
+
extra_body:
|
12
|
+
allowed_token_ids:
|
13
|
+
- 32001
|
14
|
+
- 32002
|
15
|
+
- 32003
|
16
|
+
- 32004
|
17
|
+
- 32005
|
18
|
+
- 32006
|
19
|
+
- 32007
|
20
|
+
- 32008
|
21
|
+
- block_type: SamplePopulatorBlock
|
22
|
+
block_config:
|
23
|
+
block_name: icl_populator
|
24
|
+
config_paths:
|
25
|
+
- configs/skills/_A_.yaml
|
26
|
+
- configs/skills/_B_.yaml
|
27
|
+
- configs/skills/_C_.yaml
|
28
|
+
- configs/skills/_D_.yaml
|
29
|
+
- configs/skills/_E_.yaml
|
30
|
+
- configs/skills/_F_.yaml
|
31
|
+
- configs/skills/_G_.yaml
|
32
|
+
- configs/skills/_H_.yaml
|
33
|
+
column_name: route
|
34
|
+
batch_kwargs:
|
35
|
+
num_procs: 8
|
36
|
+
- block_type: LLMBlock
|
37
|
+
block_config:
|
38
|
+
block_name: analyzer
|
39
|
+
config_path: configs/skills/analyzer.yaml
|
40
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
41
|
+
model_prompt: <s> [INST] {prompt} [/INST]
|
42
|
+
output_cols:
|
43
|
+
- analysis
|
44
|
+
- rubric
|
45
|
+
- block_type: LLMBlock
|
46
|
+
block_config:
|
47
|
+
block_name: critic
|
48
|
+
config_path: configs/skills/critic.yaml
|
49
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
50
|
+
model_prompt: <s> [INST] {prompt} [/INST]
|
51
|
+
output_cols:
|
52
|
+
- critique
|
53
|
+
- block_type: LLMBlock
|
54
|
+
block_config:
|
55
|
+
block_name: planner
|
56
|
+
config_path: configs/skills/planner.yaml
|
57
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
58
|
+
model_prompt: <s> [INST] {prompt} [/INST]
|
59
|
+
output_cols:
|
60
|
+
- plan
|
61
|
+
- block_type: LLMBlock
|
62
|
+
block_config:
|
63
|
+
block_name: revised_responder
|
64
|
+
config_path: configs/skills/revised_responder.yaml
|
65
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
66
|
+
model_prompt: <s> [INST] {prompt} [/INST]
|
67
|
+
output_cols:
|
68
|
+
- revised_response
|
69
|
+
drop_columns:
|
70
|
+
- icl_query
|
71
|
+
- icl_response
|
72
|
+
- icl_analysis
|
73
|
+
- icl_rubric
|
74
|
+
- icl_critique
|
75
|
+
- icl_plan
|
76
|
+
- icl_revised_response
|
77
|
+
- block_type: LLMBlock
|
78
|
+
block_config:
|
79
|
+
block_name: judge
|
80
|
+
config_path: configs/skills/judge.yaml
|
81
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
82
|
+
model_prompt: <s> [INST] {prompt} [/INST]
|
83
|
+
output_cols:
|
84
|
+
- judgement
|
85
|
+
- verdict
|
86
|
+
- block_type: FilterByValueBlock
|
87
|
+
block_config:
|
88
|
+
block_name: filter_judgement
|
89
|
+
filter_column: verdict
|
90
|
+
filter_value:
|
91
|
+
- Assistant A
|
92
|
+
- Assistant B
|
93
|
+
operation: operator.contains
|
94
|
+
batch_kwargs:
|
95
|
+
num_procs: 8
|
96
|
+
- block_type: SelectorBlock
|
97
|
+
block_config:
|
98
|
+
block_name: response_selector
|
99
|
+
choice_map:
|
100
|
+
Assistant A: "response"
|
101
|
+
Assistant B: "revised_response"
|
102
|
+
choice_col: verdict
|
103
|
+
output_col: chosen_reponse
|
104
|
+
batch_kwargs:
|
105
|
+
num_procs: 8
|
106
|
+
drop_columns:
|
107
|
+
- judgemnent
|
108
|
+
- verdict
|
@@ -0,0 +1,12 @@
|
|
1
|
+
- block_type: LLMBlock
|
2
|
+
block_config:
|
3
|
+
block_name: gen_skill_freeform
|
4
|
+
config_path: configs/skills/simple_generate_qa_freeform.yaml
|
5
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
6
|
+
output_cols:
|
7
|
+
- output
|
8
|
+
gen_kwargs:
|
9
|
+
temperature: 0.7
|
10
|
+
max_tokens: 2048
|
11
|
+
drop_duplicates:
|
12
|
+
- output
|
@@ -0,0 +1,12 @@
|
|
1
|
+
- block_type: LLMBlock
|
2
|
+
block_config:
|
3
|
+
block_name: gen_skill_grounded
|
4
|
+
config_path: configs/skills/simple_generate_qa_grounded.yaml
|
5
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
6
|
+
output_cols:
|
7
|
+
- output
|
8
|
+
gen_kwargs:
|
9
|
+
temperature: 0.7
|
10
|
+
max_tokens: 2048
|
11
|
+
drop_duplicates:
|
12
|
+
- output
|
@@ -0,0 +1,80 @@
|
|
1
|
+
- block_type: LLMBlock
|
2
|
+
block_config:
|
3
|
+
block_name: gen_contexts
|
4
|
+
config_path: configs/skills/contexts.yaml
|
5
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
6
|
+
output_cols:
|
7
|
+
- context
|
8
|
+
gen_kwargs:
|
9
|
+
temperature: 0.7
|
10
|
+
max_tokens: 2048
|
11
|
+
n: 10
|
12
|
+
seed: 42
|
13
|
+
drop_duplicates:
|
14
|
+
- context
|
15
|
+
- block_type: LLMBlock
|
16
|
+
block_config:
|
17
|
+
block_name: gen_grounded_questions
|
18
|
+
config_path: configs/skills/grounded_questions.yaml
|
19
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
20
|
+
output_cols:
|
21
|
+
- question
|
22
|
+
batch_kwargs:
|
23
|
+
num_samples: 3
|
24
|
+
drop_duplicates:
|
25
|
+
- question
|
26
|
+
- block_type: LLMBlock
|
27
|
+
block_config:
|
28
|
+
block_name: eval_grounded_questions
|
29
|
+
config_path: configs/skills/evaluate_grounded_questions.yaml
|
30
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
31
|
+
output_cols:
|
32
|
+
- evaluation
|
33
|
+
- score
|
34
|
+
- block_type: FilterByValueBlock
|
35
|
+
block_config:
|
36
|
+
block_name: filter_grounded_questions
|
37
|
+
filter_column: score
|
38
|
+
filter_value: 1.0
|
39
|
+
operation: operator.eq
|
40
|
+
convert_dtype: float
|
41
|
+
batch_kwargs:
|
42
|
+
num_procs: 8
|
43
|
+
drop_columns:
|
44
|
+
- evaluation
|
45
|
+
- score
|
46
|
+
- num_samples
|
47
|
+
- block_type: LLMBlock
|
48
|
+
block_config:
|
49
|
+
block_name: gen_grounded_responses
|
50
|
+
config_path: configs/skills/grounded_responses.yaml
|
51
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
52
|
+
output_cols:
|
53
|
+
- response
|
54
|
+
- block_type: LLMBlock
|
55
|
+
block_config:
|
56
|
+
block_name: evaluate_grounded_qa_pair
|
57
|
+
config_path: configs/skills/evaluate_grounded_pair.yaml
|
58
|
+
model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
|
59
|
+
output_cols:
|
60
|
+
- evaluation
|
61
|
+
- score
|
62
|
+
- block_type: FilterByValueBlock
|
63
|
+
block_config:
|
64
|
+
block_name: filter_grounded_qa_pair
|
65
|
+
filter_column: score
|
66
|
+
filter_value: 2.0
|
67
|
+
operation: operator.ge
|
68
|
+
convert_dtype: float
|
69
|
+
batch_kwargs:
|
70
|
+
num_procs: 8
|
71
|
+
- block_type: CombineColumnsBlock
|
72
|
+
block_config:
|
73
|
+
block_name: combine_question_and_context
|
74
|
+
columns:
|
75
|
+
- context
|
76
|
+
- question
|
77
|
+
output_col: question
|
78
|
+
batch_kwargs:
|
79
|
+
num_procs: 8
|
80
|
+
batched: True
|