sdg-hub 0.1.0a2__tar.gz → 0.1.0a3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/PKG-INFO +1 -1
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/examples/data-generation-with-llama-70b/data-generation-with-llama-70b.ipynb +1 -1
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/examples/inference_time_scaling/prm_with_vllm.ipynb +12 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/examples/instructlab/knowledge/document_pre_processing.ipynb +5 -4
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/examples/instructlab/knowledge/knowledge_generation_and_mixing.ipynb +4 -4
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/examples/instructlab/skills/unstructed_to_structured.ipynb +12 -1
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/examples/instructlab/skills/unstructed_to_structured_lls.ipynb +11 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/examples/knowledge_generation_using_nemotron/knowledge_sdg.ipynb +8 -1
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/_version.py +1 -1
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/flow.py +34 -17
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub.egg-info/PKG-INFO +1 -1
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub.egg-info/SOURCES.txt +4 -8
- sdg_hub-0.1.0a3/tests/test_flow.py +74 -0
- sdg_hub-0.1.0a3/tests/testdata/test_config_1.yaml +7 -0
- sdg_hub-0.1.0a3/tests/testdata/test_flow_1.yaml +7 -0
- sdg_hub-0.1.0a3/tests/testdata/test_flow_2.yaml +10 -0
- sdg_hub-0.1.0a2/scripts/test_freeform_skills.py +0 -61
- sdg_hub-0.1.0a2/scripts/test_grounded_skills.py +0 -109
- sdg_hub-0.1.0a2/scripts/test_knowledge.py +0 -52
- sdg_hub-0.1.0a2/src/sdg_hub/utils/datamixing.py +0 -123
- sdg_hub-0.1.0a2/src/sdg_hub/utils/json.py +0 -48
- sdg_hub-0.1.0a2/src/sdg_hub/utils/models.py +0 -31
- sdg_hub-0.1.0a2/src/sdg_hub/utils/taxonomy.py +0 -489
- sdg_hub-0.1.0a2/test.ipynb +0 -1361
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/.github/actionlint.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/.github/actions/free-disk-space/action.yml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/.github/dependabot.yml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/.github/mergify.yml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/.github/workflows/actionlint.dockerfile +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/.github/workflows/actionlint.yml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/.github/workflows/docs.yml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/.github/workflows/e2e.yml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/.github/workflows/lint.yml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/.github/workflows/matchers/actionlint.json +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/.github/workflows/matchers/pylint.json +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/.github/workflows/pypi.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/.github/workflows/test.yml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/.gitignore +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/.isort.cfg +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/.markdownlint-cli2.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/.pre-commit-config.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/.pylintrc +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/LICENSE +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/MANIFEST.in +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/Makefile +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/README.md +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/assets/imgs/IL_skills_pipeline.png +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/assets/imgs/customized_nano_closed_book_rag_results.png +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/assets/imgs/instructlab-banner.png +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/assets/imgs/overview.png +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/examples/data-generation-with-llama-70b/synth_knowledge1.5_llama3.3.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/examples/instructlab/annotation/sample_data/emotion_classification.jsonl +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/examples/instructlab/knowledge/document_collection/ibm-annual-report/ibm-annual-report-2024.json +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/examples/instructlab/knowledge/document_collection/ibm-annual-report/ibm-annual-report-2024.md +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/examples/instructlab/knowledge/document_collection/ibm-annual-report/ibm-annual-report-2024.pdf +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/examples/instructlab/knowledge/document_collection/ibm-annual-report/qna.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/examples/instructlab/skills/sample_data/mdtable_seeds.jsonl +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/examples/knowledge_generation_using_nemotron/README.md +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/examples/knowledge_generation_using_nemotron/flows/synth_knowledge1.5_nemotron_super_49b.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/examples/knowledge_generation_using_nemotron/flows/synth_knowledge_reasoning_nemotron_super_49b.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/examples/knowledge_generation_using_nemotron/generate.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/examples/knowledge_generation_using_nemotron/prompts/generate_answers.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/examples/knowledge_generation_using_nemotron/prompts/generate_questions.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/examples/knowledge_generation_using_nemotron/prompts/generate_questions_responses.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/pyproject.toml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/requirements-dev.txt +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/requirements.txt +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/scripts/__init__.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/scripts/docparser.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/scripts/docparser_v2.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/scripts/flow_runner.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/scripts/ruff.sh +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/setup.cfg +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/__init__.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/blocks/__init__.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/blocks/block.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/blocks/filterblock.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/blocks/iterblock.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/blocks/llmblock.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/blocks/rmblocks.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/blocks/utilblocks.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/__init__.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/annotations/__init__.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/annotations/cot_reflection.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/annotations/detailed_description.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/annotations/detailed_description_icl.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/annotations/simple.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/knowledge/__init__.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/knowledge/atomic_facts.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/knowledge/auxilary_instructions.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/knowledge/data_recipe/__init__.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/knowledge/data_recipe/default_recipe.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/knowledge/detailed_summary.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/knowledge/evaluate_faithfulness.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/knowledge/evaluate_question.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/knowledge/evaluate_relevancy.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/knowledge/extractive_summary.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/knowledge/generate_code_questions_responses.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/knowledge/generate_questions_responses.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/knowledge/mcq_generation.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/knowledge/router.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/knowledge/simple_generate_qa.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/reasoning/dynamic_cot.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/_A_.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/_B_.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/_C_.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/_D_.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/_E_.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/_F_.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/_G_.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/_H_.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/__init__.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/analyzer.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/annotation.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/contexts.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/critic.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/data_recipe/__init__.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/data_recipe/default_recipe.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/evaluate_freeform_pair.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/evaluate_freeform_questions.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/evaluate_grounded_pair.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/evaluate_grounded_questions.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/freeform_questions.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/freeform_responses.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/grounded_questions.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/grounded_responses.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/judge.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/planner.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/respond.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/revised_responder.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/router.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/simple_generate_qa_freeform.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/configs/skills/simple_generate_qa_grounded.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/flows/annotation/emotion/detailed_description.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/flows/annotation/emotion/detailed_description_icl.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/flows/annotation/emotion/simple.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/flows/generation/knowledge/mmlu_bench.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/flows/generation/knowledge/simple_knowledge.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/flows/generation/knowledge/synth_knowledge.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/flows/generation/knowledge/synth_knowledge1.5.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/flows/generation/skills/agentic_improve_skill.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/flows/generation/skills/simple_freeform_skill.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/flows/generation/skills/simple_grounded_skill.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/flows/generation/skills/synth_grounded_skills.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/flows/generation/skills/synth_skills.yaml +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/logger_config.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/pipeline.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/prompts.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/py.typed +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/registry.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/sdg.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/utils/__init__.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/utils/chunking.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/utils/datautils.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/utils/docprocessor.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub/utils/parse_and_convert.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub.egg-info/dependency_links.txt +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub.egg-info/requires.txt +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/src/sdg_hub.egg-info/top_level.txt +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/tests/__init__.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/tests/test_chunking.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/tests/test_filterblock.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/tests/testdata/testdata.py +0 -0
- {sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/tox.ini +0 -0
@@ -20,6 +20,18 @@
|
|
20
20
|
"from sdg_hub.blocks.rmblocks import PRMBlock"
|
21
21
|
]
|
22
22
|
},
|
23
|
+
{
|
24
|
+
"cell_type": "markdown",
|
25
|
+
"metadata": {},
|
26
|
+
"source": [
|
27
|
+
"## Install sdg-hub\n",
|
28
|
+
"\n",
|
29
|
+
"\n",
|
30
|
+
"```bash \n",
|
31
|
+
"pip install sdg-hub==0.1.0a2\n",
|
32
|
+
"```"
|
33
|
+
]
|
34
|
+
},
|
23
35
|
{
|
24
36
|
"cell_type": "markdown",
|
25
37
|
"metadata": {},
|
{sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/examples/instructlab/knowledge/document_pre_processing.ipynb
RENAMED
@@ -15,10 +15,11 @@
|
|
15
15
|
"metadata": {},
|
16
16
|
"source": [
|
17
17
|
"### Install SDG\n",
|
18
|
-
"
|
19
|
-
"
|
20
|
-
"
|
21
|
-
"
|
18
|
+
"\n",
|
19
|
+
"```bash \n",
|
20
|
+
"pip install sdg-hub==0.1.0a2\n",
|
21
|
+
"pip install rich datasets tabulate transformers\n",
|
22
|
+
"```"
|
22
23
|
]
|
23
24
|
},
|
24
25
|
{
|
@@ -15,10 +15,10 @@
|
|
15
15
|
"metadata": {},
|
16
16
|
"source": [
|
17
17
|
"### Install SDG\n",
|
18
|
-
"
|
19
|
-
"
|
20
|
-
"
|
21
|
-
"
|
18
|
+
"```bash \n",
|
19
|
+
"pip install sdg-hub==0.1.0a2\n",
|
20
|
+
"pip install rich datasets tabulate transformers\n",
|
21
|
+
"```\n",
|
22
22
|
" - If you haven't already, run the document pre-processing notebook to create the seed data"
|
23
23
|
]
|
24
24
|
},
|
{sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/examples/instructlab/skills/unstructed_to_structured.ipynb
RENAMED
@@ -41,6 +41,17 @@
|
|
41
41
|
"The end goal is to create training data that will help align the model with your specific needs, whether that's matching your company's communication style, following particular protocols, or handling specialized tasks in your preferred way."
|
42
42
|
]
|
43
43
|
},
|
44
|
+
{
|
45
|
+
"cell_type": "markdown",
|
46
|
+
"metadata": {},
|
47
|
+
"source": [
|
48
|
+
"## Install sdg-hub\n",
|
49
|
+
"\n",
|
50
|
+
"```bash \n",
|
51
|
+
"pip install sdg-hub==0.1.0a2\n",
|
52
|
+
"```"
|
53
|
+
]
|
54
|
+
},
|
44
55
|
{
|
45
56
|
"cell_type": "markdown",
|
46
57
|
"metadata": {},
|
@@ -404,7 +415,7 @@
|
|
404
415
|
"name": "python",
|
405
416
|
"nbconvert_exporter": "python",
|
406
417
|
"pygments_lexer": "ipython3",
|
407
|
-
"version": "3.
|
418
|
+
"version": "3.10.16"
|
408
419
|
}
|
409
420
|
},
|
410
421
|
"nbformat": 4,
|
{sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/examples/instructlab/skills/unstructed_to_structured_lls.ipynb
RENAMED
@@ -41,6 +41,17 @@
|
|
41
41
|
"The end goal is to create training data that will help align the model with your specific needs, whether that's matching your company's communication style, following particular protocols, or handling specialized tasks in your preferred way."
|
42
42
|
]
|
43
43
|
},
|
44
|
+
{
|
45
|
+
"cell_type": "markdown",
|
46
|
+
"metadata": {},
|
47
|
+
"source": [
|
48
|
+
"## Install sdg-hub\n",
|
49
|
+
"\n",
|
50
|
+
"```bash \n",
|
51
|
+
"pip install sdg-hub==0.1.0a2\n",
|
52
|
+
"```"
|
53
|
+
]
|
54
|
+
},
|
44
55
|
{
|
45
56
|
"cell_type": "markdown",
|
46
57
|
"metadata": {},
|
{sdg_hub-0.1.0a2 → sdg_hub-0.1.0a3}/examples/knowledge_generation_using_nemotron/knowledge_sdg.ipynb
RENAMED
@@ -16,7 +16,14 @@
|
|
16
16
|
"cell_type": "markdown",
|
17
17
|
"metadata": {},
|
18
18
|
"source": [
|
19
|
-
"
|
19
|
+
"## Install sdg-hub\n",
|
20
|
+
"\n",
|
21
|
+
"```bash \n",
|
22
|
+
"pip install sdg-hub==0.1.0a2\n",
|
23
|
+
"```\n",
|
24
|
+
"\n",
|
25
|
+
"\n",
|
26
|
+
"## Installing Vllm\n",
|
20
27
|
"\n",
|
21
28
|
"- Clone vllm repo\n",
|
22
29
|
"- Checkout PR #15008\n",
|
@@ -38,10 +38,37 @@ class Flow(ABC):
|
|
38
38
|
self.base_path = str(resources.files(__package__))
|
39
39
|
self.registered_blocks = BlockRegistry.get_registry()
|
40
40
|
|
41
|
+
def _getFilePath(self, dirs, filename):
|
42
|
+
"""
|
43
|
+
Find a named configuration file.
|
44
|
+
|
45
|
+
Files are checked in the following order
|
46
|
+
- absulute path is always used
|
47
|
+
- checked relative to the directories in "dirs"
|
48
|
+
- relative the the current directory
|
49
|
+
|
50
|
+
Args:
|
51
|
+
dirs (list): Directories in which to search for "config_path"
|
52
|
+
config_path (str): The path to the configuration file.
|
53
|
+
|
54
|
+
Returns:
|
55
|
+
Selected file path
|
56
|
+
"""
|
57
|
+
if os.path.isabs(filename):
|
58
|
+
return filename
|
59
|
+
for d in dirs:
|
60
|
+
full_file_path = os.path.join(d, filename)
|
61
|
+
if os.path.isfile(full_file_path):
|
62
|
+
return full_file_path
|
63
|
+
# If not found above then return the path unchanged i.e.
|
64
|
+
# assume the path is relative to the current directory
|
65
|
+
return filename
|
66
|
+
|
41
67
|
def get_flow_from_file(self, yaml_path: str) -> list:
|
42
68
|
yaml_path_relative_to_base = os.path.join(self.base_path, yaml_path)
|
43
69
|
if os.path.isfile(yaml_path_relative_to_base):
|
44
70
|
yaml_path = yaml_path_relative_to_base
|
71
|
+
yaml_dir = os.path.dirname(yaml_path)
|
45
72
|
|
46
73
|
try:
|
47
74
|
with open(yaml_path, "r", encoding="utf-8") as yaml_file:
|
@@ -86,33 +113,23 @@ class Flow(ABC):
|
|
86
113
|
|
87
114
|
# update config path to absolute path
|
88
115
|
if "config_path" in block["block_config"]:
|
89
|
-
|
90
|
-
self.base_path, block["block_config"]["config_path"]
|
116
|
+
block["block_config"]["config_path"] = self._getFilePath(
|
117
|
+
[yaml_dir, self.base_path], block["block_config"]["config_path"]
|
91
118
|
)
|
92
|
-
if os.path.isfile(config_path_relative_to_base):
|
93
|
-
block["block_config"]["config_path"] = config_path_relative_to_base
|
94
119
|
|
95
120
|
# update config paths to absolute paths - this might be a list or a dict
|
96
121
|
if "config_paths" in block["block_config"]:
|
97
122
|
if isinstance(block["block_config"]["config_paths"], dict):
|
98
123
|
for key, path in block["block_config"]["config_paths"].items():
|
99
|
-
|
100
|
-
self.base_path, path
|
124
|
+
block["block_config"]["config_paths"][key] = self._getFilePath(
|
125
|
+
[yaml_dir, self.base_path], path
|
101
126
|
)
|
102
|
-
if os.path.isfile(config_path_relative_to_base):
|
103
|
-
block["block_config"]["config_paths"][key] = (
|
104
|
-
config_path_relative_to_base
|
105
|
-
)
|
106
127
|
|
107
|
-
|
128
|
+
elif isinstance(block["block_config"]["config_paths"], list):
|
108
129
|
for i, path in enumerate(block["block_config"]["config_paths"]):
|
109
|
-
|
110
|
-
self.base_path, path
|
130
|
+
block["block_config"]["config_paths"][i] = self._getFilePath(
|
131
|
+
[yaml_dir, self.base_path], path
|
111
132
|
)
|
112
|
-
if os.path.isfile(config_path_relative_to_base):
|
113
|
-
block["block_config"]["config_paths"][i] = (
|
114
|
-
config_path_relative_to_base
|
115
|
-
)
|
116
133
|
|
117
134
|
if "operation" in block["block_config"]:
|
118
135
|
block["block_config"]["operation"] = OPERATOR_MAP[
|
@@ -10,7 +10,6 @@ README.md
|
|
10
10
|
pyproject.toml
|
11
11
|
requirements-dev.txt
|
12
12
|
requirements.txt
|
13
|
-
test.ipynb
|
14
13
|
tox.ini
|
15
14
|
.github/actionlint.yaml
|
16
15
|
.github/dependabot.yml
|
@@ -55,9 +54,6 @@ scripts/docparser.py
|
|
55
54
|
scripts/docparser_v2.py
|
56
55
|
scripts/flow_runner.py
|
57
56
|
scripts/ruff.sh
|
58
|
-
scripts/test_freeform_skills.py
|
59
|
-
scripts/test_grounded_skills.py
|
60
|
-
scripts/test_knowledge.py
|
61
57
|
src/sdg_hub/__init__.py
|
62
58
|
src/sdg_hub/_version.py
|
63
59
|
src/sdg_hub/flow.py
|
@@ -145,14 +141,14 @@ src/sdg_hub/flows/generation/skills/synth_grounded_skills.yaml
|
|
145
141
|
src/sdg_hub/flows/generation/skills/synth_skills.yaml
|
146
142
|
src/sdg_hub/utils/__init__.py
|
147
143
|
src/sdg_hub/utils/chunking.py
|
148
|
-
src/sdg_hub/utils/datamixing.py
|
149
144
|
src/sdg_hub/utils/datautils.py
|
150
145
|
src/sdg_hub/utils/docprocessor.py
|
151
|
-
src/sdg_hub/utils/json.py
|
152
|
-
src/sdg_hub/utils/models.py
|
153
146
|
src/sdg_hub/utils/parse_and_convert.py
|
154
|
-
src/sdg_hub/utils/taxonomy.py
|
155
147
|
tests/__init__.py
|
156
148
|
tests/test_chunking.py
|
157
149
|
tests/test_filterblock.py
|
150
|
+
tests/test_flow.py
|
151
|
+
tests/testdata/test_config_1.yaml
|
152
|
+
tests/testdata/test_flow_1.yaml
|
153
|
+
tests/testdata/test_flow_2.yaml
|
158
154
|
tests/testdata/testdata.py
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# Standard
|
2
|
+
from unittest.mock import MagicMock, patch
|
3
|
+
import os
|
4
|
+
import unittest
|
5
|
+
|
6
|
+
# Third Party
|
7
|
+
import yaml
|
8
|
+
|
9
|
+
# First Party
|
10
|
+
from sdg_hub.flow import Flow
|
11
|
+
|
12
|
+
|
13
|
+
class TestFlow(unittest.TestCase):
|
14
|
+
def setUp(self):
|
15
|
+
self.flow = Flow(MagicMock())
|
16
|
+
|
17
|
+
def test_config_relative_to_flow(self):
|
18
|
+
flow = self.flow.get_flow_from_file("tests/testdata/test_flow_1.yaml")
|
19
|
+
block = flow[0]["block_type"](**flow[0]["block_config"])
|
20
|
+
|
21
|
+
self.assertEqual(block.block_config["introduction"], "intro")
|
22
|
+
|
23
|
+
def test_config_relative_to_package(self):
|
24
|
+
with open(
|
25
|
+
"tests/testdata/test_flow_1.yaml", "r", encoding="utf-8"
|
26
|
+
) as yaml_file:
|
27
|
+
y = yaml.safe_load(yaml_file)
|
28
|
+
y[0]["block_config"]["config_path"] = (
|
29
|
+
"configs/skills/simple_generate_qa_freeform.yaml"
|
30
|
+
)
|
31
|
+
with patch("yaml.safe_load", new_callable=MagicMock) as mock_safe_load:
|
32
|
+
mock_safe_load.return_value = y
|
33
|
+
flow = self.flow.get_flow_from_file("tests/testdata/test_flow_1.yaml")
|
34
|
+
block = flow[0]["block_type"](**flow[0]["block_config"])
|
35
|
+
|
36
|
+
self.assertEqual(
|
37
|
+
block.block_config["introduction"],
|
38
|
+
"Develop a series of question and answer pairs to perform a task.",
|
39
|
+
)
|
40
|
+
|
41
|
+
def test_config_absolute(self):
|
42
|
+
with open(
|
43
|
+
"tests/testdata/test_flow_1.yaml", "r", encoding="utf-8"
|
44
|
+
) as yaml_file:
|
45
|
+
y = yaml.safe_load(yaml_file)
|
46
|
+
y[0]["block_config"]["config_path"] = os.path.abspath(
|
47
|
+
"src/sdg_hub/configs/skills/simple_generate_qa_freeform.yaml"
|
48
|
+
)
|
49
|
+
with patch("yaml.safe_load", new_callable=MagicMock) as mock_safe_load:
|
50
|
+
mock_safe_load.return_value = y
|
51
|
+
flow = self.flow.get_flow_from_file("tests/testdata/test_flow_1.yaml")
|
52
|
+
block = flow[0]["block_type"](**flow[0]["block_config"])
|
53
|
+
|
54
|
+
self.assertEqual(
|
55
|
+
block.block_config["introduction"],
|
56
|
+
"Develop a series of question and answer pairs to perform a task.",
|
57
|
+
)
|
58
|
+
|
59
|
+
def test_config_list_mix(self):
|
60
|
+
with open(
|
61
|
+
"tests/testdata/test_flow_2.yaml", "r", encoding="utf-8"
|
62
|
+
) as yaml_file:
|
63
|
+
y = yaml.safe_load(yaml_file)
|
64
|
+
y[0]["block_config"]["config_paths"]["k3"] = os.path.abspath(
|
65
|
+
"src/sdg_hub/configs/skills/simple_generate_qa_freeform.yaml"
|
66
|
+
)
|
67
|
+
|
68
|
+
with patch("yaml.safe_load", new_callable=MagicMock) as mock_safe_load:
|
69
|
+
mock_safe_load.return_value = y
|
70
|
+
flow = self.flow.get_flow_from_file("tests/testdata/test_flow_2.yaml")
|
71
|
+
block = flow[0]["block_type"](**flow[0]["block_config"])
|
72
|
+
|
73
|
+
self.assertEqual(block.block_config["introduction"], "intro")
|
74
|
+
self.assertEqual(len(block.prompt_template), 3)
|
@@ -1,61 +0,0 @@
|
|
1
|
-
# Third Party
|
2
|
-
from datasets import Dataset
|
3
|
-
from openai import OpenAI
|
4
|
-
|
5
|
-
# First Party
|
6
|
-
from sdg_hub.sdg import SDG
|
7
|
-
from sdg_hub.flow import Flow
|
8
|
-
from sdg_hub.pipeline import Pipeline
|
9
|
-
|
10
|
-
# for vLLM endpoints, the api_key remains "EMPTY"
|
11
|
-
openai_api_key = "EMPTY"
|
12
|
-
openai_api_base = "Add model endpoint here"
|
13
|
-
|
14
|
-
|
15
|
-
client = OpenAI(
|
16
|
-
api_key=openai_api_key,
|
17
|
-
base_url=openai_api_base,
|
18
|
-
)
|
19
|
-
|
20
|
-
models = client.models.list()
|
21
|
-
teacher_model = models.data[0].id
|
22
|
-
|
23
|
-
samples = [
|
24
|
-
{
|
25
|
-
"seed_question": """Could you help me write a formal email to inquire about the progress of my internship application?""",
|
26
|
-
"task_description": "Writing formal emails",
|
27
|
-
"seed_response": """Subject: Inquiry Regarding the Status of My Internship Application
|
28
|
-
|
29
|
-
Dear [Recipient's Name],
|
30
|
-
|
31
|
-
I hope this email finds you well. I am writing to inquire about the current status of my internship application with [Company Name]. I submitted my application on [date of application] for the [Internship Title] position.
|
32
|
-
|
33
|
-
I am very interested in the opportunity to learn and grow as an intern at [Company Name], and I am eager to contribute my skills and enthusiasm to your team. I understand that the internship selection process may take some time, and I appreciate your consideration of my application.
|
34
|
-
|
35
|
-
If there are any additional steps I need to take or further information you require from me, please let me know. I am more than happy to provide any necessary documentation or complete additional tasks to facilitate the decision-making process.
|
36
|
-
|
37
|
-
I am excited about the prospect of joining [Company Name] and contributing to the [specific project, team, or aspect of the company] based on my background in [mention relevant skills or experiences]. I am confident that this internship will provide me with valuable experience and growth opportunities.
|
38
|
-
|
39
|
-
Thank you for your time and consideration. I look forward to hearing from you regarding the next steps in the internship application process.
|
40
|
-
|
41
|
-
Sincerely,
|
42
|
-
|
43
|
-
[Your Full Name]
|
44
|
-
|
45
|
-
[Your Contact Information]""",
|
46
|
-
}
|
47
|
-
]
|
48
|
-
|
49
|
-
|
50
|
-
ds = Dataset.from_list(samples)
|
51
|
-
|
52
|
-
skills_flow = Flow(client, 1).get_flow_from_file(
|
53
|
-
DEFAULT_FLOW_FILE_MAP["SynthSkillsFlow"]
|
54
|
-
)
|
55
|
-
skills_pipe = Pipeline(skills_flow)
|
56
|
-
|
57
|
-
sdg = SDG([skills_pipe])
|
58
|
-
gen_data = sdg.generate(ds)
|
59
|
-
|
60
|
-
print(gen_data)
|
61
|
-
print(gen_data[0])
|
@@ -1,109 +0,0 @@
|
|
1
|
-
# Third Party
|
2
|
-
from datasets import Dataset
|
3
|
-
from openai import OpenAI
|
4
|
-
|
5
|
-
# First Party
|
6
|
-
from sdg_hub.sdg import SDG
|
7
|
-
from sdg_hub.flow import Flow
|
8
|
-
from sdg_hub.pipeline import Pipeline
|
9
|
-
|
10
|
-
# for vLLM endpoints, the api_key remains "EMPTY"
|
11
|
-
openai_api_key = "EMPTY"
|
12
|
-
openai_api_base = "Add model endpoint here"
|
13
|
-
|
14
|
-
|
15
|
-
client = OpenAI(
|
16
|
-
api_key=openai_api_key,
|
17
|
-
base_url=openai_api_base,
|
18
|
-
)
|
19
|
-
|
20
|
-
models = client.models.list()
|
21
|
-
teacher_model = models.data[0].id
|
22
|
-
|
23
|
-
samples = [
|
24
|
-
{
|
25
|
-
"seed_context": """*Ms. Thompson:* Good morning, everyone. Today, we''re here to discuss
|
26
|
-
our customer journey mapping and analysis. I believe this is crucial to understanding
|
27
|
-
our customers'' experiences and improving our services.
|
28
|
-
|
29
|
-
|
30
|
-
*Mr. Patel:* I agree, Lisa. We should start by identifying all touchpoints in
|
31
|
-
our customer journey, from initial contact to post-sale support.
|
32
|
-
|
33
|
-
|
34
|
-
*Ms. Rodriguez:* Yes, and let''s not forget about the emotional aspect of the
|
35
|
-
journey. How do our customers feel at each stage? What are their pain points?
|
36
|
-
|
37
|
-
|
38
|
-
*Mr. Kim:* We can use data from our CRM system to track the customer journey and
|
39
|
-
gather insights. This will help us create a detailed, data-driven map.
|
40
|
-
|
41
|
-
|
42
|
-
*Ms. Johnson:* Once we have the map, we can analyze it to identify areas for improvement.
|
43
|
-
Perhaps there are steps where customers drop off or express dissatisfaction.
|
44
|
-
|
45
|
-
|
46
|
-
*Mr. Davis:* We should also consider the customer''s perspective. Conducting interviews
|
47
|
-
or surveys can provide valuable insights into their thoughts and feelings.
|
48
|
-
|
49
|
-
|
50
|
-
*Ms. Brown:* Absolutely. And once we''ve identified areas for improvement, we
|
51
|
-
can develop strategies to address them. This might involve redesigning certain
|
52
|
-
touchpoints, enhancing our communication, or streamlining processes.
|
53
|
-
|
54
|
-
|
55
|
-
*Mr. Smith:* And we must remember to measure the impact of any changes we make.
|
56
|
-
By tracking key performance indicators, we can determine whether our efforts are
|
57
|
-
successful.
|
58
|
-
|
59
|
-
|
60
|
-
*Ms. Thompson:* Great suggestions, everyone. Let''s divide into teams to tackle
|
61
|
-
different aspects of the customer journey. Team 1 will focus on pre-sale touchpoints,
|
62
|
-
Team 2 on sales, and Team 3 on post-sale support.
|
63
|
-
|
64
|
-
|
65
|
-
*Mr. Patel:* Sounds good. We''ll reconvene in a week to share our findings and
|
66
|
-
discuss next steps.""",
|
67
|
-
"seed_question": """Generate a company wide email based on the given meeting transcript""",
|
68
|
-
"task_description": "Writing emails based on minutes of meeting",
|
69
|
-
"seed_response": """Subject: Recap and Next Steps - Customer Journey Mapping and Analysis\n\
|
70
|
-
\nDear [Company Name] Team,\n\nI hope this email finds you well. Yesterday, we\
|
71
|
-
\ gathered to discuss our customer journey mapping and analysis, aiming to understand\
|
72
|
-
\ our customers' experiences and identify opportunities for improvement. The discussions\
|
73
|
-
\ were fruitful, and I want to share a brief overview of the key points and outcomes.\n\
|
74
|
-
\n1. **Identifying Touchpoints:**\n Mr. Patel suggested mapping all touchpoints\
|
75
|
-
\ in our customer journey, from initial contact to post-sale support.\n\n2. **Emotional\
|
76
|
-
\ Aspect and Pain Points:**\n Ms. Rodriguez emphasized the importance of considering\
|
77
|
-
\ the emotional aspect of the journey and identifying customers' pain points at\
|
78
|
-
\ each stage.\n\n3. **Data-Driven Mapping:**\n Mr. Kim proposed using data from\
|
79
|
-
\ our CRM system to create a detailed, data-driven customer journey map.\n\n4.\
|
80
|
-
\ **Customer Perspective:**\n Ms. Johnson recommended gathering insights from\
|
81
|
-
\ the customer's perspective through interviews or surveys.\n\n5. **Analysis and\
|
82
|
-
\ Improvement:**\n Ms. Brown suggested analyzing the customer journey map to\
|
83
|
-
\ identify areas for improvement and developing strategies to address them.\n\n\
|
84
|
-
6. **Measuring Impact:**\n Mr. Smith stressed the need to measure the impact\
|
85
|
-
\ of any changes made by tracking key performance indicators.\n\nTo facilitate\
|
86
|
-
\ a comprehensive analysis, we have divided into teams to tackle different aspects\
|
87
|
-
\ of the customer journey:\n\n* Team 1: Pre-sale touchpoints\n* Team 2: Sales\n\
|
88
|
-
* Team 3: Post-sale support\n\nEach team will share their findings and discuss\
|
89
|
-
\ next steps in a week.\n\nYour engagement and insights have been invaluable in\
|
90
|
-
\ understanding our customers' experiences and identifying opportunities for improvement.\
|
91
|
-
\ I look forward to our continued collaboration as we work towards enhancing our\
|
92
|
-
\ services and delivering exceptional customer experiences.\n\nBest regards,\n\
|
93
|
-
\n[Your Full Name]\n[Your Position]\n[Company Name]""",
|
94
|
-
}
|
95
|
-
]
|
96
|
-
|
97
|
-
|
98
|
-
ds = Dataset.from_list(samples)
|
99
|
-
|
100
|
-
skills_flow = Flow(client, 10).get_flow_from_file(
|
101
|
-
DEFAULT_FLOW_FILE_MAP["SynthGroundedSkillsFlow"]
|
102
|
-
)
|
103
|
-
skills_pipe = Pipeline(skills_flow)
|
104
|
-
|
105
|
-
sdg = SDG([skills_pipe])
|
106
|
-
gen_data = sdg.generate(ds)
|
107
|
-
|
108
|
-
print(gen_data)
|
109
|
-
print(gen_data[0])
|
@@ -1,52 +0,0 @@
|
|
1
|
-
# Standard
|
2
|
-
import operator
|
3
|
-
|
4
|
-
# Third Party
|
5
|
-
from datasets import Dataset
|
6
|
-
from openai import OpenAI
|
7
|
-
|
8
|
-
# First Party
|
9
|
-
from sdg_hub.sdg import SDG
|
10
|
-
from sdg_hub.flow import Flow
|
11
|
-
from sdg_hub.pipeline import Pipeline
|
12
|
-
|
13
|
-
# Please don't add you vLLM endpoint key here
|
14
|
-
openai_api_key = "EMPTY"
|
15
|
-
openai_api_base = "Add model endpoint here"
|
16
|
-
|
17
|
-
client = OpenAI(
|
18
|
-
api_key=openai_api_key,
|
19
|
-
base_url=openai_api_base,
|
20
|
-
)
|
21
|
-
|
22
|
-
models = client.models.list()
|
23
|
-
teacher_model = models.data[0].id
|
24
|
-
|
25
|
-
samples = [
|
26
|
-
{
|
27
|
-
"icl_query_1": "what is the location of the tubal tonsils?",
|
28
|
-
"icl_response_1": "The location of the tubal tonsils is the roof of the pharynx.",
|
29
|
-
"icl_query_2": "How long does the adenoid grow?",
|
30
|
-
"task_description": "Teaching about human anatomy, specifically tonsils",
|
31
|
-
"icl_response_2": "The adenoid grows until the age of 5, starts to shrink at the age of 7 and becomes small in adulthood.",
|
32
|
-
"icl_query_3": "What is the immune systems first line of defense against ingested or inhaled foreign pathogens?",
|
33
|
-
"icl_response_3": "The tonsils are the immune systems first line of defense.",
|
34
|
-
"document": "The **tonsils** are a set of lymphoid organs facing into the aerodigestive tract, which is known as Waldeyer's tonsillar ring and consists of the adenoid tonsil or pharyngeal tonsil, two tubal tonsils, two palatine tonsils, and the lingual tonsils. These organs play an important role in the immune system. When used unqualified, the term most commonly refers specifically to the palatine tonsils, which are two lymphoid organs situated at either side of the back of the human throat. The palatine tonsils and the adenoid tonsil are organs consisting of lymphoepithelial tissue located near the oropharynx and nasopharynx parts of the throat",
|
35
|
-
"domain": "textbook",
|
36
|
-
}
|
37
|
-
]
|
38
|
-
|
39
|
-
ds = Dataset.from_list(samples)
|
40
|
-
|
41
|
-
mmlu_flow = Flow(client, 1).get_flow_from_file(DEFAULT_FLOW_FILE_MAP["MMLUBenchFlow"])
|
42
|
-
knowledge_flow = Flow(client, 1).get_flow_from_file(
|
43
|
-
DEFAULT_FLOW_FILE_MAP["SynthKnowledgeFlow"]
|
44
|
-
)
|
45
|
-
knowledge_pipe = Pipeline(knowledge_flow)
|
46
|
-
mmlu_pipe = Pipeline(mmlu_flow)
|
47
|
-
|
48
|
-
sdg = SDG([mmlu_pipe, knowledge_pipe])
|
49
|
-
mmlubench_data = sdg.generate(ds)
|
50
|
-
|
51
|
-
print(mmlubench_data)
|
52
|
-
print(mmlubench_data[0])
|