sdg-hub 0.1.0a2.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. sdg_hub-0.1.0a2.dev0/.github/actionlint.yaml +3 -0
  2. sdg_hub-0.1.0a2.dev0/.github/actions/free-disk-space/action.yml +19 -0
  3. sdg_hub-0.1.0a2.dev0/.github/dependabot.yml +21 -0
  4. sdg_hub-0.1.0a2.dev0/.github/mergify.yml +75 -0
  5. sdg_hub-0.1.0a2.dev0/.github/workflows/actionlint.dockerfile +3 -0
  6. sdg_hub-0.1.0a2.dev0/.github/workflows/actionlint.yml +49 -0
  7. sdg_hub-0.1.0a2.dev0/.github/workflows/docs.yml +46 -0
  8. sdg_hub-0.1.0a2.dev0/.github/workflows/e2e.yml +103 -0
  9. sdg_hub-0.1.0a2.dev0/.github/workflows/lint.yml +69 -0
  10. sdg_hub-0.1.0a2.dev0/.github/workflows/matchers/actionlint.json +18 -0
  11. sdg_hub-0.1.0a2.dev0/.github/workflows/matchers/pylint.json +33 -0
  12. sdg_hub-0.1.0a2.dev0/.github/workflows/pypi.yaml +133 -0
  13. sdg_hub-0.1.0a2.dev0/.github/workflows/test.yml +117 -0
  14. sdg_hub-0.1.0a2.dev0/.gitignore +170 -0
  15. sdg_hub-0.1.0a2.dev0/.isort.cfg +10 -0
  16. sdg_hub-0.1.0a2.dev0/.markdownlint-cli2.yaml +19 -0
  17. sdg_hub-0.1.0a2.dev0/.pre-commit-config.yaml +16 -0
  18. sdg_hub-0.1.0a2.dev0/.pylintrc +649 -0
  19. sdg_hub-0.1.0a2.dev0/LICENSE +201 -0
  20. sdg_hub-0.1.0a2.dev0/MANIFEST.in +2 -0
  21. sdg_hub-0.1.0a2.dev0/Makefile +59 -0
  22. sdg_hub-0.1.0a2.dev0/PKG-INFO +154 -0
  23. sdg_hub-0.1.0a2.dev0/README.md +118 -0
  24. sdg_hub-0.1.0a2.dev0/assets/imgs/IL_skills_pipeline.png +0 -0
  25. sdg_hub-0.1.0a2.dev0/assets/imgs/customized_nano_closed_book_rag_results.png +0 -0
  26. sdg_hub-0.1.0a2.dev0/assets/imgs/instructlab-banner.png +0 -0
  27. sdg_hub-0.1.0a2.dev0/assets/imgs/overview.png +0 -0
  28. sdg_hub-0.1.0a2.dev0/examples/data-generation-with-llama-70b/data-generation-with-llama-70b.ipynb +368 -0
  29. sdg_hub-0.1.0a2.dev0/examples/data-generation-with-llama-70b/synth_knowledge1.5_llama3.3.yaml +136 -0
  30. sdg_hub-0.1.0a2.dev0/examples/inference_time_scaling/prm_with_vllm.ipynb +193 -0
  31. sdg_hub-0.1.0a2.dev0/examples/instructlab/annotation/sample_data/emotion_classification.jsonl +2000 -0
  32. sdg_hub-0.1.0a2.dev0/examples/instructlab/knowledge/document_collection/ibm-annual-report/ibm-annual-report-2024.json +1 -0
  33. sdg_hub-0.1.0a2.dev0/examples/instructlab/knowledge/document_collection/ibm-annual-report/ibm-annual-report-2024.md +5085 -0
  34. sdg_hub-0.1.0a2.dev0/examples/instructlab/knowledge/document_collection/ibm-annual-report/ibm-annual-report-2024.pdf +0 -0
  35. sdg_hub-0.1.0a2.dev0/examples/instructlab/knowledge/document_collection/ibm-annual-report/qna.yaml +100 -0
  36. sdg_hub-0.1.0a2.dev0/examples/instructlab/knowledge/document_pre_processing.ipynb +112 -0
  37. sdg_hub-0.1.0a2.dev0/examples/instructlab/knowledge/knowledge_generation_and_mixing.ipynb +193 -0
  38. sdg_hub-0.1.0a2.dev0/examples/instructlab/skills/sample_data/mdtable_seeds.jsonl +5 -0
  39. sdg_hub-0.1.0a2.dev0/examples/instructlab/skills/unstructed_to_structured.ipynb +412 -0
  40. sdg_hub-0.1.0a2.dev0/examples/instructlab/skills/unstructed_to_structured_lls.ipynb +567 -0
  41. sdg_hub-0.1.0a2.dev0/examples/knowledge_generation_using_nemotron/README.md +121 -0
  42. sdg_hub-0.1.0a2.dev0/examples/knowledge_generation_using_nemotron/flows/synth_knowledge1.5_nemotron_super_49b.yaml +174 -0
  43. sdg_hub-0.1.0a2.dev0/examples/knowledge_generation_using_nemotron/flows/synth_knowledge_reasoning_nemotron_super_49b.yaml +188 -0
  44. sdg_hub-0.1.0a2.dev0/examples/knowledge_generation_using_nemotron/generate.py +194 -0
  45. sdg_hub-0.1.0a2.dev0/examples/knowledge_generation_using_nemotron/knowledge_sdg.ipynb +955 -0
  46. sdg_hub-0.1.0a2.dev0/examples/knowledge_generation_using_nemotron/prompts/generate_answers.yaml +51 -0
  47. sdg_hub-0.1.0a2.dev0/examples/knowledge_generation_using_nemotron/prompts/generate_questions.yaml +47 -0
  48. sdg_hub-0.1.0a2.dev0/examples/knowledge_generation_using_nemotron/prompts/generate_questions_responses.yaml +59 -0
  49. sdg_hub-0.1.0a2.dev0/pyproject.toml +104 -0
  50. sdg_hub-0.1.0a2.dev0/requirements-dev.txt +12 -0
  51. sdg_hub-0.1.0a2.dev0/requirements.txt +14 -0
  52. sdg_hub-0.1.0a2.dev0/scripts/__init__.py +0 -0
  53. sdg_hub-0.1.0a2.dev0/scripts/docparser.py +77 -0
  54. sdg_hub-0.1.0a2.dev0/scripts/docparser_v2.py +204 -0
  55. sdg_hub-0.1.0a2.dev0/scripts/flow_runner.py +106 -0
  56. sdg_hub-0.1.0a2.dev0/scripts/ruff.sh +54 -0
  57. sdg_hub-0.1.0a2.dev0/scripts/test_freeform_skills.py +61 -0
  58. sdg_hub-0.1.0a2.dev0/scripts/test_grounded_skills.py +109 -0
  59. sdg_hub-0.1.0a2.dev0/scripts/test_knowledge.py +52 -0
  60. sdg_hub-0.1.0a2.dev0/setup.cfg +4 -0
  61. sdg_hub-0.1.0a2.dev0/src/sdg_hub/__init__.py +4 -0
  62. sdg_hub-0.1.0a2.dev0/src/sdg_hub/_version.py +21 -0
  63. sdg_hub-0.1.0a2.dev0/src/sdg_hub/blocks/__init__.py +6 -0
  64. sdg_hub-0.1.0a2.dev0/src/sdg_hub/blocks/block.py +54 -0
  65. sdg_hub-0.1.0a2.dev0/src/sdg_hub/blocks/filterblock.py +76 -0
  66. sdg_hub-0.1.0a2.dev0/src/sdg_hub/blocks/iterblock.py +31 -0
  67. sdg_hub-0.1.0a2.dev0/src/sdg_hub/blocks/llmblock.py +430 -0
  68. sdg_hub-0.1.0a2.dev0/src/sdg_hub/blocks/rmblocks.py +194 -0
  69. sdg_hub-0.1.0a2.dev0/src/sdg_hub/blocks/utilblocks.py +140 -0
  70. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/__init__.py +0 -0
  71. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/annotations/__init__.py +0 -0
  72. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/annotations/cot_reflection.yaml +34 -0
  73. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/annotations/detailed_description.yaml +10 -0
  74. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/annotations/detailed_description_icl.yaml +32 -0
  75. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/annotations/simple.yaml +10 -0
  76. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/knowledge/__init__.py +0 -0
  77. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/knowledge/atomic_facts.yaml +45 -0
  78. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/knowledge/auxilary_instructions.yaml +35 -0
  79. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/knowledge/data_recipe/__init__.py +0 -0
  80. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/knowledge/data_recipe/default_recipe.yaml +3 -0
  81. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/knowledge/detailed_summary.yaml +17 -0
  82. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/knowledge/evaluate_faithfulness.yaml +68 -0
  83. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/knowledge/evaluate_question.yaml +38 -0
  84. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/knowledge/evaluate_relevancy.yaml +85 -0
  85. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/knowledge/extractive_summary.yaml +17 -0
  86. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/knowledge/generate_code_questions_responses.yaml +39 -0
  87. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/knowledge/generate_questions_responses.yaml +56 -0
  88. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/knowledge/mcq_generation.yaml +83 -0
  89. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/knowledge/router.yaml +12 -0
  90. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/knowledge/simple_generate_qa.yaml +34 -0
  91. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/reasoning/dynamic_cot.yaml +40 -0
  92. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/_A_.yaml +97 -0
  93. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/_B_.yaml +36 -0
  94. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/_C_.yaml +71 -0
  95. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/_D_.yaml +85 -0
  96. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/_E_.yaml +30 -0
  97. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/_F_.yaml +45 -0
  98. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/_G_.yaml +56 -0
  99. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/_H_.yaml +80 -0
  100. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/__init__.py +0 -0
  101. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/analyzer.yaml +48 -0
  102. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/annotation.yaml +36 -0
  103. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/contexts.yaml +21 -0
  104. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/critic.yaml +60 -0
  105. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/data_recipe/__init__.py +0 -0
  106. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/data_recipe/default_recipe.yaml +6 -0
  107. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/evaluate_freeform_pair.yaml +44 -0
  108. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/evaluate_freeform_questions.yaml +46 -0
  109. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/evaluate_grounded_pair.yaml +54 -0
  110. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/evaluate_grounded_questions.yaml +51 -0
  111. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/freeform_questions.yaml +29 -0
  112. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/freeform_responses.yaml +45 -0
  113. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/grounded_questions.yaml +38 -0
  114. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/grounded_responses.yaml +59 -0
  115. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/judge.yaml +53 -0
  116. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/planner.yaml +67 -0
  117. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/respond.yaml +8 -0
  118. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/revised_responder.yaml +78 -0
  119. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/router.yaml +12 -0
  120. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/simple_generate_qa_freeform.yaml +27 -0
  121. sdg_hub-0.1.0a2.dev0/src/sdg_hub/configs/skills/simple_generate_qa_grounded.yaml +31 -0
  122. sdg_hub-0.1.0a2.dev0/src/sdg_hub/flow.py +127 -0
  123. sdg_hub-0.1.0a2.dev0/src/sdg_hub/flows/annotation/emotion/detailed_description.yaml +19 -0
  124. sdg_hub-0.1.0a2.dev0/src/sdg_hub/flows/annotation/emotion/detailed_description_icl.yaml +19 -0
  125. sdg_hub-0.1.0a2.dev0/src/sdg_hub/flows/annotation/emotion/simple.yaml +19 -0
  126. sdg_hub-0.1.0a2.dev0/src/sdg_hub/flows/generation/knowledge/mmlu_bench.yaml +13 -0
  127. sdg_hub-0.1.0a2.dev0/src/sdg_hub/flows/generation/knowledge/simple_knowledge.yaml +12 -0
  128. sdg_hub-0.1.0a2.dev0/src/sdg_hub/flows/generation/knowledge/synth_knowledge.yaml +89 -0
  129. sdg_hub-0.1.0a2.dev0/src/sdg_hub/flows/generation/knowledge/synth_knowledge1.5.yaml +136 -0
  130. sdg_hub-0.1.0a2.dev0/src/sdg_hub/flows/generation/skills/agentic_improve_skill.yaml +108 -0
  131. sdg_hub-0.1.0a2.dev0/src/sdg_hub/flows/generation/skills/simple_freeform_skill.yaml +12 -0
  132. sdg_hub-0.1.0a2.dev0/src/sdg_hub/flows/generation/skills/simple_grounded_skill.yaml +12 -0
  133. sdg_hub-0.1.0a2.dev0/src/sdg_hub/flows/generation/skills/synth_grounded_skills.yaml +80 -0
  134. sdg_hub-0.1.0a2.dev0/src/sdg_hub/flows/generation/skills/synth_skills.yaml +59 -0
  135. sdg_hub-0.1.0a2.dev0/src/sdg_hub/logger_config.py +20 -0
  136. sdg_hub-0.1.0a2.dev0/src/sdg_hub/pipeline.py +66 -0
  137. sdg_hub-0.1.0a2.dev0/src/sdg_hub/prompts.py +17 -0
  138. sdg_hub-0.1.0a2.dev0/src/sdg_hub/py.typed +0 -0
  139. sdg_hub-0.1.0a2.dev0/src/sdg_hub/registry.py +122 -0
  140. sdg_hub-0.1.0a2.dev0/src/sdg_hub/sdg.py +164 -0
  141. sdg_hub-0.1.0a2.dev0/src/sdg_hub/utils/__init__.py +5 -0
  142. sdg_hub-0.1.0a2.dev0/src/sdg_hub/utils/chunking.py +73 -0
  143. sdg_hub-0.1.0a2.dev0/src/sdg_hub/utils/datamixing.py +123 -0
  144. sdg_hub-0.1.0a2.dev0/src/sdg_hub/utils/datautils.py +14 -0
  145. sdg_hub-0.1.0a2.dev0/src/sdg_hub/utils/docprocessor.py +357 -0
  146. sdg_hub-0.1.0a2.dev0/src/sdg_hub/utils/json.py +48 -0
  147. sdg_hub-0.1.0a2.dev0/src/sdg_hub/utils/models.py +31 -0
  148. sdg_hub-0.1.0a2.dev0/src/sdg_hub/utils/parse_and_convert.py +392 -0
  149. sdg_hub-0.1.0a2.dev0/src/sdg_hub/utils/taxonomy.py +489 -0
  150. sdg_hub-0.1.0a2.dev0/src/sdg_hub.egg-info/PKG-INFO +154 -0
  151. sdg_hub-0.1.0a2.dev0/src/sdg_hub.egg-info/SOURCES.txt +158 -0
  152. sdg_hub-0.1.0a2.dev0/src/sdg_hub.egg-info/dependency_links.txt +1 -0
  153. sdg_hub-0.1.0a2.dev0/src/sdg_hub.egg-info/requires.txt +9 -0
  154. sdg_hub-0.1.0a2.dev0/src/sdg_hub.egg-info/top_level.txt +1 -0
  155. sdg_hub-0.1.0a2.dev0/test.ipynb +1361 -0
  156. sdg_hub-0.1.0a2.dev0/tests/__init__.py +0 -0
  157. sdg_hub-0.1.0a2.dev0/tests/test_chunking.py +52 -0
  158. sdg_hub-0.1.0a2.dev0/tests/test_filterblock.py +44 -0
  159. sdg_hub-0.1.0a2.dev0/tests/testdata/testdata.py +25 -0
  160. sdg_hub-0.1.0a2.dev0/tox.ini +68 -0
@@ -0,0 +1,3 @@
1
+ self-hosted-runner:
2
+ labels:
3
+ - ubuntu-gpu
@@ -0,0 +1,19 @@
1
+ name: 'Free Disk Space'
2
+ description: 'Frees disk space on the runner'
3
+ runs:
4
+ using: "composite"
5
+ steps:
6
+ - run: |
7
+ df -h
8
+ sudo docker rmi "$(docker image ls -aq)" >/dev/null 2>&1 || true
9
+ sudo rm -rf \
10
+ /usr/share/dotnet /usr/local/lib/android /opt/ghc \
11
+ /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup \
12
+ /usr/lib/jvm || true
13
+ sudo apt install aptitude -y >/dev/null 2>&1
14
+ sudo aptitude purge '~n ^mysql' -f -y >/dev/null 2>&1
15
+ sudo aptitude purge '~n ^dotnet' -f -y >/dev/null 2>&1
16
+ sudo apt-get autoremove -y >/dev/null 2>&1
17
+ sudo apt-get autoclean -y >/dev/null 2>&1
18
+ df -h
19
+ shell: bash
@@ -0,0 +1,21 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+
3
+ # GitHub Dependabot configuration file
4
+ version: 2
5
+ updates:
6
+
7
+ # Maintain dependencies for GitHub Actions
8
+ - package-ecosystem: "github-actions"
9
+ directory: "/"
10
+ schedule:
11
+ interval: "daily"
12
+ - package-ecosystem: "docker"
13
+ directory: "/.github/workflows"
14
+ schedule:
15
+ interval: "daily"
16
+
17
+ # Maintain dependencies for Python scripts
18
+ - package-ecosystem: "pip"
19
+ directory: "/"
20
+ schedule:
21
+ interval: "daily"
@@ -0,0 +1,75 @@
1
+ pull_request_rules:
2
+
3
+ - name: label-cicd
4
+ description: Automatically apply CI/CD label
5
+ conditions:
6
+ - or:
7
+ - files=.github/mergify.yml
8
+ - files=.github/workflows/**/*
9
+ - files=.pylintrc
10
+ - files=tox.ini
11
+ - files=.markdownlint-cli2.yaml
12
+ actions:
13
+ label:
14
+ add:
15
+ - CI/CD
16
+
17
+ - name: label-documentation
18
+ description: Automatically apply documentation label
19
+ conditions:
20
+ - or:
21
+ - files~=.*\.md
22
+ - files~=docs/.*
23
+ actions:
24
+ label:
25
+ add:
26
+ - documentation
27
+
28
+ - name: label-testing
29
+ description: Automatically apply testing label
30
+ conditions:
31
+ - or:
32
+ - files~=tests/.*
33
+ - files=tox.ini
34
+ actions:
35
+ label:
36
+ add:
37
+ - testing
38
+
39
+ - name: ping author on conflicts and add 'needs-rebase' label
40
+ conditions:
41
+ - conflict
42
+ - -closed
43
+ actions:
44
+ label:
45
+ add:
46
+ - needs-rebase
47
+ comment:
48
+ message: |
49
+ This pull request has merge conflicts that must be resolved before it can be
50
+ merged. @{{author}} please rebase it. https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/syncing-a-fork
51
+
52
+ - name: remove 'needs-rebase' label when conflict is resolved
53
+ conditions:
54
+ - -conflict
55
+ - -closed
56
+ actions:
57
+ label:
58
+ remove:
59
+ - needs-rebase
60
+
61
+ - name: Apply ci-failure label if any CI checks have failed
62
+ conditions:
63
+ - "#check-failure>0"
64
+ actions:
65
+ label:
66
+ add:
67
+ - ci-failure
68
+
69
+ - name: Remove ci-failure label if no failures are present
70
+ conditions:
71
+ - "#check-failure=0"
72
+ actions:
73
+ label:
74
+ remove:
75
+ - ci-failure
@@ -0,0 +1,3 @@
1
+ # Since dependabot cannot update workflows using docker,
2
+ # we use this indirection since dependabot can update this file.
3
+ FROM rhysd/actionlint:1.7.1@sha256:435ecdb63b1169e80ca3e136290072548c07fc4d76a044cf5541021712f8f344
@@ -0,0 +1,49 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+
3
+ name: Lint GitHub Actions workflows
4
+ on:
5
+ push:
6
+ branches:
7
+ - "main"
8
+ paths:
9
+ - '.github/workflows/*.ya?ml'
10
+ - '.github/workflows/actionlint.*' # This workflow
11
+ pull_request:
12
+ branches:
13
+ - "main"
14
+ paths:
15
+ - '.github/workflows/*.ya?ml'
16
+ - '.github/workflows/actionlint.*' # This workflow
17
+
18
+ env:
19
+ LC_ALL: en_US.UTF-8
20
+
21
+ defaults:
22
+ run:
23
+ shell: bash
24
+
25
+ permissions:
26
+ contents: read
27
+
28
+ jobs:
29
+ actionlint:
30
+ runs-on: ubuntu-latest
31
+ steps:
32
+ - name: "Harden Runner"
33
+ uses: step-security/harden-runner@c6295a65d1254861815972266d5933fd6e532bdf # v2.11.1
34
+ with:
35
+ egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs
36
+
37
+ - name: "Checkout"
38
+ uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
39
+ with:
40
+ fetch-depth: 0
41
+
42
+ - name: "Download actionlint"
43
+ run: |
44
+ docker build --tag actionlint - < .github/workflows/actionlint.dockerfile
45
+
46
+ - name: "Check workflow files"
47
+ run: |
48
+ echo "::add-matcher::.github/workflows/matchers/actionlint.json"
49
+ docker run --volume="${PWD}:/repo" --workdir=/repo actionlint -color
@@ -0,0 +1,46 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+
3
+ name: Lint Markdown documents
4
+
5
+ on:
6
+ push:
7
+ branches:
8
+ - "main"
9
+ paths:
10
+ - '**/*.md'
11
+ - '.markdownlint-cli2.yaml'
12
+ - '.github/workflows/docs.yml' # This workflow
13
+ pull_request:
14
+ branches:
15
+ - "main"
16
+ paths:
17
+ - '**/*.md'
18
+ - '.markdownlint-cli2.yaml'
19
+ - '.github/workflows/docs.yml' # This workflow
20
+
21
+ env:
22
+ LC_ALL: en_US.UTF-8
23
+
24
+ defaults:
25
+ run:
26
+ shell: bash
27
+
28
+ permissions:
29
+ contents: read
30
+
31
+ jobs:
32
+ markdown-lint:
33
+ runs-on: ubuntu-latest
34
+ steps:
35
+ - name: "Harden Runner"
36
+ uses: step-security/harden-runner@c6295a65d1254861815972266d5933fd6e532bdf # v2.11.1
37
+ with:
38
+ egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs
39
+ - name: "Checkout"
40
+ uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
41
+ with:
42
+ fetch-depth: 0
43
+ - name: "Check Markdown documents"
44
+ uses: DavidAnson/markdownlint-cli2-action@b4c9feab76d8025d1e83c653fa3990936df0e6c8 # v16.0.0
45
+ with:
46
+ globs: '**/*.md'
@@ -0,0 +1,103 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+
3
+ name: E2E test
4
+
5
+ on:
6
+ workflow_dispatch:
7
+ # TODO: e2e workflow set to manual dispatch until it's adjusted to use
8
+ # AWS GPU runners instead of ubuntu-gpu GitHub runners, since those
9
+ # GitHub runners are more expensive and not available in this org
10
+ #
11
+ # push:
12
+ # branches:
13
+ # - "main"
14
+ # - "release-**"
15
+ # paths:
16
+ # - '**.py'
17
+ # - 'pyproject.toml'
18
+ # - 'requirements*.txt'
19
+ # - '.github/workflows/e2e.yml'
20
+ # pull_request:
21
+ # branches:
22
+ # - "main"
23
+ # - "release-**"
24
+ # paths:
25
+ # - '**.py'
26
+ # - 'pyproject.toml'
27
+ # - 'requirements*.txt'
28
+ # - '.github/workflows/e2e.yml'
29
+
30
+ jobs:
31
+ e2e:
32
+ runs-on: ubuntu-gpu
33
+
34
+ permissions:
35
+ pull-requests: write
36
+
37
+ steps:
38
+ # No step-security/harden-runner since this is a self-hosted runner
39
+ - name: Checkout instructlab/sdg
40
+ uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
41
+ with:
42
+ # https://github.com/actions/checkout/issues/249
43
+ fetch-depth: 0
44
+
45
+ - name: Checkout instructlab/instructlab
46
+ uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
47
+ with:
48
+ repository: "instructlab/instructlab"
49
+ path: "instructlab"
50
+ fetch-depth: 0
51
+
52
+ - name: Install Packages
53
+ run: |
54
+ sudo apt-get install -y cuda-toolkit git cmake build-essential virtualenv
55
+ nvidia-smi
56
+ sudo ls -l /dev/nvidia*
57
+
58
+ - name: Setup Python 3.11
59
+ uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
60
+ with:
61
+ python-version: 3.11
62
+ cache: pip
63
+ cache-dependency-path: |
64
+ **/pyproject.toml
65
+ **/requirements*.txt
66
+
67
+ - name: Remove llama-cpp-python from cache
68
+ run: |
69
+ pip cache remove llama_cpp_python
70
+
71
+ - name: Cache huggingface
72
+ uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
73
+ with:
74
+ path: ~/.cache/huggingface
75
+ # config contains DEFAULT_MODEL
76
+ key: huggingface-${{ hashFiles('src/instructlab/configuration.py') }}
77
+
78
+ - name: Install instructlab and instructlab-sdg
79
+ run: |
80
+ export PATH="/home/runner/.local/bin:/usr/local/cuda/bin:$PATH"
81
+ python3 -m venv venv
82
+ . venv/bin/activate
83
+ cd instructlab
84
+ sed 's/\[.*\]//' requirements.txt > constraints.txt
85
+ python3 -m pip cache remove llama_cpp_python
86
+ CMAKE_ARGS="-DLLAMA_CUBLAS=on" python3 -m pip install --no-binary llama_cpp_python -c constraints.txt llama_cpp_python
87
+ # needed for --4-bit-quant option to ilab train
88
+ python3 -m pip install bitsandbytes
89
+ # install instructlab
90
+ python3 -m pip install .
91
+ cd ..
92
+ # Install instructlab-sdg
93
+ python3 -m pip install .
94
+
95
+ - name: Run e2e test
96
+ run: |
97
+ . venv/bin/activate
98
+ ./instructlab/scripts/basic-workflow-tests.sh -cm
99
+
100
+ - name: Remove llama-cpp-python from cache
101
+ if: always()
102
+ run: |
103
+ pip cache remove llama_cpp_python
@@ -0,0 +1,69 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+
3
+ name: Lint, Format, and MyPy
4
+
5
+ on:
6
+ push:
7
+ branches:
8
+ - "main-disabled"
9
+ paths:
10
+ - '**.py'
11
+ - 'pyproject.toml'
12
+ - 'requirements*.txt'
13
+ - 'tox.ini'
14
+ - 'scripts/*.sh'
15
+ - '.github/**'
16
+ pull_request:
17
+ branches:
18
+ - "main-disabled"
19
+ paths:
20
+ - '**.py'
21
+ - 'pyproject.toml'
22
+ - 'requirements*.txt'
23
+ - 'tox.ini'
24
+ - 'scripts/*.sh'
25
+ - '.github/**'
26
+
27
+ env:
28
+ PYTHON_VERSION: 3.11
29
+
30
+ jobs:
31
+ lint:
32
+ runs-on: ubuntu-latest
33
+ steps:
34
+ - name: Checkout
35
+ uses: actions/checkout@v4
36
+ with:
37
+ # https://github.com/actions/checkout/issues/249
38
+ fetch-depth: 0
39
+ submodules: true
40
+
41
+ - name: Setup Python 3.11
42
+ uses: actions/setup-python@v5
43
+ with:
44
+ python-version: 3.11
45
+ cache: pip
46
+ cache-dependency-path: |
47
+ **/pyproject.toml
48
+ **/requirements*.txt
49
+
50
+ - name: Install dependencies
51
+ id: deps
52
+ run: |
53
+ python -m pip install --upgrade pip
54
+ python -m pip install tox
55
+
56
+ - name: Run Ruff check
57
+ run: |
58
+ tox -e ruff -- check
59
+
60
+ - name: Run linting
61
+ if: ${{ !cancelled() && (steps.deps.outcome == 'success') }}
62
+ run: |
63
+ echo "::add-matcher::.github/workflows/matchers/pylint.json"
64
+ tox -e lint
65
+
66
+ - name: Run mypy type checks
67
+ if: ${{ !cancelled() && (steps.deps.outcome == 'success') }}
68
+ run: |
69
+ tox -e mypy
@@ -0,0 +1,18 @@
1
+ {
2
+ "problemMatcher": [
3
+ {
4
+ "owner": "actionlint",
5
+ "pattern": [
6
+ {
7
+ "regexp": "^(?:\\x1b\\[\\d+m)?(.+?)(?:\\x1b\\[\\d+m)*:(?:\\x1b\\[\\d+m)*(\\d+)(?:\\x1b\\[\\d+m)*:(?:\\x1b\\[\\d+m)*(\\d+)(?:\\x1b\\[\\d+m)*: (?:\\x1b\\[\\d+m)*(.+?)(?:\\x1b\\[\\d+m)* \\[(.+?)\\]$",
8
+ "file": 1,
9
+ "line": 2,
10
+ "column": 3,
11
+ "message": 4,
12
+ "code": 5
13
+ }
14
+ ]
15
+ }
16
+ ]
17
+ }
18
+
@@ -0,0 +1,33 @@
1
+ {
2
+ "problemMatcher": [
3
+ {
4
+ "owner": "pylint-error",
5
+ "severity": "error",
6
+ "pattern": [
7
+ {
8
+ "regexp": "^(.+):(\\d+):(\\d+):\\s(([EF]\\d{4}):\\s.+)$",
9
+ "file": 1,
10
+ "line": 2,
11
+ "column": 3,
12
+ "message": 4,
13
+ "code": 5
14
+ }
15
+ ]
16
+ },
17
+ {
18
+ "owner": "pylint-warning",
19
+ "severity": "warning",
20
+ "pattern": [
21
+ {
22
+ "regexp": "^(.+):(\\d+):(\\d+):\\s(([CRW]\\d{4}):\\s.+)$",
23
+ "file": 1,
24
+ "line": 2,
25
+ "column": 3,
26
+ "message": 4,
27
+ "code": 5
28
+ }
29
+ ]
30
+ }
31
+ ]
32
+ }
33
+
@@ -0,0 +1,133 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+
3
+ name: Build, test, and upload PyPI package
4
+
5
+ on:
6
+ push:
7
+ branches:
8
+ - "main"
9
+ - "release-**"
10
+ tags:
11
+ - "v*"
12
+ pull_request:
13
+ branches:
14
+ - "main"
15
+ - "release-**"
16
+ release:
17
+ types:
18
+ - published
19
+
20
+ env:
21
+ LC_ALL: en_US.UTF-8
22
+
23
+ defaults:
24
+ run:
25
+ shell: bash
26
+
27
+ permissions:
28
+ contents: read
29
+
30
+ jobs:
31
+ # Create and verify release artifacts
32
+ # - build source dist (tar ball) and wheel
33
+ # - validate artifacts with various tools
34
+ # - upload artifacts to GHA
35
+ build-package:
36
+ name: Build and check packages
37
+ runs-on: ubuntu-latest
38
+ steps:
39
+ - name: "Harden Runner"
40
+ uses: step-security/harden-runner@c6295a65d1254861815972266d5933fd6e532bdf # v2.11.1
41
+ with:
42
+ egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs
43
+
44
+
45
+ - name: "Checkout"
46
+ uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
47
+ with:
48
+ # for setuptools-scm
49
+ fetch-depth: 0
50
+
51
+ - name: "Build and Inspect"
52
+ uses: hynek/build-and-inspect-python-package@b5076c307dc91924a82ad150cdd1533b444d3310 # v2.12.0
53
+
54
+ # push to Test PyPI on
55
+ # - a new GitHub release is published
56
+ # - a PR is merged into main branch
57
+ publish-test-pypi:
58
+ name: Publish packages to test.pypi.org
59
+ # environment: publish-test-pypi
60
+ if: ${{ (github.repository_owner == 'Red-Hat-AI-Innovation-Team') && ((github.event.action == 'published') || ((github.event_name == 'push') && (github.ref == 'refs/heads/main'))) }}
61
+ permissions:
62
+ contents: read
63
+ # see https://docs.pypi.org/trusted-publishers/
64
+ id-token: write
65
+ runs-on: ubuntu-latest
66
+ needs: build-package
67
+
68
+ steps:
69
+ - name: "Harden Runner"
70
+ uses: step-security/harden-runner@c6295a65d1254861815972266d5933fd6e532bdf # v2.11.1
71
+ with:
72
+ egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs
73
+
74
+ - name: "Download build artifacts"
75
+ uses: actions/download-artifact@cc203385981b70ca67e1cc392babf9cc229d5806 # v4.1.9
76
+ with:
77
+ name: Packages
78
+ path: dist
79
+
80
+ - name: "Upload to Test PyPI"
81
+ uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc # v1.12.4
82
+ with:
83
+ repository-url: https://test.pypi.org/legacy/
84
+
85
+ # push to Production PyPI on
86
+ # - a new GitHub release is published
87
+ publish-pypi:
88
+ name: Publish release to pypi.org
89
+ # environment: publish-pypi
90
+ if: ${{ (github.repository_owner == 'Red-Hat-AI-Innovation-Team') && (github.event.action == 'published') }}
91
+ permissions:
92
+ # see https://docs.pypi.org/trusted-publishers/
93
+ id-token: write
94
+ # allow gh release upload
95
+ contents: write
96
+
97
+ runs-on: ubuntu-latest
98
+ needs: build-package
99
+
100
+ steps:
101
+ - name: "Harden Runner"
102
+ uses: step-security/harden-runner@c6295a65d1254861815972266d5933fd6e532bdf # v2.11.1
103
+ with:
104
+ egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs
105
+
106
+ - name: "Download build artifacts"
107
+ uses: actions/download-artifact@cc203385981b70ca67e1cc392babf9cc229d5806 # v4.1.9
108
+ with:
109
+ name: Packages
110
+ path: dist
111
+
112
+ - name: "Sigstore sign package"
113
+ uses: sigstore/gh-action-sigstore-python@f514d46b907ebcd5bedc05145c03b69c1edd8b46 # v3.0.0
114
+ with:
115
+ inputs: |
116
+ ./dist/*.tar.gz
117
+ ./dist/*.whl
118
+ release-signing-artifacts: false
119
+
120
+ - name: "Upload artifacts and signatures to GitHub release"
121
+ run: |
122
+ gh release upload '${{ github.ref_name }}' dist/* --repo '${{ github.repository }}'
123
+ env:
124
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
125
+
126
+ # PyPI does not accept .sigstore artifacts and
127
+ # gh-action-pypi-publish has no option to ignore them.
128
+ - name: "Remove sigstore signatures before uploading to PyPI"
129
+ run: |
130
+ rm ./dist/*.sigstore.json
131
+
132
+ - name: "Upload to PyPI"
133
+ uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc # v1.12.4
@@ -0,0 +1,117 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+
3
+ name: Test
4
+
5
+ on:
6
+ workflow_dispatch:
7
+ push:
8
+ branches:
9
+ - "main"
10
+ - "release-**"
11
+ paths:
12
+ - '**.py'
13
+ - 'pyproject.toml'
14
+ - 'requirements*.txt'
15
+ - 'tox.ini'
16
+ - '.github/workflows/test.yml' # This workflow
17
+ pull_request:
18
+ branches:
19
+ - "main"
20
+ - "release-**"
21
+ paths:
22
+ - '**.py'
23
+ - 'pyproject.toml'
24
+ - 'requirements*.txt'
25
+ - 'tox.ini'
26
+ - '.github/workflows/test.yml' # This workflow
27
+
28
+ env:
29
+ LC_ALL: en_US.UTF-8
30
+
31
+ defaults:
32
+ run:
33
+ shell: bash
34
+
35
+ permissions:
36
+ contents: read
37
+
38
+ jobs:
39
+ test:
40
+ name: "${{ matrix.python }} on ${{ matrix.platform }}"
41
+ runs-on: "${{ matrix.platform }}"
42
+ strategy:
43
+ matrix:
44
+ python:
45
+ - "3.10"
46
+ - "3.11"
47
+ platform:
48
+ - "ubuntu-latest"
49
+ include:
50
+ - python: "3.11"
51
+ platform: "macos-latest"
52
+ steps:
53
+ - name: "Harden Runner"
54
+ uses: step-security/harden-runner@c6295a65d1254861815972266d5933fd6e532bdf # v2.11.1
55
+ with:
56
+ egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs
57
+
58
+ - name: Checkout
59
+ uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
60
+ with:
61
+ # https://github.com/actions/checkout/issues/249
62
+ fetch-depth: 0
63
+
64
+ - name: Free disk space
65
+ if: matrix.platform != 'macos-latest'
66
+ uses: ./.github/actions/free-disk-space
67
+
68
+ - name: Install the expect package
69
+ if: startsWith(matrix.platform, 'ubuntu')
70
+ run: |
71
+ sudo apt-get install -y expect
72
+
73
+ - name: Install tools on MacOS
74
+ if: startsWith(matrix.platform, 'macos')
75
+ run: |
76
+ brew install expect coreutils bash
77
+
78
+ - name: Setup Python ${{ matrix.python }}
79
+ uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 # v5.5.0
80
+ with:
81
+ python-version: ${{ matrix.python }}
82
+ cache: pip
83
+ cache-dependency-path: |
84
+ **/pyproject.toml
85
+ **/requirements*.txt
86
+
87
+ - name: Remove llama-cpp-python from cache
88
+ run: |
89
+ pip cache remove llama_cpp_python
90
+
91
+ - name: Cache huggingface
92
+ uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
93
+ with:
94
+ path: ~/.cache/huggingface
95
+ # config contains DEFAULT_MODEL
96
+ key: huggingface-${{ hashFiles('src/instructlab/configuration.py') }}
97
+
98
+ - name: Install dependencies
99
+ run: |
100
+ python -m pip install --upgrade pip
101
+ python -m pip install tox tox-gh>=1.2
102
+
103
+ - name: Run unit tests with tox
104
+ run: |
105
+ tox
106
+
107
+ - name: Remove llama-cpp-python from cache
108
+ if: always()
109
+ run: |
110
+ pip cache remove llama_cpp_python
111
+
112
+ test-workflow-complete:
113
+ needs: ["test"]
114
+ runs-on: ubuntu-latest
115
+ steps:
116
+ - name: Test Workflow Complete
117
+ run: echo "Test Workflow Complete"