sdg-hub 0.3.1__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. sdg_hub/__init__.py +0 -2
  2. sdg_hub/_version.py +2 -2
  3. sdg_hub/core/__init__.py +1 -2
  4. sdg_hub/core/blocks/__init__.py +2 -4
  5. sdg_hub/core/blocks/base.py +61 -6
  6. sdg_hub/core/blocks/filtering/column_value_filter.py +3 -2
  7. sdg_hub/core/blocks/llm/__init__.py +2 -4
  8. sdg_hub/core/blocks/llm/llm_chat_block.py +251 -265
  9. sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +216 -98
  10. sdg_hub/core/blocks/llm/llm_parser_block.py +320 -0
  11. sdg_hub/core/blocks/llm/text_parser_block.py +53 -152
  12. sdg_hub/core/flow/__init__.py +3 -4
  13. sdg_hub/core/flow/base.py +11 -73
  14. sdg_hub/core/flow/metadata.py +1 -68
  15. sdg_hub/core/flow/registry.py +0 -1
  16. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml +51 -12
  17. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/__init__.py +0 -0
  18. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/flow.yaml +158 -0
  19. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml +51 -12
  20. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml +14 -3
  21. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +147 -28
  22. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/README.md +0 -0
  23. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/__init__.py +0 -0
  24. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/atomic_facts_ja.yaml +41 -0
  25. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/detailed_summary_ja.yaml +14 -0
  26. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/extractive_summary_ja.yaml +14 -0
  27. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml +303 -0
  28. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/generate_questions_responses_ja.yaml +55 -0
  29. sdg_hub/flows/text_analysis/structured_insights/flow.yaml +28 -5
  30. {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.1.dist-info}/METADATA +2 -1
  31. {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.1.dist-info}/RECORD +34 -30
  32. sdg_hub/core/blocks/evaluation/__init__.py +0 -9
  33. sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +0 -323
  34. sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +0 -323
  35. sdg_hub/core/blocks/evaluation/verify_question_block.py +0 -329
  36. sdg_hub/core/blocks/llm/client_manager.py +0 -472
  37. sdg_hub/core/blocks/llm/config.py +0 -337
  38. {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.1.dist-info}/WHEEL +0 -0
  39. {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.1.dist-info}/licenses/LICENSE +0 -0
  40. {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,55 @@
1
+ - role: system
2
+ content: You are a very knowledgeable AI Assistant that will faithfully assist the user with their task.
3
+
4
+ - role: user
5
+ content: |
6
+ Develop a series of as many educational question and answer pairs as possible from a chapter in a {{domain}} textbook.
7
+
8
+ The questions should:
9
+ * Be self-contained, not requiring references to tables, figures, or specific sections in the text for understanding.
10
+ * Focus on teaching and reinforcing the key knowledge and concepts presented in the chapter.
11
+ * Avoid sections with minimal educational content like index pages or prefaces. In such cases, respond with [UNANSWERABLE].
12
+ * Be directly relevant to the textbook's domain. For instance, in a science textbook, questions should revolve around scientific terms, definitions, and practical applications, while in a legal textbook, they should cover legal principles, case law, and precedents.
13
+ * Be formulated to allow for independent answers, avoiding direct references to specific theorems or text sections. For example, rather than asking 'Under what conditions is the fixed point of a function unique according to Theorem 3.1.5?', ask 'How does the Fixed Point Iteration method contribute to understanding function uniqueness?'
14
+ * Span a range of difficulty levels to accommodate a diverse student audience, from basic understanding to advanced comprehension.
15
+ * Include a variety of question types such as multiple-choice for basic recall, short answer for deeper understanding, and essay or problem-solving questions to test application and analysis skills.
16
+ * Align closely with the learning objectives of the textbook or the specific chapter, ensuring that the questions test the fundamental concepts and skills that the chapter aims to impart.
17
+ * Be in Japanese.
18
+
19
+ Strictly follow this format for each question answer pair your generate while responding:
20
+
21
+ [QUESTION]
22
+ <Insert question here>
23
+ [ANSWER]
24
+ <Insert answer here>
25
+ [END]
26
+
27
+ Each question and answer pair should stand alone as a mini-lesson, encapsulating a key concept or idea from the chapter in a way that is accessible and informative without requiring the reader to refer back to the textbook.
28
+
29
+ Here are some examples of questions:
30
+
31
+ [Document]
32
+ {{icl_document}}
33
+
34
+ [QUESTION]
35
+ {{icl_query_1}}
36
+ [ANSWER]
37
+ {{icl_response_1}}
38
+ [END]
39
+
40
+ [QUESTION]
41
+ {{icl_query_2}}
42
+ [ANSWER]
43
+ {{icl_response_2}}
44
+ [END]
45
+
46
+ [QUESTION]
47
+ {{icl_query_3}}
48
+ [ANSWER]
49
+ {{icl_response_3}}
50
+ [END]
51
+
52
+ Now, here is the document:
53
+ [DOCUMENT]
54
+ {{document_outline}}
55
+ {{document}}
@@ -24,7 +24,6 @@ metadata:
24
24
  - "entity-extraction"
25
25
  - "keyword-extraction"
26
26
  license: "Apache-2.0"
27
- min_sdg_hub_version: "0.2.0"
28
27
  dataset_requirements:
29
28
  required_columns:
30
29
  - "text"
@@ -50,10 +49,16 @@ blocks:
50
49
  max_tokens: 1024
51
50
  temperature: 0.3
52
51
  async_mode: true
52
+ - block_type: "LLMParserBlock"
53
+ block_config:
54
+ block_name: "extract_summary"
55
+ input_cols: "raw_summary"
56
+ extract_content: true
57
+ expand_lists: true
53
58
  - block_type: "TextParserBlock"
54
59
  block_config:
55
60
  block_name: "parse_summary"
56
- input_cols: "raw_summary"
61
+ input_cols: "extract_summary_content"
57
62
  output_cols: "summary"
58
63
  start_tags:
59
64
  - "[SUMMARY]"
@@ -76,10 +81,16 @@ blocks:
76
81
  max_tokens: 512
77
82
  temperature: 0.3
78
83
  async_mode: true
84
+ - block_type: "LLMParserBlock"
85
+ block_config:
86
+ block_name: "extract_keywords"
87
+ input_cols: "raw_keywords"
88
+ extract_content: true
89
+ expand_lists: true
79
90
  - block_type: "TextParserBlock"
80
91
  block_config:
81
92
  block_name: "parse_keywords"
82
- input_cols: "raw_keywords"
93
+ input_cols: "extract_keywords_content"
83
94
  output_cols: "keywords"
84
95
  start_tags:
85
96
  - "[KEYWORDS]"
@@ -102,10 +113,16 @@ blocks:
102
113
  max_tokens: 1024
103
114
  temperature: 0.3
104
115
  async_mode: true
116
+ - block_type: "LLMParserBlock"
117
+ block_config:
118
+ block_name: "extract_entities"
119
+ input_cols: "raw_entities"
120
+ extract_content: true
121
+ expand_lists: true
105
122
  - block_type: "TextParserBlock"
106
123
  block_config:
107
124
  block_name: "parse_entities"
108
- input_cols: "raw_entities"
125
+ input_cols: "extract_entities_content"
109
126
  output_cols: "entities"
110
127
  start_tags:
111
128
  - "[ENTITIES]"
@@ -128,10 +145,16 @@ blocks:
128
145
  max_tokens: 256
129
146
  temperature: 0.1
130
147
  async_mode: true
148
+ - block_type: "LLMParserBlock"
149
+ block_config:
150
+ block_name: "extract_sentiment"
151
+ input_cols: "raw_sentiment"
152
+ extract_content: true
153
+ expand_lists: true
131
154
  - block_type: "TextParserBlock"
132
155
  block_config:
133
156
  block_name: "parse_sentiment"
134
- input_cols: "raw_sentiment"
157
+ input_cols: "extract_sentiment_content"
135
158
  output_cols: "sentiment"
136
159
  start_tags:
137
160
  - "[SENTIMENT]"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sdg_hub
3
- Version: 0.3.1
3
+ Version: 0.4.1
4
4
  Summary: Synthetic Data Generation
5
5
  Author-email: Red Hat AI Innovation <abhandwa@redhat.com>
6
6
  License: Apache-2.0
@@ -65,6 +65,7 @@ Requires-Dist: pytest-html; extra == "dev"
65
65
  Requires-Dist: tox<5,>=4.4.2; extra == "dev"
66
66
  Requires-Dist: ruff; extra == "dev"
67
67
  Requires-Dist: pytest-env; extra == "dev"
68
+ Requires-Dist: nbconvert>=7.0.0; extra == "dev"
68
69
  Dynamic: license-file
69
70
 
70
71
  # `sdg_hub`: Synthetic Data Generation Toolkit
@@ -1,9 +1,9 @@
1
- sdg_hub/__init__.py,sha256=Tw-6R5a8_W1kJcTAsW3R9ltBDP1dy5-fe7Tvt3cSyCQ,550
2
- sdg_hub/_version.py,sha256=gGLpQUQx-ty9SEy9PYw9OgJWWzJLBnCpfJOfzL7SjlI,704
1
+ sdg_hub/__init__.py,sha256=TlkZT40-70urdcWLqv3kupaJj8s-SVgd2QyvlSFwb4A,510
2
+ sdg_hub/_version.py,sha256=k7cu0JKra64gmMNU_UfA5sw2eNc_GRvf3QmesiYAy8g,704
3
3
  sdg_hub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- sdg_hub/core/__init__.py,sha256=NwqB4fwhC29W50VW7QXZssLxx122YvgO9LHDLdgAnrI,496
5
- sdg_hub/core/blocks/__init__.py,sha256=9sCkCvDQzJGSedaePVlEIpbNwrkBz_K500VW_6FLhuE,1601
6
- sdg_hub/core/blocks/base.py,sha256=TrzUAkG7Tiquk0Z3SOFsb5mRnHd1IbHH6gFPVH1P7T8,10424
4
+ sdg_hub/core/__init__.py,sha256=e3BoejbqjYhasf9t__L4qE52lkD9EBjx4o--2kqKdro,460
5
+ sdg_hub/core/blocks/__init__.py,sha256=5FsbkcO-dmBv6MqO96TPn9FKKPTQZQCv20j4wR7UvQw,1502
6
+ sdg_hub/core/blocks/base.py,sha256=-SOdBpJwtRTMsrmCEuLjUBQMRCo_PLYlHEBRrz8sF9g,13031
7
7
  sdg_hub/core/blocks/registry.py,sha256=FuEN_pnq-nSH1LguY3_oCubT6Kz3SuJjk3TcUpLT-lw,10695
8
8
  sdg_hub/core/blocks/deprecated_blocks/__init__.py,sha256=RDu3MWFStDQko-TKkx8tGoB1UTatP_RSldZK43zHDvY,889
9
9
  sdg_hub/core/blocks/deprecated_blocks/combine_columns.py,sha256=HCvpaYsAwgx1Dm0vIshcWsKoVsRT0KrmKp9j4oqtByc,2757
@@ -15,20 +15,15 @@ sdg_hub/core/blocks/deprecated_blocks/rename_columns.py,sha256=thp-mHtkRmUw_nYKp
15
15
  sdg_hub/core/blocks/deprecated_blocks/sample_populator.py,sha256=UdueMApxOmPWaxxMrw7b1v74fKJBfqqRATEBqgmVtNw,1737
16
16
  sdg_hub/core/blocks/deprecated_blocks/selector.py,sha256=nWecsVsW8DvBcqAF_LOqXmW-5MQ28uN3d1y6wkSy38c,2960
17
17
  sdg_hub/core/blocks/deprecated_blocks/set_to_majority_value.py,sha256=44TQu-rK5isia-otMVB1zHd8D-wWmu3C8CI1NLtfY5s,2729
18
- sdg_hub/core/blocks/evaluation/__init__.py,sha256=kFXee-vsVVdU2XtLio9qHgPx_a0zoB_rQr509EKBGJc,357
19
- sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py,sha256=vFi3YIxVPNnzgdenIeAl7yUb4OOUY_uUOXS-pWLsDmw,12223
20
- sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py,sha256=NXT1lixR-JnOXNlBCbMjULcpu4kh2SthhwCWEobiBt0,12115
21
- sdg_hub/core/blocks/evaluation/verify_question_block.py,sha256=LKoIHdxUuTVO24n_M9cAliEj56uEe2kQAecKTRz65zI,12465
22
18
  sdg_hub/core/blocks/filtering/__init__.py,sha256=isxSVSvDqkMjG8dQSl3Q2M4g5c1t9fTjBSA21icf-yA,275
23
- sdg_hub/core/blocks/filtering/column_value_filter.py,sha256=H8Gif0q9Wc_d1TnVow8Zpsg7blJOFGN1EZmV6OPpkcg,5971
24
- sdg_hub/core/blocks/llm/__init__.py,sha256=N6-Prgd4X85oWbMQzhYMrq7OX-NTJm57cghowK-val0,844
25
- sdg_hub/core/blocks/llm/client_manager.py,sha256=6RNqYvFIh4SF6jopI6tTY5MA01y8Qo-tAhsE0GeHZZ0,16109
26
- sdg_hub/core/blocks/llm/config.py,sha256=gc4xp5D20MSlKMFEos0QAaKUwgbZpBtMGXmn6LsIk78,11289
19
+ sdg_hub/core/blocks/filtering/column_value_filter.py,sha256=2Z9j_CiiTn5mHZ9gfXU-itLXDmeXSh0UI0x1x7j-LQ0,6001
20
+ sdg_hub/core/blocks/llm/__init__.py,sha256=AyS0dd3pkPPXH5a9aj4mT5HsKjX2vjXfkmQc6rkFV4A,795
27
21
  sdg_hub/core/blocks/llm/error_handler.py,sha256=7T-019ZFB9qgZoX1ybIiXyaLjPzrF96qcKmUu6vmO6g,12178
28
- sdg_hub/core/blocks/llm/llm_chat_block.py,sha256=9ytjxjADM0FydkLapZPSQPfzjjrFIdFONs3EJEoKnaw,23007
29
- sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py,sha256=H7LqYpEiFO1g2cxncAl4vkLhQxAYgGpV6cUyQTSG03k,27628
22
+ sdg_hub/core/blocks/llm/llm_chat_block.py,sha256=MHhI2x9i6LrfDXgvAy2_6YxgyoD7j6BpCgNGsM69xDg,22194
23
+ sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py,sha256=DW4b09IqXmcshvXawFheDyaLp3rz7vpO5VBrKdUQYW8,31703
24
+ sdg_hub/core/blocks/llm/llm_parser_block.py,sha256=aoHqsDDhaIgCDfPpv7acc0DVN-zUgzFflRVB4win0aM,12012
30
25
  sdg_hub/core/blocks/llm/prompt_builder_block.py,sha256=fkJd718X1oYlMY1cjo_8WCO16Gl8Tm0bUPWR78E_uws,13935
31
- sdg_hub/core/blocks/llm/text_parser_block.py,sha256=8oRlXEkw8ULA8XVa7WtQZUojodl_ihs1omZpvbwoJQE,17165
26
+ sdg_hub/core/blocks/llm/text_parser_block.py,sha256=975HK6NfXiU9Any4UDMpBNidRpyhHmc76BXUN69SVyc,12566
32
27
  sdg_hub/core/blocks/transform/__init__.py,sha256=lF9InjOzA6p_mjiwV-a2Kwstq9kqRiQ-dEwbsmR9yQs,825
33
28
  sdg_hub/core/blocks/transform/duplicate_columns.py,sha256=SaP7rIF4ZFEFFa50aU2xGNIuddXaEZrKxdWfHjzFpVI,2833
34
29
  sdg_hub/core/blocks/transform/index_based_mapper.py,sha256=XC_a7Skbd3mu7f4ra8fGWPxMwqUMSjJkQ7Ag7vflwJA,8235
@@ -37,12 +32,12 @@ sdg_hub/core/blocks/transform/melt_columns.py,sha256=vaYa5Taq6GhNZYWFL4uPK3-SfN2
37
32
  sdg_hub/core/blocks/transform/rename_columns.py,sha256=qeB5L2utqDQnutUetH1VKZSqDiJSH_yUp5EFCV-XCVI,1998
38
33
  sdg_hub/core/blocks/transform/text_concat.py,sha256=_-B__Hob1WwgwkILPIZvTnsDzuwtoX1hKviyzHlnnes,3149
39
34
  sdg_hub/core/blocks/transform/uniform_col_val_setter.py,sha256=XnjiT29z3PzIPy8M-mmE2w-Miab6Ed5ahy32SaxTCTE,3263
40
- sdg_hub/core/flow/__init__.py,sha256=N2NZGngvd7qpT5FI_knKukUFM0IkD9K5jdTi-gDeUI4,475
41
- sdg_hub/core/flow/base.py,sha256=8Xacytg9M82Mbv8r2GLbQgNltH-hCtFS1Fa1WpfFlSw,56488
35
+ sdg_hub/core/flow/__init__.py,sha256=0_m_htuZfPxk8xQ9IKfp0Pz-JRE4O7lYMUFrKyLNoLA,409
36
+ sdg_hub/core/flow/base.py,sha256=IRnNEZ3laDmR4sW_MTseL4syhLuUylyHY_0tS5QaS-A,54084
42
37
  sdg_hub/core/flow/checkpointer.py,sha256=stm5ZtjjEiLk9ZkAAnoQQn5Y8Yl_d7qCsQLZTrCXR48,11867
43
- sdg_hub/core/flow/metadata.py,sha256=h9jpvAzWsF5n4ztZMzwa9ZNgnzKTHmFWdn7YbyJLHCw,12977
38
+ sdg_hub/core/flow/metadata.py,sha256=cFrpJjWOaK87aCuRFyC3Pdf83oYU93mrmZEMdUnhsN8,10540
44
39
  sdg_hub/core/flow/migration.py,sha256=6and-RBqV0t2gRipr1GiOOVnyBJdtyyjw1kO08Z--d4,7558
45
- sdg_hub/core/flow/registry.py,sha256=DzCqEEgwhvwnCBAGLogoMVdwXh4pCHrxOWqoxam7O8I,12162
40
+ sdg_hub/core/flow/registry.py,sha256=N6KfX-L7QRkooznIFxDuhRZYuDA5g3N5zC-KRm2jVhk,12109
46
41
  sdg_hub/core/flow/validation.py,sha256=pUJvgaUjLpKNwvW6djcqVOF-HShOjegEmGOnUnoX4BA,9722
47
42
  sdg_hub/core/utils/__init__.py,sha256=C2FzLn3dHprwGJDEgI4fyFS3aoCJR-9PhHsunxropJ8,351
48
43
  sdg_hub/core/utils/datautils.py,sha256=__HkUe1DxcJVHKrFX68z_hDXwxJygBlJDfjJLnj7rHc,4230
@@ -59,12 +54,14 @@ sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/gener
59
54
  sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_question_list.yaml,sha256=qHOgUNrQz2vjUjJiEHNGWxDDXwjJlP1kofTxeGgLyPI,1461
60
55
  sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
56
  sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/detailed_summary.yaml,sha256=Ik6gAml0O-jPq8jpXBAkURzYkQuFOnDZb4LDwjmfAiE,381
62
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml,sha256=va9ESTlEaZozy8pXTJ8OICjRg08KSP4l305YUKFuGAE,4417
57
+ sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml,sha256=fUdzY9dtU69o99Uq8FIPycgVWdLD-1kbY97Bh-Vo2A0,5538
58
+ sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
+ sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/flow.yaml,sha256=smPWVUZRCt58EagWDmJVmTBQj8qMcjpzh-Q3GSuFrz0,4413
63
60
  sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
61
  sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/extractive_summary.yaml,sha256=SeapWoOx3fhN5SvWYuHss_9prLE8xSkOic7JkbDHSR0,4081
65
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml,sha256=Iv4AlbE9PFtTn6teekgiNtrTiYio_nYWS8gyD6eFLUA,4580
62
+ sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml,sha256=iNNIfofFE7awK7iivtIFWxjfjy8QviMugOPPnOTySKA,5706
66
63
  sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
67
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml,sha256=Rv0c4s5vim2I5jKzQgjcUfVMdla6czzmZUU67hlTAbg,2898
64
+ sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml,sha256=CIUZNYhvszT-jpz1Hvh6nS2y5W34P529ZOMp8thEQ9k,3219
68
65
  sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/key_facts_summary.yaml,sha256=YKMX_CuvcThG_bdNCAIXdVBkMvB72I89RGq2ltSSgc8,3298
69
66
  sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
70
67
  sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -74,17 +71,24 @@ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/ev
74
71
  sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml,sha256=zwzklXup6khRkR88avgrJTcjaMcV1wnbeYaML5oPuNs,1767
75
72
  sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml,sha256=cA8igo7jMrRXaWW6k0of6KOp7YnxLtPj0fP4DbrmZNQ,3647
76
73
  sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml,sha256=fcMV7LaCFZo4D29nwhGJXqFFuZMYVLo9XYjv8zcU6zs,364
77
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml,sha256=oyrLRjEnmioMa_G_sd9yQK_nBt4arwWV5fvKgzYE2ds,6090
74
+ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml,sha256=HR8sf7RUZKr8UqKztBj-nlvyrve1UMUu8x8qgYM6O14,9055
78
75
  sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml,sha256=yX8aLY8dJSDML9ZJhnj9RzPbN8tH2xfcM4Gc6xZuwqQ,2596
76
+ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
+ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
78
+ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/atomic_facts_ja.yaml,sha256=OjPZaSCOSLxEWgW3pmNwF7mmLhGhFGTmKL_3rKdqeW4,2488
79
+ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/detailed_summary_ja.yaml,sha256=nEy_RcotHGiiENrmUANpKkbIFsrARAeSwECrBeHi2so,391
80
+ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/extractive_summary_ja.yaml,sha256=V90W0IeJQZTFThA8v0UOs3DtZbtU3BI9jkpChw1BULo,402
81
+ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml,sha256=iY1N6CY97fEkqI5oqaamSfqmiXpHPhWH_aOppsMxVjY,9176
82
+ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/generate_questions_responses_ja.yaml,sha256=96SQqXG7fmb-50SdX85sgVtrFcQ-oNKe_0BoQdZmY5g,2638
79
83
  sdg_hub/flows/text_analysis/__init__.py,sha256=WStks4eM_KHNTVsHglcj8vFghmI0PH9P1hUrijBLbwc,125
80
84
  sdg_hub/flows/text_analysis/structured_insights/__init__.py,sha256=_DT4NR05JD9CZoSWROPr2lC6se0VjSqQPZJJlEV79mk,274
81
85
  sdg_hub/flows/text_analysis/structured_insights/analyze_sentiment.yaml,sha256=1YGPypFJYS8qfYFj2J6ERTgodKJvMF4YHNGt_vOF5qc,1000
82
86
  sdg_hub/flows/text_analysis/structured_insights/extract_entities.yaml,sha256=Q_SDy14Zu-qS2sbKfUBmGlYj3k7CUg6HzzXlFCXRKuU,1169
83
87
  sdg_hub/flows/text_analysis/structured_insights/extract_keywords.yaml,sha256=_nPPMdHnxag_lYbhYUjGJGo-CvRwWvwdGX7cQhdZ1S0,847
84
- sdg_hub/flows/text_analysis/structured_insights/flow.yaml,sha256=2HuGTyNwYe6a8Ev-QdKZXwe29NL4wOkq4ecEV9a7NDg,4221
88
+ sdg_hub/flows/text_analysis/structured_insights/flow.yaml,sha256=BBV18SdvuVTAESjwkJ7V1jbb-cSTBvNl3SCycd0oEQ4,4934
85
89
  sdg_hub/flows/text_analysis/structured_insights/summarize.yaml,sha256=WXwQak1pF8e1OwnOoI1EHu8QB6iUNW89rfkTdi1Oq54,687
86
- sdg_hub-0.3.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
87
- sdg_hub-0.3.1.dist-info/METADATA,sha256=-dPDzTaPfnMb_n6p7Jcvkqv3Y-Ihi76psItQL7DQBX8,9735
88
- sdg_hub-0.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
89
- sdg_hub-0.3.1.dist-info/top_level.txt,sha256=TqI7d-HE1n6zkXFkU0nF3A1Ct0P0pBaqI675uFokhx4,8
90
- sdg_hub-0.3.1.dist-info/RECORD,,
90
+ sdg_hub-0.4.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
91
+ sdg_hub-0.4.1.dist-info/METADATA,sha256=pLRs5oOsVI9515UEZxcUEZFZhCoZ0kli0KLpBPPPB7w,9783
92
+ sdg_hub-0.4.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
93
+ sdg_hub-0.4.1.dist-info/top_level.txt,sha256=TqI7d-HE1n6zkXFkU0nF3A1Ct0P0pBaqI675uFokhx4,8
94
+ sdg_hub-0.4.1.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- # SPDX-License-Identifier: Apache-2.0
2
- """Evaluation blocks for SDG Hub."""
3
-
4
- # Local
5
- from .evaluate_faithfulness_block import EvaluateFaithfulnessBlock
6
- from .evaluate_relevancy_block import EvaluateRelevancyBlock
7
- from .verify_question_block import VerifyQuestionBlock
8
-
9
- __all__ = ["EvaluateFaithfulnessBlock", "EvaluateRelevancyBlock", "VerifyQuestionBlock"]
@@ -1,323 +0,0 @@
1
- # SPDX-License-Identifier: Apache-2.0
2
- """Thin wrapper for faithfulness evaluation using 4 composed blocks.
3
-
4
- This module provides a simple, lightweight wrapper that composes:
5
- - PromptBuilderBlock: builds evaluation prompts
6
- - LLMChatBlock: generates LLM responses
7
- - TextParserBlock: parses structured output
8
- - ColumnValueFilterBlock: filters based on judgment
9
-
10
- The wrapper exposes minimal LLM interface for flow detection while
11
- delegating all functionality to the internal blocks.
12
- """
13
-
14
- # Standard
15
- from typing import Any, Optional
16
-
17
- # Third Party
18
- from datasets import Dataset
19
- from pydantic import ConfigDict, Field, field_validator
20
-
21
- # Local
22
- from ...utils.error_handling import BlockValidationError
23
- from ...utils.logger_config import setup_logger
24
- from ..base import BaseBlock
25
- from ..filtering.column_value_filter import ColumnValueFilterBlock
26
- from ..llm.llm_chat_block import LLMChatBlock
27
- from ..llm.prompt_builder_block import PromptBuilderBlock
28
- from ..llm.text_parser_block import TextParserBlock
29
- from ..registry import BlockRegistry
30
-
31
- logger = setup_logger(__name__)
32
-
33
-
34
- @BlockRegistry.register(
35
- "EvaluateFaithfulnessBlock",
36
- "evaluation",
37
- "Thin wrapper composing 4 blocks for faithfulness evaluation",
38
- )
39
- class EvaluateFaithfulnessBlock(BaseBlock):
40
- """Thin wrapper for faithfulness evaluation using composed blocks.
41
-
42
- Composes PromptBuilderBlock + LLMChatBlock + TextParserBlock + ColumnValueFilterBlock
43
- into a single evaluation pipeline with smart parameter routing.
44
-
45
- Parameters
46
- ----------
47
- block_name : str
48
- Name of the block.
49
- input_cols : List[str]
50
- Input columns: ["document", "response"]
51
- output_cols : List[str]
52
- Output columns: ["faithfulness_explanation", "faithfulness_judgment"]
53
- model : Optional[str]
54
- LLM model identifier.
55
- api_base : Optional[str]
56
- API base URL.
57
- api_key : Optional[str]
58
- API key.
59
- prompt_config_path : str
60
- Path to YAML prompt template file (required).
61
- **kwargs : Any
62
- All other parameters are automatically routed to appropriate internal blocks
63
- based on each block's accepted parameters. This includes all LLM parameters
64
- (temperature, max_tokens, extra_body, extra_headers, etc.), text parser
65
- parameters, and filter parameters.
66
- """
67
-
68
- model_config = ConfigDict(
69
- extra="allow"
70
- ) # Allow extra fields for dynamic forwarding
71
-
72
- # --- Core configuration ---
73
- prompt_config_path: str = Field(
74
- ...,
75
- description="Path to YAML file containing the faithfulness evaluation prompt template",
76
- )
77
-
78
- # --- LLM interface (for flow detection) ---
79
- model: Optional[str] = Field(None, description="LLM model identifier")
80
- api_base: Optional[str] = Field(None, description="API base URL")
81
- api_key: Optional[str] = Field(None, description="API key")
82
-
83
- # --- Filter configuration ---
84
- filter_value: str = Field(
85
- "YES", description="Value to filter on for faithfulness judgment"
86
- )
87
- operation: str = Field("eq", description="Filter operation")
88
- convert_dtype: Optional[str] = Field(
89
- None, description="Data type conversion for filter column"
90
- )
91
-
92
- # --- Parser configuration ---
93
- start_tags: list[str] = Field(
94
- ["[Start of Explanation]", "[Start of Answer]"],
95
- description="Start tags for parsing explanation and judgment",
96
- )
97
- end_tags: list[str] = Field(
98
- ["[End of Explanation]", "[End of Answer]"],
99
- description="End tags for parsing explanation and judgment",
100
- )
101
- parsing_pattern: Optional[str] = Field(
102
- None,
103
- description="Regex pattern for custom parsing. If provided, takes precedence over tag-based parsing",
104
- )
105
-
106
- # --- Internal blocks (composition) ---
107
- prompt_builder: PromptBuilderBlock = Field(None, exclude=True) # type: ignore
108
- llm_chat: LLMChatBlock = Field(None, exclude=True) # type: ignore
109
- text_parser: TextParserBlock = Field(None, exclude=True) # type: ignore
110
- filter_block: ColumnValueFilterBlock = Field(None, exclude=True) # type: ignore
111
-
112
- @field_validator("input_cols")
113
- @classmethod
114
- def validate_input_cols(cls, v):
115
- """Validate input columns."""
116
- if v != ["document", "response"]:
117
- raise ValueError(
118
- f"EvaluateFaithfulnessBlock expects input_cols ['document', 'response'], got {v}"
119
- )
120
- return v
121
-
122
- @field_validator("output_cols")
123
- @classmethod
124
- def validate_output_cols(cls, v):
125
- """Validate output columns."""
126
- expected = ["faithfulness_explanation", "faithfulness_judgment"]
127
- if v != expected:
128
- raise ValueError(
129
- f"EvaluateFaithfulnessBlock expects output_cols {expected}, got {v}"
130
- )
131
- return v
132
-
133
- def __init__(self, **kwargs):
134
- """Initialize with smart parameter routing."""
135
- super().__init__(**kwargs)
136
- self._create_internal_blocks(**kwargs)
137
-
138
- # Log initialization if model is configured
139
- if self.model:
140
- logger.info(
141
- f"Initialized EvaluateFaithfulnessBlock '{self.block_name}' with model '{self.model}'"
142
- )
143
-
144
- def _extract_params(self, kwargs: dict, block_class) -> dict:
145
- """Extract parameters for specific block class based on its model_fields."""
146
- # Exclude parameters that are handled by this wrapper's structure
147
- wrapper_params = {
148
- "block_name",
149
- "input_cols",
150
- "output_cols",
151
- }
152
-
153
- # Extract parameters that the target block accepts
154
- params = {
155
- k: v
156
- for k, v in kwargs.items()
157
- if k in block_class.model_fields and k not in wrapper_params
158
- }
159
-
160
- # Also include declared fields from this composite block that the target block accepts
161
- for field_name in self.__class__.model_fields:
162
- if (
163
- field_name in block_class.model_fields
164
- and field_name not in wrapper_params
165
- ):
166
- field_value = getattr(self, field_name)
167
- if field_value is not None: # Only forward non-None values
168
- params[field_name] = field_value
169
-
170
- return params
171
-
172
- def _create_internal_blocks(self, **kwargs):
173
- """Create internal blocks with parameter routing."""
174
- # Route parameters to appropriate blocks
175
- prompt_params = self._extract_params(kwargs, PromptBuilderBlock)
176
- llm_params = self._extract_params(kwargs, LLMChatBlock)
177
- parser_params = self._extract_params(kwargs, TextParserBlock)
178
- filter_params = self._extract_params(kwargs, ColumnValueFilterBlock)
179
-
180
- self.prompt_builder = PromptBuilderBlock(
181
- block_name=f"{self.block_name}_prompt_builder",
182
- input_cols=["document", "response"],
183
- output_cols=["eval_faithfulness_prompt"],
184
- **prompt_params,
185
- )
186
-
187
- # Create LLM chat block with dynamic LLM parameter forwarding
188
- llm_config = {
189
- "block_name": f"{self.block_name}_llm_chat",
190
- "input_cols": ["eval_faithfulness_prompt"],
191
- "output_cols": ["raw_eval_faithfulness"],
192
- **llm_params,
193
- }
194
-
195
- # Only add LLM parameters if they are provided
196
- if self.model is not None:
197
- llm_config["model"] = self.model
198
- if self.api_base is not None:
199
- llm_config["api_base"] = self.api_base
200
- if self.api_key is not None:
201
- llm_config["api_key"] = self.api_key
202
-
203
- self.llm_chat = LLMChatBlock(**llm_config)
204
-
205
- # Create text parser
206
- self.text_parser = TextParserBlock(
207
- block_name=f"{self.block_name}_text_parser",
208
- input_cols=["raw_eval_faithfulness"],
209
- output_cols=["faithfulness_explanation", "faithfulness_judgment"],
210
- **parser_params,
211
- )
212
-
213
- self.filter_block = ColumnValueFilterBlock(
214
- block_name=f"{self.block_name}_filter",
215
- input_cols=["faithfulness_judgment"],
216
- output_cols=[], # Filter doesn't create new columns
217
- **filter_params,
218
- )
219
-
220
- def generate(self, samples: Dataset, **kwargs: Any) -> Dataset:
221
- """Execute the 4-block faithfulness evaluation pipeline.
222
-
223
- Parameters
224
- ----------
225
- samples : Dataset
226
- Input dataset with 'document' and 'response' columns.
227
- **kwargs : Any
228
- Additional arguments passed to internal blocks.
229
-
230
- Returns
231
- -------
232
- Dataset
233
- Filtered dataset with faithfulness evaluation results.
234
- """
235
- # Validate model is configured
236
- if not self.model:
237
- raise BlockValidationError(
238
- f"Model not configured for block '{self.block_name}'. "
239
- f"Call flow.set_model_config() before generating."
240
- )
241
-
242
- logger.info(
243
- f"Starting faithfulness evaluation for {len(samples)} samples",
244
- extra={"block_name": self.block_name, "model": self.model},
245
- )
246
-
247
- try:
248
- # Execute 4-block pipeline with validation delegation
249
- result = self.prompt_builder(samples, **kwargs)
250
- result = self.llm_chat(result, **kwargs)
251
- result = self.text_parser(result, **kwargs)
252
- result = self.filter_block(result, **kwargs)
253
-
254
- logger.info(
255
- f"Faithfulness evaluation completed: {len(samples)} → {len(result)} samples",
256
- extra={"block_name": self.block_name},
257
- )
258
-
259
- return result
260
-
261
- except Exception as e:
262
- logger.error(
263
- f"Error during faithfulness evaluation: {e}",
264
- extra={"block_name": self.block_name, "error": str(e)},
265
- )
266
- raise
267
-
268
- def __getattr__(self, name: str) -> Any:
269
- """Forward attribute access to appropriate internal block."""
270
- # Check each internal block to see which one has this parameter
271
- for block_attr, block_class in [
272
- ("prompt_builder", PromptBuilderBlock),
273
- ("llm_chat", LLMChatBlock),
274
- ("text_parser", TextParserBlock),
275
- ("filter_block", ColumnValueFilterBlock),
276
- ]:
277
- if hasattr(self, block_attr) and name in block_class.model_fields:
278
- internal_block = getattr(self, block_attr)
279
- if internal_block is not None:
280
- return getattr(internal_block, name)
281
- raise AttributeError(
282
- f"'{self.__class__.__name__}' object has no attribute '{name}'"
283
- )
284
-
285
- def __setattr__(self, name: str, value: Any) -> None:
286
- """Handle dynamic parameter updates from flow.set_model_config()."""
287
- super().__setattr__(name, value)
288
-
289
- # Forward to appropriate internal blocks
290
- for block_attr, block_class in [
291
- ("prompt_builder", PromptBuilderBlock),
292
- ("llm_chat", LLMChatBlock),
293
- ("text_parser", TextParserBlock),
294
- ("filter_block", ColumnValueFilterBlock),
295
- ]:
296
- if hasattr(self, block_attr) and name in block_class.model_fields:
297
- setattr(getattr(self, block_attr), name, value)
298
-
299
- def _reinitialize_client_manager(self) -> None:
300
- """Reinitialize internal LLM block's client manager."""
301
- if hasattr(self.llm_chat, "_reinitialize_client_manager"):
302
- self.llm_chat._reinitialize_client_manager()
303
-
304
- def get_internal_blocks_info(self) -> dict[str, Any]:
305
- """Get information about internal blocks."""
306
- return {
307
- "prompt_builder": self.prompt_builder.get_info(),
308
- "llm_chat": self.llm_chat.get_info(),
309
- "text_parser": self.text_parser.get_info(),
310
- "filter": self.filter_block.get_info(),
311
- }
312
-
313
- def __repr__(self) -> str:
314
- """String representation of the block."""
315
- filter_value = (
316
- getattr(self.filter_block, "filter_value", "YES")
317
- if hasattr(self, "filter_block")
318
- else "YES"
319
- )
320
- return (
321
- f"EvaluateFaithfulnessBlock(name='{self.block_name}', "
322
- f"model='{self.model}', filter_value='{filter_value}')"
323
- )