sdg-hub 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. sdg_hub/_version.py +16 -3
  2. sdg_hub/core/blocks/deprecated_blocks/selector.py +1 -1
  3. sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +175 -416
  4. sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +174 -415
  5. sdg_hub/core/blocks/evaluation/verify_question_block.py +180 -415
  6. sdg_hub/core/blocks/llm/client_manager.py +92 -43
  7. sdg_hub/core/blocks/llm/config.py +1 -0
  8. sdg_hub/core/blocks/llm/llm_chat_block.py +74 -16
  9. sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +277 -115
  10. sdg_hub/core/blocks/llm/text_parser_block.py +88 -23
  11. sdg_hub/core/blocks/registry.py +48 -34
  12. sdg_hub/core/blocks/transform/__init__.py +2 -0
  13. sdg_hub/core/blocks/transform/index_based_mapper.py +1 -1
  14. sdg_hub/core/blocks/transform/json_structure_block.py +142 -0
  15. sdg_hub/core/flow/base.py +326 -62
  16. sdg_hub/core/utils/datautils.py +54 -0
  17. sdg_hub/core/utils/flow_metrics.py +261 -0
  18. sdg_hub/core/utils/logger_config.py +50 -9
  19. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/__init__.py +0 -0
  20. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/__init__.py +0 -0
  21. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/detailed_summary.yaml +11 -0
  22. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml +159 -0
  23. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/__init__.py +0 -0
  24. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/extractive_summary.yaml +65 -0
  25. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml +161 -0
  26. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_answers.yaml +15 -0
  27. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_multiple_qa.yaml +21 -0
  28. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_question_list.yaml +44 -0
  29. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/__init__.py +0 -0
  30. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml +104 -0
  31. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/key_facts_summary.yaml +61 -0
  32. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +0 -7
  33. sdg_hub/flows/text_analysis/__init__.py +2 -0
  34. sdg_hub/flows/text_analysis/structured_insights/__init__.py +6 -0
  35. sdg_hub/flows/text_analysis/structured_insights/analyze_sentiment.yaml +27 -0
  36. sdg_hub/flows/text_analysis/structured_insights/extract_entities.yaml +38 -0
  37. sdg_hub/flows/text_analysis/structured_insights/extract_keywords.yaml +21 -0
  38. sdg_hub/flows/text_analysis/structured_insights/flow.yaml +153 -0
  39. sdg_hub/flows/text_analysis/structured_insights/summarize.yaml +21 -0
  40. {sdg_hub-0.2.1.dist-info → sdg_hub-0.3.0.dist-info}/METADATA +42 -15
  41. {sdg_hub-0.2.1.dist-info → sdg_hub-0.3.0.dist-info}/RECORD +44 -22
  42. {sdg_hub-0.2.1.dist-info → sdg_hub-0.3.0.dist-info}/WHEEL +0 -0
  43. {sdg_hub-0.2.1.dist-info → sdg_hub-0.3.0.dist-info}/licenses/LICENSE +0 -0
  44. {sdg_hub-0.2.1.dist-info → sdg_hub-0.3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,161 @@
1
+ metadata:
2
+ name: Extractive Summary Knowledge Tuning Dataset Generation Flow
3
+ description: Generate extractive summary from the input document. Each document is first converted into list of knowledge segments for creating extractive summary and then annotated with context, relationship and relevance. This is then converted
4
+ into Question-Answer pairs.
5
+ version: 2.0.0
6
+ author: SDG Hub Contributors
7
+ recommended_models:
8
+ default: openai/gpt-oss-120b
9
+ compatible:
10
+ - meta-llama/Llama-3.3-70B-Instruct
11
+ - microsoft/phi-4
12
+ - mistralai/Mixtral-8x7B-Instruct-v0.1
13
+ experimental: []
14
+ tags:
15
+ - knowledge-tuning
16
+ - document-internalization
17
+ - question-generation
18
+ - knowledge-extractive-summary
19
+ - qa-pairs
20
+ - extractive-summaries
21
+ license: Apache-2.0
22
+ min_sdg_hub_version: 0.2.0
23
+ dataset_requirements:
24
+ required_columns:
25
+ - document
26
+ - document_outline
27
+ - domain
28
+ - icl_document
29
+ - icl_query_1
30
+ - icl_query_2
31
+ - icl_query_3
32
+ description: 'Input dataset should contain documents with text content and domain classification. Each document should be substantial enough for meaningful question generation (minimum 100 words recommended). The flow generates three types
33
+ of summaries: detailed (n=20), extractive (n=10), and key facts (n=50), each producing corresponding QA pairs designed to help LLMs internalize document knowledge for knowledge tuning.'
34
+ output_columns:
35
+ - summary
36
+ - question
37
+ - response
38
+ - raw_document
39
+ - faithfulness_explanation
40
+ - faithfulness_judgment
41
+ id: epic-jade-656
42
+ blocks:
43
+ - block_type: DuplicateColumnsBlock
44
+ block_config:
45
+ block_name: duplicate_document_col
46
+ input_cols:
47
+ document: base_document
48
+ - block_type: PromptBuilderBlock
49
+ block_config:
50
+ block_name: extractive_summary_prompt
51
+ input_cols:
52
+ - document
53
+ - document_outline
54
+ output_cols: extractive_summary_prompt
55
+ prompt_config_path: extractive_summary.yaml
56
+ format_as_messages: true
57
+ - block_type: LLMChatBlock
58
+ block_config:
59
+ block_name: gen_extractive_summary
60
+ input_cols: extractive_summary_prompt
61
+ output_cols: raw_summary
62
+ max_tokens: 4096
63
+ temperature: 0.7
64
+ n: 50
65
+ async_mode: true
66
+ - block_type: TextParserBlock
67
+ block_config:
68
+ block_name: parse_extractive_summary
69
+ input_cols: raw_summary
70
+ output_cols: summary
71
+ start_tags:
72
+ - ''
73
+ end_tags:
74
+ - ''
75
+ - block_type: RenameColumnsBlock
76
+ block_config:
77
+ block_name: rename_to_document_column
78
+ input_cols:
79
+ document: raw_document
80
+ summary: document
81
+ - block_type: PromptBuilderBlock
82
+ block_config:
83
+ block_name: question_generation_prompt
84
+ input_cols:
85
+ - domain
86
+ - document
87
+ - document_outline
88
+ - icl_document
89
+ - icl_query_1
90
+ - icl_query_2
91
+ - icl_query_3
92
+ output_cols: question_generation_prompt
93
+ prompt_config_path: ../generate_question_list.yaml
94
+ format_as_messages: true
95
+ - block_type: LLMChatBlock
96
+ block_config:
97
+ block_name: question_generation
98
+ input_cols: question_generation_prompt
99
+ output_cols: question_list
100
+ max_tokens: 256
101
+ temperature: 0.7
102
+ n: 1
103
+ async_mode: true
104
+ - block_type: TextParserBlock
105
+ block_config:
106
+ block_name: parse_question_list
107
+ input_cols: question_list
108
+ output_cols: question
109
+ start_tags:
110
+ - '[QUESTION]'
111
+ end_tags:
112
+ - '[END]'
113
+ - block_type: PromptBuilderBlock
114
+ block_config:
115
+ block_name: answer_generation_prompt
116
+ input_cols:
117
+ - question
118
+ - document
119
+ - document_outline
120
+ output_cols: answer_generation_prompt
121
+ prompt_config_path: ../generate_answers.yaml
122
+ format_as_messages: true
123
+ - block_type: LLMChatBlock
124
+ block_config:
125
+ block_name: answer_generation
126
+ input_cols: answer_generation_prompt
127
+ output_cols: response_dict
128
+ max_tokens: 4096
129
+ temperature: 0.7
130
+ n: 1
131
+ async_mode: true
132
+ - block_type: TextParserBlock
133
+ block_config:
134
+ block_name: parse_response_dict
135
+ input_cols: response_dict
136
+ output_cols: response
137
+ start_tags:
138
+ - ''
139
+ end_tags:
140
+ - ''
141
+ save_reasoning_content: true
142
+ - block_type: EvaluateFaithfulnessBlock
143
+ block_config:
144
+ block_name: eval_faithfulness
145
+ input_cols:
146
+ - document
147
+ - response
148
+ output_cols:
149
+ - faithfulness_explanation
150
+ - faithfulness_judgment
151
+ prompt_config_path: ../../multi_summary_qa/instructlab/evaluate_faithfulness.yaml
152
+ filter_value: 'YES'
153
+ operation: eq
154
+ async_mode: true
155
+ format_as_messages: true
156
+ start_tags:
157
+ - '[Start of Explanation]'
158
+ - '[Start of Answer]'
159
+ end_tags:
160
+ - '[End of Explanation]'
161
+ - '[End of Answer]'
@@ -0,0 +1,15 @@
1
+ - role: system
2
+ content: You are a very knowledgeable AI Assistant that will faithfully assist the user with their task.
3
+
4
+ - role: user
5
+ content: |
6
+ Answer the question based on the provided document.
7
+
8
+ Here is the document:
9
+
10
+ Document:
11
+ {{document_outline}}
12
+ {{document}}
13
+
14
+ Question:
15
+ {{question}}
@@ -0,0 +1,21 @@
1
+ - role: system
2
+ content: You are a very knowledgeable AI Assistant that will faithfully assist the user with their task.
3
+
4
+ - role: user
5
+ content: |
6
+ Given below key fact taken from document generate 5 Question and Answer pair based on the key facts.
7
+ Introduce variation in the question and key fact.
8
+ Make sure to ground the question and answer in the provided key fact.
9
+
10
+ Strictly follow this format for each question and answer pair your generate while responding:
11
+ [QUESTION]
12
+ <Insert question here>
13
+ [END]
14
+ [ANSWER]
15
+ <Insert answer here>
16
+ [END]
17
+
18
+ Now, here is the key fact:
19
+ [Key Fact]
20
+ {{document_outline}}
21
+ {{key_fact}}
@@ -0,0 +1,44 @@
1
+ - role: system
2
+ content: You are a very knowledgeable AI Assistant that will faithfully assist the user with their task.
3
+
4
+ - role: user
5
+ content: |
6
+ Develop a series of educational questions from a chapter in a {{domain}} textbook.
7
+
8
+ The questions should:
9
+ * Self-contained – understandable without needing to reference tables, figures, or specific text sections.
10
+ * Focus on the provided example and follow the format and style of the provided examples.
11
+ * Relevant to the subject – based on the textbook’s domain (e.g., legal, scientific, etc.).
12
+ * Independently answerable – avoid direct references to theorems, figures, or text numbers.
13
+ * Varied in difficulty - Make difficult same as the provided examples.
14
+ * Use same format as the provided examples.
15
+
16
+ Strictly follow this format for each question your generate while responding
17
+
18
+ [QUESTION]
19
+ <Insert question here>
20
+ [END]
21
+
22
+ Each question and answer pair should stand alone as a mini-lesson, encapsulating a key concept or idea from the chapter in a way that is accessible and informative without requiring the reader to refer back to the textbook.
23
+
24
+ Here are some examples of questions:
25
+
26
+ [Document]
27
+ {{icl_document}}
28
+
29
+ [QUESTION]
30
+ {{icl_query_1}}
31
+ [END]
32
+
33
+ [QUESTION]
34
+ {{icl_query_2}}
35
+ [END]
36
+
37
+ [QUESTION]
38
+ {{icl_query_3}}
39
+ [END]
40
+
41
+ Now, here is the document:
42
+ [DOCUMENT]
43
+ {{document_outline}}
44
+ {{document}}
@@ -0,0 +1,104 @@
1
+ metadata:
2
+ name: Key Facts Knowledge Tuning Dataset Generation Flow
3
+ description: Generating list of atomic facts from a document and converting each atomic fact into a QA pair. This flow will generate 5 QA pairs for each atomic fact.
4
+ version: 2.0.0
5
+ author: SDG Hub Contributors
6
+ recommended_models:
7
+ default: openai/gpt-oss-120b
8
+ compatible:
9
+ - meta-llama/Llama-3.3-70B-Instruct
10
+ - microsoft/phi-4
11
+ - mistralai/Mixtral-8x7B-Instruct-v0.1
12
+ experimental: []
13
+ tags:
14
+ - knowledge-tuning
15
+ - document-internalization
16
+ - question-generation
17
+ - qa-pairs
18
+ - key-facts
19
+ license: Apache-2.0
20
+ min_sdg_hub_version: 0.2.0
21
+ dataset_requirements:
22
+ required_columns:
23
+ - document
24
+ - document_outline
25
+ - domain
26
+ description: 'Input dataset should contain documents with text content and domain classification. Each document should be substantial enough for meaningful question generation (around maximum of 8000 tokens). The flow generates 5 QA pairs for each atomic fact.'
27
+ output_columns:
28
+ - key_fact
29
+ - question
30
+ - response
31
+ - raw_key_fact_qa
32
+ id: heavy-heart-77
33
+ blocks:
34
+ - block_type: PromptBuilderBlock
35
+ block_config:
36
+ block_name: atomic_facts_prompt
37
+ input_cols:
38
+ - document
39
+ - document_outline
40
+ - domain
41
+ output_cols: atomic_facts_prompt
42
+ prompt_config_path: key_facts_summary.yaml
43
+ format_as_messages: true
44
+ - block_type: LLMChatBlock
45
+ block_config:
46
+ block_name: gen_atomic_facts
47
+ input_cols: atomic_facts_prompt
48
+ output_cols: raw_summary
49
+ max_tokens: 4096
50
+ temperature: 0.7
51
+ n: 1
52
+ async_mode: true
53
+ - block_type: TextParserBlock
54
+ block_config:
55
+ block_name: parse_atomic_facts
56
+ input_cols: raw_summary
57
+ output_cols: atomic_facts
58
+ start_tags:
59
+ - '### Key Facts With Context'
60
+ end_tags:
61
+ - ''
62
+ - block_type: TextParserBlock
63
+ block_config:
64
+ block_name: parse_atomic_facts_to_individual_facts
65
+ input_cols: atomic_facts
66
+ output_cols: key_fact
67
+ parsing_pattern: '(?:^|\n)\s*\d+\.\s+(.*?)(?=\n\s*\d+\.\s+|\Z)'
68
+ - block_type: RenameColumnsBlock
69
+ block_config:
70
+ block_name: rename_to_document_column
71
+ input_cols:
72
+ document: raw_document
73
+ atomic_facts: document
74
+ - block_type: PromptBuilderBlock
75
+ block_config:
76
+ block_name: key_fact_qa
77
+ input_cols:
78
+ - key_fact
79
+ - document_outline
80
+ output_cols: key_fact_qa
81
+ prompt_config_path: ../generate_multiple_qa.yaml
82
+ format_as_messages: true
83
+ - block_type: LLMChatBlock
84
+ block_config:
85
+ block_name: generate_key_fact_qa
86
+ input_cols: key_fact_qa
87
+ output_cols: raw_key_fact_qa
88
+ max_tokens: 4096
89
+ temperature: 0.7
90
+ n: 1
91
+ async_mode: true
92
+ - block_type: TextParserBlock
93
+ block_config:
94
+ block_name: parse_key_fact_qa
95
+ input_cols: raw_key_fact_qa
96
+ output_cols:
97
+ - question
98
+ - response
99
+ start_tags:
100
+ - '[QUESTION]'
101
+ - '[ANSWER]'
102
+ end_tags:
103
+ - '[END]'
104
+ - '[END]'
@@ -0,0 +1,61 @@
1
+ - role: system
2
+ content: You are an expert at summarizing key facts from text.
3
+
4
+ - role: user
5
+ content: |
6
+ Please break down the following snippet from an article about {{domain}} into atomic facts.
7
+ Follow these principles to summarize the key facts:
8
+ 1. Identify atomic facts/key facts from the text.
9
+ 2. Break down compound sentences into atomic facts.
10
+ 3. First list the key facts.
11
+ 4. Then, provide each fact with enough context from the passage so that a reader can clearly understand how the fact connects to the original text.
12
+ 5. Follow the format of the examples below.
13
+
14
+ To help you understand the task, here is an example:
15
+
16
+ ### Passage
17
+ Remote work has grown by over 150% since 2020 due to the pandemic. Companies found that productivity remained stable, while employee satisfaction increased. However, challenges like communication gaps and team cohesion issues emerged. Firms are now adopting hybrid models to balance flexibility with collaboration.
18
+
19
+ ### Key Facts
20
+ 1. Remote work has grown by over 150% since 2020.
21
+ 2. The pandemic was the driving force behind this growth.
22
+ 3. Companies reported that productivity remained stable during remote work.
23
+ 4. Employee satisfaction increased during the remote work period.
24
+ 5. Remote work created communication challenges.
25
+ 6. Remote work weakened team cohesion.
26
+ 7. Companies are adopting hybrid models.
27
+ 8. Hybrid models aim to balance flexibility with collaboration.
28
+
29
+
30
+ ### Key Facts With Context
31
+ 1. **Remote work has grown by over 150% since 2020.**
32
+ → This fact quantifies the dramatic rise in remote work, establishing a clear before-and-after comparison post-pandemic.
33
+
34
+ 2. **The pandemic was the driving force behind this growth.**
35
+ → It situates the shift in a global crisis context, emphasizing the reactive nature of the workplace transformation.
36
+
37
+ 3. **Companies reported that productivity remained stable during remote work.**
38
+ → Despite initial concerns, businesses observed that remote work did not negatively impact output, reinforcing its viability.
39
+
40
+ 4. **Employee satisfaction increased during the remote work period.**
41
+ → Morale and well-being improved under remote arrangements, adding a human-centric benefit to the operational model.
42
+
43
+ 5. **Remote work created communication challenges.**
44
+ → Acknowledges that the new model introduced friction in interpersonal and organizational dialogue.
45
+
46
+ 6. **Remote work weakened team cohesion.**
47
+ → Beyond communication, the shift led to reduced team bonding and synergy, impacting culture and collaboration.
48
+
49
+ 7. **Companies are adopting hybrid models.**
50
+ → Businesses are no longer sticking with full remote or full office—they are evolving toward a blended solution.
51
+
52
+ 8. **Hybrid models aim to balance flexibility with collaboration.**
53
+ → The rationale behind hybrid models is to retain the positives (flexibility, satisfaction) while mitigating the negatives (isolation, miscommunication).
54
+ ### End
55
+
56
+ Now it's your turn breakdown following snippet from article about {{domain}} into atomic facts following similar style as above examples
57
+ ### Passage
58
+ {{document_outline}}
59
+ {{document}}
60
+
61
+ ### Key Facts
@@ -46,7 +46,6 @@ blocks:
46
46
  input_cols: [document, document_outline]
47
47
  output_cols: summary_prompt
48
48
  prompt_config_path: detailed_summary.yaml
49
- format_as_messages: true
50
49
 
51
50
  - block_type: LLMChatBlock
52
51
  block_config:
@@ -70,7 +69,6 @@ blocks:
70
69
  input_cols: [document, document_outline, domain]
71
70
  output_cols: atomic_facts_prompt
72
71
  prompt_config_path: atomic_facts.yaml
73
- format_as_messages: true
74
72
 
75
73
  - block_type: LLMChatBlock
76
74
  block_config:
@@ -94,7 +92,6 @@ blocks:
94
92
  input_cols: [document, document_outline]
95
93
  output_cols: extractive_summary_prompt
96
94
  prompt_config_path: extractive_summary.yaml
97
- format_as_messages: true
98
95
 
99
96
  - block_type: LLMChatBlock
100
97
  block_config:
@@ -129,7 +126,6 @@ blocks:
129
126
  input_cols: [domain, document, document_outline, icl_document, icl_query_1, icl_response_1, icl_query_2, icl_response_2, icl_query_3, icl_response_3]
130
127
  output_cols: knowledge_generation_prompt
131
128
  prompt_config_path: generate_questions_responses.yaml
132
- format_as_messages: true
133
129
 
134
130
  - block_type: LLMChatBlock
135
131
  block_config:
@@ -157,7 +153,6 @@ blocks:
157
153
  filter_value: "YES"
158
154
  operation: eq
159
155
  async_mode: true
160
- format_as_messages: true
161
156
  start_tags: ["[Start of Explanation]", "[Start of Answer]"]
162
157
  end_tags: ["[End of Explanation]", "[End of Answer]"]
163
158
 
@@ -172,7 +167,6 @@ blocks:
172
167
  convert_dtype: float
173
168
  max_tokens: 2048
174
169
  async_mode: true
175
- format_as_messages: true
176
170
  start_tags: ["[Start of Feedback]", "[Start of Score]"]
177
171
  end_tags: ["[End of Feedback]", "[End of Score]"]
178
172
 
@@ -187,6 +181,5 @@ blocks:
187
181
  convert_dtype: float
188
182
  max_tokens: 2048
189
183
  async_mode: true
190
- format_as_messages: true
191
184
  start_tags: ["[Start of Explanation]", "[Start of Rating]"]
192
185
  end_tags: ["[End of Explanation]", "[End of Rating]"]
@@ -0,0 +1,2 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """Text analysis flows for processing and extracting insights from textual content."""
@@ -0,0 +1,6 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """Structured Text Insights Extraction Flow.
3
+
4
+ This module provides a comprehensive flow for extracting structured insights from text,
5
+ including summary, keywords, named entities, and sentiment analysis, combined into a JSON output.
6
+ """
@@ -0,0 +1,27 @@
1
+ - role: system
2
+ content: You are an AI assistant expert at analyzing the emotional tone and sentiment of text content.
3
+
4
+ - role: user
5
+ content: |
6
+ Analyze the overall sentiment and emotional tone of the following text. Consider:
7
+
8
+ 1. **Emotional tone**: Is the text positive, negative, or neutral?
9
+ 2. **Intensity**: How strong is the sentiment expressed?
10
+ 3. **Context**: Consider the subject matter and how it's presented
11
+ 4. **Balance**: If there are mixed sentiments, which one dominates?
12
+
13
+ Sentiment categories:
14
+ - **positive**: Optimistic, encouraging, favorable, upbeat content
15
+ - **negative**: Critical, pessimistic, unfavorable, concerning content
16
+ - **neutral**: Factual, balanced, objective content without strong emotional tone
17
+
18
+ Text to analyze:
19
+ {{text}}
20
+
21
+ Provide your response in the following format:
22
+ [SENTIMENT]
23
+ positive
24
+ [/SENTIMENT]
25
+
26
+ Where the sentiment value is one of: positive, negative, or neutral
27
+
@@ -0,0 +1,38 @@
1
+ - role: system
2
+ content: You are an AI assistant expert at identifying and extracting named entities from text content. You must return valid JSON format.
3
+
4
+ - role: user
5
+ content: |
6
+ Extract all important named entities from the following text and organize them by category:
7
+
8
+ Text to analyze:
9
+ {{text}}
10
+
11
+ Identify and categorize entities into:
12
+ - **people**: Names of individuals, titles, roles
13
+ - **organizations**: Companies, institutions, agencies, groups
14
+ - **locations**: Cities, countries, regions, landmarks, addresses
15
+
16
+ Rules:
17
+ - Only include entities explicitly mentioned in the text
18
+ - Use exact names as they appear
19
+ - Focus on the most important entities (3-8 per category max)
20
+
21
+ Provide your response in exactly this format:
22
+ [ENTITIES]
23
+ {
24
+ "people": ["Person 1", "Person 2"],
25
+ "organizations": ["Org 1", "Org 2"],
26
+ "locations": ["Location 1"]
27
+ }
28
+ [/ENTITIES]
29
+
30
+ If no entities are found for a category, use an empty list: []
31
+ If no entities are found at all, respond with:
32
+ [ENTITIES]
33
+ {
34
+ "people": [],
35
+ "organizations": [],
36
+ "locations": []
37
+ }
38
+ [/ENTITIES]
@@ -0,0 +1,21 @@
1
+ - role: system
2
+ content: You are an AI assistant expert at identifying the most important keywords and phrases from text content.
3
+
4
+ - role: user
5
+ content: |
6
+ Extract exactly 10 of the most important keywords or key phrases from the following text. These should be:
7
+
8
+ 1. The most relevant and representative terms
9
+ 2. Words or short phrases (1-3 words) that capture the main topics
10
+ 3. Terms that someone would use to search for or categorize this content
11
+ 4. A mix of specific terms and broader concepts when appropriate
12
+ 5. Avoid common stop words unless they're part of important phrases
13
+
14
+ Text to analyze:
15
+ {{text}}
16
+
17
+ Provide your response in the following format:
18
+ [KEYWORDS]
19
+ keyword1, keyword2, keyword3, keyword4, keyword5, keyword6, keyword7, keyword8, keyword9, keyword10
20
+ [/KEYWORDS]
21
+