fabricatio 0.2.4.dev2__tar.gz → 0.2.4.dev3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/PKG-INFO +1 -1
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/pyproject.toml +1 -1
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/capabilities/rag.py +28 -4
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/config.py +3 -0
- fabricatio-0.2.4.dev3/src/hbs_helpers.rs +15 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/src/lib.rs +1 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/src/templates.rs +7 -0
- fabricatio-0.2.4.dev3/templates/built-in/draft_rating_criteria.hbs +40 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/templates/built-in/draft_rating_manual.hbs +20 -6
- fabricatio-0.2.4.dev3/templates/built-in/draft_rating_weights_klee.hbs +44 -0
- fabricatio-0.2.4.dev3/templates/built-in/draft_tool_usage_code.hbs +76 -0
- fabricatio-0.2.4.dev3/templates/built-in/extract_criteria_from_reasons.hbs +49 -0
- fabricatio-0.2.4.dev3/templates/built-in/extract_reasons_from_examples.hbs +48 -0
- fabricatio-0.2.4.dev3/templates/built-in/liststr.hbs +44 -0
- fabricatio-0.2.4.dev3/templates/built-in/make_choice.hbs +40 -0
- fabricatio-0.2.4.dev3/templates/built-in/make_judgment.hbs +35 -0
- fabricatio-0.2.4.dev3/templates/built-in/rate_fine_grind.hbs +51 -0
- fabricatio-0.2.4.dev3/templates/built-in/refined_query.hbs +36 -0
- fabricatio-0.2.4.dev3/templates.tar.gz +0 -0
- fabricatio-0.2.4.dev3/uv.lock +1509 -0
- fabricatio-0.2.4.dev2/templates/built-in/draft_rating_criteria.hbs +0 -24
- fabricatio-0.2.4.dev2/templates/built-in/draft_rating_weights_klee.hbs +0 -23
- fabricatio-0.2.4.dev2/templates/built-in/draft_tool_usage_code.hbs +0 -66
- fabricatio-0.2.4.dev2/templates/built-in/extract_criteria_from_reasons.hbs +0 -24
- fabricatio-0.2.4.dev2/templates/built-in/extract_reasons_from_examples.hbs +0 -18
- fabricatio-0.2.4.dev2/templates/built-in/liststr.hbs +0 -26
- fabricatio-0.2.4.dev2/templates/built-in/make_choice.hbs +0 -25
- fabricatio-0.2.4.dev2/templates/built-in/make_judgment.hbs +0 -27
- fabricatio-0.2.4.dev2/templates/built-in/rate_fine_grind.hbs +0 -39
- fabricatio-0.2.4.dev2/templates.tar.gz +0 -0
- fabricatio-0.2.4.dev2/uv.lock +0 -1509
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/.github/workflows/build-package.yaml +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/.github/workflows/ruff.yaml +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/.github/workflows/tests.yaml +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/.gitignore +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/.python-version +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/Cargo.lock +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/Cargo.toml +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/LICENSE +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/Makefile +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/README.md +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/examples/extract_article/extract.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/examples/llm_usages/llm_usage.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/examples/make_a_rating/rating.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/examples/make_diary/commits.json +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/examples/make_diary/diary.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/examples/minor/hello_fabricatio.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/examples/propose_task/propose.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/examples/simple_chat/chat.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/examples/simple_rag/simple_rag.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/examples/task_handle/handle_task.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/__init__.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/_rust.pyi +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/_rust_instances.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/actions/article.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/actions/rag.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/capabilities/propose.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/capabilities/rating.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/capabilities/task.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/core.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/decorators.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/fs/__init__.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/fs/curd.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/fs/readers.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/journal.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/models/action.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/models/events.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/models/extra.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/models/generic.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/models/kwargs_types.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/models/role.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/models/task.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/models/tool.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/models/usages.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/models/utils.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/parser.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/py.typed +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/toolboxes/__init__.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/toolboxes/arithmetic.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/toolboxes/fs.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/toolboxes/task.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/python/fabricatio/workflows/articles.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/src/hash.rs +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/templates/built-in/binary-exploitation-ctf-solver.hbs +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/templates/built-in/claude-xml.hbs +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/templates/built-in/clean-up-code.hbs +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/templates/built-in/create_json_obj.hbs +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/templates/built-in/cryptography-ctf-solver.hbs +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/templates/built-in/dependencies.hbs +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/templates/built-in/document-the-code.hbs +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/templates/built-in/find-security-vulnerabilities.hbs +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/templates/built-in/fix-bugs.hbs +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/templates/built-in/improve-performance.hbs +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/templates/built-in/refactor.hbs +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/templates/built-in/retrieved_display.hbs +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/templates/built-in/reverse-engineering-ctf-solver.hbs +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/templates/built-in/task_briefing.hbs +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/templates/built-in/web-ctf-solver.hbs +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/templates/built-in/write-git-commit.hbs +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/templates/built-in/write-github-pull-request.hbs +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/templates/built-in/write-github-readme.hbs +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/tests/test_config.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/tests/test_models/test_action.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/tests/test_models/test_advanced.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/tests/test_models/test_generic.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/tests/test_models/test_role.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/tests/test_models/test_task.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/tests/test_models/test_tool.py +0 -0
- {fabricatio-0.2.4.dev2 → fabricatio-0.2.4.dev3}/tests/test_models/test_usages.py +0 -0
@@ -13,10 +13,16 @@ from typing import Any, Callable, Dict, List, Optional, Self, Union, Unpack, ove
|
|
13
13
|
from fabricatio._rust_instances import template_manager
|
14
14
|
from fabricatio.config import configs
|
15
15
|
from fabricatio.journal import logger
|
16
|
-
from fabricatio.models.kwargs_types import
|
16
|
+
from fabricatio.models.kwargs_types import (
|
17
|
+
ChooseKwargs,
|
18
|
+
CollectionSimpleConfigKwargs,
|
19
|
+
EmbeddingKwargs,
|
20
|
+
FetchKwargs,
|
21
|
+
LLMKwargs,
|
22
|
+
)
|
17
23
|
from fabricatio.models.usages import EmbeddingUsage
|
18
24
|
from fabricatio.models.utils import MilvusData
|
19
|
-
from more_itertools.recipes import flatten
|
25
|
+
from more_itertools.recipes import flatten, unique
|
20
26
|
from pydantic import Field, PrivateAttr
|
21
27
|
|
22
28
|
|
@@ -221,9 +227,9 @@ class RAG(EmbeddingUsage):
|
|
221
227
|
|
222
228
|
# Step 2: Flatten the search results
|
223
229
|
flattened_results = flatten(search_results)
|
224
|
-
|
230
|
+
unique_results = unique(flattened_results, key=itemgetter("id"))
|
225
231
|
# Step 3: Sort by distance (descending)
|
226
|
-
sorted_results = sorted(
|
232
|
+
sorted_results = sorted(unique_results, key=itemgetter("distance"), reverse=True)
|
227
233
|
|
228
234
|
logger.debug(f"Searched similarities: {[t['distance'] for t in sorted_results]}")
|
229
235
|
# Step 4: Extract the entities
|
@@ -308,3 +314,21 @@ class RAG(EmbeddingUsage):
|
|
308
314
|
f"{rendered}\n\n{extra_system_message}",
|
309
315
|
**kwargs,
|
310
316
|
)
|
317
|
+
|
318
|
+
async def arefined_query(self, question: List[str] | str, **kwargs: Unpack[ChooseKwargs]) -> List[str]:
|
319
|
+
"""Refines the given question using a template.
|
320
|
+
|
321
|
+
Args:
|
322
|
+
question (List[str] | str): The question to be refined.
|
323
|
+
**kwargs (Unpack[ChooseKwargs]): Additional keyword arguments for the refinement process.
|
324
|
+
|
325
|
+
Returns:
|
326
|
+
List[str]: A list of refined questions.
|
327
|
+
"""
|
328
|
+
return await self.aliststr(
|
329
|
+
template_manager.render_template(
|
330
|
+
configs.templates.refined_query_template,
|
331
|
+
{"question": [question] if isinstance(question, str) else question},
|
332
|
+
),
|
333
|
+
**kwargs,
|
334
|
+
)
|
@@ -209,6 +209,9 @@ class TemplateConfig(BaseModel):
|
|
209
209
|
liststr_template: str = Field(default="liststr")
|
210
210
|
"""The name of the liststr template which will be used to display a list of strings."""
|
211
211
|
|
212
|
+
refined_query_template: str = Field(default="refined_query")
|
213
|
+
"""The name of the refined query template which will be used to refine a query."""
|
214
|
+
|
212
215
|
|
213
216
|
class MagikaConfig(BaseModel):
|
214
217
|
"""Magika configuration class."""
|
@@ -0,0 +1,15 @@
|
|
1
|
+
use handlebars::handlebars_helper;
|
2
|
+
use serde_json::Value;
|
3
|
+
|
4
|
+
handlebars_helper!(len: |v: Value| {
|
5
|
+
if v.is_array(){
|
6
|
+
v.as_array().unwrap().len()
|
7
|
+
}else if v.is_object(){
|
8
|
+
v.as_object().unwrap().len()
|
9
|
+
}else if v.is_string(){
|
10
|
+
v.as_str().unwrap().len()
|
11
|
+
}else{
|
12
|
+
0
|
13
|
+
}
|
14
|
+
|
15
|
+
});
|
@@ -1,3 +1,4 @@
|
|
1
|
+
use crate::hbs_helpers::len;
|
1
2
|
use handlebars::{no_escape, Handlebars};
|
2
3
|
use pyo3::exceptions::PyRuntimeError;
|
3
4
|
use pyo3::prelude::*;
|
@@ -34,6 +35,7 @@ impl TemplateManager {
|
|
34
35
|
suffix:suffix.unwrap_or("hbs".to_string())
|
35
36
|
};
|
36
37
|
manager.discover_templates();
|
38
|
+
manager.register_builtin_helper();
|
37
39
|
Ok(manager)
|
38
40
|
}
|
39
41
|
|
@@ -90,6 +92,11 @@ impl TemplateManager {
|
|
90
92
|
})
|
91
93
|
.collect()
|
92
94
|
}
|
95
|
+
|
96
|
+
fn register_builtin_helper(&mut self){
|
97
|
+
self.handlebars.register_helper("len",Box::new(len))
|
98
|
+
}
|
99
|
+
|
93
100
|
}
|
94
101
|
|
95
102
|
pub(crate) fn register(_: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# Task
|
2
|
+
Please draft rating criteria for the topic: "{{ topic }}".
|
3
|
+
|
4
|
+
## CFER Framework
|
5
|
+
Create criteria that are:
|
6
|
+
- **Comprehensive**: Cover all important aspects of the topic
|
7
|
+
- **Fair**: Measurable and objective across different instances
|
8
|
+
- **Evidence-based**: Can be evaluated based on observable evidence
|
9
|
+
- **Relevant**: Directly related to the quality or performance of the topic
|
10
|
+
|
11
|
+
Return the criteria{{#if criteria_count}} with exactly {{ criteria_count }} items{{/if}} in JSON format as a list of strings within code block.
|
12
|
+
|
13
|
+
# Example:
|
14
|
+
Rating criteria for the topic "Nice icecream"
|
15
|
+
|
16
|
+
----- Start of response example -----
|
17
|
+
```json
|
18
|
+
[
|
19
|
+
"Visual appeal (color, presentation, garnish)",
|
20
|
+
"Flavor complexity and authenticity",
|
21
|
+
"Texture consistency and mouthfeel",
|
22
|
+
"Ingredient quality and freshness",
|
23
|
+
"Value for price point"
|
24
|
+
]
|
25
|
+
```
|
26
|
+
----- End of response example -----
|
27
|
+
|
28
|
+
## Aspect Introduction
|
29
|
+
Each criterion should:
|
30
|
+
- Be concise yet descriptive (5-10 words)
|
31
|
+
- Represent a distinct aspect of evaluation
|
32
|
+
- Be applicable across different instances of the topic
|
33
|
+
- Avoid overlapping with other criteria
|
34
|
+
- Use terminology relevant to the domain
|
35
|
+
|
36
|
+
# Warning
|
37
|
+
- Please ensure that the criteria are clear, concise, and independently measurable.
|
38
|
+
- The response SHALL be returned as a JSON array within the codeblock.
|
39
|
+
- No additional explanation is needed.
|
40
|
+
{{#if criteria_count }}- You must return exactly {{ criteria_count }} criteria.{{/if}}
|
@@ -1,16 +1,22 @@
|
|
1
1
|
# Task
|
2
|
-
Please draft a rating manual for the following topic and criteria
|
2
|
+
Please draft a rating manual for the following topic and criteria. This manual will guide evaluators in applying consistent standards.
|
3
3
|
|
4
4
|
Topic: {{ topic }}
|
5
5
|
Criteria: {{ criteria }}
|
6
6
|
|
7
|
-
|
7
|
+
## CFER Framework
|
8
|
+
Ensure your rating manual follows these principles:
|
9
|
+
- **Comprehensive**: Cover all important aspects of each criterion
|
10
|
+
- **Fair**: Describe measurable and objective evaluation standards
|
11
|
+
- **Evidence-based**: Focus on observable evidence for each rating level
|
12
|
+
- **Relevant**: Connect directly to quality or performance within the topic
|
8
13
|
|
14
|
+
Return the rating manual as a JSON object within a codeblock where the keys are the criteria and the values are the detailed descriptions for each criterion.
|
9
15
|
|
10
16
|
# Example:
|
11
17
|
|
12
|
-
|
13
|
-
|
18
|
+
Topic: Nice icecream
|
19
|
+
Criteria: ["outlook","taste","texture"]
|
14
20
|
----- Start of response example -----
|
15
21
|
```json
|
16
22
|
{
|
@@ -19,9 +25,17 @@ criteria:["outlook","taste","texture"]
|
|
19
25
|
"texture": "How the icecream feels. High score for smooth texture without icy bits, melts smoothly in the mouth providing a creamy experience; low score for rough texture containing many ice crystals, feeling like it has been frozen for too long lacking softness."
|
20
26
|
}
|
21
27
|
```
|
22
|
-
-----
|
28
|
+
----- End of response example -----
|
29
|
+
|
30
|
+
## Aspect Introduction
|
31
|
+
For each criterion in your manual:
|
32
|
+
- Provide a clear definition of what the criterion measures
|
33
|
+
- Include specific examples of high-quality and low-quality manifestations
|
34
|
+
- Define observable characteristics evaluators should look for
|
35
|
+
- Use domain-appropriate terminology
|
36
|
+
- Ensure the description enables consistent application across different instances
|
23
37
|
|
24
38
|
# Warning
|
25
39
|
- Please ensure that the rating manual is clear and concise, and that the criteria for each criterion are well-defined.
|
26
40
|
- The response SHALL be returned as a JSON object within the codeblock.
|
27
|
-
- No additional
|
41
|
+
- No additional explanation is needed.
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# Task:
|
2
|
+
Please compare the relative importance of the following criteria for evaluating "{{topic}}":
|
3
|
+
|
4
|
+
First Criterion: {{first}}
|
5
|
+
Second Criterion: {{second}}
|
6
|
+
|
7
|
+
## CFER Framework
|
8
|
+
When determining the relative importance weight, consider these principles:
|
9
|
+
- **Comprehensive**: Consider how each criterion contributes to the overall evaluation
|
10
|
+
- **Fair**: Base your weighting on objective measures rather than personal preference
|
11
|
+
- **Evidence-based**: Consider measurable impacts of each criterion on outcomes
|
12
|
+
- **Relevant**: Prioritize criteria based on their direct influence on {{topic}} quality
|
13
|
+
|
14
|
+
Provide a positive floating-point number where:
|
15
|
+
- 1.0 means the criteria are equally important
|
16
|
+
- > 1.0 means `{{second}}` is more important (e.g., 2.0 means twice as important)
|
17
|
+
- < 1.0 means `{{first}}` is more important (e.g., 0.5 means half as important)
|
18
|
+
|
19
|
+
Your response SHALL be a positive floating-point number within a JSON codeblock.
|
20
|
+
|
21
|
+
## Aspect Introduction
|
22
|
+
When comparing these criteria:
|
23
|
+
- Consider the core purpose and context of "{{topic}}"
|
24
|
+
- Evaluate which criterion has broader impact on outcomes
|
25
|
+
- Assess which criterion is more foundational or prerequisite
|
26
|
+
- Consider which criterion is more difficult to compensate for if lacking
|
27
|
+
- Think about which criterion users/stakeholders would prioritize
|
28
|
+
- Consider industry standards and expert consensus in this domain
|
29
|
+
|
30
|
+
# Example:
|
31
|
+
Topic: "Best icecream"
|
32
|
+
First Criterion: "Taste"
|
33
|
+
Second Criterion: "Texture"
|
34
|
+
|
35
|
+
----- Start of response example -----
|
36
|
+
```json
|
37
|
+
1.5
|
38
|
+
```
|
39
|
+
----- End of response example -----
|
40
|
+
|
41
|
+
# Warning:
|
42
|
+
- Your response must be a POSITIVE floating-point number within a JSON codeblock.
|
43
|
+
- No explanation or additional information is needed, only the positive floating-point number.
|
44
|
+
- The number should reflect a careful consideration of relative importance, not random selection.
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# Task: Draft Python Code for Tool Integration
|
2
|
+
|
3
|
+
You are tasked with writing Python code that effectively utilizes specific tools to accomplish a defined task. This template will guide you to produce high-quality, executable code.
|
4
|
+
|
5
|
+
## CFER Framework
|
6
|
+
Your code solution should follow these principles:
|
7
|
+
- **Comprehensive**: Utilize all relevant tools necessary to fully accomplish the task
|
8
|
+
- **Functional**: Ensure the code executes without errors and handles edge cases appropriately
|
9
|
+
- **Efficient**: Optimize for performance and resource utilization where appropriate
|
10
|
+
- **Reliable**: Include proper error handling and validation for robust execution
|
11
|
+
|
12
|
+
## Available Resources
|
13
|
+
|
14
|
+
**Dependencies:**
|
15
|
+
{{deps}}
|
16
|
+
|
17
|
+
**Available Data:**
|
18
|
+
{{#each data }}
|
19
|
+
-------- Start of `{{ @key }}` --------
|
20
|
+
{{ this }}
|
21
|
+
-------- End of `{{ @key }}` --------
|
22
|
+
{{/each }}
|
23
|
+
|
24
|
+
**Task Briefing:**
|
25
|
+
{{ task }}
|
26
|
+
|
27
|
+
**Available Tools:**
|
28
|
+
{{#each tools }}
|
29
|
+
- `{{ name }}`
|
30
|
+
{{/each }}
|
31
|
+
|
32
|
+
## Implementation Guidelines
|
33
|
+
When developing your solution:
|
34
|
+
- Begin with proper tool imports and initialization
|
35
|
+
- Structure your code with logical function organization
|
36
|
+
- Implement appropriate error handling
|
37
|
+
- Document key sections with clear comments
|
38
|
+
- Define output variables to capture results
|
39
|
+
- Follow Python best practices for code style
|
40
|
+
|
41
|
+
## Response Format
|
42
|
+
Provide your solution in the following format:
|
43
|
+
|
44
|
+
1. Python code implementation in a code block
|
45
|
+
2. JSON array of output variable names that contain your solution results
|
46
|
+
|
47
|
+
## Example Response:
|
48
|
+
|
49
|
+
---- Start of Response Example ----
|
50
|
+
|
51
|
+
```python
|
52
|
+
{{#if data}}# Import available data
|
53
|
+
from {{data_module_name}} import ({{#each data }}{{@key}},{{/each }}){{/if}}
|
54
|
+
# Import available tools
|
55
|
+
from {{tool_module_name}} import ({{#each tools }}{{name}},{{/each }})
|
56
|
+
|
57
|
+
output1 = # Create your output no.1 with `{{tool_module_name}}`{{#if data}} and `{{data_module_name}}`{{/if}} here, if needed
|
58
|
+
another_output = # Create your output no.2 with `{{tool_module_name}}`{{#if data}} and `{{data_module_name}}`{{/if}} here, if needed
|
59
|
+
yet_another_output = # Create your output no.3 with `{{tool_module_name}}`{{#if data}} and `{{data_module_name}}`{{/if}} ,here, if needed
|
60
|
+
|
61
|
+
```
|
62
|
+
|
63
|
+
```json
|
64
|
+
[
|
65
|
+
"output1",
|
66
|
+
"another_output"
|
67
|
+
"yet_another_output"
|
68
|
+
]
|
69
|
+
```
|
70
|
+
---- End of Response Example ----
|
71
|
+
|
72
|
+
## Constraints
|
73
|
+
- Your solution must use ONLY the provided tools and data
|
74
|
+
- Do not import any third-party libraries not specified in the dependencies
|
75
|
+
- Ensure all output variables contain JSON-serializable values
|
76
|
+
- Your response must include both the Python code block and the JSON output array code block
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# Task: Extract Evaluation Criteria from Reasons
|
2
|
+
|
3
|
+
Extract {{criteria_count}} most important criteria for evaluating "{{topic}}" from the provided list of reasons. These criteria should capture the key aspects that matter most when comparing or evaluating items in this domain.
|
4
|
+
|
5
|
+
## CFER Framework
|
6
|
+
When extracting and refining criteria, follow these principles:
|
7
|
+
- **Comprehensive**: Identify criteria that cover different aspects of the topic
|
8
|
+
- **Focused**: Each criterion should represent a distinct evaluable dimension
|
9
|
+
- **Extracted**: Derive criteria directly from the provided reasons, not external knowledge
|
10
|
+
- **Refined**: Transform raw mentions into clear, concise evaluation dimensions
|
11
|
+
|
12
|
+
## Input Reasons
|
13
|
+
{{reasons}}
|
14
|
+
|
15
|
+
## Extraction Guidelines
|
16
|
+
When identifying criteria:
|
17
|
+
1. Identify key comparison points mentioned across multiple reasons
|
18
|
+
2. Look for qualities, features, or characteristics used to differentiate options
|
19
|
+
3. Consider both explicitly stated and implicitly suggested evaluation factors
|
20
|
+
4. Consolidate similar or related concepts into single meaningful criteria
|
21
|
+
5. Prioritize criteria based on frequency of mention and importance to the topic
|
22
|
+
6. Express each criterion as a single clear concept (usually a noun or noun phrase)
|
23
|
+
|
24
|
+
## Response Format
|
25
|
+
Return exactly {{criteria_count}} criteria as a JSON array of strings, ranked in DESCENDING order of importance.
|
26
|
+
|
27
|
+
# Example:
|
28
|
+
Topic: "Good icecream"
|
29
|
+
Reasons: ["Vanilla Dream has a rich and authentic vanilla flavor, while Choco Delight has an artificial chocolate flavor.", "Vanilla Dream has a smooth and creamy texture, while Choco Delight has a rough texture with ice crystals.", "Choco Delight is more affordable than Vanilla Dream.", "Choco Delight has a wider variety of flavors compared to Vanilla Dream."]
|
30
|
+
|
31
|
+
----- Start of response example -----
|
32
|
+
```json
|
33
|
+
[
|
34
|
+
"flavor authenticity",
|
35
|
+
"texture quality",
|
36
|
+
"affordability",
|
37
|
+
"flavor variety"
|
38
|
+
]
|
39
|
+
```
|
40
|
+
----- End of response example -----
|
41
|
+
|
42
|
+
## Format Requirements
|
43
|
+
- Return EXACTLY {{criteria_count}} criteria, no more and no less
|
44
|
+
- Format as a valid JSON array of strings
|
45
|
+
- Each criterion should be concise (1-3 words when possible)
|
46
|
+
- Use consistent terminology (prefer noun forms)
|
47
|
+
- Ensure criteria are distinct from each other
|
48
|
+
- List criteria in descending order of importance
|
49
|
+
- No explanations or additional text outside the JSON array
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# Task: Generate Comparative Reasons Between Examples
|
2
|
+
|
3
|
+
Within the topic of "{{topic}}", analyze why "{{first}}" is better than "{{second}}" by providing {{reasons_count}} clear, substantiated reasons.
|
4
|
+
|
5
|
+
## CFER Framework
|
6
|
+
When formulating your comparative reasons:
|
7
|
+
- **Comparative**: Each reason should directly contrast specific aspects of both examples
|
8
|
+
- **Factual**: Base reasons on observable qualities rather than subjective opinions
|
9
|
+
- **Explicit**: Clearly articulate the advantage that "{{first}}" has over "{{second}}"
|
10
|
+
- **Relevant**: Focus on aspects that matter most within the context of "{{topic}}"
|
11
|
+
|
12
|
+
## Reasoning Guidelines
|
13
|
+
When developing your reasons:
|
14
|
+
1. Identify specific features, attributes or characteristics where "{{first}}" excels
|
15
|
+
2. For each feature, explain how it manifests in both examples with clear contrast
|
16
|
+
3. Articulate why the difference matters in the context of "{{topic}}"
|
17
|
+
4. Use concrete details rather than vague generalizations
|
18
|
+
5. Ensure each reason addresses a distinct aspect (avoid overlap)
|
19
|
+
6. Consider both qualitative and quantitative advantages where applicable
|
20
|
+
|
21
|
+
## Response Format
|
22
|
+
Provide exactly {{reasons_count}} reasons in a JSON array of strings. Each reason should:
|
23
|
+
- Be a complete, self-contained statement
|
24
|
+
- Include specific information about both "{{first}}" and "{{second}}"
|
25
|
+
- Clearly explain the comparative advantage
|
26
|
+
|
27
|
+
# Example:
|
28
|
+
Topic: "Good icecream"
|
29
|
+
First: "Vanilla Dream"
|
30
|
+
Second: "Choco Delight"
|
31
|
+
Reasons count: 2
|
32
|
+
|
33
|
+
----- Start of response example -----
|
34
|
+
```json
|
35
|
+
[
|
36
|
+
"Vanilla Dream has a rich and authentic vanilla flavor, while Choco Delight has an artificial chocolate flavor.",
|
37
|
+
"Vanilla Dream has a smooth and creamy texture, while Choco Delight has a rough texture with ice crystals."
|
38
|
+
]
|
39
|
+
```
|
40
|
+
----- End of response example -----
|
41
|
+
|
42
|
+
## Format Requirements
|
43
|
+
- Provide EXACTLY {{reasons_count}} reasons, no more and no less
|
44
|
+
- Format as a valid JSON array of strings
|
45
|
+
- Each reason should be a complete sentence with proper punctuation
|
46
|
+
- Each reason must mention both "{{first}}" and "{{second}}" by name
|
47
|
+
- Each reason should highlight a specific advantage of "{{first}}" over "{{second}}"
|
48
|
+
- No explanations, justifications, or additional text outside the JSON array
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# Task: Generate List of Strings
|
2
|
+
|
3
|
+
{{requirement}}
|
4
|
+
|
5
|
+
## CFER Framework
|
6
|
+
When generating your list of strings, follow these principles:
|
7
|
+
- **Complete**: Include all items that match the requirement criteria
|
8
|
+
- **Focused**: Each string should represent a distinct, relevant item
|
9
|
+
- **Exact**: Follow the requirement precisely without adding irrelevant items
|
10
|
+
- **Refined**: Format each string consistently and appropriately
|
11
|
+
|
12
|
+
## Response Guidelines
|
13
|
+
When creating your list:
|
14
|
+
1. Identify all items that fully satisfy the requirement
|
15
|
+
2. Express each item as a clear, concise string
|
16
|
+
3. Remove duplicates and redundant entries
|
17
|
+
4. Use consistent formatting across all items (capitalization, terminology)
|
18
|
+
5. Order items logically (alphabetical, chronological, or by importance)
|
19
|
+
{{#if k}}6. Provide EXACTLY {{k}} items as specified{{/if}}
|
20
|
+
|
21
|
+
## Response Format
|
22
|
+
Return your list as a JSON array of strings within a code block.
|
23
|
+
|
24
|
+
# Example:
|
25
|
+
Requirement: Extract all fruit names mentioned in the text:
|
26
|
+
"The apple is red, the banana is yellow, the orange is orange, and the green apple is sour."
|
27
|
+
|
28
|
+
----- Start of response example -----
|
29
|
+
```json
|
30
|
+
[
|
31
|
+
"apple",
|
32
|
+
"banana",
|
33
|
+
"orange",
|
34
|
+
"green apple"
|
35
|
+
]
|
36
|
+
```
|
37
|
+
----- End of response example -----
|
38
|
+
|
39
|
+
## Format Requirements
|
40
|
+
- Response MUST be a valid JSON array within a code block
|
41
|
+
- Every element MUST be a string type
|
42
|
+
- No explanations or additional text outside the code block
|
43
|
+
- No empty strings or null values
|
44
|
+
{{#if k}}- The array MUST contain EXACTLY {{k}} strings{{/if}}
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# Task: Make Selection from Available Options
|
2
|
+
|
3
|
+
{{instruction}}
|
4
|
+
|
5
|
+
## CFER Framework
|
6
|
+
When selecting the most appropriate option(s), follow these principles:
|
7
|
+
- **Considered**: Evaluate each option thoroughly against the requirements
|
8
|
+
- **Focused**: Select only options that directly address the core needs
|
9
|
+
- **Evidence-based**: Base your selection on concrete merits rather than assumptions
|
10
|
+
- **Reasoned**: Prioritize options that best satisfy all critical requirements
|
11
|
+
|
12
|
+
## Available Options
|
13
|
+
{{#each options}}
|
14
|
+
- **{{name}}**: {{briefing}}
|
15
|
+
{{/each}}
|
16
|
+
|
17
|
+
## Selection Guidelines
|
18
|
+
When making your selection:
|
19
|
+
1. Identify the key requirements in the instruction
|
20
|
+
2. Evaluate how well each option satisfies these requirements
|
21
|
+
3. Consider both primary functionality and secondary benefits
|
22
|
+
4. Eliminate options that fail to meet essential criteria
|
23
|
+
5. Prioritize options that offer the best overall solution
|
24
|
+
{{#if k}}6. Select EXACTLY {{k}} options as specified{{/if}}
|
25
|
+
|
26
|
+
## Response Format
|
27
|
+
Return your selection as a JSON array of option names within a code block.
|
28
|
+
|
29
|
+
# Example:
|
30
|
+
----- Start of response example -----
|
31
|
+
```json
|
32
|
+
["option1", "option2"]
|
33
|
+
```
|
34
|
+
----- End of response example -----
|
35
|
+
|
36
|
+
## Format Requirements
|
37
|
+
- Response MUST be a valid JSON array containing only option names
|
38
|
+
- Each selected option MUST be from the provided list: [{{#each options}}"{{name}}", {{/each}}]
|
39
|
+
- No explanations or additional text outside the JSON code block
|
40
|
+
{{#if k}}- The array MUST contain EXACTLY {{k}} options{{/if}}
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# Task: Make Boolean Judgment
|
2
|
+
|
3
|
+
{{ prompt }}
|
4
|
+
|
5
|
+
## CFER Framework
|
6
|
+
When making your judgment, follow these principles:
|
7
|
+
- **Clear**: Assess the prompt against definitive criteria
|
8
|
+
- **Focused**: Evaluate only what's directly requested, avoiding scope creep
|
9
|
+
- **Evidence-based**: Base your judgment solely on the information provided
|
10
|
+
- **Responsive**: Provide precisely the boolean value that answers the prompt
|
11
|
+
|
12
|
+
## Judgment Guidelines
|
13
|
+
When determining your response:
|
14
|
+
1. Carefully analyze the exact wording of the prompt
|
15
|
+
2. Identify the specific criteria that determine a true/false answer
|
16
|
+
3. Apply logical reasoning without introducing external assumptions
|
17
|
+
4. Make a definitive decision based on the available information
|
18
|
+
{{#if affirm_case}}5. If {{affirm_case}}, respond `true`{{/if}}
|
19
|
+
{{#if deny_case}}6. If {{deny_case}}, respond `false`{{/if}}
|
20
|
+
|
21
|
+
## Response Format
|
22
|
+
Return your judgment as a single boolean value (true or false) in a JSON code block.
|
23
|
+
|
24
|
+
# Example:
|
25
|
+
----- Start of response example -----
|
26
|
+
```json
|
27
|
+
true
|
28
|
+
```
|
29
|
+
----- End of response example -----
|
30
|
+
|
31
|
+
## Format Requirements
|
32
|
+
- Response MUST be EXACTLY `true` or `false` within a JSON code block
|
33
|
+
- No explanations, qualifications, or additional text outside the code block
|
34
|
+
- No "maybe," "it depends," or other non-boolean responses
|
35
|
+
- No quotation marks around the boolean value in the JSON
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# Task: Rate Content on Multiple Dimensions
|
2
|
+
|
3
|
+
Rate the following content based on the provided rating manual and score range:
|
4
|
+
|
5
|
+
----- Start of content to rate -----
|
6
|
+
{{ to_rate }}
|
7
|
+
----- End of content to rate -----
|
8
|
+
|
9
|
+
## CFER Framework
|
10
|
+
When rating the content, follow these principles:
|
11
|
+
- **Consistent**: Apply the same standards across all dimensions
|
12
|
+
- **Factual**: Base ratings on observable qualities rather than subjective preferences
|
13
|
+
- **Explicit**: Consider each dimension independently based on its defined criteria
|
14
|
+
- **Refined**: Provide precise scores rounded to two decimal places
|
15
|
+
|
16
|
+
## Rating Manual
|
17
|
+
{{ rating_manual }}
|
18
|
+
|
19
|
+
## Score Range
|
20
|
+
- Minimum score: {{ min_score }}
|
21
|
+
- Maximum score: {{ max_score }}
|
22
|
+
|
23
|
+
## Rating Guidelines
|
24
|
+
When determining your ratings:
|
25
|
+
1. Analyze the content thoroughly against each dimension in the rating manual
|
26
|
+
2. Consider only what's present in the content being rated
|
27
|
+
3. Evaluate each dimension independently based on its specific criteria
|
28
|
+
4. Assign precise scores within the specified range for each dimension
|
29
|
+
5. Round all scores to exactly two decimal places
|
30
|
+
6. Ensure no dimension is scored outside the range [{{ min_score }}, {{ max_score }}]
|
31
|
+
|
32
|
+
## Response Format
|
33
|
+
Return your ratings as a JSON object with dimension names as keys and scores as values.
|
34
|
+
|
35
|
+
# Example:
|
36
|
+
----- Start of response example -----
|
37
|
+
```json
|
38
|
+
{
|
39
|
+
"outlook": 3.56,
|
40
|
+
"taste": 4.55,
|
41
|
+
"texture": 3.01
|
42
|
+
}
|
43
|
+
```
|
44
|
+
----- End of response example -----
|
45
|
+
|
46
|
+
## Format Requirements
|
47
|
+
- Response MUST be a valid JSON object within a code block
|
48
|
+
- Keys MUST match ALL dimensions from the rating manual: [{{#each rating_manual}}"{{@key}}",{{/each}}]
|
49
|
+
- Values MUST be numeric, rounded to exactly two decimal places
|
50
|
+
- All scores MUST be within the range [{{ min_score }}, {{ max_score }}]
|
51
|
+
- No explanations or additional text outside the JSON code block
|
@@ -0,0 +1,36 @@
|
|
1
|
+
{{#if question}}# Context
|
2
|
+
You are a query refinement specialist optimizing search queries for vector database retrieval. Your goal is to improve how questions are framed to maximize relevant document retrieval.
|
3
|
+
|
4
|
+
# Format
|
5
|
+
For each query:
|
6
|
+
- Make it more specific and focused
|
7
|
+
- Add domain-specific terminology
|
8
|
+
- Remove ambiguous language
|
9
|
+
- Split complex questions into multiple simpler queries when appropriate
|
10
|
+
- Each refined query should be 1-3 sentences
|
11
|
+
|
12
|
+
# Aspects to Consider
|
13
|
+
- Specificity: Replace vague terms with precise alternatives
|
14
|
+
- Terminology: Include technical vocabulary relevant to the domain
|
15
|
+
- Decomposition: Break down multi-part questions into separate queries
|
16
|
+
- Context Enrichment: Add implied but unstated contextual elements
|
17
|
+
- Information Density: Ensure key concepts are emphasized
|
18
|
+
|
19
|
+
# Examples
|
20
|
+
Original: "How do I implement authentication?"
|
21
|
+
Refined:
|
22
|
+
- "What are best practices for implementing user authentication in web applications?"
|
23
|
+
- "How to securely store authentication credentials in a database?"
|
24
|
+
|
25
|
+
Original: "Explain vector databases"
|
26
|
+
Refined: "Technical explanation of vector database architecture and similarity search mechanisms for embedding storage"
|
27
|
+
|
28
|
+
# Response
|
29
|
+
Refine the following {{len question}} {{#if question.[1]}}queries{{else}}query{{/if}}:
|
30
|
+
|
31
|
+
{{#each question}}
|
32
|
+
- "{{this}}"
|
33
|
+
{{/each}}
|
34
|
+
|
35
|
+
Output ONLY the refined queries as a numbered list without explanations.
|
36
|
+
{{/if}}
|
Binary file
|