fabricatio 0.2.10.dev0__tar.gz → 0.2.10.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/PKG-INFO +1 -4
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/README.md +0 -2
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/extract_and_inject/.gitignore +2 -1
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/extract_and_inject/ask.py +16 -17
- fabricatio-0.2.10.dev1/examples/extract_and_inject/chunk_article.py +41 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/search_bibtex/search.py +1 -1
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/pyproject.toml +1 -2
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/actions/article.py +2 -2
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/actions/article_rag.py +33 -2
- fabricatio-0.2.10.dev1/python/fabricatio/actions/rag.py +96 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/capabilities/rag.py +5 -2
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/models/adv_kwargs_types.py +5 -12
- fabricatio-0.2.10.dev1/python/fabricatio/models/extra/aricle_rag.py +120 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/models/extra/article_essence.py +8 -7
- fabricatio-0.2.10.dev1/python/fabricatio/models/extra/rag.py +98 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/models/generic.py +12 -11
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/models/kwargs_types.py +8 -1
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/rust.pyi +10 -1
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/utils.py +14 -1
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/src/bib_tools.rs +23 -2
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/refined_query.hbs +7 -6
- fabricatio-0.2.10.dev1/templates.tar.gz +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/uv.lock +27 -52
- fabricatio-0.2.10.dev0/python/fabricatio/actions/rag.py +0 -74
- fabricatio-0.2.10.dev0/python/fabricatio/models/extra/rag.py +0 -72
- fabricatio-0.2.10.dev0/templates.tar.gz +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/.github/workflows/build-package.yaml +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/.github/workflows/ruff.yaml +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/.github/workflows/tests.yaml +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/.gitignore +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/.python-version +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/Cargo.lock +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/Cargo.toml +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/LICENSE +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/Makefile +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/correct/correct.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/correct/correct_loop.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/extract_and_inject/article_rag.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/extract_and_inject/extract_and_inject.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/extract_article/extract.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/llm_usages/llm_usage.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/make_a_rating/rating.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/make_diary/commits.json +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/make_diary/diary.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/minor/hello_fabricatio.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/minor/write_a_poem.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/propose_task/propose.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/reviewer/censor.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/reviewer/review.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/rules/.gitignore +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/rules/draft_ruleset.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/search_bibtex/.gitignore +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/simple_chat/chat.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/simple_rag/simple_rag.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/task_handle/handle_task.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/write_article/.gitignore +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/write_article/article_rag.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/write_article/write_article.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/write_outline/.gitignore +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/write_outline/write_outline.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/examples/write_outline/write_outline_corrected.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/__init__.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/actions/__init__.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/actions/output.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/actions/rules.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/capabilities/__init__.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/capabilities/advanced_judge.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/capabilities/censor.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/capabilities/check.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/capabilities/correct.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/capabilities/propose.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/capabilities/rating.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/capabilities/review.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/capabilities/task.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/config.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/constants.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/core.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/decorators.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/fs/__init__.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/fs/curd.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/fs/readers.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/journal.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/models/action.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/models/events.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/models/extra/__init__.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/models/extra/advanced_judge.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/models/extra/article_base.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/models/extra/article_main.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/models/extra/article_outline.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/models/extra/article_proposal.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/models/extra/patches.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/models/extra/problem.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/models/extra/rule.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/models/role.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/models/task.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/models/tool.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/models/usages.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/parser.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/py.typed +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/rust_instances.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/toolboxes/__init__.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/toolboxes/arithmetic.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/toolboxes/fs.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/workflows/__init__.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/workflows/articles.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/workflows/rag.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/src/hash.rs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/src/hbs_helpers.rs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/src/language.rs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/src/lib.rs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/src/templates.rs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/src/word_split.rs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/as_prompt.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/binary-exploitation-ctf-solver.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/check_string.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/claude-xml.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/clean-up-code.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/co_validation.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/create_json_obj.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/cryptography-ctf-solver.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/dependencies.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/document-the-code.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/draft_rating_criteria.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/draft_rating_manual.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/draft_rating_weights_klee.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/draft_tool_usage_code.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/extract_criteria_from_reasons.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/extract_reasons_from_examples.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/find-security-vulnerabilities.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/fix-bugs.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/fix_troubled_obj.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/fix_troubled_string.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/generic_string.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/improve-performance.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/liststr.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/make_choice.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/make_judgment.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/pathstr.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/rate_fine_grind.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/refactor.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/retrieved_display.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/reverse-engineering-ctf-solver.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/review_string.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/rule_requirement.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/ruleset_requirement_breakdown.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/task_briefing.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/web-ctf-solver.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/write-git-commit.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/write-github-pull-request.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/templates/built-in/write-github-readme.hbs +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/tests/test_config.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/tests/test_models/test_action.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/tests/test_models/test_advanced.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/tests/test_models/test_generic.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/tests/test_models/test_problem.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/tests/test_models/test_role.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/tests/test_models/test_task.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/tests/test_models/test_tool.py +0 -0
- {fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/tests/test_models/test_usages.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: fabricatio
|
3
|
-
Version: 0.2.10.
|
3
|
+
Version: 0.2.10.dev1
|
4
4
|
Classifier: License :: OSI Approved :: MIT License
|
5
5
|
Classifier: Programming Language :: Rust
|
6
6
|
Classifier: Programming Language :: Python :: 3.12
|
@@ -23,7 +23,6 @@ Requires-Dist: pymitter>=1.0.0
|
|
23
23
|
Requires-Dist: questionary>=2.1.0
|
24
24
|
Requires-Dist: regex>=2024.11.6
|
25
25
|
Requires-Dist: rich>=13.9.4
|
26
|
-
Requires-Dist: rtoml>=0.12.0
|
27
26
|
Requires-Dist: pymilvus>=2.5.4 ; extra == 'rag'
|
28
27
|
Requires-Dist: fabricatio[calc,plot,rag] ; extra == 'full'
|
29
28
|
Requires-Dist: sympy>=1.13.3 ; extra == 'calc'
|
@@ -45,8 +44,6 @@ Project-URL: Issues, https://github.com/Whth/fabricatio/issues
|
|
45
44
|
# Fabricatio
|
46
45
|
|
47
46
|

|
48
|
-

|
49
|
-

|
50
47
|
|
51
48
|
## Overview
|
52
49
|
|
@@ -2,8 +2,10 @@
|
|
2
2
|
|
3
3
|
import asyncio
|
4
4
|
|
5
|
-
from fabricatio import
|
5
|
+
from fabricatio import Action, Role, Task, WorkFlow, logger
|
6
|
+
from fabricatio.capabilities.rag import RAG
|
6
7
|
from fabricatio.models.events import Event
|
8
|
+
from fabricatio.models.extra.aricle_rag import ArticleChunk
|
7
9
|
from fabricatio.utils import ok
|
8
10
|
from questionary import text
|
9
11
|
|
@@ -23,14 +25,14 @@ class Talk(Action, RAG):
|
|
23
25
|
user_say = await text("User: ").ask_async()
|
24
26
|
if user_say is None:
|
25
27
|
break
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
28
|
+
ref_q = ok(await self.arefined_query(user_say))
|
29
|
+
logger.info(f'refined query: \n{ref_q}')
|
30
|
+
ret = await self.aretrieve(ref_q, document_model=ArticleChunk)
|
31
|
+
|
32
|
+
sys_msg = "\n".join(r.as_prompt() for r in ret)
|
33
|
+
logger.info(f"System message: \n{sys_msg}")
|
34
|
+
gpt_say = await self.aask(user_say, sys_msg)
|
35
|
+
|
34
36
|
print(f"GPT: {gpt_say}") # noqa: T201
|
35
37
|
counter += 1
|
36
38
|
except KeyboardInterrupt:
|
@@ -40,18 +42,15 @@ class Talk(Action, RAG):
|
|
40
42
|
|
41
43
|
async def main() -> None:
|
42
44
|
"""Main function."""
|
43
|
-
|
45
|
+
Role(
|
44
46
|
name="talker",
|
45
47
|
description="talker role but with rag",
|
46
|
-
registry={
|
48
|
+
registry={
|
49
|
+
Event.quick_instantiate("talk"): WorkFlow(name="talk", steps=(Talk(target_collection="article_chunks"),))
|
50
|
+
},
|
47
51
|
)
|
48
52
|
|
49
|
-
task =
|
50
|
-
await role.propose_task(
|
51
|
-
"you have to act as a helpful assistant, answer to all user questions properly and patiently"
|
52
|
-
),
|
53
|
-
"Failed to propose task",
|
54
|
-
)
|
53
|
+
task = Task(name="answer user's questions")
|
55
54
|
_ = await task.delegate("talk")
|
56
55
|
|
57
56
|
|
@@ -0,0 +1,41 @@
|
|
1
|
+
"""Example of proposing a task to a role."""
|
2
|
+
|
3
|
+
import asyncio
|
4
|
+
|
5
|
+
from fabricatio import BibManager, Event, Role, Task, WorkFlow, logger
|
6
|
+
from fabricatio.actions.article_rag import ChunkArticle
|
7
|
+
from fabricatio.actions.rag import InjectToDB
|
8
|
+
from fabricatio.fs import gather_files
|
9
|
+
from fabricatio.utils import ok
|
10
|
+
|
11
|
+
|
12
|
+
async def main() -> None:
|
13
|
+
"""Main function."""
|
14
|
+
Role(
|
15
|
+
name="Researcher",
|
16
|
+
description="chunk the article",
|
17
|
+
llm_rpm=50,
|
18
|
+
llm_tpm=100000,
|
19
|
+
registry={
|
20
|
+
Event.quick_instantiate(e := "Chunk"): WorkFlow(
|
21
|
+
name="Chunk",
|
22
|
+
steps=(
|
23
|
+
ChunkArticle(output_key="to_inject"),
|
24
|
+
InjectToDB(collection_name="article_chunks").to_task_output(),
|
25
|
+
),
|
26
|
+
).update_init_context(
|
27
|
+
article_path=gather_files("bare_md", "md"),
|
28
|
+
bib_manager=BibManager(path="ref.bib"),
|
29
|
+
max_chunk_size=500,
|
30
|
+
max_overlapping_rate=0.3,
|
31
|
+
),
|
32
|
+
},
|
33
|
+
)
|
34
|
+
|
35
|
+
task: Task[str] = Task(name="Chunk Article")
|
36
|
+
res = ok(await task.delegate(e))
|
37
|
+
logger.success(f"Injected to {res}")
|
38
|
+
|
39
|
+
|
40
|
+
if __name__ == "__main__":
|
41
|
+
asyncio.run(main())
|
@@ -3,7 +3,7 @@ from fabricatio import BibManager, logger
|
|
3
3
|
|
4
4
|
b = BibManager("Exported Items.bib")
|
5
5
|
logger.success(
|
6
|
-
b.
|
6
|
+
b.get_cite_key_by_title("A Negative Selection Immune System Inspired Methodology for Fault Diagnosis of Wind Turbines"))
|
7
7
|
logger.success(
|
8
8
|
b.get_cite_key_fuzzy(
|
9
9
|
"System Inspired Methodology for Fault"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "fabricatio"
|
3
|
-
version = "0.2.10-
|
3
|
+
version = "0.2.10-dev1"
|
4
4
|
description = "A LLM multi-agent framework."
|
5
5
|
readme = "README.md"
|
6
6
|
license = { file = "LICENSE" }
|
@@ -42,7 +42,6 @@ dependencies = [
|
|
42
42
|
"questionary>=2.1.0",
|
43
43
|
"regex>=2024.11.6",
|
44
44
|
"rich>=13.9.4",
|
45
|
-
"rtoml>=0.12.0",
|
46
45
|
]
|
47
46
|
|
48
47
|
[project.urls]
|
@@ -4,6 +4,7 @@ from asyncio import gather
|
|
4
4
|
from pathlib import Path
|
5
5
|
from typing import Callable, List, Optional
|
6
6
|
|
7
|
+
from fabricatio.rust import BibManager, detect_language
|
7
8
|
from more_itertools import filter_map
|
8
9
|
|
9
10
|
from fabricatio.capabilities.censor import Censor
|
@@ -17,7 +18,6 @@ from fabricatio.models.extra.article_outline import ArticleOutline
|
|
17
18
|
from fabricatio.models.extra.article_proposal import ArticleProposal
|
18
19
|
from fabricatio.models.extra.rule import RuleSet
|
19
20
|
from fabricatio.models.task import Task
|
20
|
-
from fabricatio.rust import BibManager, detect_language
|
21
21
|
from fabricatio.utils import ok
|
22
22
|
|
23
23
|
|
@@ -78,7 +78,7 @@ class FixArticleEssence(Action):
|
|
78
78
|
out = []
|
79
79
|
count = 0
|
80
80
|
for a in article_essence:
|
81
|
-
if key := (bib_mgr.
|
81
|
+
if key := (bib_mgr.get_cite_key_by_title(a.title) or bib_mgr.get_cite_key_fuzzy(a.title)):
|
82
82
|
a.title = bib_mgr.get_title_by_key(key) or a.title
|
83
83
|
a.authors = bib_mgr.get_author_by_key(key) or a.authors
|
84
84
|
a.publication_year = bib_mgr.get_year_by_key(key) or a.publication_year
|
@@ -1,11 +1,15 @@
|
|
1
1
|
"""A module for writing articles using RAG (Retrieval-Augmented Generation) capabilities."""
|
2
2
|
|
3
3
|
from asyncio import gather
|
4
|
-
from
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import List, Optional
|
5
6
|
|
7
|
+
from fabricatio import BibManager
|
6
8
|
from fabricatio.capabilities.censor import Censor
|
7
9
|
from fabricatio.capabilities.rag import RAG
|
8
10
|
from fabricatio.models.action import Action
|
11
|
+
from fabricatio.models.extra.aricle_rag import ArticleChunk
|
12
|
+
from fabricatio.models.extra.article_essence import ArticleEssence
|
9
13
|
from fabricatio.models.extra.article_main import Article, ArticleSubsection
|
10
14
|
from fabricatio.models.extra.rule import RuleSet
|
11
15
|
from fabricatio.utils import ok
|
@@ -97,9 +101,36 @@ class TweakArticleRAG(Action, RAG, Censor):
|
|
97
101
|
await self.censor_obj_inplace(
|
98
102
|
subsec,
|
99
103
|
ruleset=ruleset,
|
100
|
-
reference=f"{await self.
|
104
|
+
reference=f"{'\n\n'.join(d.display() for d in await self.aretrieve(refind_q, document_model=ArticleEssence, final_limit=self.ref_limit))}\n\n"
|
101
105
|
f"You can use Reference above to rewrite the `{subsec.__class__.__name__}`.\n"
|
102
106
|
f"You should Always use `{subsec.language}` as written language, "
|
103
107
|
f"which is the original language of the `{subsec.title}`. "
|
104
108
|
f"since rewrite a `{subsec.__class__.__name__}` in a different language is usually a bad choice",
|
105
109
|
)
|
110
|
+
|
111
|
+
|
112
|
+
class ChunkArticle(Action):
|
113
|
+
"""Chunk an article into smaller chunks."""
|
114
|
+
|
115
|
+
output_key:str = "article_chunks"
|
116
|
+
"""The key used to store the output of the action."""
|
117
|
+
max_chunk_size: Optional[int] = None
|
118
|
+
"""The maximum size of each chunk."""
|
119
|
+
max_overlapping_rate: Optional[float] = None
|
120
|
+
"""The maximum overlapping rate between chunks."""
|
121
|
+
|
122
|
+
async def _execute(
|
123
|
+
self,
|
124
|
+
article_path: str | Path,
|
125
|
+
bib_manager: BibManager,
|
126
|
+
max_chunk_size: Optional[int] = None,
|
127
|
+
max_overlapping_rate: Optional[float] = None,
|
128
|
+
**_,
|
129
|
+
) -> List[ArticleChunk]:
|
130
|
+
return ArticleChunk.from_file(
|
131
|
+
article_path,
|
132
|
+
bib_manager,
|
133
|
+
max_chunk_size=ok(max_chunk_size or self.max_chunk_size, "No max_chunk_size provided!"),
|
134
|
+
max_overlapping_rate=ok(max_overlapping_rate or self.max_overlapping_rate, "No max_overlapping_rate provided!"),
|
135
|
+
)
|
136
|
+
|
@@ -0,0 +1,96 @@
|
|
1
|
+
"""Inject data into the database."""
|
2
|
+
|
3
|
+
from typing import List, Optional
|
4
|
+
|
5
|
+
from questionary import text
|
6
|
+
|
7
|
+
from fabricatio.capabilities.rag import RAG
|
8
|
+
from fabricatio.config import configs
|
9
|
+
from fabricatio.journal import logger
|
10
|
+
from fabricatio.models.action import Action
|
11
|
+
from fabricatio.models.extra.rag import MilvusClassicModel, MilvusDataBase
|
12
|
+
from fabricatio.models.task import Task
|
13
|
+
from fabricatio.utils import ok
|
14
|
+
|
15
|
+
|
16
|
+
class InjectToDB(Action, RAG):
|
17
|
+
"""Inject data into the database."""
|
18
|
+
|
19
|
+
output_key: str = "collection_name"
|
20
|
+
collection_name: str = "my_collection"
|
21
|
+
"""The name of the collection to inject data into."""
|
22
|
+
|
23
|
+
async def _execute[T: MilvusDataBase](
|
24
|
+
self, to_inject: Optional[T] | List[Optional[T]], override_inject: bool = False, **_
|
25
|
+
) -> Optional[str]:
|
26
|
+
from pymilvus.milvus_client import IndexParams
|
27
|
+
|
28
|
+
if to_inject is None:
|
29
|
+
return None
|
30
|
+
if not isinstance(to_inject, list):
|
31
|
+
to_inject = [to_inject]
|
32
|
+
if not (seq := [t for t in to_inject if t is not None]): # filter out None
|
33
|
+
return None
|
34
|
+
logger.info(f"Injecting {len(seq)} items into the collection '{self.collection_name}'")
|
35
|
+
if override_inject:
|
36
|
+
self.check_client().client.drop_collection(self.collection_name)
|
37
|
+
|
38
|
+
await self.view(
|
39
|
+
self.collection_name,
|
40
|
+
create=True,
|
41
|
+
schema=seq[0].as_milvus_schema(
|
42
|
+
ok(
|
43
|
+
self.milvus_dimensions
|
44
|
+
or configs.rag.milvus_dimensions
|
45
|
+
or self.embedding_dimensions
|
46
|
+
or configs.embedding.dimensions
|
47
|
+
),
|
48
|
+
),
|
49
|
+
index_params=IndexParams(
|
50
|
+
seq[0].vector_field_name,
|
51
|
+
index_name=seq[0].vector_field_name,
|
52
|
+
index_type=seq[0].index_type,
|
53
|
+
metric_type=seq[0].metric_type,
|
54
|
+
),
|
55
|
+
).add_document(seq, flush=True)
|
56
|
+
|
57
|
+
return self.collection_name
|
58
|
+
|
59
|
+
|
60
|
+
class RAGTalk(Action, RAG):
|
61
|
+
"""RAG-enabled conversational action that processes user questions based on a given task.
|
62
|
+
|
63
|
+
This action establishes an interactive conversation loop where it retrieves context-relevant
|
64
|
+
information to answer user queries according to the assigned task briefing.
|
65
|
+
|
66
|
+
Notes:
|
67
|
+
task_input: Task briefing that guides how to respond to user questions
|
68
|
+
collection_name: Name of the vector collection to use for retrieval (default: "my_collection")
|
69
|
+
|
70
|
+
Returns:
|
71
|
+
Number of conversation turns completed before termination
|
72
|
+
"""
|
73
|
+
|
74
|
+
output_key: str = "task_output"
|
75
|
+
|
76
|
+
async def _execute(self, task_input: Task[str], **kwargs) -> int:
|
77
|
+
collection_name = kwargs.get("collection_name", "my_collection")
|
78
|
+
counter = 0
|
79
|
+
|
80
|
+
self.view(collection_name, create=True)
|
81
|
+
|
82
|
+
try:
|
83
|
+
while True:
|
84
|
+
user_say = await text("User: ").ask_async()
|
85
|
+
if user_say is None:
|
86
|
+
break
|
87
|
+
ret: List[MilvusClassicModel] = await self.aretrieve(user_say, document_model=MilvusClassicModel)
|
88
|
+
|
89
|
+
gpt_say = await self.aask(
|
90
|
+
user_say, system_message="\n".join(m.text for m in ret) + "\nYou can refer facts provided above."
|
91
|
+
)
|
92
|
+
print(f"GPT: {gpt_say}") # noqa: T201
|
93
|
+
counter += 1
|
94
|
+
except KeyboardInterrupt:
|
95
|
+
logger.info(f"executed talk action {counter} times")
|
96
|
+
return counter
|
@@ -130,7 +130,7 @@ class RAG(EmbeddingUsage):
|
|
130
130
|
if isinstance(data, MilvusDataBase):
|
131
131
|
data = [data]
|
132
132
|
|
133
|
-
data_vec = await self.vectorize([d.
|
133
|
+
data_vec = await self.vectorize([d.prepare_vectorization() for d in data])
|
134
134
|
prepared_data = [d.prepare_insertion(vec) for d, vec in zip(data, data_vec, strict=True)]
|
135
135
|
|
136
136
|
c_name = collection_name or self.safe_target_collection
|
@@ -188,13 +188,15 @@ class RAG(EmbeddingUsage):
|
|
188
188
|
async def aretrieve[D: MilvusDataBase](
|
189
189
|
self,
|
190
190
|
query: List[str] | str,
|
191
|
+
document_model: Type[D],
|
191
192
|
final_limit: int = 20,
|
192
|
-
**kwargs: Unpack[FetchKwargs
|
193
|
+
**kwargs: Unpack[FetchKwargs],
|
193
194
|
) -> List[D]:
|
194
195
|
"""Retrieve data from the collection.
|
195
196
|
|
196
197
|
Args:
|
197
198
|
query (List[str] | str): The query to be used for retrieval.
|
199
|
+
document_model (Type[D]): The model class used to convert retrieved data into document objects.
|
198
200
|
final_limit (int): The final limit on the number of results to return.
|
199
201
|
**kwargs (Unpack[FetchKwargs]): Additional keyword arguments for retrieval.
|
200
202
|
|
@@ -206,6 +208,7 @@ class RAG(EmbeddingUsage):
|
|
206
208
|
return (
|
207
209
|
await self.afetch_document(
|
208
210
|
vecs=(await self.vectorize(query)),
|
211
|
+
document_model=document_model,
|
209
212
|
**kwargs,
|
210
213
|
)
|
211
214
|
)[:final_limit]
|
{fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/models/adv_kwargs_types.py
RENAMED
@@ -1,10 +1,9 @@
|
|
1
1
|
"""A module containing kwargs types for content correction and checking operations."""
|
2
2
|
|
3
3
|
from importlib.util import find_spec
|
4
|
-
from typing import
|
4
|
+
from typing import NotRequired, TypedDict
|
5
5
|
|
6
6
|
from fabricatio.models.extra.problem import Improvement
|
7
|
-
from fabricatio.models.extra.rag import MilvusDataBase
|
8
7
|
from fabricatio.models.extra.rule import RuleSet
|
9
8
|
from fabricatio.models.generic import SketchedAble
|
10
9
|
from fabricatio.models.kwargs_types import ReferencedKwargs
|
@@ -49,19 +48,13 @@ if find_spec("pymilvus"):
|
|
49
48
|
schema: CollectionSchema | None
|
50
49
|
index_params: IndexParams | None
|
51
50
|
|
52
|
-
class FetchKwargs
|
51
|
+
class FetchKwargs(TypedDict):
|
53
52
|
"""Arguments for fetching data from vector collections.
|
54
53
|
|
55
54
|
Controls how data is retrieved from vector databases, including filtering
|
56
55
|
and result limiting parameters.
|
57
56
|
"""
|
58
57
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
result_per_query: int
|
63
|
-
|
64
|
-
class RetrievalKwargs(FetchKwargs, total=False):
|
65
|
-
"""Arguments for retrieval operations."""
|
66
|
-
|
67
|
-
final_limit: int
|
58
|
+
collection_name: NotRequired[str | None]
|
59
|
+
similarity_threshold: NotRequired[float]
|
60
|
+
result_per_query: NotRequired[int]
|
@@ -0,0 +1,120 @@
|
|
1
|
+
"""A Module containing the article rag models."""
|
2
|
+
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import ClassVar, Dict, List, Self, Unpack
|
5
|
+
|
6
|
+
from fabricatio.fs import safe_text_read
|
7
|
+
from fabricatio.journal import logger
|
8
|
+
from fabricatio.models.extra.rag import MilvusDataBase
|
9
|
+
from fabricatio.models.generic import AsPrompt
|
10
|
+
from fabricatio.models.kwargs_types import ChunkKwargs
|
11
|
+
from fabricatio.rust import BibManager, split_into_chunks
|
12
|
+
from fabricatio.utils import ok, wrapp_in_block
|
13
|
+
from more_itertools.recipes import flatten
|
14
|
+
from pydantic import Field
|
15
|
+
|
16
|
+
|
17
|
+
class ArticleChunk(MilvusDataBase, AsPrompt):
|
18
|
+
"""The chunk of an article."""
|
19
|
+
|
20
|
+
head_split: ClassVar[List[str]] = [
|
21
|
+
"引 言",
|
22
|
+
"引言",
|
23
|
+
"绪 论",
|
24
|
+
"绪论",
|
25
|
+
"前言",
|
26
|
+
"INTRODUCTION",
|
27
|
+
"Introduction",
|
28
|
+
]
|
29
|
+
tail_split: ClassVar[List[str]] = [
|
30
|
+
"参 考 文 献",
|
31
|
+
"参 考 文 献",
|
32
|
+
"参考文献",
|
33
|
+
"REFERENCES",
|
34
|
+
"References",
|
35
|
+
"Bibliography",
|
36
|
+
"Reference",
|
37
|
+
]
|
38
|
+
chunk: str
|
39
|
+
"""The segment of the article"""
|
40
|
+
year: int
|
41
|
+
"""The year of the article"""
|
42
|
+
authors: List[str] = Field(default_factory=list)
|
43
|
+
"""The authors of the article"""
|
44
|
+
article_title: str
|
45
|
+
"""The title of the article"""
|
46
|
+
bibtex_cite_key: str
|
47
|
+
"""The bibtex cite key of the article"""
|
48
|
+
|
49
|
+
def _as_prompt_inner(self) -> Dict[str, str]:
|
50
|
+
return {
|
51
|
+
self.article_title: f"{wrapp_in_block(self.chunk, 'Referring Content')}\n"
|
52
|
+
f"Authors: {';'.join(self.authors)}\n"
|
53
|
+
f"Published Year: {self.year}\n"
|
54
|
+
f"Bibtex Key: {self.bibtex_cite_key}\n",
|
55
|
+
}
|
56
|
+
|
57
|
+
def _prepare_vectorization_inner(self) -> str:
|
58
|
+
return self.chunk
|
59
|
+
|
60
|
+
@classmethod
|
61
|
+
def from_file[P: str | Path](
|
62
|
+
cls, path: P | List[P], bib_mgr: BibManager, **kwargs: Unpack[ChunkKwargs]
|
63
|
+
) -> List[Self]:
|
64
|
+
"""Load the article chunks from the file."""
|
65
|
+
if isinstance(path, list):
|
66
|
+
result = list(flatten(cls._from_file_inner(p, bib_mgr, **kwargs) for p in path))
|
67
|
+
logger.debug(f"Number of chunks created from list of files: {len(result)}")
|
68
|
+
return result
|
69
|
+
|
70
|
+
return cls._from_file_inner(path, bib_mgr, **kwargs)
|
71
|
+
|
72
|
+
@classmethod
|
73
|
+
def _from_file_inner(cls, path: str | Path, bib_mgr: BibManager, **kwargs: Unpack[ChunkKwargs]) -> List[Self]:
|
74
|
+
path = Path(path)
|
75
|
+
|
76
|
+
title_seg = path.stem.split(" - ").pop()
|
77
|
+
|
78
|
+
key = (
|
79
|
+
bib_mgr.get_cite_key_by_title(title_seg)
|
80
|
+
or bib_mgr.get_cite_key_by_title_fuzzy(title_seg)
|
81
|
+
or bib_mgr.get_cite_key_fuzzy(path.stem)
|
82
|
+
)
|
83
|
+
if key is None:
|
84
|
+
logger.warning(f"no cite key found for {path.as_posix()}, skip.")
|
85
|
+
return []
|
86
|
+
authors = ok(bib_mgr.get_author_by_key(key), f"no author found for {key}")
|
87
|
+
year = ok(bib_mgr.get_year_by_key(key), f"no year found for {key}")
|
88
|
+
article_title = ok(bib_mgr.get_title_by_key(key), f"no title found for {key}")
|
89
|
+
|
90
|
+
result = [
|
91
|
+
cls(chunk=c, year=year, authors=authors, article_title=article_title, bibtex_cite_key=key)
|
92
|
+
for c in split_into_chunks(cls.strip(safe_text_read(path)), **kwargs)
|
93
|
+
]
|
94
|
+
logger.debug(f"Number of chunks created from file {path.as_posix()}: {len(result)}")
|
95
|
+
return result
|
96
|
+
|
97
|
+
@classmethod
|
98
|
+
def strip(cls, string: str) -> str:
|
99
|
+
"""Strip the head and tail of the string."""
|
100
|
+
logger.debug(f"String length before strip: {(original := len(string))}")
|
101
|
+
for split in (s for s in cls.head_split if s in string):
|
102
|
+
logger.debug(f"Strip head using {split}")
|
103
|
+
parts = string.split(split)
|
104
|
+
string = split.join(parts[1:]) if len(parts) > 1 else parts[0]
|
105
|
+
break
|
106
|
+
logger.debug(
|
107
|
+
f"String length after head strip: {(stripped_len := len(string))}, decreased by {(d := original - stripped_len)}"
|
108
|
+
)
|
109
|
+
if not d:
|
110
|
+
logger.warning("No decrease at head strip, which is might be abnormal.")
|
111
|
+
for split in (s for s in cls.tail_split if s in string):
|
112
|
+
logger.debug(f"Strip tail using {split}")
|
113
|
+
parts = string.split(split)
|
114
|
+
string = split.join(parts[:-1]) if len(parts) > 1 else parts[0]
|
115
|
+
break
|
116
|
+
logger.debug(f"String length after tail strip: {len(string)}, decreased by {(d := stripped_len - len(string))}")
|
117
|
+
if not d:
|
118
|
+
logger.warning("No decrease at tail strip, which is might be abnormal.")
|
119
|
+
|
120
|
+
return string
|
{fabricatio-0.2.10.dev0 → fabricatio-0.2.10.dev1}/python/fabricatio/models/extra/article_essence.py
RENAMED
@@ -1,8 +1,9 @@
|
|
1
1
|
"""ArticleEssence: Semantic fingerprint of academic paper for structured analysis."""
|
2
2
|
|
3
|
-
from typing import List
|
3
|
+
from typing import List
|
4
4
|
|
5
|
-
from fabricatio.models.
|
5
|
+
from fabricatio.models.extra.rag import MilvusDataBase
|
6
|
+
from fabricatio.models.generic import PersistentAble, SketchedAble
|
6
7
|
from pydantic import BaseModel
|
7
8
|
|
8
9
|
|
@@ -54,7 +55,7 @@ class Highlightings(BaseModel):
|
|
54
55
|
"""
|
55
56
|
|
56
57
|
|
57
|
-
class ArticleEssence(
|
58
|
+
class ArticleEssence(SketchedAble, PersistentAble, MilvusDataBase):
|
58
59
|
"""Structured representation of a scientific article's core elements in its original language."""
|
59
60
|
|
60
61
|
language: str
|
@@ -93,7 +94,7 @@ class ArticleEssence(ProposedAble, Display, PersistentAble, Vectorizable):
|
|
93
94
|
bibtex_cite_key: str
|
94
95
|
"""Bibtex cite key of the original article."""
|
95
96
|
|
96
|
-
def
|
97
|
-
|
98
|
-
|
99
|
-
|
97
|
+
def _prepare_vectorization_inner(self) -> str:
|
98
|
+
return self.compact()
|
99
|
+
|
100
|
+
|
@@ -0,0 +1,98 @@
|
|
1
|
+
"""A module containing the RAG (Retrieval-Augmented Generation) models."""
|
2
|
+
|
3
|
+
from abc import ABC
|
4
|
+
from functools import partial
|
5
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Self, Sequence, Set
|
6
|
+
|
7
|
+
from fabricatio.decorators import precheck_package
|
8
|
+
from fabricatio.models.generic import Vectorizable
|
9
|
+
from fabricatio.utils import ok
|
10
|
+
from pydantic import JsonValue
|
11
|
+
|
12
|
+
if TYPE_CHECKING:
|
13
|
+
from importlib.util import find_spec
|
14
|
+
|
15
|
+
from pydantic.fields import FieldInfo
|
16
|
+
|
17
|
+
if find_spec("pymilvus"):
|
18
|
+
from pymilvus import CollectionSchema
|
19
|
+
|
20
|
+
|
21
|
+
class MilvusDataBase(Vectorizable, ABC):
|
22
|
+
"""A base class for Milvus data."""
|
23
|
+
|
24
|
+
primary_field_name: ClassVar[str] = "id"
|
25
|
+
"""The name of the primary field in Milvus."""
|
26
|
+
vector_field_name: ClassVar[str] = "vector"
|
27
|
+
"""The name of the vector field in Milvus."""
|
28
|
+
|
29
|
+
index_type: ClassVar[str] = "FLAT"
|
30
|
+
"""The type of index to be used in Milvus."""
|
31
|
+
metric_type: ClassVar[str] = "COSINE"
|
32
|
+
"""The type of metric to be used in Milvus."""
|
33
|
+
|
34
|
+
def prepare_insertion(self, vector: List[float]) -> Dict[str, Any]:
|
35
|
+
"""Prepares the data for insertion into Milvus.
|
36
|
+
|
37
|
+
Returns:
|
38
|
+
dict: A dictionary containing the data to be inserted into Milvus.
|
39
|
+
"""
|
40
|
+
return {**self.model_dump(exclude_none=True, by_alias=True), self.vector_field_name: vector}
|
41
|
+
|
42
|
+
@classmethod
|
43
|
+
@precheck_package(
|
44
|
+
"pymilvus", "pymilvus is not installed. Have you installed `fabricatio[rag]` instead of `fabricatio`?"
|
45
|
+
)
|
46
|
+
def as_milvus_schema(cls, dimension: int = 1024) -> "CollectionSchema":
|
47
|
+
"""Generates the schema for Milvus collection."""
|
48
|
+
from pymilvus import CollectionSchema, DataType, FieldSchema
|
49
|
+
|
50
|
+
fields = [
|
51
|
+
FieldSchema(cls.primary_field_name, dtype=DataType.INT64, is_primary=True, auto_id=True),
|
52
|
+
FieldSchema(cls.vector_field_name, dtype=DataType.FLOAT_VECTOR, dim=dimension),
|
53
|
+
]
|
54
|
+
|
55
|
+
for k, v in cls.model_fields.items():
|
56
|
+
k: str
|
57
|
+
v: FieldInfo
|
58
|
+
schema = partial(FieldSchema, k, description=v.description or "")
|
59
|
+
anno = ok(v.annotation)
|
60
|
+
|
61
|
+
if anno == int:
|
62
|
+
fields.append(schema(dtype=DataType.INT64))
|
63
|
+
elif anno == str:
|
64
|
+
fields.append(schema(dtype=DataType.VARCHAR, max_length=65535))
|
65
|
+
elif anno == float:
|
66
|
+
fields.append(schema(dtype=DataType.DOUBLE))
|
67
|
+
elif anno == list[str] or anno == List[str] or anno == set[str] or anno == Set[str]:
|
68
|
+
fields.append(
|
69
|
+
schema(dtype=DataType.ARRAY, element_type=DataType.VARCHAR, max_length=65535, max_capacity=4096)
|
70
|
+
)
|
71
|
+
elif anno == list[int] or anno == List[int] or anno == set[int] or anno == Set[int]:
|
72
|
+
fields.append(schema(dtype=DataType.ARRAY, element_type=DataType.INT64, max_capacity=4096))
|
73
|
+
elif anno == list[float] or anno == List[float] or anno == set[float] or anno == Set[float]:
|
74
|
+
fields.append(schema(dtype=DataType.ARRAY, element_type=DataType.DOUBLE, max_capacity=4096))
|
75
|
+
elif anno == JsonValue:
|
76
|
+
fields.append(schema(dtype=DataType.JSON))
|
77
|
+
|
78
|
+
else:
|
79
|
+
raise NotImplementedError(f"{k}:{anno} is not supported")
|
80
|
+
|
81
|
+
return CollectionSchema(fields)
|
82
|
+
|
83
|
+
@classmethod
|
84
|
+
def from_sequence(cls, data: Sequence[Dict[str, Any]]) -> List[Self]:
|
85
|
+
"""Constructs a list of instances from a sequence of dictionaries."""
|
86
|
+
return [cls(**d) for d in data]
|
87
|
+
|
88
|
+
|
89
|
+
class MilvusClassicModel(MilvusDataBase):
|
90
|
+
"""A class representing a classic model stored in Milvus."""
|
91
|
+
|
92
|
+
text: str
|
93
|
+
"""The text to be stored in Milvus."""
|
94
|
+
subject: str = ""
|
95
|
+
"""The subject of the text."""
|
96
|
+
|
97
|
+
def _prepare_vectorization_inner(self) -> str:
|
98
|
+
return self.text
|