django-dynamic-query 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- django_dynamic_query/__init__.py +0 -0
- django_dynamic_query/apps.py +33 -0
- django_dynamic_query/core/__init__.py +0 -0
- django_dynamic_query/core/builder.py +153 -0
- django_dynamic_query/core/exceptions.py +18 -0
- django_dynamic_query/core/validators.py +65 -0
- django_dynamic_query/management/commands/index_schema.py +12 -0
- django_dynamic_query/nlp/__init__.py +0 -0
- django_dynamic_query/nlp/ai_handler.py +149 -0
- django_dynamic_query/nlp/schema_rag.py +230 -0
- django_dynamic_query/templates/dynamic_query/dashboard.html +353 -0
- django_dynamic_query/urls.py +8 -0
- django_dynamic_query/utils.py +22 -0
- django_dynamic_query/views.py +178 -0
- django_dynamic_query-0.1.0.dist-info/METADATA +301 -0
- django_dynamic_query-0.1.0.dist-info/RECORD +18 -0
- django_dynamic_query-0.1.0.dist-info/WHEEL +4 -0
- django_dynamic_query-0.1.0.dist-info/licenses/LICENSE +201 -0
|
File without changes
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from django.apps import AppConfig
|
|
2
|
+
from django.conf import settings
|
|
3
|
+
from django.core.exceptions import ImproperlyConfigured
|
|
4
|
+
|
|
5
|
+
class DjangoDynamicQueryConfig(AppConfig):
|
|
6
|
+
default_auto_field = 'django.db.models.BigAutoField'
|
|
7
|
+
name = 'django_dynamic_query'
|
|
8
|
+
verbose_name = "Django Dynamic Query Builder"
|
|
9
|
+
|
|
10
|
+
def ready(self):
|
|
11
|
+
"""
|
|
12
|
+
Checking the settings.py when django starts, without making any problem for memory for model loading
|
|
13
|
+
"""
|
|
14
|
+
config = getattr(settings, 'DJANGO_DYNAMIC_QUERY', {})
|
|
15
|
+
enable_nlp = config.get('ENABLE_NLP', False)
|
|
16
|
+
|
|
17
|
+
if enable_nlp:
|
|
18
|
+
model_type = config.get("MODEL_TYPE", "local")
|
|
19
|
+
|
|
20
|
+
if model_type == "local":
|
|
21
|
+
model_name = config.get("MODEL_NAME")
|
|
22
|
+
model_path = config.get("MODEL_PATH")
|
|
23
|
+
|
|
24
|
+
if not model_name and not model_path:
|
|
25
|
+
raise ImproperlyConfigured(
|
|
26
|
+
"MODEL_NAME or MODEL_PATH must be provided when MODEL_TYPE is set to 'local'."
|
|
27
|
+
)
|
|
28
|
+
elif model_type == "api":
|
|
29
|
+
if not config.get("AI_API_KEY"):
|
|
30
|
+
raise ImproperlyConfigured(
|
|
31
|
+
"API_KEY must be provided when MODEL_TYPE is set to 'api'."
|
|
32
|
+
)
|
|
33
|
+
print("[Django Dynamic Query] API NLP mode verified.")
|
|
File without changes
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
from django.db.models import Q, Count, Sum, Avg, Min, Max
|
|
2
|
+
from django.core.exceptions import FieldError, ValidationError
|
|
3
|
+
from .validators import validate_query_structure
|
|
4
|
+
|
|
5
|
+
AGGREGATE_FUNCTIONS = {
|
|
6
|
+
"Count": Count,
|
|
7
|
+
"Sum": Sum,
|
|
8
|
+
"Avg": Avg,
|
|
9
|
+
"Min": Min,
|
|
10
|
+
"Max": Max,
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DynamicQueryBuilder:
|
|
15
|
+
def __init__(self, model_label: str, query_data: dict):
|
|
16
|
+
"""
|
|
17
|
+
Initialize the DynamicQueryBuilder with a model label and filters.
|
|
18
|
+
:param model_label: The label of the model in the format 'app_label.ModelName'.
|
|
19
|
+
:param query_data: A list of filter dictionaries, each containing 'field', 'operator', and 'value'.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
self.query_data = query_data or {}
|
|
23
|
+
|
|
24
|
+
self.model = validate_query_structure(model_label, query_data)
|
|
25
|
+
|
|
26
|
+
def _parse_node(self, node: dict) -> Q:
|
|
27
|
+
if "filters" in node:
|
|
28
|
+
logic = node.get("logic", "AND").upper()
|
|
29
|
+
group_q = Q()
|
|
30
|
+
for sub_node in node["filters"]:
|
|
31
|
+
sub_q = self._parse_node(sub_node)
|
|
32
|
+
if logic == "OR":
|
|
33
|
+
group_q = group_q | sub_q if group_q else sub_q
|
|
34
|
+
else:
|
|
35
|
+
group_q &= sub_q
|
|
36
|
+
return group_q
|
|
37
|
+
else:
|
|
38
|
+
field = node.get("field")
|
|
39
|
+
operator = node.get("operator", "exact")
|
|
40
|
+
value = node.get("value")
|
|
41
|
+
negate = node.get("negate", False)
|
|
42
|
+
|
|
43
|
+
lookup = field if operator == "exact" else f"{field}__{operator}"
|
|
44
|
+
q_obj = Q(**{lookup: value})
|
|
45
|
+
return ~q_obj if negate else q_obj
|
|
46
|
+
|
|
47
|
+
def validate_query_syntax(self):
|
|
48
|
+
"""
|
|
49
|
+
Validate the syntax of the query data Before generating ORM code.
|
|
50
|
+
"""
|
|
51
|
+
print('validating query syntax...')
|
|
52
|
+
try:
|
|
53
|
+
queryset = self.model.objects.none()
|
|
54
|
+
|
|
55
|
+
if self.query_data.get("filters"):
|
|
56
|
+
q_expression = self._parse_node(self.query_data)
|
|
57
|
+
queryset = queryset.filter(q_expression)
|
|
58
|
+
|
|
59
|
+
annotations = self.query_data.get("annotations", {})
|
|
60
|
+
if annotations:
|
|
61
|
+
anno_kwargs = {}
|
|
62
|
+
for alias, info in annotations.items():
|
|
63
|
+
func = info.get("function")
|
|
64
|
+
field = info.get("field")
|
|
65
|
+
if func in AGGREGATE_FUNCTIONS:
|
|
66
|
+
anno_kwargs[alias] = AGGREGATE_FUNCTIONS[func](field)
|
|
67
|
+
queryset = queryset.annotate(**anno_kwargs)
|
|
68
|
+
|
|
69
|
+
aggregates = self.query_data.get("aggregates", {})
|
|
70
|
+
if aggregates:
|
|
71
|
+
aggr_kwargs = {}
|
|
72
|
+
for alias, info in aggregates.items():
|
|
73
|
+
func = info.get("function")
|
|
74
|
+
field = info.get("field")
|
|
75
|
+
if func in AGGREGATE_FUNCTIONS:
|
|
76
|
+
aggr_kwargs[alias] = AGGREGATE_FUNCTIONS[func](field)
|
|
77
|
+
queryset.aggregate(**aggr_kwargs)
|
|
78
|
+
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
except (FieldError, ValidationError, TypeError, ValueError) as e:
|
|
82
|
+
return str(e)
|
|
83
|
+
except Exception as e:
|
|
84
|
+
return f"Unexpected error during query validation: {str(e)}"
|
|
85
|
+
|
|
86
|
+
def _generate_orm_string_recursive(self, node: dict) -> str:
|
|
87
|
+
"""
|
|
88
|
+
Recursively generate an ORM string representation of the query data.
|
|
89
|
+
"""
|
|
90
|
+
if "filters" in node:
|
|
91
|
+
logic = node.get("logic", "AND").upper()
|
|
92
|
+
join_str = " | " if logic == "OR" else " & "
|
|
93
|
+
|
|
94
|
+
parts = [self._generate_orm_string_recursive(sub_node) for sub_node in node["filters"]]
|
|
95
|
+
if len(parts) == 1:
|
|
96
|
+
return parts[0]
|
|
97
|
+
return f"({join_str.join(parts)})"
|
|
98
|
+
else:
|
|
99
|
+
field = node.get("field")
|
|
100
|
+
operator = node.get("operator", "exact")
|
|
101
|
+
value = node.get("value")
|
|
102
|
+
negate = node.get("negate", False)
|
|
103
|
+
|
|
104
|
+
lookup = field if operator == "exact" else f"{field}__{operator}"
|
|
105
|
+
orm_str = f"Q({lookup}={repr(value)})"
|
|
106
|
+
|
|
107
|
+
return f"~{orm_str}" if negate else orm_str
|
|
108
|
+
|
|
109
|
+
def build_orm_code(self) -> str:
|
|
110
|
+
"""
|
|
111
|
+
Generate a string representation of the ORM code for the query.
|
|
112
|
+
"""
|
|
113
|
+
model_name = self.model.__name__
|
|
114
|
+
code = f"{model_name}.objects"
|
|
115
|
+
|
|
116
|
+
if self.query_data.get("filters"):
|
|
117
|
+
q_expression = self._generate_orm_string_recursive(self.query_data)
|
|
118
|
+
code += f".filter({q_expression})"
|
|
119
|
+
else:
|
|
120
|
+
code += ".all()"
|
|
121
|
+
|
|
122
|
+
annotations = self.query_data.get("annotations", {})
|
|
123
|
+
if annotations:
|
|
124
|
+
anno_parts = []
|
|
125
|
+
for alias, info in annotations.items():
|
|
126
|
+
func = info.get("function")
|
|
127
|
+
field = info.get("field")
|
|
128
|
+
if func in AGGREGATE_FUNCTIONS:
|
|
129
|
+
anno_parts.append(f"{alias}={func}('{field}')")
|
|
130
|
+
if anno_parts:
|
|
131
|
+
code += f".annotate({', '.join(anno_parts)})"
|
|
132
|
+
|
|
133
|
+
order_by = self.query_data.get("order_by")
|
|
134
|
+
if order_by:
|
|
135
|
+
if isinstance(order_by, list):
|
|
136
|
+
order_by_str = ", ".join(repr(field) for field in order_by)
|
|
137
|
+
code += f".order_by({order_by_str})"
|
|
138
|
+
elif isinstance(order_by, str):
|
|
139
|
+
code += f".order_by({repr(order_by)})"
|
|
140
|
+
|
|
141
|
+
aggregates = self.query_data.get("aggregates", {})
|
|
142
|
+
if aggregates:
|
|
143
|
+
aggr_parts = []
|
|
144
|
+
for alias, info in aggregates.items():
|
|
145
|
+
func = info.get("function")
|
|
146
|
+
field = info.get("field")
|
|
147
|
+
if func in AGGREGATE_FUNCTIONS:
|
|
148
|
+
aggr_parts.append(f"{alias}={func}('{field}')")
|
|
149
|
+
if aggr_parts:
|
|
150
|
+
code += f".aggregate({', '.join(aggr_parts)})"
|
|
151
|
+
|
|
152
|
+
return code
|
|
153
|
+
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
class DynamicQueryError(Exception):
|
|
2
|
+
"""Base exception for dynamic query errors."""
|
|
3
|
+
pass
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class InvalidModelError(DynamicQueryError):
|
|
7
|
+
"""Raised when an invalid model is specified."""
|
|
8
|
+
pass
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class InvalidFieldError(DynamicQueryError):
|
|
12
|
+
"""Raised when an invalid field is specified."""
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
class InvalidOperatorError(DynamicQueryError):
|
|
16
|
+
"""Raised when an invalid operator is specified."""
|
|
17
|
+
pass
|
|
18
|
+
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from django.apps import apps
|
|
2
|
+
from django.core.exceptions import FieldDoesNotExist
|
|
3
|
+
from .exceptions import DynamicQueryError, InvalidModelError, InvalidFieldError, InvalidOperatorError
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
VALID_OPERATORS = {
|
|
7
|
+
'exact', 'iexact', 'contains', 'icontains', 'in', 'gt', 'gte', 'lt', 'lte',
|
|
8
|
+
'startswith', 'istartswith', 'endswith', 'iendswith', 'range', 'year', 'month',
|
|
9
|
+
'day', 'week_day', 'isnull', 'search', 'regex', 'iregex'
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
SENSITIVE_FIELDS = {'password', 'is_staff', 'is_superuser', 'last_login', 'date_joined'}
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _validate_field_path(model, field_path: str):
|
|
16
|
+
"""
|
|
17
|
+
field authencity check for nested fields in JSON
|
|
18
|
+
"""
|
|
19
|
+
parts = field_path.split("__")
|
|
20
|
+
current_model = model
|
|
21
|
+
|
|
22
|
+
for i, part in enumerate(parts):
|
|
23
|
+
if part in SENSITIVE_FIELDS:
|
|
24
|
+
raise InvalidFieldError(f"Access to sensitive field '{part}' is not allowed.")
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
field = current_model._meta.get_field(part)
|
|
28
|
+
except FieldDoesNotExist:
|
|
29
|
+
raise InvalidFieldError(f"Invalid field '{part}' for model '{current_model._meta.label}'.")
|
|
30
|
+
|
|
31
|
+
if field.is_relation and i < len(parts) - 1:
|
|
32
|
+
current_model = field.related_model
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def validate_query_structure(model_label: str, query_data: dict) -> type:
|
|
36
|
+
"""
|
|
37
|
+
Validate the structure of the query data.
|
|
38
|
+
"""
|
|
39
|
+
try:
|
|
40
|
+
model = apps.get_model(model_label)
|
|
41
|
+
except (LookupError, ValueError):
|
|
42
|
+
raise InvalidModelError(f"Invalid model label: {model_label}")
|
|
43
|
+
|
|
44
|
+
def _validate_node(node):
|
|
45
|
+
if not isinstance(node, dict):
|
|
46
|
+
raise DynamicQueryError(
|
|
47
|
+
f"AI Generated an invalid nested filter structure. "
|
|
48
|
+
f"Expected a JSON object (dict), but received a string/primitive: '{node}'"
|
|
49
|
+
)
|
|
50
|
+
if "filters" in node:
|
|
51
|
+
for sub_node in node["filters"]:
|
|
52
|
+
_validate_node(sub_node)
|
|
53
|
+
else:
|
|
54
|
+
field_name = node.get("field")
|
|
55
|
+
operator = node.get("operator", "exact")
|
|
56
|
+
|
|
57
|
+
if not field_name:
|
|
58
|
+
raise InvalidFieldError("Field name is required in filter.")
|
|
59
|
+
if operator not in VALID_OPERATORS:
|
|
60
|
+
raise InvalidOperatorError(f"Invalid operator '{operator}' for field '{field_name}'.")
|
|
61
|
+
|
|
62
|
+
_validate_field_path(model, field_name)
|
|
63
|
+
|
|
64
|
+
_validate_node(query_data)
|
|
65
|
+
return model
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from django.core.management.base import BaseCommand
|
|
2
|
+
from django_dynamic_query.nlp.schema_rag import DjangoSchemaRAG
|
|
3
|
+
|
|
4
|
+
class Command(BaseCommand):
|
|
5
|
+
help = "Indexes the host Django project models and fields into the Vector DB for RAG."
|
|
6
|
+
|
|
7
|
+
def handle(self, *args, **options):
|
|
8
|
+
self.stdout.write("Scanning project schema...")
|
|
9
|
+
rag = DjangoSchemaRAG()
|
|
10
|
+
result = rag.index_project_models()
|
|
11
|
+
self.stdout.write(self.style.SUCCESS(result))
|
|
12
|
+
|
|
File without changes
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
import json
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
from django.apps import apps
|
|
6
|
+
from django.conf import settings
|
|
7
|
+
|
|
8
|
+
from openai import OpenAI
|
|
9
|
+
|
|
10
|
+
class DjangoAIQueryTransformer:
|
|
11
|
+
def __init__(self, model_label: str, rag_fields: list = None):
|
|
12
|
+
self.model_label = model_label
|
|
13
|
+
self.rag_fields = rag_fields or []
|
|
14
|
+
|
|
15
|
+
self.config = getattr(settings, "DJANGO_DYNAMIC_QUERY", {
|
|
16
|
+
"ENABLE_NLP": False,
|
|
17
|
+
"MODEL_TYPE": "local",
|
|
18
|
+
"MODEL_PATH": "",
|
|
19
|
+
"MODEL_NAME": "gemma3:1b",
|
|
20
|
+
"AI_API_KEY": ""
|
|
21
|
+
})
|
|
22
|
+
|
|
23
|
+
def generate_query_json(self, user_text: str) -> dict:
|
|
24
|
+
"""
|
|
25
|
+
generate query json from user_text and request to cloud or local LLM
|
|
26
|
+
"""
|
|
27
|
+
if not self.config.get("ENABLE_NLP", False):
|
|
28
|
+
return {"error": "NLP feature is disabled in DJANGO_DYNAMIC_QUERY settings."}
|
|
29
|
+
|
|
30
|
+
# relevant_fields = self.rag.retrieve_all_model_fields(self.model_label)
|
|
31
|
+
|
|
32
|
+
# if not relevant_fields:
|
|
33
|
+
# return {"error": "RAG could not find any matching fields. Please run 'manage.py index_schema'."}
|
|
34
|
+
|
|
35
|
+
fields_context = "\n".join([f"- Name: '{f['name']}', Type: {f['type']}" for f in self.rag_fields])
|
|
36
|
+
|
|
37
|
+
system_prompt = f"""
|
|
38
|
+
You are a backend assistant that converts natural language into structured JSON filters for Django ORM.
|
|
39
|
+
Target Django Model: '{self.model_label}'
|
|
40
|
+
Available fields for this model:
|
|
41
|
+
{fields_context}
|
|
42
|
+
|
|
43
|
+
CRITICAL JSON STRUCTURE RULES:
|
|
44
|
+
1. Every item inside the "filters" array MUST be a JSON Object (dictionary), NEVER a string.
|
|
45
|
+
2. A valid filter object format: {{"field": "field_name", "operator": "exact/contains/gt/lt", "value": "extracted_value"}}
|
|
46
|
+
|
|
47
|
+
Return ONLY a raw valid JSON object with keys: "logic", "filters", "annotations", "aggregates". Do not include markdown code blocks.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
model_type = self.config.get('MODEL_TYPE', 'local').lower()
|
|
51
|
+
model_name = self.config.get('MODEL_NAME', "gemma3:1b")
|
|
52
|
+
|
|
53
|
+
if model_type == "api":
|
|
54
|
+
api_key = self.config.get('AI_API_KEY')
|
|
55
|
+
return self._call_api_provider(model_name, api_key, system_prompt, user_text)
|
|
56
|
+
else:
|
|
57
|
+
return self._call_local_provider(model_name, system_prompt, user_text)
|
|
58
|
+
|
|
59
|
+
def _call_api_provider(self, model_name: str, api_key, system_prompt, user_text) -> dict:
|
|
60
|
+
"""
|
|
61
|
+
sending request to cloud services (OpenAI, Gemini, Anthropic, ...).
|
|
62
|
+
"""
|
|
63
|
+
if not api_key:
|
|
64
|
+
return {"error": "AI_API_KEY is required for 'api' model type."}
|
|
65
|
+
|
|
66
|
+
base_url = self.config.get("API_BASE_URL", None) or None
|
|
67
|
+
|
|
68
|
+
try:
|
|
69
|
+
client = OpenAI(api_key=api_key, base_url=base_url)
|
|
70
|
+
|
|
71
|
+
response = client.chat.completions.create(
|
|
72
|
+
model=model_name,
|
|
73
|
+
messages=[
|
|
74
|
+
{"role": "system", "content": system_prompt},
|
|
75
|
+
{"role": "user", "content": user_text}
|
|
76
|
+
],
|
|
77
|
+
response_format={"type": "json_object"},
|
|
78
|
+
temperature=0.1
|
|
79
|
+
)
|
|
80
|
+
return json.loads(response.choices[0].message.content)
|
|
81
|
+
except Exception as e:
|
|
82
|
+
return {"error": f"Cloud AI Provider Error: {str(e)}"}
|
|
83
|
+
|
|
84
|
+
def _call_local_provider(self, model_name: str, system_prompt: str, user_text: str) -> dict:
|
|
85
|
+
"""
|
|
86
|
+
sending request to local ollama services.
|
|
87
|
+
"""
|
|
88
|
+
base_url = self.config.get("MODEL_PATH", "").strip()
|
|
89
|
+
if not base_url:
|
|
90
|
+
base_url = "http://127.0.0.1:11434/api/chat"
|
|
91
|
+
|
|
92
|
+
payload = {
|
|
93
|
+
"model": model_name,
|
|
94
|
+
"messages": [
|
|
95
|
+
{"role": "system", "content": system_prompt},
|
|
96
|
+
{"role": "user", "content": user_text}
|
|
97
|
+
],
|
|
98
|
+
"stream": False,
|
|
99
|
+
"format": "json",
|
|
100
|
+
"options": {
|
|
101
|
+
"temperature": 0.0,
|
|
102
|
+
"top_k": 1,
|
|
103
|
+
"top_p": 0.1
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
try:
|
|
107
|
+
response = requests.post(base_url, json=payload, timeout=120.0)
|
|
108
|
+
if response.status_code != 200:
|
|
109
|
+
try:
|
|
110
|
+
err_json = response.json()
|
|
111
|
+
if "error" in err_json:
|
|
112
|
+
return {"error": f"Ollama Error: {err_json['error']}"}
|
|
113
|
+
except Exception:
|
|
114
|
+
pass
|
|
115
|
+
return {"error": f"Ollama returned HTTP {response.status_code}"}
|
|
116
|
+
|
|
117
|
+
response_data = response.json()
|
|
118
|
+
if "error" in response_data:
|
|
119
|
+
return {"error": f"Ollama Error: {response_data['error']}"}
|
|
120
|
+
ai_content = ""
|
|
121
|
+
if "message" in response_data and isinstance(response_data["message"], dict):
|
|
122
|
+
ai_content = response_data["message"].get("content", "")
|
|
123
|
+
elif "response" in response_data:
|
|
124
|
+
ai_content = response_data.get("response", "")
|
|
125
|
+
|
|
126
|
+
if not ai_content:
|
|
127
|
+
return {"error": "Ollama returned an empty response or invalid JSON structure."}
|
|
128
|
+
|
|
129
|
+
ai_content_cleaned = self._clean_markdown_json(ai_content)
|
|
130
|
+
|
|
131
|
+
return json.loads(ai_content_cleaned)
|
|
132
|
+
|
|
133
|
+
except requests.exceptions.Timeout:
|
|
134
|
+
return {"error": f"Timeout occurred! Model '{model_name}' took too long to respond. Ensure your system hardware can run this model efficiently."}
|
|
135
|
+
except requests.exceptions.RequestException as req_err:
|
|
136
|
+
return {"error": f"Could not connect to Ollama. Ensure Ollama is running on {base_url}. Details: {str(req_err)}"}
|
|
137
|
+
except json.JSONDecodeError:
|
|
138
|
+
return {"error": "AI response was not a valid JSON. Try a stronger model or re-write your prompt."}
|
|
139
|
+
except Exception as e:
|
|
140
|
+
return {"error": f"Local AI Provider Error: {str(e)}"}
|
|
141
|
+
|
|
142
|
+
def _clean_markdown_json(self, raw_response: str) -> str:
|
|
143
|
+
"""
|
|
144
|
+
get cleaned json from markdown
|
|
145
|
+
"""
|
|
146
|
+
match = re.search(r'```json\s*([\s\S]*?)\s*```', raw_response)
|
|
147
|
+
if match:
|
|
148
|
+
return match.group(1).strip()
|
|
149
|
+
return raw_response.strip()
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
import uuid
|
|
4
|
+
import re
|
|
5
|
+
from collections import Counter
|
|
6
|
+
|
|
7
|
+
from django.apps import apps
|
|
8
|
+
from django.conf import settings
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DjangoSchemaRAG:
|
|
12
|
+
def __init__(self):
|
|
13
|
+
"""
|
|
14
|
+
Creating Local dir for project db schema
|
|
15
|
+
"""
|
|
16
|
+
self.db_dir = os.path.abspath(os.path.join(settings.BASE_DIR, ".ddq_schema_rag"))
|
|
17
|
+
|
|
18
|
+
import chromadb
|
|
19
|
+
from chromadb.utils import embedding_functions
|
|
20
|
+
|
|
21
|
+
self.client = chromadb.PersistentClient(path=self.db_dir)
|
|
22
|
+
self.emb_fn = embedding_functions.DefaultEmbeddingFunction()
|
|
23
|
+
|
|
24
|
+
self.collection_name = "django_rag_v2_collection"
|
|
25
|
+
self.collection = self.client.get_or_create_collection(
|
|
26
|
+
name=self.collection_name,
|
|
27
|
+
embedding_function=self.emb_fn
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
def index_project_models(self):
|
|
31
|
+
"""
|
|
32
|
+
scanning all the project models and their
|
|
33
|
+
fields and types of their data and verbose name
|
|
34
|
+
"""
|
|
35
|
+
try:
|
|
36
|
+
self.client.delete_collection(name=self.collection_name)
|
|
37
|
+
except Exception as e:
|
|
38
|
+
print(f"Error deleting collection: {e}")
|
|
39
|
+
|
|
40
|
+
self.collection = self.client.get_or_create_collection(
|
|
41
|
+
name=self.collection_name,
|
|
42
|
+
embedding_function=self.emb_fn
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
documents = []
|
|
46
|
+
metadatas = []
|
|
47
|
+
ids = []
|
|
48
|
+
|
|
49
|
+
seen_models = set()
|
|
50
|
+
|
|
51
|
+
for model in apps.get_models():
|
|
52
|
+
if model._meta.app_label in ['admin', 'auth', 'contenttypes', 'sessions']:
|
|
53
|
+
continue # Skip built-in Django apps
|
|
54
|
+
|
|
55
|
+
model_label = f"{model._meta.app_label}.{model.__name__}".lower()
|
|
56
|
+
|
|
57
|
+
if model_label in seen_models:
|
|
58
|
+
continue
|
|
59
|
+
seen_models.add(model_label)
|
|
60
|
+
|
|
61
|
+
verbose_name = str(getattr(model._meta, 'verbose_name', model.__name__))
|
|
62
|
+
verbose_name_plural = str(getattr(model._meta, 'verbose_name_plural', verbose_name))
|
|
63
|
+
|
|
64
|
+
fields_description = []
|
|
65
|
+
fields_payload = []
|
|
66
|
+
|
|
67
|
+
for field in model._meta.get_fields():
|
|
68
|
+
if field.concrete and not field.many_to_many:
|
|
69
|
+
field_name = field.name
|
|
70
|
+
field_type = field.get_internal_type()
|
|
71
|
+
field_verbose_name = getattr(field, 'verbose_name', field_name)
|
|
72
|
+
help_text = getattr(field, 'help_text', '')
|
|
73
|
+
|
|
74
|
+
fields_description.append(
|
|
75
|
+
f"Field Name: '{field_name}' (Type: {field_type}, Label/Verbose: '{field_verbose_name}')"
|
|
76
|
+
)
|
|
77
|
+
fields_payload.append({"name": field_name, "type": field_type})
|
|
78
|
+
|
|
79
|
+
document_content = (
|
|
80
|
+
f"Django Model Class: {model.__name__} | App Label: {model._meta.app_label} | "
|
|
81
|
+
f"Alternative Translations/Names: {verbose_name}, {verbose_name_plural} | "
|
|
82
|
+
f"Table Columns and Fields Schema: {' - '.join(fields_description)}"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
documents.append(document_content)
|
|
86
|
+
metadatas.append({
|
|
87
|
+
"model_label": model_label,
|
|
88
|
+
"field_json": json.dumps(fields_payload)
|
|
89
|
+
})
|
|
90
|
+
ids.append(str(uuid.uuid4()))
|
|
91
|
+
|
|
92
|
+
if documents:
|
|
93
|
+
self.collection.add(documents=documents, metadatas=metadatas, ids=ids)
|
|
94
|
+
return f"Successfully indexed {len(documents)} fields across the project."
|
|
95
|
+
return "No correct models found to index."
|
|
96
|
+
|
|
97
|
+
def search_semantic_schema(self, user_query: str) -> dict | None:
|
|
98
|
+
"""Pure RAG Flow: Semantic search with safe dict parsing and hybrid fallback"""
|
|
99
|
+
try:
|
|
100
|
+
results = self.collection.query(
|
|
101
|
+
query_texts=[user_query],
|
|
102
|
+
n_results=1
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
if results and results.get('metadatas') and results['metadatas'][0]:
|
|
106
|
+
best_match_meta = results['metadatas'][0][0]
|
|
107
|
+
|
|
108
|
+
if not best_match_meta:
|
|
109
|
+
return None
|
|
110
|
+
|
|
111
|
+
model_label = best_match_meta.get("model_label")
|
|
112
|
+
fields_json = best_match_meta.get("fields_json")
|
|
113
|
+
|
|
114
|
+
if model_label:
|
|
115
|
+
# لایه ایمنی: اگر متادیتا به هر دلیلی در کروما None بود، داینامیک از جنگو بازسازیاش کن تا کراش نکند
|
|
116
|
+
if not fields_json or not isinstance(fields_json, str):
|
|
117
|
+
try:
|
|
118
|
+
model = apps.get_model(model_label)
|
|
119
|
+
fields_payload = []
|
|
120
|
+
for field in model._meta.get_fields():
|
|
121
|
+
if field.concrete and not field.many_to_many:
|
|
122
|
+
fields_payload.append({
|
|
123
|
+
"name": field.name,
|
|
124
|
+
"type": field.get_internal_type()
|
|
125
|
+
})
|
|
126
|
+
return {
|
|
127
|
+
"model_label": model_label,
|
|
128
|
+
"fields": fields_payload
|
|
129
|
+
}
|
|
130
|
+
except Exception:
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
return {
|
|
134
|
+
"model_label": model_label,
|
|
135
|
+
"fields": json.loads(fields_json)
|
|
136
|
+
}
|
|
137
|
+
except Exception as e:
|
|
138
|
+
print(f"RAG Vector Search Error: {e}")
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
def retrieve_all_model_fields(self, model_label: str) -> list:
|
|
142
|
+
"""
|
|
143
|
+
ChromaDB filtering bug fix: Instead of a guesstimate vector query,
|
|
144
|
+
fetches all fields of this specific model directly, with a 100% guarantee.
|
|
145
|
+
"""
|
|
146
|
+
try:
|
|
147
|
+
target_label = model_label.lower()
|
|
148
|
+
all_data = self.collection.get()
|
|
149
|
+
|
|
150
|
+
extracted_fields = []
|
|
151
|
+
|
|
152
|
+
if all_data and 'metadatas' in all_data and all_data['metadatas']:
|
|
153
|
+
for meta in all_data['metadatas']:
|
|
154
|
+
if meta.get("model_label") == target_label:
|
|
155
|
+
extracted_fields.append({
|
|
156
|
+
"name": meta["field_name"],
|
|
157
|
+
"type": meta["field_type"]
|
|
158
|
+
})
|
|
159
|
+
return extracted_fields
|
|
160
|
+
except Exception as e:
|
|
161
|
+
print(f"ChromaDB Fetch Error: {e}")
|
|
162
|
+
return []
|
|
163
|
+
|
|
164
|
+
def detect_model_from_query(self, user_query) -> str | None:
|
|
165
|
+
"""
|
|
166
|
+
detect model_name like: app_label.ModelName based on user query.
|
|
167
|
+
"""
|
|
168
|
+
try:
|
|
169
|
+
results = self.collection.query(
|
|
170
|
+
query_texts=[user_query],
|
|
171
|
+
n_results=5
|
|
172
|
+
)
|
|
173
|
+
if results and results["metadatas"] and results['metadatas'][0]:
|
|
174
|
+
models = [meta['model_label'] for meta in results['metadatas'][0]]
|
|
175
|
+
if models:
|
|
176
|
+
return Counter(models).most_common(1)[0][0]
|
|
177
|
+
except Exception as e:
|
|
178
|
+
return str(e)
|
|
179
|
+
return None
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class DjangoSchemaRegistry:
|
|
183
|
+
"""
|
|
184
|
+
100% offline and intelligent replacement for ChromaDB.
|
|
185
|
+
Directly uses Django's own in-memory (RAM) structure.
|
|
186
|
+
No internet, model downloads, or admin commands required.
|
|
187
|
+
"""
|
|
188
|
+
|
|
189
|
+
def detect_model_from_query(self, user_query: str) -> str | None:
|
|
190
|
+
"""
|
|
191
|
+
Automatic model detection by checking the similarity of user keywords with the model name and its verbose_names.
|
|
192
|
+
Fully compatible with the Persian language.
|
|
193
|
+
"""
|
|
194
|
+
user_query = user_query.lower()
|
|
195
|
+
tokens = re.findall(r'\w+', user_query)
|
|
196
|
+
|
|
197
|
+
best_model = None
|
|
198
|
+
max_score = 0
|
|
199
|
+
|
|
200
|
+
for model in apps.get_models():
|
|
201
|
+
if model._meta.app_label in ['admin', 'auth', 'contenttypes', 'sessions']:
|
|
202
|
+
continue
|
|
203
|
+
|
|
204
|
+
model_label = f"{model._meta.app_label}.{model.__name__}"
|
|
205
|
+
|
|
206
|
+
model_name = model.__name__.lower()
|
|
207
|
+
verbose_name = str(model._meta.verbose_name).lower() if hasattr(model._meta, 'verbose_name') else ""
|
|
208
|
+
verbose_name_plural = str(model._meta.verbose_name_plural).lower() if hasattr(model._meta, 'verbose_name_plural') else ""
|
|
209
|
+
|
|
210
|
+
score = 0
|
|
211
|
+
for token in tokens:
|
|
212
|
+
if token in model_name or (verbose_name and token in verbose_name) or (verbose_name_plural and token in verbose_name_plural):
|
|
213
|
+
score += 15 # امتیاز بالا برای تطابق مستقیم با نام جدول
|
|
214
|
+
|
|
215
|
+
# وزندهی ثانویه بر اساس فیلدهای مدل (اگر کاربر اسم فیلد را آورده باشد)
|
|
216
|
+
for field in model._meta.get_fields():
|
|
217
|
+
if not field.concrete:
|
|
218
|
+
continue
|
|
219
|
+
field_name = field.name.lower()
|
|
220
|
+
field_verbose = str(getattr(field, 'verbose_name', '')).lower()
|
|
221
|
+
|
|
222
|
+
for token in tokens:
|
|
223
|
+
if token in field_name or (field_verbose and token in field_verbose):
|
|
224
|
+
score += 3
|
|
225
|
+
|
|
226
|
+
if score > max_score and score > 0:
|
|
227
|
+
max_score = score
|
|
228
|
+
best_model = model_label
|
|
229
|
+
|
|
230
|
+
return best_model
|