satisfactoscript 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- satisfactoscript/__init__.py +5 -0
- satisfactoscript/agentic/__init__.py +0 -0
- satisfactoscript/agentic/agent.py +127 -0
- satisfactoscript/core/__init__.py +5 -0
- satisfactoscript/core/config.py +144 -0
- satisfactoscript/core/core.py +696 -0
- satisfactoscript/core/loaders.py +137 -0
- satisfactoscript/core/registry.py +94 -0
- satisfactoscript/semantic/__init__.py +3 -0
- satisfactoscript/semantic/semantic.py +186 -0
- satisfactoscript-0.1.0.dist-info/METADATA +145 -0
- satisfactoscript-0.1.0.dist-info/RECORD +14 -0
- satisfactoscript-0.1.0.dist-info/WHEEL +5 -0
- satisfactoscript-0.1.0.dist-info/top_level.txt +1 -0
|
File without changes
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from ..semantic import SemanticEngine
|
|
4
|
+
|
|
5
|
+
class GenBIAgent:
|
|
6
|
+
"""
|
|
7
|
+
An agentic layer designed to interface natural language queries with the Semantic Engine.
|
|
8
|
+
It utilizes LLMs to translate human questions into structured semantic queries (JSON).
|
|
9
|
+
"""
|
|
10
|
+
def __init__(self, semantic_engine: SemanticEngine, default_model="kpi_export_sales"):
|
|
11
|
+
"""
|
|
12
|
+
Initializes the GenBIAgent with the semantic engine and a default target model.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
semantic_engine (SemanticEngine): The configured Semantic Engine instance.
|
|
16
|
+
default_model (str, optional): The default semantic model to use for queries.
|
|
17
|
+
Defaults to "kpi_export_sales".
|
|
18
|
+
"""
|
|
19
|
+
self.semantic = semantic_engine
|
|
20
|
+
self.default_model = default_model
|
|
21
|
+
# On pré-charge le contexte propre (le dictionnaire métier) pour le LLM
|
|
22
|
+
self.context = self.semantic.get_llm_context(self.default_model)
|
|
23
|
+
|
|
24
|
+
def ask(self, question: str):
|
|
25
|
+
"""
|
|
26
|
+
Takes a natural language question, queries the LLM API to translate it,
|
|
27
|
+
and executes the corresponding semantic query to return a PySpark DataFrame.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
question (str): The natural language question from the user.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
DataFrame: The resulting PySpark DataFrame containing the answer,
|
|
34
|
+
or None if an error occurred during translation or execution.
|
|
35
|
+
"""
|
|
36
|
+
print(f"👤 [User] Question : '{question}'")
|
|
37
|
+
|
|
38
|
+
# 1. Le Prompt Système : On cadenasse l'IA pour éviter les hallucinations
|
|
39
|
+
system_prompt = f"""You are an expert in Data Anlytics.
|
|
40
|
+
Your role here is just to translate questions coming from users in JSON request.
|
|
41
|
+
You have not access to the database. The unique and only thing you need to use is the following semantic dictionary :
|
|
42
|
+
{self.context}
|
|
43
|
+
|
|
44
|
+
Rules to strictly apply :
|
|
45
|
+
- You MUST ANSWER with a JSON object. no speech before or after the json object
|
|
46
|
+
- the JSON object MUST HAVE the EXACT following structure :
|
|
47
|
+
{{
|
|
48
|
+
"metrics": ["metric_name_1"],
|
|
49
|
+
"group_by": ["dimension_name_1"]
|
|
50
|
+
}}
|
|
51
|
+
- Deduce metrics and dimensions regarding the question meaning
|
|
52
|
+
- If the question has no mention
|
|
53
|
+
- If the question does not mention a grouping dimension, return an empty list for "group_by".
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
# 2. Appel au LLM
|
|
57
|
+
print(f"🤖 [Agent] Réflexion en cours...")
|
|
58
|
+
llm_response_text = self._call_llm(system_prompt, question)
|
|
59
|
+
|
|
60
|
+
# 3. Nettoyage et Parsing du JSON
|
|
61
|
+
if not llm_response_text:
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
# Les LLMs aiment bien rajouter ```json au début de leurs réponses, on nettoie ça
|
|
66
|
+
clean_json = llm_response_text.replace('```json', '').replace('```', '').strip()
|
|
67
|
+
query_params = json.loads(clean_json)
|
|
68
|
+
print(f"🧠 [Agent] Traduction réussie : {query_params}")
|
|
69
|
+
except Exception as e:
|
|
70
|
+
print(f"❌ [Agent] Erreur de parsing JSON. L'IA a désobéi. Réponse brute : \n{llm_response_text}")
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
# 4. Exécution sur le cluster Spark via votre SemanticEngine
|
|
74
|
+
print(f"🚀 [Agent] Exécution de la requête sur Databricks...")
|
|
75
|
+
try:
|
|
76
|
+
df_result = self.semantic.query(
|
|
77
|
+
model_name=self.default_model,
|
|
78
|
+
metrics=query_params.get("metrics", []),
|
|
79
|
+
group_by=query_params.get("group_by", [])
|
|
80
|
+
)
|
|
81
|
+
return df_result
|
|
82
|
+
except Exception as e:
|
|
83
|
+
print(f"❌ [Agent] Erreur lors de l'exécution PySpark : {e}")
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
def _call_llm(self, system_prompt: str, user_question: str) -> str:
|
|
87
|
+
"""
|
|
88
|
+
Internal method to make the network call to the LLM API (OpenAI).
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
system_prompt (str): The strictly formatted system context and instructions.
|
|
92
|
+
user_question (str): The natural language user query.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
str: The raw text response from the LLM, expected to be a JSON string.
|
|
96
|
+
"""
|
|
97
|
+
try:
|
|
98
|
+
import openai
|
|
99
|
+
except ImportError:
|
|
100
|
+
print("❌ [Agent] La librairie 'openai' n'est pas installée. Faites un `pip install openai`.")
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
# On récupère la clé depuis les variables d'environnement (vide par défaut pour l'instant)
|
|
104
|
+
api_key = os.environ.get("OPENAI_API_KEY", "")
|
|
105
|
+
|
|
106
|
+
if not api_key:
|
|
107
|
+
print("⚠️ [Agent] OPENAI_API_KEY manquante. Simulation de réponse JSON pour le test...")
|
|
108
|
+
# Simulation en attendant votre clé !
|
|
109
|
+
return '{"metrics": ["gross_revenue"], "group_by": ["platform"]}'
|
|
110
|
+
|
|
111
|
+
try:
|
|
112
|
+
client = openai.OpenAI(api_key=api_key)
|
|
113
|
+
|
|
114
|
+
response = client.chat.completions.create(
|
|
115
|
+
model="gpt-4o", # Modèle standard actuel
|
|
116
|
+
messages=[
|
|
117
|
+
{"role": "system", "content": system_prompt},
|
|
118
|
+
{"role": "user", "content": user_question}
|
|
119
|
+
],
|
|
120
|
+
# TEMPÉRATURE À 0 : C'est le secret ! On veut une traduction logique, pas de la créativité.
|
|
121
|
+
temperature=0.0
|
|
122
|
+
)
|
|
123
|
+
return response.choices[0].message.content
|
|
124
|
+
|
|
125
|
+
except Exception as e:
|
|
126
|
+
print(f"❌ [Agent] Erreur de communication avec l'API OpenAI : {e}")
|
|
127
|
+
return None
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration module. Handles YAML loading and Environment detection.
|
|
3
|
+
Includes auto-discovery mechanism to find config.yaml at project root.
|
|
4
|
+
"""
|
|
5
|
+
import yaml
|
|
6
|
+
import os
|
|
7
|
+
|
|
8
|
+
class ConfigurationManager:
|
|
9
|
+
"""
|
|
10
|
+
Manages the loading and retrieval of configuration from a YAML file.
|
|
11
|
+
Automatically detects the current environment by testing catalog access.
|
|
12
|
+
"""
|
|
13
|
+
def __init__(self, spark, config_path=None):
|
|
14
|
+
"""
|
|
15
|
+
Initializes the ConfigurationManager.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
spark (SparkSession): The active SparkSession used to verify catalog access.
|
|
19
|
+
config_path (str, optional): Absolute path to the config.yaml file.
|
|
20
|
+
If not provided, the framework will attempt
|
|
21
|
+
to auto-discover it by searching parent directories.
|
|
22
|
+
|
|
23
|
+
Raises:
|
|
24
|
+
FileNotFoundError: If the configuration file cannot be found.
|
|
25
|
+
"""
|
|
26
|
+
self.spark = spark
|
|
27
|
+
self.config = {}
|
|
28
|
+
self.current_env_name = None
|
|
29
|
+
self.db_prefix = None
|
|
30
|
+
|
|
31
|
+
# 1. Path Resolution logic
|
|
32
|
+
final_path = config_path
|
|
33
|
+
|
|
34
|
+
if not final_path:
|
|
35
|
+
# Try to auto-discover at current location or parents
|
|
36
|
+
print("[Config] No path provided. Searching for 'config.yaml' in parent directories...")
|
|
37
|
+
final_path = self._find_config_upwards("config.yaml")
|
|
38
|
+
|
|
39
|
+
if final_path:
|
|
40
|
+
self._load_config_file(final_path)
|
|
41
|
+
self.current_env_name = self._detect_environment()
|
|
42
|
+
self.db_prefix = self.get_value("catalog")
|
|
43
|
+
else:
|
|
44
|
+
# Critical error: Framework cannot run without config
|
|
45
|
+
raise FileNotFoundError(
|
|
46
|
+
"[Config] CRITICAL: 'config.yaml' not found in current directory or any parent directory. "
|
|
47
|
+
"Please verify your project structure."
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
def _find_config_upwards(self, filename):
|
|
51
|
+
"""
|
|
52
|
+
Searches for a file starting from current working directory and moving up.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
filename (str): The name of the file to search for.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
str: Absolute path to the file if found, else None.
|
|
59
|
+
"""
|
|
60
|
+
current_dir = os.getcwd()
|
|
61
|
+
|
|
62
|
+
# Loop until we hit the root of the filesystem
|
|
63
|
+
while True:
|
|
64
|
+
check_path = os.path.join(current_dir, filename)
|
|
65
|
+
if os.path.exists(check_path):
|
|
66
|
+
print(f" [Config] Auto-discovered config file at: {check_path}")
|
|
67
|
+
return check_path
|
|
68
|
+
|
|
69
|
+
parent_dir = os.path.dirname(current_dir)
|
|
70
|
+
if parent_dir == current_dir:
|
|
71
|
+
# We hit the filesystem root (e.g. /) without finding the file
|
|
72
|
+
return None
|
|
73
|
+
current_dir = parent_dir
|
|
74
|
+
|
|
75
|
+
def _load_config_file(self, path):
|
|
76
|
+
"""
|
|
77
|
+
Loads the YAML configuration file into the object's state.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
path (str): The absolute path to the YAML file.
|
|
81
|
+
|
|
82
|
+
Raises:
|
|
83
|
+
ValueError: If the YAML file contains syntax errors.
|
|
84
|
+
"""
|
|
85
|
+
print(f" [Config] Loading configuration from: {path}")
|
|
86
|
+
try:
|
|
87
|
+
with open(path, 'r') as f:
|
|
88
|
+
self.config = yaml.safe_load(f)
|
|
89
|
+
except yaml.YAMLError as exc:
|
|
90
|
+
raise ValueError(f"Error parsing YAML file: {exc}")
|
|
91
|
+
|
|
92
|
+
def _detect_environment(self):
|
|
93
|
+
"""
|
|
94
|
+
Auto-detects the active environment by iterating through the 'priority_check'
|
|
95
|
+
list in the config and attempting to connect to the associated catalog.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
str: The name of the successfully detected environment.
|
|
99
|
+
|
|
100
|
+
Raises:
|
|
101
|
+
EnvironmentError: If no catalogs defined in the configuration are accessible.
|
|
102
|
+
"""
|
|
103
|
+
envs = self.config.get("environments", {})
|
|
104
|
+
priority = self.config.get("priority_check", envs.keys())
|
|
105
|
+
|
|
106
|
+
print(" [Config] Auto-detecting environment...")
|
|
107
|
+
|
|
108
|
+
for env_name in priority:
|
|
109
|
+
if env_name not in envs:
|
|
110
|
+
continue
|
|
111
|
+
|
|
112
|
+
catalog = envs[env_name].get("catalog")
|
|
113
|
+
try:
|
|
114
|
+
# Use a read-only command which is more stable than USE CATALOG.
|
|
115
|
+
# It checks for accessibility without changing the session's state.
|
|
116
|
+
self.spark.sql(f"SHOW SCHEMAS IN `{catalog}`").limit(1).collect()
|
|
117
|
+
print(f" [Config] Success: Connected to catalog '{catalog}'. Environment is '{env_name.upper()}'.")
|
|
118
|
+
return env_name
|
|
119
|
+
except Exception:
|
|
120
|
+
continue
|
|
121
|
+
|
|
122
|
+
raise EnvironmentError("Could not connect to any catalog defined in the configuration file.")
|
|
123
|
+
|
|
124
|
+
def get_value(self, key):
|
|
125
|
+
"""
|
|
126
|
+
Retrieves a configuration value specific to the currently active environment.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
key (str): The configuration key to retrieve.
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
Any: The value associated with the key, or None if not found or no env is active.
|
|
133
|
+
"""
|
|
134
|
+
if not self.current_env_name: return None
|
|
135
|
+
return self.config["environments"][self.current_env_name].get(key)
|
|
136
|
+
|
|
137
|
+
def get_db(self):
|
|
138
|
+
"""
|
|
139
|
+
Retrieves the database (catalog) prefix for the current environment.
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
str: The database/catalog prefix.
|
|
143
|
+
"""
|
|
144
|
+
return self.db_prefix
|