naas-abi 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- naas_abi/__init__.py +35 -0
- naas_abi/agents/AbiAgent.py +442 -0
- naas_abi/agents/AbiAgent_test.py +157 -0
- naas_abi/agents/EntitytoSPARQLAgent.py +952 -0
- naas_abi/agents/EntitytoSPARQLAgent_test.py +66 -0
- naas_abi/agents/KnowledgeGraphBuilderAgent.py +321 -0
- naas_abi/agents/KnowledgeGraphBuilderAgent_test.py +86 -0
- naas_abi/agents/OntologyEngineerAgent.py +115 -0
- naas_abi/agents/OntologyEngineerAgent_test.py +42 -0
- naas_abi/apps/oxigraph_admin/main.py +392 -0
- naas_abi/apps/oxigraph_admin/terminal_style.py +151 -0
- naas_abi/apps/sparql_terminal/main.py +68 -0
- naas_abi/apps/sparql_terminal/terminal_style.py +236 -0
- naas_abi/apps/terminal_agent/main.py +553 -0
- naas_abi/apps/terminal_agent/terminal_style.py +175 -0
- naas_abi/cli.py +714 -0
- naas_abi/mappings.py +83 -0
- naas_abi/models/airgap_gemma.py +220 -0
- naas_abi/models/airgap_qwen.py +24 -0
- naas_abi/models/default.py +23 -0
- naas_abi/models/gpt_4_1.py +25 -0
- naas_abi/pipelines/AIAgentOntologyGenerationPipeline.py +635 -0
- naas_abi/pipelines/AIAgentOntologyGenerationPipeline_test.py +133 -0
- naas_abi/pipelines/AddIndividualPipeline.py +215 -0
- naas_abi/pipelines/AddIndividualPipeline_test.py +66 -0
- naas_abi/pipelines/InsertDataSPARQLPipeline.py +197 -0
- naas_abi/pipelines/InsertDataSPARQLPipeline_test.py +96 -0
- naas_abi/pipelines/MergeIndividualsPipeline.py +245 -0
- naas_abi/pipelines/MergeIndividualsPipeline_test.py +98 -0
- naas_abi/pipelines/RemoveIndividualPipeline.py +166 -0
- naas_abi/pipelines/RemoveIndividualPipeline_test.py +58 -0
- naas_abi/pipelines/UpdateCommercialOrganizationPipeline.py +198 -0
- naas_abi/pipelines/UpdateDataPropertyPipeline.py +175 -0
- naas_abi/pipelines/UpdateLegalNamePipeline.py +107 -0
- naas_abi/pipelines/UpdateLinkedInPagePipeline.py +179 -0
- naas_abi/pipelines/UpdatePersonPipeline.py +184 -0
- naas_abi/pipelines/UpdateSkillPipeline.py +118 -0
- naas_abi/pipelines/UpdateTickerPipeline.py +104 -0
- naas_abi/pipelines/UpdateWebsitePipeline.py +106 -0
- naas_abi/triggers.py +131 -0
- naas_abi/workflows/AgentRecommendationWorkflow.py +321 -0
- naas_abi/workflows/AgentRecommendationWorkflow_test.py +160 -0
- naas_abi/workflows/ArtificialAnalysisWorkflow.py +337 -0
- naas_abi/workflows/ArtificialAnalysisWorkflow_test.py +57 -0
- naas_abi/workflows/ConvertOntologyGraphToYamlWorkflow.py +210 -0
- naas_abi/workflows/ConvertOntologyGraphToYamlWorkflow_test.py +78 -0
- naas_abi/workflows/CreateClassOntologyYamlWorkflow.py +208 -0
- naas_abi/workflows/CreateClassOntologyYamlWorkflow_test.py +65 -0
- naas_abi/workflows/CreateIndividualOntologyYamlWorkflow.py +183 -0
- naas_abi/workflows/CreateIndividualOntologyYamlWorkflow_test.py +86 -0
- naas_abi/workflows/ExportGraphInstancesToExcelWorkflow.py +450 -0
- naas_abi/workflows/ExportGraphInstancesToExcelWorkflow_test.py +33 -0
- naas_abi/workflows/GetObjectPropertiesFromClassWorkflow.py +385 -0
- naas_abi/workflows/GetObjectPropertiesFromClassWorkflow_test.py +57 -0
- naas_abi/workflows/GetSubjectGraphWorkflow.py +84 -0
- naas_abi/workflows/GetSubjectGraphWorkflow_test.py +71 -0
- naas_abi/workflows/SearchIndividualWorkflow.py +190 -0
- naas_abi/workflows/SearchIndividualWorkflow_test.py +98 -0
- naas_abi-1.0.0.dist-info/METADATA +9 -0
- naas_abi-1.0.0.dist-info/RECORD +62 -0
- naas_abi-1.0.0.dist-info/WHEEL +5 -0
- naas_abi-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,635 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import uuid
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from enum import Enum
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Dict, List, Optional
|
|
8
|
+
|
|
9
|
+
from fastapi import APIRouter
|
|
10
|
+
from langchain_core.tools import BaseTool, StructuredTool
|
|
11
|
+
from naas_abi_core.pipeline import Pipeline, PipelineConfiguration, PipelineParameters
|
|
12
|
+
from naas_abi_core.services.triple_store.TripleStorePorts import ITripleStoreService
|
|
13
|
+
from rdflib import Graph, Literal, Namespace
|
|
14
|
+
|
|
15
|
+
ABI = Namespace("http://ontology.naas.ai/abi/")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class AIAgentOntologyGenerationConfiguration(PipelineConfiguration):
|
|
20
|
+
"""Configuration for AI Agent Ontology Generation Pipeline.
|
|
21
|
+
|
|
22
|
+
Attributes:
|
|
23
|
+
triple_store (ITripleStoreService): The ontology store service to use
|
|
24
|
+
datastore_path (str): Path to store generated ontology files
|
|
25
|
+
source_datastore_path (str): Path to source Artificial Analysis data
|
|
26
|
+
max_models_per_agent (int): Maximum models per agent for performance
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
triple_store: ITripleStoreService
|
|
30
|
+
datastore_path: str = (
|
|
31
|
+
"storage/datastore/core/modules/abi/AIAgentOntologyGenerationPipeline"
|
|
32
|
+
)
|
|
33
|
+
source_datastore_path: str = (
|
|
34
|
+
"storage/datastore/core/modules/abi/ArtificialAnalysisWorkflow"
|
|
35
|
+
)
|
|
36
|
+
max_models_per_agent: int = 50
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class AIAgentOntologyGenerationParameters(PipelineParameters):
|
|
40
|
+
"""Parameters for AI Agent Ontology Generation Pipeline execution.
|
|
41
|
+
|
|
42
|
+
Attributes:
|
|
43
|
+
force_regenerate (bool): Force regeneration even if files exist
|
|
44
|
+
agent_filter (List[str]): Filter specific agents to generate
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
force_regenerate: bool = False
|
|
48
|
+
agent_filter: Optional[List[str]] = None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class AIAgentOntologyGenerationPipeline(Pipeline):
|
|
52
|
+
__configuration: AIAgentOntologyGenerationConfiguration
|
|
53
|
+
|
|
54
|
+
def __init__(self, configuration: AIAgentOntologyGenerationConfiguration):
|
|
55
|
+
self.__configuration = configuration
|
|
56
|
+
|
|
57
|
+
def run(self, parameters: PipelineParameters) -> Graph:
|
|
58
|
+
"""
|
|
59
|
+
Main pipeline execution with clear steps:
|
|
60
|
+
1. Load Artificial Analysis data
|
|
61
|
+
2. Group models by AI agent
|
|
62
|
+
3. Generate ontologies in timestamped datastore folders
|
|
63
|
+
4. Deploy current versions to module folders
|
|
64
|
+
5. Create audit trail and summary
|
|
65
|
+
"""
|
|
66
|
+
if not isinstance(parameters, AIAgentOntologyGenerationParameters):
|
|
67
|
+
raise ValueError(
|
|
68
|
+
"Parameters must be of type AIAgentOntologyGenerationParameters"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# Initialize graph for results
|
|
72
|
+
graph = Graph()
|
|
73
|
+
|
|
74
|
+
# STEP 1: Load latest Artificial Analysis data
|
|
75
|
+
aa_data = self._load_latest_aa_data()
|
|
76
|
+
if not aa_data:
|
|
77
|
+
raise ValueError("No Artificial Analysis data found")
|
|
78
|
+
|
|
79
|
+
# STEP 2-5: Process and generate ontologies (includes deployment)
|
|
80
|
+
generated_files = self._execute_pipeline_steps(aa_data, parameters)
|
|
81
|
+
|
|
82
|
+
# STEP 6: Create summary triples
|
|
83
|
+
pipeline_uri = ABI[f"AIAgentOntologyGeneration_{uuid.uuid4()}"]
|
|
84
|
+
graph.add((pipeline_uri, ABI.hasGeneratedFiles, Literal(len(generated_files))))
|
|
85
|
+
graph.add(
|
|
86
|
+
(
|
|
87
|
+
pipeline_uri,
|
|
88
|
+
ABI.hasTimestamp,
|
|
89
|
+
Literal(datetime.now(timezone.utc).isoformat()),
|
|
90
|
+
)
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Store results in triple store
|
|
94
|
+
self.__configuration.triple_store.insert(graph)
|
|
95
|
+
|
|
96
|
+
return graph
|
|
97
|
+
|
|
98
|
+
def _load_latest_aa_data(self) -> Optional[Dict[str, Any]]:
|
|
99
|
+
"""Load the latest Artificial Analysis data from datastore."""
|
|
100
|
+
aa_dir = Path(self.__configuration.source_datastore_path)
|
|
101
|
+
|
|
102
|
+
if not aa_dir.exists():
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
# Find latest JSON file
|
|
106
|
+
json_files = list(aa_dir.glob("*_llms_data.json"))
|
|
107
|
+
if not json_files:
|
|
108
|
+
return None
|
|
109
|
+
|
|
110
|
+
latest_file = max(json_files, key=lambda f: f.stat().st_mtime)
|
|
111
|
+
|
|
112
|
+
with open(latest_file, "r", encoding="utf-8") as f:
|
|
113
|
+
return json.load(f)
|
|
114
|
+
|
|
115
|
+
def _execute_pipeline_steps(
|
|
116
|
+
self, aa_data: Dict[str, Any], parameters: AIAgentOntologyGenerationParameters
|
|
117
|
+
) -> List[Path]:
|
|
118
|
+
"""
|
|
119
|
+
Execute the main pipeline steps:
|
|
120
|
+
STEP 2: Extract and group models by AI agent
|
|
121
|
+
STEP 3: Generate ontologies in timestamped datastore folders
|
|
122
|
+
STEP 4: Deploy current versions to module folders
|
|
123
|
+
STEP 5: Create audit trail and summary
|
|
124
|
+
"""
|
|
125
|
+
# STEP 2: Extract and group models by AI agent
|
|
126
|
+
timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S")
|
|
127
|
+
output_dir = Path(self.__configuration.datastore_path) / timestamp
|
|
128
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
129
|
+
|
|
130
|
+
models = aa_data.get("llms", []) or aa_data.get("data", [])
|
|
131
|
+
agent_models = self._group_models_by_agent(models)
|
|
132
|
+
|
|
133
|
+
# Apply agent filter if specified
|
|
134
|
+
if parameters.agent_filter:
|
|
135
|
+
agent_models = {
|
|
136
|
+
agent: models
|
|
137
|
+
for agent, models in agent_models.items()
|
|
138
|
+
if agent in parameters.agent_filter
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
# STEP 3-4: Generate and deploy ontologies for each agent
|
|
142
|
+
generated_files = []
|
|
143
|
+
for agent_module, models in agent_models.items():
|
|
144
|
+
agent_files = self._process_single_agent(
|
|
145
|
+
agent_module, models, timestamp, output_dir
|
|
146
|
+
)
|
|
147
|
+
generated_files.extend(agent_files)
|
|
148
|
+
|
|
149
|
+
# STEP 5: Create summary and audit trail
|
|
150
|
+
self._create_execution_summary(
|
|
151
|
+
timestamp, models, agent_models, generated_files, output_dir
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
return generated_files
|
|
155
|
+
|
|
156
|
+
def _process_single_agent(
|
|
157
|
+
self,
|
|
158
|
+
agent_module: str,
|
|
159
|
+
models: List[Dict[str, Any]],
|
|
160
|
+
timestamp: str,
|
|
161
|
+
output_dir: Path,
|
|
162
|
+
) -> List[Path]:
|
|
163
|
+
"""Process a single AI agent: generate ontology content and deploy to multiple locations."""
|
|
164
|
+
# Limit models per agent if configured
|
|
165
|
+
if len(models) > self.__configuration.max_models_per_agent:
|
|
166
|
+
models = models[: self.__configuration.max_models_per_agent]
|
|
167
|
+
|
|
168
|
+
# Generate ontology content
|
|
169
|
+
ontology_content = self._generate_agent_ontology_file(agent_module, models)
|
|
170
|
+
agent_title = agent_module.replace("_", "").title()
|
|
171
|
+
|
|
172
|
+
# File paths
|
|
173
|
+
current_filename = f"{agent_title}Ontology.ttl"
|
|
174
|
+
audit_filename = f"{timestamp}_{agent_title}Ontology.ttl"
|
|
175
|
+
|
|
176
|
+
generated_files = []
|
|
177
|
+
|
|
178
|
+
# STEP 3A: Write current version to datastore (for deployment)
|
|
179
|
+
current_file = output_dir / current_filename
|
|
180
|
+
with open(current_file, "w", encoding="utf-8") as f:
|
|
181
|
+
f.write(ontology_content)
|
|
182
|
+
generated_files.append(current_file)
|
|
183
|
+
|
|
184
|
+
# STEP 3B: Write audit version to datastore (for history)
|
|
185
|
+
audit_file = output_dir / audit_filename
|
|
186
|
+
with open(audit_file, "w", encoding="utf-8") as f:
|
|
187
|
+
f.write(ontology_content)
|
|
188
|
+
generated_files.append(audit_file)
|
|
189
|
+
|
|
190
|
+
# STEP 4: Deploy current version to module folder
|
|
191
|
+
# From pipelines/ go up 3 levels to reach modules/
|
|
192
|
+
modules_dir = Path(
|
|
193
|
+
__file__
|
|
194
|
+
).parent.parent.parent # Go up 3 levels to /Users/jrvmac/abi/src/core/modules
|
|
195
|
+
module_dir = modules_dir / agent_module / "ontologies"
|
|
196
|
+
|
|
197
|
+
# Create module directory if it doesn't exist
|
|
198
|
+
module_dir.mkdir(parents=True, exist_ok=True)
|
|
199
|
+
|
|
200
|
+
# Deploy to module folder
|
|
201
|
+
module_file = module_dir / current_filename
|
|
202
|
+
with open(module_file, "w", encoding="utf-8") as f:
|
|
203
|
+
f.write(ontology_content)
|
|
204
|
+
generated_files.append(module_file)
|
|
205
|
+
|
|
206
|
+
return generated_files
|
|
207
|
+
|
|
208
|
+
def _create_execution_summary(
|
|
209
|
+
self,
|
|
210
|
+
timestamp: str,
|
|
211
|
+
models: List[Dict[str, Any]],
|
|
212
|
+
agent_models: Dict[str, List[Dict[str, Any]]],
|
|
213
|
+
generated_files: List[Path],
|
|
214
|
+
output_dir: Path,
|
|
215
|
+
) -> None:
|
|
216
|
+
"""Create execution summary for audit trail."""
|
|
217
|
+
summary_data = {
|
|
218
|
+
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
219
|
+
"timestamp": timestamp,
|
|
220
|
+
"total_models_processed": len(models),
|
|
221
|
+
"agents_generated": len(agent_models),
|
|
222
|
+
"total_files_generated": len(generated_files),
|
|
223
|
+
"agent_breakdown": {
|
|
224
|
+
agent: len(models) for agent, models in agent_models.items()
|
|
225
|
+
},
|
|
226
|
+
"file_locations": {
|
|
227
|
+
"datastore_current": [
|
|
228
|
+
str(f)
|
|
229
|
+
for f in generated_files
|
|
230
|
+
if "datastore" in str(f) and not f.name.startswith("2")
|
|
231
|
+
],
|
|
232
|
+
"datastore_audit": [
|
|
233
|
+
str(f)
|
|
234
|
+
for f in generated_files
|
|
235
|
+
if "datastore" in str(f) and f.name.startswith("2")
|
|
236
|
+
],
|
|
237
|
+
"module_deployed": [
|
|
238
|
+
str(f) for f in generated_files if "src/core/modules" in str(f)
|
|
239
|
+
],
|
|
240
|
+
},
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
summary_file = output_dir / f"generation_summary_{timestamp}.json"
|
|
244
|
+
with open(summary_file, "w", encoding="utf-8") as f:
|
|
245
|
+
json.dump(summary_data, f, indent=2)
|
|
246
|
+
|
|
247
|
+
def _group_models_by_agent(
|
|
248
|
+
self, models: List[Dict[str, Any]]
|
|
249
|
+
) -> Dict[str, List[Dict[str, Any]]]:
|
|
250
|
+
"""Group models by AI agent module based on model family."""
|
|
251
|
+
agent_models: Dict[str, List[Dict[str, Any]]] = {}
|
|
252
|
+
|
|
253
|
+
for model in models:
|
|
254
|
+
agent_module = self._determine_ai_agent_module(model)
|
|
255
|
+
|
|
256
|
+
if agent_module:
|
|
257
|
+
if agent_module not in agent_models:
|
|
258
|
+
agent_models[agent_module] = []
|
|
259
|
+
agent_models[agent_module].append(model)
|
|
260
|
+
|
|
261
|
+
return agent_models
|
|
262
|
+
|
|
263
|
+
def _determine_ai_agent_module(self, model_data: Dict[str, Any]) -> Optional[str]:
|
|
264
|
+
"""Map model to AI agent module based on model name/slug, not just provider."""
|
|
265
|
+
model_name = model_data.get("name", "").lower()
|
|
266
|
+
model_slug = model_data.get("slug", "").lower()
|
|
267
|
+
creator = model_data.get("model_creator", {})
|
|
268
|
+
creator_name = creator.get("name", "").lower()
|
|
269
|
+
creator_slug = creator.get("slug", "").lower()
|
|
270
|
+
|
|
271
|
+
# Model family to module mapping (prioritizing model name over provider)
|
|
272
|
+
model_mapping = {
|
|
273
|
+
# OpenAI models
|
|
274
|
+
"gpt": "chatgpt",
|
|
275
|
+
"chatgpt": "chatgpt",
|
|
276
|
+
"o1": "chatgpt",
|
|
277
|
+
"o3": "chatgpt",
|
|
278
|
+
"o4": "chatgpt",
|
|
279
|
+
"gpt-4": "chatgpt",
|
|
280
|
+
"gpt-5": "chatgpt",
|
|
281
|
+
"davinci": "chatgpt",
|
|
282
|
+
# OpenAI Open Source models (different from ChatGPT)
|
|
283
|
+
"gpt-oss": "gpt_oss",
|
|
284
|
+
"gpt_oss": "gpt_oss",
|
|
285
|
+
# Anthropic
|
|
286
|
+
"claude": "claude",
|
|
287
|
+
# Google models - separate by family
|
|
288
|
+
"gemini": "gemini",
|
|
289
|
+
"gemma": "gemma",
|
|
290
|
+
"palm": "gemini", # Palm is part of Gemini family
|
|
291
|
+
# Meta/Facebook
|
|
292
|
+
"llama": "llama",
|
|
293
|
+
"meta": "llama",
|
|
294
|
+
# xAI
|
|
295
|
+
"grok": "grok",
|
|
296
|
+
# Mistral
|
|
297
|
+
"mistral": "mistral",
|
|
298
|
+
"mixtral": "mistral",
|
|
299
|
+
"codestral": "mistral",
|
|
300
|
+
# DeepSeek
|
|
301
|
+
"deepseek": "deepseek",
|
|
302
|
+
# Alibaba
|
|
303
|
+
"qwen": "qwen",
|
|
304
|
+
"qwq": "qwen",
|
|
305
|
+
# Perplexity
|
|
306
|
+
"sonar": "perplexity",
|
|
307
|
+
"perplexity": "perplexity",
|
|
308
|
+
# Other model families
|
|
309
|
+
"phi": "phi",
|
|
310
|
+
"titan": "titan",
|
|
311
|
+
"yi": "yi",
|
|
312
|
+
"solar": "solar",
|
|
313
|
+
"exaone": "exaone",
|
|
314
|
+
"glm": "glm",
|
|
315
|
+
"minimax": "minimax",
|
|
316
|
+
"kimi": "kimi",
|
|
317
|
+
"arctic": "arctic",
|
|
318
|
+
"dbrx": "dbrx",
|
|
319
|
+
"lfm": "lfm",
|
|
320
|
+
"cohere": "cohere",
|
|
321
|
+
"command": "cohere",
|
|
322
|
+
"jamba": "jamba",
|
|
323
|
+
"reka": "reka",
|
|
324
|
+
"openchat": "openchat",
|
|
325
|
+
"tulu": "tulu",
|
|
326
|
+
"nous": "nous_research",
|
|
327
|
+
"hermes": "nous_research",
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
# Check model name/slug first for family identification
|
|
331
|
+
for pattern, module in model_mapping.items():
|
|
332
|
+
if pattern in model_name or pattern in model_slug:
|
|
333
|
+
return module
|
|
334
|
+
|
|
335
|
+
# Fallback to provider-based mapping for unmapped models
|
|
336
|
+
provider_mapping = {
|
|
337
|
+
"openai": "chatgpt",
|
|
338
|
+
"anthropic": "claude",
|
|
339
|
+
"google": "gemini", # Default Google to Gemini if no specific family found
|
|
340
|
+
"x.ai": "grok",
|
|
341
|
+
"xai": "grok",
|
|
342
|
+
"mistral ai": "mistral",
|
|
343
|
+
"mistral": "mistral",
|
|
344
|
+
"meta": "llama",
|
|
345
|
+
"deepseek": "deepseek",
|
|
346
|
+
"perplexity": "perplexity",
|
|
347
|
+
"alibaba": "qwen",
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
# Try provider match as fallback
|
|
351
|
+
for provider, module in provider_mapping.items():
|
|
352
|
+
if provider in creator_name or provider in creator_slug:
|
|
353
|
+
return module
|
|
354
|
+
|
|
355
|
+
return None
|
|
356
|
+
|
|
357
|
+
def _generate_agent_ontology_file(
|
|
358
|
+
self, agent_module: str, models: List[Dict[str, Any]]
|
|
359
|
+
) -> str:
|
|
360
|
+
"""Generate BFO-structured ontology from Artificial Analysis JSON.
|
|
361
|
+
|
|
362
|
+
JSON MAPPING TO BFO 7 BUCKETS:
|
|
363
|
+
|
|
364
|
+
Bucket 1 (Material Entities):
|
|
365
|
+
- JSON 'name' → abi:AIModelInstance
|
|
366
|
+
- JSON 'model_creator.name' → abi:provider
|
|
367
|
+
|
|
368
|
+
Bucket 2 (Qualities):
|
|
369
|
+
- JSON 'pricing.*' → abi:*TokenCost properties
|
|
370
|
+
- JSON 'median_*' → abi:outputSpeed, timeToFirstToken
|
|
371
|
+
- JSON 'evaluations.*' → abi:intelligenceIndex, codingIndex, mathIndex
|
|
372
|
+
|
|
373
|
+
Bucket 3 (Realizable Entities):
|
|
374
|
+
- Imported from CapabilityOntology → capability:TextGenerationCapability, etc.
|
|
375
|
+
|
|
376
|
+
Bucket 4 (Processes):
|
|
377
|
+
- Generated process instances → abi:BusinessProposalCreationProcess, etc.
|
|
378
|
+
|
|
379
|
+
Bucket 5 (Temporal Regions):
|
|
380
|
+
- Generated session instances → abi:InferenceSession
|
|
381
|
+
|
|
382
|
+
Bucket 6 (Spatial Regions):
|
|
383
|
+
- Inherited from AIAgentOntology → abi:DataCenterLocation
|
|
384
|
+
|
|
385
|
+
Bucket 7 (Information Content):
|
|
386
|
+
- JSON 'sourceAPI' → abi:sourceAPI property
|
|
387
|
+
"""
|
|
388
|
+
timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S")
|
|
389
|
+
agent_title = agent_module.replace("_", "").title()
|
|
390
|
+
|
|
391
|
+
# Generate ontology content following BFO 7 buckets structure
|
|
392
|
+
ontology_content = f"""@prefix abi: <http://naas.ai/ontology/abi#> .
|
|
393
|
+
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
|
|
394
|
+
@prefix owl: <http://www.w3.org/2002/07/owl#> .
|
|
395
|
+
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
|
|
396
|
+
@prefix bfo: <http://purl.obolibrary.org/obo/> .
|
|
397
|
+
@prefix capability: <http://ontology.naas.ai/abi/capability/> .
|
|
398
|
+
@prefix dc: <http://purl.org/dc/terms/> .
|
|
399
|
+
|
|
400
|
+
<http://naas.ai/ontology/abi/{agent_title}Ontology> a owl:Ontology ;
|
|
401
|
+
owl:imports <http://ontology.naas.ai/abi/AIAgentOntology> ;
|
|
402
|
+
dc:title "{agent_title} AI Agent Ontology"@en ;
|
|
403
|
+
dc:description "BFO-grounded ontology for {agent_title} AI models and processes"@en ;
|
|
404
|
+
dc:created "{timestamp}"@en .
|
|
405
|
+
|
|
406
|
+
"""
|
|
407
|
+
|
|
408
|
+
# BFO Bucket 1: Material Entity (WHAT/WHO) - The AI Agent
|
|
409
|
+
agent_uri = f"abi:{agent_title}Agent"
|
|
410
|
+
ontology_content += f"""
|
|
411
|
+
#################################################################
|
|
412
|
+
# BFO Bucket 1: Material Entities (WHAT/WHO)
|
|
413
|
+
#################################################################
|
|
414
|
+
|
|
415
|
+
{agent_uri} a abi:AIAgent ;
|
|
416
|
+
rdfs:label "{agent_title} AI Agent"@en ;
|
|
417
|
+
rdfs:comment "AI Agent capable of utilizing {agent_title} models"@en ;
|
|
418
|
+
abi:hasSpecializedRole "Multi-purpose AI processing"@en .
|
|
419
|
+
|
|
420
|
+
"""
|
|
421
|
+
|
|
422
|
+
# Add BFO Process Instances for this agent
|
|
423
|
+
process_mappings = self._generate_process_mappings(agent_title, agent_uri)
|
|
424
|
+
ontology_content += process_mappings
|
|
425
|
+
|
|
426
|
+
# Add models
|
|
427
|
+
for i, model in enumerate(models):
|
|
428
|
+
model_content = self._generate_model_instance(model, agent_uri, i)
|
|
429
|
+
ontology_content += model_content + "\n"
|
|
430
|
+
|
|
431
|
+
return ontology_content
|
|
432
|
+
|
|
433
|
+
def _generate_process_mappings(self, agent_title: str, agent_uri: str) -> str:
|
|
434
|
+
"""Generate BFO process instances following the 7 buckets framework."""
|
|
435
|
+
process_content = f"""
|
|
436
|
+
#################################################################
|
|
437
|
+
# BFO Bucket 4: Processes (HOW-IT-HAPPENS)
|
|
438
|
+
#################################################################
|
|
439
|
+
|
|
440
|
+
abi:{agent_title}BusinessProposalProcess a abi:BusinessProposalCreationProcess ;
|
|
441
|
+
rdfs:label "{agent_title} Business Proposal Process"@en ;
|
|
442
|
+
abi:hasParticipant {agent_uri} ;
|
|
443
|
+
abi:realizesCapability capability:TextGenerationCapability ;
|
|
444
|
+
abi:hasTemporalRegion abi:{agent_title}BusinessProposalSession ;
|
|
445
|
+
abi:hasQuality abi:{agent_title}BusinessProposalQuality .
|
|
446
|
+
|
|
447
|
+
abi:{agent_title}CreativeWritingProcess a abi:CreativeWritingProcess ;
|
|
448
|
+
rdfs:label "{agent_title} Creative Writing Process"@en ;
|
|
449
|
+
abi:hasParticipant {agent_uri} ;
|
|
450
|
+
abi:realizesCapability capability:TextGenerationCapability ;
|
|
451
|
+
abi:hasTemporalRegion abi:{agent_title}CreativeWritingSession ;
|
|
452
|
+
abi:hasQuality abi:{agent_title}CreativeWritingQuality .
|
|
453
|
+
|
|
454
|
+
abi:{agent_title}CodeGenerationProcess a abi:CodeGenerationProcess ;
|
|
455
|
+
rdfs:label "{agent_title} Code Generation Process"@en ;
|
|
456
|
+
abi:hasParticipant {agent_uri} ;
|
|
457
|
+
abi:realizesCapability capability:CodeGenerationCapability ;
|
|
458
|
+
abi:hasTemporalRegion abi:{agent_title}CodeGenerationSession ;
|
|
459
|
+
abi:hasQuality abi:{agent_title}CodeGenerationQuality .
|
|
460
|
+
|
|
461
|
+
#################################################################
|
|
462
|
+
# BFO Bucket 5: Temporal Regions (WHEN)
|
|
463
|
+
#################################################################
|
|
464
|
+
|
|
465
|
+
abi:{agent_title}BusinessProposalSession a abi:InferenceSession ;
|
|
466
|
+
rdfs:label "{agent_title} Business Proposal Session"@en .
|
|
467
|
+
|
|
468
|
+
abi:{agent_title}CreativeWritingSession a abi:InferenceSession ;
|
|
469
|
+
rdfs:label "{agent_title} Creative Writing Session"@en .
|
|
470
|
+
|
|
471
|
+
abi:{agent_title}CodeGenerationSession a abi:InferenceSession ;
|
|
472
|
+
rdfs:label "{agent_title} Code Generation Session"@en .
|
|
473
|
+
|
|
474
|
+
"""
|
|
475
|
+
return process_content
|
|
476
|
+
|
|
477
|
+
def _generate_model_instance(
|
|
478
|
+
self, model: Dict[str, Any], agent_uri: str, index: int
|
|
479
|
+
) -> str:
|
|
480
|
+
"""Generate BFO-structured model instance from Artificial Analysis JSON."""
|
|
481
|
+
|
|
482
|
+
# JSON → BFO Bucket 1: Material Entity extraction
|
|
483
|
+
model_name = model.get("name", "Unknown Model")
|
|
484
|
+
model_slug = model.get("slug", "unknown")
|
|
485
|
+
model_id = self._generate_uri_safe_id(model_name)
|
|
486
|
+
creator = model.get("model_creator", {})
|
|
487
|
+
creator_name = creator.get("name", "Unknown")
|
|
488
|
+
|
|
489
|
+
# JSON → BFO Bucket 2: Qualities extraction
|
|
490
|
+
pricing = model.get("pricing", {})
|
|
491
|
+
input_cost = pricing.get("price_1m_input_tokens") or 0
|
|
492
|
+
output_cost = pricing.get("price_1m_output_tokens") or 0
|
|
493
|
+
blended_cost = pricing.get("price_1m_blended_3_to_1") or 0
|
|
494
|
+
|
|
495
|
+
output_speed = model.get("median_output_tokens_per_second") or 0
|
|
496
|
+
ttft = model.get("median_time_to_first_token_seconds") or 0
|
|
497
|
+
ttft_answer = model.get("median_time_to_first_answer_token") or 0
|
|
498
|
+
|
|
499
|
+
evaluations = model.get("evaluations", {})
|
|
500
|
+
intelligence_index = (
|
|
501
|
+
evaluations.get("artificial_analysis_intelligence_index") or 0
|
|
502
|
+
)
|
|
503
|
+
coding_index = evaluations.get("artificial_analysis_coding_index") or 0
|
|
504
|
+
math_index = evaluations.get("artificial_analysis_math_index") or 0
|
|
505
|
+
|
|
506
|
+
agent_title = agent_uri.replace("abi:", "").replace("Agent", "")
|
|
507
|
+
|
|
508
|
+
return f"""
|
|
509
|
+
#################################################################
|
|
510
|
+
# BFO Bucket 1: Material Entity - {model_name}
|
|
511
|
+
#################################################################
|
|
512
|
+
|
|
513
|
+
abi:{model_id} a abi:AIModelInstance ;
|
|
514
|
+
rdfs:label "{model_name}"@en ;
|
|
515
|
+
abi:modelSlug "{model_slug}"@en ;
|
|
516
|
+
abi:provider "{creator_name}"@en ;
|
|
517
|
+
abi:sourceAPI "artificial_analysis"@en .
|
|
518
|
+
|
|
519
|
+
#################################################################
|
|
520
|
+
# BFO Bucket 2: Qualities - Performance & Cost Metrics
|
|
521
|
+
#################################################################
|
|
522
|
+
|
|
523
|
+
abi:{model_id} abi:inputTokenCost {input_cost} ;
|
|
524
|
+
abi:inputTokenCostCurrency "USD"@en ;
|
|
525
|
+
abi:outputTokenCost {output_cost} ;
|
|
526
|
+
abi:outputTokenCostCurrency "USD"@en ;
|
|
527
|
+
abi:blendedCost {blended_cost} ;
|
|
528
|
+
abi:blendedCostCurrency "USD"@en ;
|
|
529
|
+
abi:outputSpeed {output_speed} ;
|
|
530
|
+
abi:outputSpeedUnit "tokens_per_second"@en ;
|
|
531
|
+
abi:timeToFirstToken {ttft} ;
|
|
532
|
+
abi:timeToFirstTokenUnit "seconds"@en ;
|
|
533
|
+
abi:timeToFirstAnswerToken {ttft_answer} ;
|
|
534
|
+
abi:timeToFirstAnswerTokenUnit "seconds"@en ;
|
|
535
|
+
abi:intelligenceIndex {intelligence_index} ;
|
|
536
|
+
abi:codingIndex {coding_index} ;
|
|
537
|
+
abi:mathIndex {math_index} .
|
|
538
|
+
|
|
539
|
+
#################################################################
|
|
540
|
+
# Relationships - Agent/Process/Model Network
|
|
541
|
+
#################################################################
|
|
542
|
+
|
|
543
|
+
{agent_uri} abi:canUtilizeModel abi:{model_id} .
|
|
544
|
+
abi:{agent_title}BusinessProposalProcess abi:utilizesModel abi:{model_id} .
|
|
545
|
+
abi:{agent_title}CreativeWritingProcess abi:utilizesModel abi:{model_id} .
|
|
546
|
+
abi:{agent_title}CodeGenerationProcess abi:utilizesModel abi:{model_id} ."""
|
|
547
|
+
|
|
548
|
+
def _generate_uri_safe_id(self, text: str) -> str:
|
|
549
|
+
"""Generate URI-safe identifier from text."""
|
|
550
|
+
# Replace spaces and special characters with underscores
|
|
551
|
+
safe_id = text.replace(" ", "_").replace("-", "_").replace(".", "_")
|
|
552
|
+
safe_id = safe_id.replace("(", "").replace(")", "").replace("'", "")
|
|
553
|
+
safe_id = safe_id.replace("/", "_").replace("\\", "_")
|
|
554
|
+
# Remove consecutive underscores
|
|
555
|
+
while "__" in safe_id:
|
|
556
|
+
safe_id = safe_id.replace("__", "_")
|
|
557
|
+
# Remove leading/trailing underscores
|
|
558
|
+
safe_id = safe_id.strip("_")
|
|
559
|
+
return safe_id
|
|
560
|
+
|
|
561
|
+
def as_tools(self) -> list[BaseTool]:
|
|
562
|
+
"""Returns a list of LangChain tools for this pipeline.
|
|
563
|
+
|
|
564
|
+
Returns:
|
|
565
|
+
list[BaseTool]: List containing the pipeline tool
|
|
566
|
+
"""
|
|
567
|
+
return [
|
|
568
|
+
StructuredTool(
|
|
569
|
+
name="ai_agent_ontology_generation",
|
|
570
|
+
description="Generates AI agent ontologies from Artificial Analysis data (datastore only, no module deployment)",
|
|
571
|
+
func=lambda **kwargs: self.run(
|
|
572
|
+
AIAgentOntologyGenerationParameters(**kwargs)
|
|
573
|
+
),
|
|
574
|
+
args_schema=AIAgentOntologyGenerationParameters,
|
|
575
|
+
)
|
|
576
|
+
]
|
|
577
|
+
|
|
578
|
+
def as_api(
|
|
579
|
+
self,
|
|
580
|
+
router: APIRouter,
|
|
581
|
+
route_name: str = "",
|
|
582
|
+
name: str = "",
|
|
583
|
+
description: str = "",
|
|
584
|
+
description_stream: str = "",
|
|
585
|
+
tags: list[str | Enum] | None = None,
|
|
586
|
+
) -> None:
|
|
587
|
+
if tags is None:
|
|
588
|
+
tags = []
|
|
589
|
+
return None
|
|
590
|
+
|
|
591
|
+
def get_configuration(self) -> AIAgentOntologyGenerationConfiguration:
|
|
592
|
+
"""Get the pipeline configuration."""
|
|
593
|
+
return self.__configuration
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
# =============================================================================
|
|
597
|
+
# PIPELINE EXECUTION SUMMARY
|
|
598
|
+
# =============================================================================
|
|
599
|
+
"""
|
|
600
|
+
AI Agent Ontology Generation Pipeline
|
|
601
|
+
|
|
602
|
+
OVERVIEW:
|
|
603
|
+
This pipeline generates AI agent ontologies from Artificial Analysis data,
|
|
604
|
+
creates proper audit trails, and deploys current versions to module folders.
|
|
605
|
+
|
|
606
|
+
EXECUTION STEPS:
|
|
607
|
+
1. Load Artificial Analysis data from datastore
|
|
608
|
+
2. Extract and group models by AI agent family
|
|
609
|
+
3. Generate ontology content for each agent
|
|
610
|
+
4. Deploy to structured locations:
|
|
611
|
+
- Datastore: timestamped folders with current + audit versions
|
|
612
|
+
- Modules: current versions only for immediate use
|
|
613
|
+
|
|
614
|
+
FILE STRUCTURE CREATED:
|
|
615
|
+
📁 storage/datastore/core/modules/abi/AIAgentOntologyGenerationPipeline/
|
|
616
|
+
├── 📁 YYYYMMDDTHHMMSS/
|
|
617
|
+
│ ├── 📄 ClaudeOntology.ttl (current - for deployment)
|
|
618
|
+
│ ├── 📄 YYYYMMDDTHHMMSS_ClaudeOntology.ttl (audit - for history)
|
|
619
|
+
│ ├── 📄 ChatgptOntology.ttl (current - for deployment)
|
|
620
|
+
│ ├── 📄 YYYYMMDDTHHMMSS_ChatgptOntology.ttl (audit - for history)
|
|
621
|
+
│ └── 📄 generation_summary_YYYYMMDDTHHMMSS.json
|
|
622
|
+
└── ...
|
|
623
|
+
|
|
624
|
+
📁 src/core/modules/
|
|
625
|
+
├── 📁 claude/ontologies/ClaudeOntology.ttl (deployed current version)
|
|
626
|
+
├── 📁 chatgpt/ontologies/ChatgptOntology.ttl (deployed current version)
|
|
627
|
+
└── ...
|
|
628
|
+
|
|
629
|
+
BENEFITS:
|
|
630
|
+
✅ Single pipeline handles everything (generation + deployment)
|
|
631
|
+
✅ Complete audit trail with timestamped versions
|
|
632
|
+
✅ Current versions always available in module folders
|
|
633
|
+
✅ Clean separation of concerns with organized methods
|
|
634
|
+
✅ Comprehensive execution summaries for monitoring
|
|
635
|
+
"""
|