exonware-xwnode 0.0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- exonware/__init__.py +14 -0
- exonware/xwnode/__init__.py +127 -0
- exonware/xwnode/base.py +676 -0
- exonware/xwnode/config.py +178 -0
- exonware/xwnode/contracts.py +730 -0
- exonware/xwnode/errors.py +503 -0
- exonware/xwnode/facade.py +460 -0
- exonware/xwnode/strategies/__init__.py +158 -0
- exonware/xwnode/strategies/advisor.py +463 -0
- exonware/xwnode/strategies/edges/__init__.py +32 -0
- exonware/xwnode/strategies/edges/adj_list.py +227 -0
- exonware/xwnode/strategies/edges/adj_matrix.py +391 -0
- exonware/xwnode/strategies/edges/base.py +169 -0
- exonware/xwnode/strategies/flyweight.py +328 -0
- exonware/xwnode/strategies/impls/__init__.py +13 -0
- exonware/xwnode/strategies/impls/_base_edge.py +403 -0
- exonware/xwnode/strategies/impls/_base_node.py +307 -0
- exonware/xwnode/strategies/impls/edge_adj_list.py +353 -0
- exonware/xwnode/strategies/impls/edge_adj_matrix.py +445 -0
- exonware/xwnode/strategies/impls/edge_bidir_wrapper.py +455 -0
- exonware/xwnode/strategies/impls/edge_block_adj_matrix.py +539 -0
- exonware/xwnode/strategies/impls/edge_coo.py +533 -0
- exonware/xwnode/strategies/impls/edge_csc.py +447 -0
- exonware/xwnode/strategies/impls/edge_csr.py +492 -0
- exonware/xwnode/strategies/impls/edge_dynamic_adj_list.py +503 -0
- exonware/xwnode/strategies/impls/edge_flow_network.py +555 -0
- exonware/xwnode/strategies/impls/edge_hyperedge_set.py +516 -0
- exonware/xwnode/strategies/impls/edge_neural_graph.py +650 -0
- exonware/xwnode/strategies/impls/edge_octree.py +574 -0
- exonware/xwnode/strategies/impls/edge_property_store.py +655 -0
- exonware/xwnode/strategies/impls/edge_quadtree.py +519 -0
- exonware/xwnode/strategies/impls/edge_rtree.py +820 -0
- exonware/xwnode/strategies/impls/edge_temporal_edgeset.py +558 -0
- exonware/xwnode/strategies/impls/edge_tree_graph_basic.py +271 -0
- exonware/xwnode/strategies/impls/edge_weighted_graph.py +411 -0
- exonware/xwnode/strategies/manager.py +775 -0
- exonware/xwnode/strategies/metrics.py +538 -0
- exonware/xwnode/strategies/migration.py +432 -0
- exonware/xwnode/strategies/nodes/__init__.py +50 -0
- exonware/xwnode/strategies/nodes/_base_node.py +307 -0
- exonware/xwnode/strategies/nodes/adjacency_list.py +267 -0
- exonware/xwnode/strategies/nodes/aho_corasick.py +345 -0
- exonware/xwnode/strategies/nodes/array_list.py +209 -0
- exonware/xwnode/strategies/nodes/base.py +247 -0
- exonware/xwnode/strategies/nodes/deque.py +200 -0
- exonware/xwnode/strategies/nodes/hash_map.py +135 -0
- exonware/xwnode/strategies/nodes/heap.py +307 -0
- exonware/xwnode/strategies/nodes/linked_list.py +232 -0
- exonware/xwnode/strategies/nodes/node_aho_corasick.py +520 -0
- exonware/xwnode/strategies/nodes/node_array_list.py +175 -0
- exonware/xwnode/strategies/nodes/node_avl_tree.py +371 -0
- exonware/xwnode/strategies/nodes/node_b_plus_tree.py +542 -0
- exonware/xwnode/strategies/nodes/node_bitmap.py +420 -0
- exonware/xwnode/strategies/nodes/node_bitset_dynamic.py +513 -0
- exonware/xwnode/strategies/nodes/node_bloom_filter.py +347 -0
- exonware/xwnode/strategies/nodes/node_btree.py +357 -0
- exonware/xwnode/strategies/nodes/node_count_min_sketch.py +470 -0
- exonware/xwnode/strategies/nodes/node_cow_tree.py +473 -0
- exonware/xwnode/strategies/nodes/node_cuckoo_hash.py +392 -0
- exonware/xwnode/strategies/nodes/node_fenwick_tree.py +301 -0
- exonware/xwnode/strategies/nodes/node_hash_map.py +269 -0
- exonware/xwnode/strategies/nodes/node_heap.py +191 -0
- exonware/xwnode/strategies/nodes/node_hyperloglog.py +407 -0
- exonware/xwnode/strategies/nodes/node_linked_list.py +409 -0
- exonware/xwnode/strategies/nodes/node_lsm_tree.py +400 -0
- exonware/xwnode/strategies/nodes/node_ordered_map.py +390 -0
- exonware/xwnode/strategies/nodes/node_ordered_map_balanced.py +565 -0
- exonware/xwnode/strategies/nodes/node_patricia.py +512 -0
- exonware/xwnode/strategies/nodes/node_persistent_tree.py +378 -0
- exonware/xwnode/strategies/nodes/node_radix_trie.py +452 -0
- exonware/xwnode/strategies/nodes/node_red_black_tree.py +497 -0
- exonware/xwnode/strategies/nodes/node_roaring_bitmap.py +570 -0
- exonware/xwnode/strategies/nodes/node_segment_tree.py +289 -0
- exonware/xwnode/strategies/nodes/node_set_hash.py +354 -0
- exonware/xwnode/strategies/nodes/node_set_tree.py +480 -0
- exonware/xwnode/strategies/nodes/node_skip_list.py +316 -0
- exonware/xwnode/strategies/nodes/node_splay_tree.py +393 -0
- exonware/xwnode/strategies/nodes/node_suffix_array.py +487 -0
- exonware/xwnode/strategies/nodes/node_treap.py +387 -0
- exonware/xwnode/strategies/nodes/node_tree_graph_hybrid.py +1434 -0
- exonware/xwnode/strategies/nodes/node_trie.py +252 -0
- exonware/xwnode/strategies/nodes/node_union_find.py +187 -0
- exonware/xwnode/strategies/nodes/node_xdata_optimized.py +369 -0
- exonware/xwnode/strategies/nodes/priority_queue.py +209 -0
- exonware/xwnode/strategies/nodes/queue.py +161 -0
- exonware/xwnode/strategies/nodes/sparse_matrix.py +206 -0
- exonware/xwnode/strategies/nodes/stack.py +152 -0
- exonware/xwnode/strategies/nodes/trie.py +274 -0
- exonware/xwnode/strategies/nodes/union_find.py +283 -0
- exonware/xwnode/strategies/pattern_detector.py +603 -0
- exonware/xwnode/strategies/performance_monitor.py +487 -0
- exonware/xwnode/strategies/queries/__init__.py +24 -0
- exonware/xwnode/strategies/queries/base.py +236 -0
- exonware/xwnode/strategies/queries/cql.py +201 -0
- exonware/xwnode/strategies/queries/cypher.py +181 -0
- exonware/xwnode/strategies/queries/datalog.py +70 -0
- exonware/xwnode/strategies/queries/elastic_dsl.py +70 -0
- exonware/xwnode/strategies/queries/eql.py +70 -0
- exonware/xwnode/strategies/queries/flux.py +70 -0
- exonware/xwnode/strategies/queries/gql.py +70 -0
- exonware/xwnode/strategies/queries/graphql.py +240 -0
- exonware/xwnode/strategies/queries/gremlin.py +181 -0
- exonware/xwnode/strategies/queries/hiveql.py +214 -0
- exonware/xwnode/strategies/queries/hql.py +70 -0
- exonware/xwnode/strategies/queries/jmespath.py +219 -0
- exonware/xwnode/strategies/queries/jq.py +66 -0
- exonware/xwnode/strategies/queries/json_query.py +66 -0
- exonware/xwnode/strategies/queries/jsoniq.py +248 -0
- exonware/xwnode/strategies/queries/kql.py +70 -0
- exonware/xwnode/strategies/queries/linq.py +238 -0
- exonware/xwnode/strategies/queries/logql.py +70 -0
- exonware/xwnode/strategies/queries/mql.py +68 -0
- exonware/xwnode/strategies/queries/n1ql.py +210 -0
- exonware/xwnode/strategies/queries/partiql.py +70 -0
- exonware/xwnode/strategies/queries/pig.py +215 -0
- exonware/xwnode/strategies/queries/promql.py +70 -0
- exonware/xwnode/strategies/queries/sparql.py +220 -0
- exonware/xwnode/strategies/queries/sql.py +275 -0
- exonware/xwnode/strategies/queries/xml_query.py +66 -0
- exonware/xwnode/strategies/queries/xpath.py +223 -0
- exonware/xwnode/strategies/queries/xquery.py +258 -0
- exonware/xwnode/strategies/queries/xwnode_executor.py +332 -0
- exonware/xwnode/strategies/queries/xwquery_strategy.py +424 -0
- exonware/xwnode/strategies/registry.py +604 -0
- exonware/xwnode/strategies/simple.py +273 -0
- exonware/xwnode/strategies/utils.py +532 -0
- exonware/xwnode/types.py +912 -0
- exonware/xwnode/version.py +78 -0
- exonware_xwnode-0.0.1.12.dist-info/METADATA +169 -0
- exonware_xwnode-0.0.1.12.dist-info/RECORD +132 -0
- exonware_xwnode-0.0.1.12.dist-info/WHEEL +4 -0
- exonware_xwnode-0.0.1.12.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,181 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
Gremlin Query Strategy
|
4
|
+
|
5
|
+
This module implements the Gremlin query strategy for Apache TinkerPop graph queries.
|
6
|
+
|
7
|
+
Company: eXonware.com
|
8
|
+
Author: Eng. Muhammad AlShehri
|
9
|
+
Email: connect@exonware.com
|
10
|
+
Version: 0.0.1.12
|
11
|
+
Generation Date: January 2, 2025
|
12
|
+
"""
|
13
|
+
|
14
|
+
import re
|
15
|
+
from typing import Any, Dict, List, Optional, Union
|
16
|
+
from .base import AGraphQueryStrategy
|
17
|
+
from ...errors import XWNodeTypeError, XWNodeValueError
|
18
|
+
from ...contracts import QueryMode, QueryTrait
|
19
|
+
|
20
|
+
|
21
|
+
class GremlinStrategy(AGraphQueryStrategy):
|
22
|
+
"""
|
23
|
+
Gremlin query strategy for Apache TinkerPop graph queries.
|
24
|
+
|
25
|
+
Supports:
|
26
|
+
- Gremlin traversal language
|
27
|
+
- Graph traversal operations
|
28
|
+
- Vertex and edge operations
|
29
|
+
- Property and label operations
|
30
|
+
- Path and cycle detection
|
31
|
+
"""
|
32
|
+
|
33
|
+
def __init__(self, **options):
|
34
|
+
super().__init__(**options)
|
35
|
+
self._mode = QueryMode.GREMLIN
|
36
|
+
self._traits = QueryTrait.GRAPH | QueryTrait.STRUCTURED | QueryTrait.ANALYTICAL
|
37
|
+
|
38
|
+
def execute(self, query: str, **kwargs) -> Any:
|
39
|
+
"""Execute Gremlin query."""
|
40
|
+
if not self.validate_query(query):
|
41
|
+
raise XWNodeValueError(f"Invalid Gremlin query: {query}")
|
42
|
+
|
43
|
+
query_type = self._get_query_type(query)
|
44
|
+
|
45
|
+
if query_type == "traversal":
|
46
|
+
return self._execute_traversal(query, **kwargs)
|
47
|
+
elif query_type == "vertex":
|
48
|
+
return self._execute_vertex(query, **kwargs)
|
49
|
+
elif query_type == "edge":
|
50
|
+
return self._execute_edge(query, **kwargs)
|
51
|
+
else:
|
52
|
+
raise XWNodeValueError(f"Unsupported query type: {query_type}")
|
53
|
+
|
54
|
+
def validate_query(self, query: str) -> bool:
|
55
|
+
"""Validate Gremlin query syntax."""
|
56
|
+
if not query or not isinstance(query, str):
|
57
|
+
return False
|
58
|
+
|
59
|
+
# Basic Gremlin validation
|
60
|
+
query = query.strip()
|
61
|
+
|
62
|
+
# Check for Gremlin keywords
|
63
|
+
gremlin_keywords = ["g.", "V", "E", "addV", "addE", "drop", "has", "hasLabel", "hasId", "out", "in", "both", "outE", "inE", "bothE", "outV", "inV", "bothV", "values", "key", "label", "id", "count", "limit", "range", "order", "by", "select", "where", "and", "or", "not", "is", "within", "without", "between", "inside", "outside", "within", "without", "between", "inside", "outside", "within", "without", "between", "inside", "outside"]
|
64
|
+
|
65
|
+
for keyword in gremlin_keywords:
|
66
|
+
if keyword in query:
|
67
|
+
return True
|
68
|
+
|
69
|
+
return False
|
70
|
+
|
71
|
+
def get_query_plan(self, query: str) -> Dict[str, Any]:
|
72
|
+
"""Get Gremlin query execution plan."""
|
73
|
+
query_type = self._get_query_type(query)
|
74
|
+
|
75
|
+
return {
|
76
|
+
"query_type": query_type,
|
77
|
+
"operation": query_type,
|
78
|
+
"complexity": self._estimate_complexity(query),
|
79
|
+
"estimated_cost": self._estimate_cost(query),
|
80
|
+
"steps": self._extract_steps(query),
|
81
|
+
"optimization_hints": self._get_optimization_hints(query)
|
82
|
+
}
|
83
|
+
|
84
|
+
def path_query(self, start: Any, end: Any) -> List[Any]:
|
85
|
+
"""Execute path query."""
|
86
|
+
query = f"g.V('{start}').repeat(out()).until(hasId('{end}')).path()"
|
87
|
+
return self.execute(query)
|
88
|
+
|
89
|
+
def neighbor_query(self, node: Any) -> List[Any]:
|
90
|
+
"""Execute neighbor query."""
|
91
|
+
query = f"g.V('{node}').both()"
|
92
|
+
return self.execute(query)
|
93
|
+
|
94
|
+
def shortest_path_query(self, start: Any, end: Any) -> List[Any]:
|
95
|
+
"""Execute shortest path query."""
|
96
|
+
query = f"g.V('{start}').repeat(out()).until(hasId('{end}')).path().limit(1)"
|
97
|
+
return self.execute(query)
|
98
|
+
|
99
|
+
def connected_components_query(self) -> List[List[Any]]:
|
100
|
+
"""Execute connected components query."""
|
101
|
+
query = "g.V().repeat(both()).until(cyclicPath()).dedup()"
|
102
|
+
return self.execute(query)
|
103
|
+
|
104
|
+
def cycle_detection_query(self) -> List[List[Any]]:
|
105
|
+
"""Execute cycle detection query."""
|
106
|
+
query = "g.V().repeat(out()).until(cyclicPath()).path()"
|
107
|
+
return self.execute(query)
|
108
|
+
|
109
|
+
def _get_query_type(self, query: str) -> str:
|
110
|
+
"""Extract query type from Gremlin query."""
|
111
|
+
query = query.strip()
|
112
|
+
|
113
|
+
if "V(" in query or "E(" in query:
|
114
|
+
return "traversal"
|
115
|
+
elif "addV" in query or "V(" in query:
|
116
|
+
return "vertex"
|
117
|
+
elif "addE" in query or "E(" in query:
|
118
|
+
return "edge"
|
119
|
+
else:
|
120
|
+
return "unknown"
|
121
|
+
|
122
|
+
def _execute_traversal(self, query: str, **kwargs) -> Any:
|
123
|
+
"""Execute traversal query."""
|
124
|
+
return {"result": "Gremlin traversal executed", "query": query}
|
125
|
+
|
126
|
+
def _execute_vertex(self, query: str, **kwargs) -> Any:
|
127
|
+
"""Execute vertex query."""
|
128
|
+
return {"result": "Gremlin vertex executed", "query": query}
|
129
|
+
|
130
|
+
def _execute_edge(self, query: str, **kwargs) -> Any:
|
131
|
+
"""Execute edge query."""
|
132
|
+
return {"result": "Gremlin edge executed", "query": query}
|
133
|
+
|
134
|
+
def _estimate_complexity(self, query: str) -> str:
|
135
|
+
"""Estimate query complexity."""
|
136
|
+
steps = self._extract_steps(query)
|
137
|
+
|
138
|
+
if len(steps) > 10:
|
139
|
+
return "HIGH"
|
140
|
+
elif len(steps) > 5:
|
141
|
+
return "MEDIUM"
|
142
|
+
else:
|
143
|
+
return "LOW"
|
144
|
+
|
145
|
+
def _estimate_cost(self, query: str) -> int:
|
146
|
+
"""Estimate query cost."""
|
147
|
+
complexity = self._estimate_complexity(query)
|
148
|
+
if complexity == "HIGH":
|
149
|
+
return 180
|
150
|
+
elif complexity == "MEDIUM":
|
151
|
+
return 90
|
152
|
+
else:
|
153
|
+
return 45
|
154
|
+
|
155
|
+
def _extract_steps(self, query: str) -> List[str]:
|
156
|
+
"""Extract Gremlin steps from query."""
|
157
|
+
steps = []
|
158
|
+
|
159
|
+
# Common Gremlin steps
|
160
|
+
gremlin_steps = ["V", "E", "addV", "addE", "drop", "has", "hasLabel", "hasId", "out", "in", "both", "outE", "inE", "bothE", "outV", "inV", "bothV", "values", "key", "label", "id", "count", "limit", "range", "order", "by", "select", "where", "and", "or", "not", "is", "within", "without", "between", "inside", "outside", "repeat", "until", "emit", "times", "path", "dedup", "cyclicPath"]
|
161
|
+
|
162
|
+
for step in gremlin_steps:
|
163
|
+
if step in query:
|
164
|
+
steps.append(step)
|
165
|
+
|
166
|
+
return steps
|
167
|
+
|
168
|
+
def _get_optimization_hints(self, query: str) -> List[str]:
|
169
|
+
"""Get query optimization hints."""
|
170
|
+
hints = []
|
171
|
+
|
172
|
+
if "repeat" in query:
|
173
|
+
hints.append("Consider using limit() with repeat() to prevent infinite loops")
|
174
|
+
|
175
|
+
if "path" in query:
|
176
|
+
hints.append("Consider using dedup() with path() to avoid duplicate paths")
|
177
|
+
|
178
|
+
if "count" in query:
|
179
|
+
hints.append("Consider using count() early in the traversal for better performance")
|
180
|
+
|
181
|
+
return hints
|
@@ -0,0 +1,214 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
HiveQL Query Strategy
|
4
|
+
|
5
|
+
This module implements the HiveQL query strategy for Apache Hive SQL operations.
|
6
|
+
|
7
|
+
Company: eXonware.com
|
8
|
+
Author: Eng. Muhammad AlShehri
|
9
|
+
Email: connect@exonware.com
|
10
|
+
Version: 0.0.1.12
|
11
|
+
Generation Date: January 2, 2025
|
12
|
+
"""
|
13
|
+
|
14
|
+
import re
|
15
|
+
from typing import Any, Dict, List, Optional, Union
|
16
|
+
from .base import AStructuredQueryStrategy
|
17
|
+
from ...errors import XWNodeTypeError, XWNodeValueError
|
18
|
+
from ...contracts import QueryMode, QueryTrait
|
19
|
+
|
20
|
+
|
21
|
+
class HiveQLStrategy(AStructuredQueryStrategy):
|
22
|
+
"""
|
23
|
+
HiveQL query strategy for Apache Hive SQL operations.
|
24
|
+
|
25
|
+
Supports:
|
26
|
+
- Hive-specific SQL extensions
|
27
|
+
- Partitioned tables
|
28
|
+
- Bucketed tables
|
29
|
+
- UDFs and UDAFs
|
30
|
+
- MapReduce operations
|
31
|
+
"""
|
32
|
+
|
33
|
+
def __init__(self, **options):
|
34
|
+
super().__init__(**options)
|
35
|
+
self._mode = QueryMode.HIVEQL
|
36
|
+
self._traits = QueryTrait.STRUCTURED | QueryTrait.ANALYTICAL | QueryTrait.BATCH
|
37
|
+
|
38
|
+
def execute(self, query: str, **kwargs) -> Any:
|
39
|
+
"""Execute HiveQL query."""
|
40
|
+
if not self.validate_query(query):
|
41
|
+
raise XWNodeValueError(f"Invalid HiveQL query: {query}")
|
42
|
+
|
43
|
+
query_type = self._get_query_type(query)
|
44
|
+
|
45
|
+
if query_type == "SELECT":
|
46
|
+
return self._execute_select(query, **kwargs)
|
47
|
+
elif query_type == "INSERT":
|
48
|
+
return self._execute_insert(query, **kwargs)
|
49
|
+
elif query_type == "CREATE":
|
50
|
+
return self._execute_create(query, **kwargs)
|
51
|
+
elif query_type == "LOAD":
|
52
|
+
return self._execute_load(query, **kwargs)
|
53
|
+
else:
|
54
|
+
raise XWNodeValueError(f"Unsupported query type: {query_type}")
|
55
|
+
|
56
|
+
def validate_query(self, query: str) -> bool:
|
57
|
+
"""Validate HiveQL query syntax."""
|
58
|
+
if not query or not isinstance(query, str):
|
59
|
+
return False
|
60
|
+
|
61
|
+
# HiveQL validation
|
62
|
+
query = query.strip().upper()
|
63
|
+
valid_operations = ["SELECT", "INSERT", "CREATE", "DROP", "ALTER", "LOAD", "EXPORT", "IMPORT"]
|
64
|
+
|
65
|
+
for operation in valid_operations:
|
66
|
+
if query.startswith(operation):
|
67
|
+
return True
|
68
|
+
|
69
|
+
return False
|
70
|
+
|
71
|
+
def get_query_plan(self, query: str) -> Dict[str, Any]:
|
72
|
+
"""Get HiveQL query execution plan."""
|
73
|
+
query_type = self._get_query_type(query)
|
74
|
+
|
75
|
+
return {
|
76
|
+
"query_type": query_type,
|
77
|
+
"operation": query_type,
|
78
|
+
"complexity": self._estimate_complexity(query),
|
79
|
+
"estimated_cost": self._estimate_cost(query),
|
80
|
+
"mapreduce_jobs": self._estimate_mapreduce_jobs(query),
|
81
|
+
"optimization_hints": self._get_optimization_hints(query)
|
82
|
+
}
|
83
|
+
|
84
|
+
def select_query(self, table: str, columns: List[str], where_clause: str = None) -> Any:
|
85
|
+
"""Execute SELECT query."""
|
86
|
+
query = f"SELECT {', '.join(columns)} FROM {table}"
|
87
|
+
if where_clause:
|
88
|
+
query += f" WHERE {where_clause}"
|
89
|
+
|
90
|
+
return self.execute(query)
|
91
|
+
|
92
|
+
def insert_query(self, table: str, data: Dict[str, Any]) -> Any:
|
93
|
+
"""Execute INSERT query."""
|
94
|
+
columns = list(data.keys())
|
95
|
+
values = list(data.values())
|
96
|
+
|
97
|
+
query = f"INSERT INTO {table} ({', '.join(columns)}) VALUES ({', '.join(['?' for _ in values])})"
|
98
|
+
return self.execute(query, values=values)
|
99
|
+
|
100
|
+
def update_query(self, table: str, data: Dict[str, Any], where_clause: str = None) -> Any:
|
101
|
+
"""Execute UPDATE query."""
|
102
|
+
# HiveQL doesn't support UPDATE, use INSERT OVERWRITE instead
|
103
|
+
set_clause = ', '.join([f"{k} = ?" for k in data.keys()])
|
104
|
+
query = f"INSERT OVERWRITE TABLE {table} SELECT {set_clause} FROM {table}"
|
105
|
+
|
106
|
+
if where_clause:
|
107
|
+
query += f" WHERE {where_clause}"
|
108
|
+
|
109
|
+
return self.execute(query, values=list(data.values()))
|
110
|
+
|
111
|
+
def delete_query(self, table: str, where_clause: str = None) -> Any:
|
112
|
+
"""Execute DELETE query."""
|
113
|
+
# HiveQL doesn't support DELETE, use INSERT OVERWRITE instead
|
114
|
+
query = f"INSERT OVERWRITE TABLE {table} SELECT * FROM {table}"
|
115
|
+
if where_clause:
|
116
|
+
query += f" WHERE NOT ({where_clause})"
|
117
|
+
|
118
|
+
return self.execute(query)
|
119
|
+
|
120
|
+
def join_query(self, tables: List[str], join_conditions: List[str]) -> Any:
|
121
|
+
"""Execute JOIN query."""
|
122
|
+
if len(tables) < 2:
|
123
|
+
raise XWNodeValueError("JOIN requires at least 2 tables")
|
124
|
+
|
125
|
+
query = f"SELECT * FROM {tables[0]}"
|
126
|
+
for i, table in enumerate(tables[1:], 1):
|
127
|
+
if i <= len(join_conditions):
|
128
|
+
query += f" JOIN {table} ON {join_conditions[i-1]}"
|
129
|
+
else:
|
130
|
+
query += f" CROSS JOIN {table}"
|
131
|
+
|
132
|
+
return self.execute(query)
|
133
|
+
|
134
|
+
def aggregate_query(self, table: str, functions: List[str], group_by: List[str] = None) -> Any:
|
135
|
+
"""Execute aggregate query."""
|
136
|
+
query = f"SELECT {', '.join(functions)} FROM {table}"
|
137
|
+
if group_by:
|
138
|
+
query += f" GROUP BY {', '.join(group_by)}"
|
139
|
+
|
140
|
+
return self.execute(query)
|
141
|
+
|
142
|
+
def _get_query_type(self, query: str) -> str:
|
143
|
+
"""Extract query type from HiveQL query."""
|
144
|
+
query = query.strip().upper()
|
145
|
+
for operation in ["SELECT", "INSERT", "CREATE", "DROP", "ALTER", "LOAD", "EXPORT", "IMPORT"]:
|
146
|
+
if query.startswith(operation):
|
147
|
+
return operation
|
148
|
+
return "UNKNOWN"
|
149
|
+
|
150
|
+
def _execute_select(self, query: str, **kwargs) -> Any:
|
151
|
+
"""Execute SELECT query."""
|
152
|
+
return {"result": "HiveQL SELECT executed", "query": query}
|
153
|
+
|
154
|
+
def _execute_insert(self, query: str, **kwargs) -> Any:
|
155
|
+
"""Execute INSERT query."""
|
156
|
+
return {"result": "HiveQL INSERT executed", "query": query}
|
157
|
+
|
158
|
+
def _execute_create(self, query: str, **kwargs) -> Any:
|
159
|
+
"""Execute CREATE query."""
|
160
|
+
return {"result": "HiveQL CREATE executed", "query": query}
|
161
|
+
|
162
|
+
def _execute_load(self, query: str, **kwargs) -> Any:
|
163
|
+
"""Execute LOAD query."""
|
164
|
+
return {"result": "HiveQL LOAD executed", "query": query}
|
165
|
+
|
166
|
+
def _estimate_complexity(self, query: str) -> str:
|
167
|
+
"""Estimate query complexity."""
|
168
|
+
query = query.upper()
|
169
|
+
if "JOIN" in query or "UNION" in query:
|
170
|
+
return "HIGH"
|
171
|
+
elif "GROUP BY" in query or "ORDER BY" in query:
|
172
|
+
return "MEDIUM"
|
173
|
+
else:
|
174
|
+
return "LOW"
|
175
|
+
|
176
|
+
def _estimate_cost(self, query: str) -> int:
|
177
|
+
"""Estimate query cost."""
|
178
|
+
complexity = self._estimate_complexity(query)
|
179
|
+
if complexity == "HIGH":
|
180
|
+
return 200 # Higher cost due to MapReduce
|
181
|
+
elif complexity == "MEDIUM":
|
182
|
+
return 100
|
183
|
+
else:
|
184
|
+
return 50
|
185
|
+
|
186
|
+
def _estimate_mapreduce_jobs(self, query: str) -> int:
|
187
|
+
"""Estimate number of MapReduce jobs."""
|
188
|
+
query = query.upper()
|
189
|
+
jobs = 1 # Base job
|
190
|
+
|
191
|
+
if "JOIN" in query:
|
192
|
+
jobs += 1
|
193
|
+
if "GROUP BY" in query:
|
194
|
+
jobs += 1
|
195
|
+
if "ORDER BY" in query:
|
196
|
+
jobs += 1
|
197
|
+
|
198
|
+
return jobs
|
199
|
+
|
200
|
+
def _get_optimization_hints(self, query: str) -> List[str]:
|
201
|
+
"""Get query optimization hints."""
|
202
|
+
hints = []
|
203
|
+
query = query.upper()
|
204
|
+
|
205
|
+
if "SELECT *" in query:
|
206
|
+
hints.append("Consider specifying columns instead of using *")
|
207
|
+
if "WHERE" not in query and "SELECT" in query:
|
208
|
+
hints.append("Consider adding WHERE clause to limit results")
|
209
|
+
if "JOIN" in query:
|
210
|
+
hints.append("Consider using partitioned tables for JOINs")
|
211
|
+
if "GROUP BY" in query:
|
212
|
+
hints.append("Consider using bucketed tables for GROUP BY")
|
213
|
+
|
214
|
+
return hints
|
@@ -0,0 +1,70 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
HQL Query Strategy
|
4
|
+
|
5
|
+
This module implements the HQL query strategy for Hibernate Query Language operations.
|
6
|
+
|
7
|
+
Company: eXonware.com
|
8
|
+
Author: Eng. Muhammad AlShehri
|
9
|
+
Email: connect@exonware.com
|
10
|
+
Version: 0.0.1.12
|
11
|
+
Generation Date: January 2, 2025
|
12
|
+
"""
|
13
|
+
|
14
|
+
from typing import Any, Dict, List, Optional
|
15
|
+
from .base import AStructuredQueryStrategy
|
16
|
+
from ...errors import XWNodeValueError
|
17
|
+
from ...contracts import QueryMode, QueryTrait
|
18
|
+
|
19
|
+
|
20
|
+
class HQLStrategy(AStructuredQueryStrategy):
|
21
|
+
"""HQL query strategy for Hibernate Query Language operations."""
|
22
|
+
|
23
|
+
def __init__(self, **options):
|
24
|
+
super().__init__(**options)
|
25
|
+
self._mode = QueryMode.HQL
|
26
|
+
self._traits = QueryTrait.STRUCTURED | QueryTrait.ANALYTICAL | QueryTrait.BATCH
|
27
|
+
|
28
|
+
def execute(self, query: str, **kwargs) -> Any:
|
29
|
+
"""Execute HQL query."""
|
30
|
+
if not self.validate_query(query):
|
31
|
+
raise XWNodeValueError(f"Invalid HQL query: {query}")
|
32
|
+
return {"result": "HQL query executed", "query": query}
|
33
|
+
|
34
|
+
def validate_query(self, query: str) -> bool:
|
35
|
+
"""Validate HQL query syntax."""
|
36
|
+
if not query or not isinstance(query, str):
|
37
|
+
return False
|
38
|
+
return any(op in query.upper() for op in ["FROM", "SELECT", "WHERE", "UPDATE", "DELETE", "INSERT"])
|
39
|
+
|
40
|
+
def get_query_plan(self, query: str) -> Dict[str, Any]:
|
41
|
+
"""Get HQL query execution plan."""
|
42
|
+
return {
|
43
|
+
"query_type": "HQL",
|
44
|
+
"complexity": "MEDIUM",
|
45
|
+
"estimated_cost": 100
|
46
|
+
}
|
47
|
+
|
48
|
+
def select_query(self, table: str, columns: List[str], where_clause: str = None) -> Any:
|
49
|
+
"""Execute SELECT query."""
|
50
|
+
return self.execute(f"SELECT {', '.join(columns)} FROM {table}")
|
51
|
+
|
52
|
+
def insert_query(self, table: str, data: Dict[str, Any]) -> Any:
|
53
|
+
"""Execute INSERT query."""
|
54
|
+
return self.execute(f"INSERT INTO {table} VALUES {data}")
|
55
|
+
|
56
|
+
def update_query(self, table: str, data: Dict[str, Any], where_clause: str = None) -> Any:
|
57
|
+
"""Execute UPDATE query."""
|
58
|
+
return self.execute(f"UPDATE {table} SET {data}")
|
59
|
+
|
60
|
+
def delete_query(self, table: str, where_clause: str = None) -> Any:
|
61
|
+
"""Execute DELETE query."""
|
62
|
+
return self.execute(f"DELETE FROM {table}")
|
63
|
+
|
64
|
+
def join_query(self, tables: List[str], join_conditions: List[str]) -> Any:
|
65
|
+
"""Execute JOIN query."""
|
66
|
+
return self.execute(f"FROM {tables[0]} JOIN {tables[1]}")
|
67
|
+
|
68
|
+
def aggregate_query(self, table: str, functions: List[str], group_by: List[str] = None) -> Any:
|
69
|
+
"""Execute aggregate query."""
|
70
|
+
return self.execute(f"SELECT {', '.join(functions)} FROM {table}")
|
@@ -0,0 +1,219 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
JMESPath Query Strategy
|
4
|
+
|
5
|
+
This module implements the JMESPath query strategy for JSON data queries.
|
6
|
+
|
7
|
+
Company: eXonware.com
|
8
|
+
Author: Eng. Muhammad AlShehri
|
9
|
+
Email: connect@exonware.com
|
10
|
+
Version: 0.0.1.12
|
11
|
+
Generation Date: January 2, 2025
|
12
|
+
"""
|
13
|
+
|
14
|
+
import re
|
15
|
+
from typing import Any, Dict, List, Optional, Union
|
16
|
+
from .base import ADocumentQueryStrategy
|
17
|
+
from ...errors import XWNodeTypeError, XWNodeValueError
|
18
|
+
from ...contracts import QueryMode, QueryTrait
|
19
|
+
|
20
|
+
|
21
|
+
class JMESPathStrategy(ADocumentQueryStrategy):
|
22
|
+
"""
|
23
|
+
JMESPath query strategy for JSON data queries.
|
24
|
+
|
25
|
+
Supports:
|
26
|
+
- JMESPath expressions
|
27
|
+
- Projections and filters
|
28
|
+
- Functions and operators
|
29
|
+
- Multi-select and pipe expressions
|
30
|
+
- Flatten and sort operations
|
31
|
+
"""
|
32
|
+
|
33
|
+
def __init__(self, **options):
|
34
|
+
super().__init__(**options)
|
35
|
+
self._mode = QueryMode.JMESPATH
|
36
|
+
self._traits = QueryTrait.DOCUMENT | QueryTrait.STRUCTURED | QueryTrait.ANALYTICAL
|
37
|
+
|
38
|
+
def execute(self, query: str, **kwargs) -> Any:
|
39
|
+
"""Execute JMESPath query."""
|
40
|
+
if not self.validate_query(query):
|
41
|
+
raise XWNodeValueError(f"Invalid JMESPath query: {query}")
|
42
|
+
|
43
|
+
query_type = self._get_query_type(query)
|
44
|
+
|
45
|
+
if query_type == "projection":
|
46
|
+
return self._execute_projection(query, **kwargs)
|
47
|
+
elif query_type == "filter":
|
48
|
+
return self._execute_filter(query, **kwargs)
|
49
|
+
elif query_type == "function":
|
50
|
+
return self._execute_function(query, **kwargs)
|
51
|
+
else:
|
52
|
+
raise XWNodeValueError(f"Unsupported query type: {query_type}")
|
53
|
+
|
54
|
+
def validate_query(self, query: str) -> bool:
|
55
|
+
"""Validate JMESPath query syntax."""
|
56
|
+
if not query or not isinstance(query, str):
|
57
|
+
return False
|
58
|
+
|
59
|
+
# Basic JMESPath validation
|
60
|
+
query = query.strip()
|
61
|
+
|
62
|
+
# Check for JMESPath syntax
|
63
|
+
if query.startswith(".") or query.startswith("[") or query.startswith("@"):
|
64
|
+
return True
|
65
|
+
|
66
|
+
# Check for JMESPath functions
|
67
|
+
jmespath_functions = ["length", "keys", "values", "sort", "reverse", "flatten", "unique", "join", "split", "to_string", "to_number", "type", "starts_with", "ends_with", "contains", "abs", "ceil", "floor", "max", "min", "sum", "avg", "sort_by", "group_by", "map", "filter", "merge", "merge_left", "merge_right"]
|
68
|
+
|
69
|
+
for func in jmespath_functions:
|
70
|
+
if func in query:
|
71
|
+
return True
|
72
|
+
|
73
|
+
# Check for operators
|
74
|
+
if "||" in query or "&&" in query or "==" in query or "!=" in query or ">" in query or "<" in query:
|
75
|
+
return True
|
76
|
+
|
77
|
+
return False
|
78
|
+
|
79
|
+
def get_query_plan(self, query: str) -> Dict[str, Any]:
|
80
|
+
"""Get JMESPath query execution plan."""
|
81
|
+
query_type = self._get_query_type(query)
|
82
|
+
|
83
|
+
return {
|
84
|
+
"query_type": query_type,
|
85
|
+
"operation": query_type,
|
86
|
+
"complexity": self._estimate_complexity(query),
|
87
|
+
"estimated_cost": self._estimate_cost(query),
|
88
|
+
"expressions": self._extract_expressions(query),
|
89
|
+
"optimization_hints": self._get_optimization_hints(query)
|
90
|
+
}
|
91
|
+
|
92
|
+
def path_query(self, path: str) -> Any:
|
93
|
+
"""Execute path-based query."""
|
94
|
+
# JMESPath path queries
|
95
|
+
query = f"$.{path}"
|
96
|
+
return self.execute(query)
|
97
|
+
|
98
|
+
def filter_query(self, filter_expression: str) -> Any:
|
99
|
+
"""Execute filter query."""
|
100
|
+
query = f"[?{filter_expression}]"
|
101
|
+
return self.execute(query)
|
102
|
+
|
103
|
+
def projection_query(self, fields: List[str]) -> Any:
|
104
|
+
"""Execute projection query."""
|
105
|
+
if len(fields) == 1:
|
106
|
+
query = f"$.{fields[0]}"
|
107
|
+
else:
|
108
|
+
field_list = ", ".join([f"'{field}': @.{field}" for field in fields])
|
109
|
+
query = f"{{{field_list}}}"
|
110
|
+
|
111
|
+
return self.execute(query)
|
112
|
+
|
113
|
+
def sort_query(self, sort_fields: List[str], order: str = "asc") -> Any:
|
114
|
+
"""Execute sort query."""
|
115
|
+
if order.lower() == "desc":
|
116
|
+
query = f"sort_by(@, &{sort_fields[0]}) | reverse(@)"
|
117
|
+
else:
|
118
|
+
query = f"sort_by(@, &{sort_fields[0]})"
|
119
|
+
|
120
|
+
return self.execute(query)
|
121
|
+
|
122
|
+
def limit_query(self, limit: int, offset: int = 0) -> Any:
|
123
|
+
"""Execute limit query."""
|
124
|
+
if offset > 0:
|
125
|
+
query = f"[{offset}:{offset + limit}]"
|
126
|
+
else:
|
127
|
+
query = f"[:{limit}]"
|
128
|
+
|
129
|
+
return self.execute(query)
|
130
|
+
|
131
|
+
def _get_query_type(self, query: str) -> str:
|
132
|
+
"""Extract query type from JMESPath query."""
|
133
|
+
query = query.strip()
|
134
|
+
|
135
|
+
if "[" in query and "]" in query:
|
136
|
+
return "filter"
|
137
|
+
elif "{" in query and "}" in query:
|
138
|
+
return "projection"
|
139
|
+
elif "(" in query and ")" in query:
|
140
|
+
return "function"
|
141
|
+
else:
|
142
|
+
return "path"
|
143
|
+
|
144
|
+
def _execute_projection(self, query: str, **kwargs) -> Any:
|
145
|
+
"""Execute projection query."""
|
146
|
+
return {"result": "JMESPath projection executed", "query": query}
|
147
|
+
|
148
|
+
def _execute_filter(self, query: str, **kwargs) -> Any:
|
149
|
+
"""Execute filter query."""
|
150
|
+
return {"result": "JMESPath filter executed", "query": query}
|
151
|
+
|
152
|
+
def _execute_function(self, query: str, **kwargs) -> Any:
|
153
|
+
"""Execute function query."""
|
154
|
+
return {"result": "JMESPath function executed", "query": query}
|
155
|
+
|
156
|
+
def _estimate_complexity(self, query: str) -> str:
|
157
|
+
"""Estimate query complexity."""
|
158
|
+
expressions = self._extract_expressions(query)
|
159
|
+
|
160
|
+
if len(expressions) > 5:
|
161
|
+
return "HIGH"
|
162
|
+
elif len(expressions) > 2:
|
163
|
+
return "MEDIUM"
|
164
|
+
else:
|
165
|
+
return "LOW"
|
166
|
+
|
167
|
+
def _estimate_cost(self, query: str) -> int:
|
168
|
+
"""Estimate query cost."""
|
169
|
+
complexity = self._estimate_complexity(query)
|
170
|
+
if complexity == "HIGH":
|
171
|
+
return 80
|
172
|
+
elif complexity == "MEDIUM":
|
173
|
+
return 40
|
174
|
+
else:
|
175
|
+
return 20
|
176
|
+
|
177
|
+
def _extract_expressions(self, query: str) -> List[str]:
|
178
|
+
"""Extract JMESPath expressions from query."""
|
179
|
+
expressions = []
|
180
|
+
|
181
|
+
# Path expressions
|
182
|
+
if "." in query:
|
183
|
+
expressions.append("path")
|
184
|
+
if "[" in query and "]" in query:
|
185
|
+
expressions.append("filter")
|
186
|
+
if "{" in query and "}" in query:
|
187
|
+
expressions.append("projection")
|
188
|
+
if "|" in query:
|
189
|
+
expressions.append("pipe")
|
190
|
+
if "||" in query or "&&" in query:
|
191
|
+
expressions.append("logical")
|
192
|
+
if "==" in query or "!=" in query or ">" in query or "<" in query:
|
193
|
+
expressions.append("comparison")
|
194
|
+
|
195
|
+
# Functions
|
196
|
+
jmespath_functions = ["length", "keys", "values", "sort", "reverse", "flatten", "unique", "join", "split", "to_string", "to_number", "type", "starts_with", "ends_with", "contains", "abs", "ceil", "floor", "max", "min", "sum", "avg", "sort_by", "group_by", "map", "filter", "merge", "merge_left", "merge_right"]
|
197
|
+
for func in jmespath_functions:
|
198
|
+
if func in query:
|
199
|
+
expressions.append(func)
|
200
|
+
|
201
|
+
return expressions
|
202
|
+
|
203
|
+
def _get_optimization_hints(self, query: str) -> List[str]:
|
204
|
+
"""Get query optimization hints."""
|
205
|
+
hints = []
|
206
|
+
|
207
|
+
if "|" in query:
|
208
|
+
hints.append("Consider combining operations to reduce pipe operations")
|
209
|
+
|
210
|
+
if "[" in query and "]" in query:
|
211
|
+
hints.append("Consider using specific paths instead of array operations when possible")
|
212
|
+
|
213
|
+
if "{" in query and "}" in query:
|
214
|
+
hints.append("Consider using multi-select for better performance")
|
215
|
+
|
216
|
+
if "sort" in query:
|
217
|
+
hints.append("Consider using sort_by for complex sorting operations")
|
218
|
+
|
219
|
+
return hints
|