micromegas 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- micromegas/__init__.py +1 -2
- micromegas/flightsql/__init__.py +0 -6
- micromegas/flightsql/client.py +93 -205
- {micromegas-0.7.0.dist-info → micromegas-0.9.0.dist-info}/METADATA +5 -5
- {micromegas-0.7.0.dist-info → micromegas-0.9.0.dist-info}/RECORD +6 -11
- {micromegas-0.7.0.dist-info → micromegas-0.9.0.dist-info}/WHEEL +1 -1
- micromegas/flightsql/FlightSql_pb2_grpc.py +0 -24
- micromegas/flightsql/Flight_pb2.py +0 -116
- micromegas/flightsql/Flight_pb2_grpc.py +0 -579
- micromegas/flightsql/arrow_flatbuffers.py +0 -2925
- micromegas/flightsql/arrow_ipc_reader.py +0 -99
@@ -1,99 +0,0 @@
|
|
1
|
-
import pyarrow
|
2
|
-
|
3
|
-
# based on https://github.com/apache/arrow-rs/blob/main/arrow-ipc/src/reader.rs
|
4
|
-
|
5
|
-
class ArrayReader:
|
6
|
-
def __init__(self, schema, nodes, buffers):
|
7
|
-
self.schema = schema
|
8
|
-
self.nodes = nodes
|
9
|
-
self.current_node = 0
|
10
|
-
self.buffers = buffers
|
11
|
-
self.current_buffer = 0
|
12
|
-
|
13
|
-
def next_node(self):
|
14
|
-
assert self.current_node < len(self.nodes)
|
15
|
-
node = self.nodes[self.current_node]
|
16
|
-
self.current_node += 1
|
17
|
-
return node
|
18
|
-
|
19
|
-
def next_buffer(self):
|
20
|
-
assert self.current_buffer < len(self.buffers)
|
21
|
-
buffer = self.buffers[self.current_buffer]
|
22
|
-
self.current_buffer += 1
|
23
|
-
return buffer
|
24
|
-
|
25
|
-
|
26
|
-
def create_primitive_array(node, data_type, null_buffer, data_buffer):
|
27
|
-
return pyarrow.NumericArray.from_buffers(
|
28
|
-
data_type, node.Length(), [null_buffer, data_buffer], node.NullCount()
|
29
|
-
)
|
30
|
-
|
31
|
-
|
32
|
-
def create_string_array(node, data_type, null_buffer, offset_buffer, data_buffer):
|
33
|
-
return pyarrow.NumericArray.from_buffers(
|
34
|
-
data_type,
|
35
|
-
node.Length(),
|
36
|
-
[null_buffer, offset_buffer, data_buffer],
|
37
|
-
node.NullCount(),
|
38
|
-
)
|
39
|
-
|
40
|
-
|
41
|
-
def read_column(reader, arrow_field):
|
42
|
-
if arrow_field.type in [
|
43
|
-
pyarrow.string(),
|
44
|
-
pyarrow.binary(),
|
45
|
-
pyarrow.large_binary(),
|
46
|
-
pyarrow.large_string(),
|
47
|
-
]:
|
48
|
-
return create_string_array(
|
49
|
-
reader.next_node(),
|
50
|
-
arrow_field.type,
|
51
|
-
reader.next_buffer(),
|
52
|
-
reader.next_buffer(),
|
53
|
-
reader.next_buffer(),
|
54
|
-
)
|
55
|
-
elif pyarrow.types.is_primitive(arrow_field.type):
|
56
|
-
return create_primitive_array(
|
57
|
-
reader.next_node(),
|
58
|
-
arrow_field.type,
|
59
|
-
reader.next_buffer(),
|
60
|
-
reader.next_buffer(),
|
61
|
-
)
|
62
|
-
elif pyarrow.types.is_list(arrow_field.type):
|
63
|
-
list_node = reader.next_node()
|
64
|
-
list_buffers = [reader.next_buffer(), reader.next_buffer()]
|
65
|
-
values = read_column(reader, arrow_field.type.value_field)
|
66
|
-
return pyarrow.ListArray.from_buffers(
|
67
|
-
arrow_field.type,
|
68
|
-
list_node.Length(),
|
69
|
-
list_buffers,
|
70
|
-
list_node.NullCount(),
|
71
|
-
0,
|
72
|
-
[values],
|
73
|
-
)
|
74
|
-
elif pyarrow.types.is_struct(arrow_field.type):
|
75
|
-
struct_node = reader.next_node()
|
76
|
-
null_buffer = reader.next_buffer()
|
77
|
-
children = []
|
78
|
-
for child_field in arrow_field.type.fields:
|
79
|
-
child_column = read_column(reader, child_field)
|
80
|
-
children.append(child_column)
|
81
|
-
return pyarrow.StructArray.from_buffers(
|
82
|
-
arrow_field.type,
|
83
|
-
struct_node.Length(),
|
84
|
-
[null_buffer],
|
85
|
-
struct_node.NullCount(),
|
86
|
-
0,
|
87
|
-
children,
|
88
|
-
)
|
89
|
-
else:
|
90
|
-
raise RuntimeError("unsupported arrow field type {}".format(arrow_field.type))
|
91
|
-
|
92
|
-
|
93
|
-
def read_record_batch(arrow_schema, nodes, buffers):
|
94
|
-
reader = ArrayReader(arrow_schema, nodes, buffers)
|
95
|
-
columns = []
|
96
|
-
for arrow_field in arrow_schema:
|
97
|
-
column = read_column(reader, arrow_field)
|
98
|
-
columns.append(column)
|
99
|
-
return pyarrow.RecordBatch.from_arrays(columns, schema=arrow_schema)
|