micromegas 0.1.3__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- micromegas/__init__.py +6 -0
- micromegas/client.py +107 -17
- micromegas/perfetto.py +164 -23
- micromegas/request.py +16 -0
- micromegas/time.py +30 -0
- {micromegas-0.1.3.dist-info → micromegas-0.1.6.dist-info}/METADATA +1 -1
- {micromegas-0.1.3.dist-info → micromegas-0.1.6.dist-info}/RECORD +8 -7
- {micromegas-0.1.3.dist-info → micromegas-0.1.6.dist-info}/WHEEL +0 -0
micromegas/__init__.py
CHANGED
micromegas/client.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
from . import request
|
2
|
+
from . import time
|
3
|
+
import cbor2
|
2
4
|
|
3
5
|
|
4
6
|
class Client:
|
@@ -6,17 +8,28 @@ class Client:
|
|
6
8
|
self.analytics_base_url = base_url + "analytics/"
|
7
9
|
self.headers = headers
|
8
10
|
|
11
|
+
def find_process(self, process_id):
|
12
|
+
return request.request(
|
13
|
+
self.analytics_base_url + "find_process",
|
14
|
+
{"process_id": process_id},
|
15
|
+
headers=self.headers,
|
16
|
+
)
|
17
|
+
|
9
18
|
def query_processes(self, begin, end, limit):
|
10
19
|
return request.request(
|
11
20
|
self.analytics_base_url + "query_processes",
|
12
|
-
{
|
21
|
+
{
|
22
|
+
"begin": time.format_datetime(begin),
|
23
|
+
"end": time.format_datetime(end),
|
24
|
+
"limit": limit,
|
25
|
+
},
|
13
26
|
headers=self.headers,
|
14
27
|
)
|
15
28
|
|
16
29
|
def query_streams(self, begin, end, limit, process_id=None, tag_filter=None):
|
17
30
|
args = {
|
18
|
-
"begin":
|
19
|
-
"end":
|
31
|
+
"begin": time.format_datetime(begin),
|
32
|
+
"end": time.format_datetime(end),
|
20
33
|
"limit": limit,
|
21
34
|
"process_id": process_id,
|
22
35
|
"tag_filter": tag_filter,
|
@@ -30,8 +43,8 @@ class Client:
|
|
30
43
|
|
31
44
|
def query_blocks(self, begin, end, limit, stream_id):
|
32
45
|
args = {
|
33
|
-
"begin":
|
34
|
-
"end":
|
46
|
+
"begin": time.format_datetime(begin),
|
47
|
+
"end": time.format_datetime(end),
|
35
48
|
"limit": limit,
|
36
49
|
"stream_id": stream_id,
|
37
50
|
}
|
@@ -46,8 +59,8 @@ class Client:
|
|
46
59
|
return request.request(
|
47
60
|
self.analytics_base_url + "query_spans",
|
48
61
|
{
|
49
|
-
"begin":
|
50
|
-
"end":
|
62
|
+
"begin": time.format_datetime(begin),
|
63
|
+
"end": time.format_datetime(end),
|
51
64
|
"limit": limit,
|
52
65
|
"stream_id": stream_id,
|
53
66
|
},
|
@@ -58,35 +71,112 @@ class Client:
|
|
58
71
|
return request.request(
|
59
72
|
self.analytics_base_url + "query_thread_events",
|
60
73
|
{
|
61
|
-
"begin":
|
62
|
-
"end":
|
74
|
+
"begin": time.format_datetime(begin),
|
75
|
+
"end": time.format_datetime(end),
|
63
76
|
"limit": limit,
|
64
77
|
"stream_id": stream_id,
|
65
78
|
},
|
66
79
|
headers=self.headers,
|
67
80
|
)
|
68
|
-
|
69
|
-
def query_log_entries(
|
81
|
+
|
82
|
+
def query_log_entries(
|
83
|
+
self,
|
84
|
+
begin,
|
85
|
+
end,
|
86
|
+
limit=None, # Necessary if stream_id is specified, ignored otherwise
|
87
|
+
stream_id=None, # If none, query is run on cached lakehouse using query engine
|
88
|
+
sql=None, # Necessary if stream_id is None, ignored otherwise
|
89
|
+
):
|
70
90
|
return request.request(
|
71
91
|
self.analytics_base_url + "query_log_entries",
|
72
92
|
{
|
73
|
-
"begin":
|
74
|
-
"end":
|
93
|
+
"begin": time.format_datetime(begin),
|
94
|
+
"end": time.format_datetime(end),
|
75
95
|
"limit": limit,
|
76
96
|
"stream_id": stream_id,
|
97
|
+
"sql": sql,
|
77
98
|
},
|
78
99
|
headers=self.headers,
|
79
100
|
)
|
80
101
|
|
81
|
-
def query_metrics(self, begin, end, limit, stream_id):
|
102
|
+
def query_metrics(self, begin, end, limit=None, stream_id=None, sql=None):
|
82
103
|
return request.request(
|
83
104
|
self.analytics_base_url + "query_metrics",
|
84
105
|
{
|
85
|
-
"begin":
|
86
|
-
"end":
|
106
|
+
"begin": time.format_datetime(begin),
|
107
|
+
"end": time.format_datetime(end),
|
87
108
|
"limit": limit,
|
88
109
|
"stream_id": stream_id,
|
110
|
+
"sql": sql,
|
89
111
|
},
|
90
112
|
headers=self.headers,
|
91
113
|
)
|
92
|
-
|
114
|
+
|
115
|
+
def query_view(self, view_set_name, view_instance_id, begin, end, sql):
|
116
|
+
return request.request(
|
117
|
+
self.analytics_base_url + "query_view",
|
118
|
+
{
|
119
|
+
"view_set_name": view_set_name,
|
120
|
+
"view_instance_id": view_instance_id,
|
121
|
+
"begin": time.format_datetime(begin),
|
122
|
+
"end": time.format_datetime(end),
|
123
|
+
"sql": sql,
|
124
|
+
},
|
125
|
+
headers=self.headers,
|
126
|
+
)
|
127
|
+
|
128
|
+
def query_partitions(self):
|
129
|
+
args = {}
|
130
|
+
return request.request(
|
131
|
+
self.analytics_base_url + "query_partitions",
|
132
|
+
args,
|
133
|
+
headers=self.headers,
|
134
|
+
)
|
135
|
+
|
136
|
+
def __stream_request(self, endpoint, args):
|
137
|
+
response = request.streamed_request(
|
138
|
+
self.analytics_base_url + endpoint,
|
139
|
+
args,
|
140
|
+
headers=self.headers,
|
141
|
+
)
|
142
|
+
while response.raw.readable():
|
143
|
+
try:
|
144
|
+
print(cbor2.load(response.raw))
|
145
|
+
except cbor2.CBORDecodeEOF:
|
146
|
+
break
|
147
|
+
|
148
|
+
def create_or_update_partitions(
|
149
|
+
self, view_set_name, view_instance_id, begin, end, partition_delta_seconds
|
150
|
+
):
|
151
|
+
args = {
|
152
|
+
"view_set_name": view_set_name,
|
153
|
+
"view_instance_id": view_instance_id,
|
154
|
+
"begin": time.format_datetime(begin),
|
155
|
+
"end": time.format_datetime(end),
|
156
|
+
"partition_delta_seconds": partition_delta_seconds,
|
157
|
+
}
|
158
|
+
self.__stream_request("create_or_update_partitions", args)
|
159
|
+
|
160
|
+
def merge_partitions(
|
161
|
+
self, view_set_name, view_instance_id, begin, end, partition_delta_seconds
|
162
|
+
):
|
163
|
+
args = {
|
164
|
+
"view_set_name": view_set_name,
|
165
|
+
"view_instance_id": view_instance_id,
|
166
|
+
"begin": time.format_datetime(begin),
|
167
|
+
"end": time.format_datetime(end),
|
168
|
+
"partition_delta_seconds": partition_delta_seconds,
|
169
|
+
}
|
170
|
+
self.__stream_request("merge_partitions", args)
|
171
|
+
|
172
|
+
def retire_partitions(
|
173
|
+
self, view_set_name, view_instance_id, begin, end, partition_delta_seconds
|
174
|
+
):
|
175
|
+
args = {
|
176
|
+
"view_set_name": view_set_name,
|
177
|
+
"view_instance_id": view_instance_id,
|
178
|
+
"begin": time.format_datetime(begin),
|
179
|
+
"end": time.format_datetime(end),
|
180
|
+
"partition_delta_seconds": partition_delta_seconds,
|
181
|
+
}
|
182
|
+
self.__stream_request("retire_partitions", args)
|
micromegas/perfetto.py
CHANGED
@@ -1,21 +1,52 @@
|
|
1
1
|
import crc
|
2
|
+
from tqdm import tqdm
|
3
|
+
|
2
4
|
|
3
5
|
# hack to allow perfetto proto imports
|
4
6
|
# you can then import the protos like this: from protos.perfetto.trace import trace_pb2
|
5
7
|
def load_perfetto_protos():
|
6
8
|
import sys
|
7
9
|
import pathlib
|
8
|
-
|
10
|
+
|
11
|
+
perfetto_folder = pathlib.Path(__file__).parent.absolute() / "thirdparty/perfetto"
|
9
12
|
sys.path.append(str(perfetto_folder))
|
10
13
|
|
14
|
+
|
11
15
|
def crc64_str(s):
|
12
16
|
calculator = crc.Calculator(crc.Crc64.CRC64)
|
13
17
|
return calculator.checksum(str.encode(s))
|
14
18
|
|
19
|
+
|
20
|
+
def generate_batches(df_blocks):
|
21
|
+
nb_events_threshold = 1024 * 1024
|
22
|
+
begin = df_blocks.iloc[0]["begin_time"]
|
23
|
+
end = df_blocks.iloc[0]["end_time"]
|
24
|
+
nb_events = 0
|
25
|
+
for index, block in df_blocks.iterrows():
|
26
|
+
nb_events += block["nb_objects"]
|
27
|
+
end = block["end_time"]
|
28
|
+
if nb_events > nb_events_threshold:
|
29
|
+
yield (begin, end, nb_events)
|
30
|
+
begin = block["end_time"]
|
31
|
+
nb_events = 0
|
32
|
+
if nb_events > 0:
|
33
|
+
yield (begin, end, nb_events)
|
34
|
+
|
35
|
+
|
15
36
|
class Writer:
|
16
|
-
|
37
|
+
"""
|
38
|
+
Fetches thread events from the analytics server and formats them in the perfetto format.
|
39
|
+
Traces can be viewed using https://ui.perfetto.dev/
|
40
|
+
"""
|
41
|
+
|
42
|
+
def __init__(self, client, process_id, exe):
|
17
43
|
load_perfetto_protos()
|
18
44
|
from protos.perfetto.trace import trace_pb2, trace_packet_pb2
|
45
|
+
|
46
|
+
self.names = {}
|
47
|
+
self.categories = {}
|
48
|
+
self.source_locations = {}
|
49
|
+
self.first = True
|
19
50
|
self.client = client
|
20
51
|
self.trace = trace_pb2.Trace()
|
21
52
|
self.packets = self.trace.packet
|
@@ -27,8 +58,42 @@ class Writer:
|
|
27
58
|
packet.track_descriptor.process.process_name = exe
|
28
59
|
self.packets.append(packet)
|
29
60
|
|
30
|
-
def
|
61
|
+
def get_name_iid(self, name):
|
62
|
+
iid = self.names.get(name)
|
63
|
+
is_new = False
|
64
|
+
if iid is None:
|
65
|
+
is_new = True
|
66
|
+
iid = len(self.names) + 1
|
67
|
+
self.names[name] = iid
|
68
|
+
return iid, is_new
|
69
|
+
|
70
|
+
def get_category_iid(self, cat):
|
71
|
+
iid = self.categories.get(cat)
|
72
|
+
is_new = False
|
73
|
+
if iid is None:
|
74
|
+
is_new = True
|
75
|
+
iid = len(self.categories) + 1
|
76
|
+
self.categories[cat] = iid
|
77
|
+
return iid, is_new
|
78
|
+
|
79
|
+
def get_location_iid(self, loc):
|
80
|
+
iid = self.source_locations.get(loc)
|
81
|
+
is_new = False
|
82
|
+
if iid is None:
|
83
|
+
is_new = True
|
84
|
+
iid = len(self.source_locations) + 1
|
85
|
+
self.source_locations[loc] = iid
|
86
|
+
return iid, is_new
|
87
|
+
|
88
|
+
def append_thread(self, stream_id, thread_name, thread_id):
|
31
89
|
from protos.perfetto.trace import trace_pb2, trace_packet_pb2, track_event
|
90
|
+
|
91
|
+
df_blocks = self.client.query_blocks(
|
92
|
+
begin=None, end=None, limit=100_000, stream_id=stream_id
|
93
|
+
)
|
94
|
+
if df_blocks.empty:
|
95
|
+
return
|
96
|
+
|
32
97
|
packet = trace_packet_pb2.TracePacket()
|
33
98
|
thread_uuid = crc64_str(stream_id)
|
34
99
|
packet.track_descriptor.uuid = thread_uuid
|
@@ -39,26 +104,102 @@ class Writer:
|
|
39
104
|
self.packets.append(packet)
|
40
105
|
trusted_packet_sequence_id = 1
|
41
106
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
packet
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
107
|
+
batches = list(generate_batches(df_blocks))
|
108
|
+
for begin, end, limit in tqdm(batches, unit="event batches"):
|
109
|
+
df_spans = self.client.query_spans(
|
110
|
+
begin, end, limit=limit, stream_id=stream_id
|
111
|
+
)
|
112
|
+
begin_ns = df_spans["begin"].astype("int64")
|
113
|
+
end_ns = df_spans["end"].astype("int64")
|
114
|
+
for index, span in df_spans.iterrows():
|
115
|
+
packet = trace_packet_pb2.TracePacket()
|
116
|
+
packet.timestamp = begin_ns[index]
|
117
|
+
packet.track_event.type = (
|
118
|
+
track_event.track_event_pb2.TrackEvent.Type.TYPE_SLICE_BEGIN
|
119
|
+
)
|
120
|
+
packet.track_event.track_uuid = thread_uuid
|
121
|
+
span_name = span["name"]
|
122
|
+
name_iid, new_name = self.get_name_iid(span_name)
|
123
|
+
packet.track_event.name_iid = name_iid
|
124
|
+
category_iid, new_category = self.get_category_iid(span["target"])
|
125
|
+
packet.track_event.category_iids.append(category_iid)
|
126
|
+
|
127
|
+
source_location = (span["filename"], span["line"])
|
128
|
+
source_location_iid, new_source_location = self.get_location_iid(source_location)
|
129
|
+
packet.track_event.source_location_iid = source_location_iid
|
130
|
+
if self.first:
|
131
|
+
# this is necessary for interning to work
|
132
|
+
self.first = False
|
133
|
+
packet.first_packet_on_sequence = True
|
134
|
+
packet.sequence_flags = 3
|
135
|
+
else:
|
136
|
+
packet.sequence_flags = 2
|
137
|
+
|
138
|
+
if new_name:
|
139
|
+
event_name = packet.interned_data.event_names.add()
|
140
|
+
event_name.iid = name_iid
|
141
|
+
event_name.name = span_name
|
142
|
+
if new_category:
|
143
|
+
cat_name = packet.interned_data.event_categories.add()
|
144
|
+
cat_name.iid = category_iid
|
145
|
+
cat_name.name = span["target"]
|
146
|
+
if new_source_location:
|
147
|
+
loc = packet.interned_data.source_locations.add()
|
148
|
+
loc.iid = source_location_iid
|
149
|
+
loc.file_name = source_location[0]
|
150
|
+
loc.line_number = source_location[1]
|
151
|
+
|
152
|
+
packet.trusted_packet_sequence_id = trusted_packet_sequence_id
|
153
|
+
self.packets.append(packet)
|
154
|
+
|
155
|
+
packet = trace_packet_pb2.TracePacket()
|
156
|
+
packet.timestamp = end_ns[index]
|
157
|
+
packet.track_event.type = (
|
158
|
+
track_event.track_event_pb2.TrackEvent.Type.TYPE_SLICE_END
|
159
|
+
)
|
160
|
+
packet.track_event.track_uuid = thread_uuid
|
161
|
+
packet.track_event.name_iid = name_iid
|
162
|
+
packet.track_event.category_iids.append(category_iid)
|
163
|
+
packet.track_event.source_location_iid = source_location_iid
|
164
|
+
packet.sequence_flags = 2
|
165
|
+
packet.trusted_packet_sequence_id = trusted_packet_sequence_id
|
166
|
+
|
167
|
+
self.packets.append(packet)
|
168
|
+
|
169
|
+
def write_file(self, filename):
|
59
170
|
with open(filename, "wb") as f:
|
60
171
|
f.write(self.trace.SerializeToString())
|
61
|
-
|
62
|
-
|
63
172
|
|
64
|
-
|
173
|
+
|
174
|
+
def get_process_cpu_streams(client, process_id):
|
175
|
+
def prop_to_dict(props):
|
176
|
+
prop_dict = {}
|
177
|
+
for p in props:
|
178
|
+
prop_dict[p["key"]] = p["value"]
|
179
|
+
return prop_dict
|
180
|
+
|
181
|
+
def get_thread_name(prop_dict):
|
182
|
+
return prop_dict["thread-name"]
|
183
|
+
|
184
|
+
def get_thread_id(prop_dict):
|
185
|
+
return int(prop_dict["thread-id"])
|
186
|
+
|
187
|
+
df_streams = client.query_streams(
|
188
|
+
begin=None, end=None, limit=1024, tag_filter="cpu", process_id=process_id
|
189
|
+
)
|
190
|
+
df_streams["properties"] = df_streams["properties"].apply(prop_to_dict)
|
191
|
+
df_streams["thread_name"] = df_streams["properties"].apply(get_thread_name)
|
192
|
+
df_streams["thread_id"] = df_streams["properties"].apply(get_thread_id)
|
193
|
+
return df_streams
|
194
|
+
|
195
|
+
|
196
|
+
def write_process_trace(client, process_id, trace_filepath):
|
197
|
+
process_df = client.find_process(process_id)
|
198
|
+
assert process_df.shape[0] == 1
|
199
|
+
process = process_df.iloc[0]
|
200
|
+
streams = get_process_cpu_streams(client, process_id)
|
201
|
+
writer = Writer(client, process_id, process["exe"])
|
202
|
+
for index, stream in tqdm(list(streams.iterrows()), unit="threads"):
|
203
|
+
stream_id = stream["thread_id"]
|
204
|
+
writer.append_thread(stream["stream_id"], stream["thread_name"], stream_id)
|
205
|
+
writer.write_file(trace_filepath)
|
micromegas/request.py
CHANGED
@@ -18,3 +18,19 @@ def request(url, args, headers={}):
|
|
18
18
|
)
|
19
19
|
table = pq.read_table(io.BytesIO(response.content))
|
20
20
|
return table.to_pandas()
|
21
|
+
|
22
|
+
def streamed_request(url, args, headers={}):
|
23
|
+
response = requests.post(
|
24
|
+
url,
|
25
|
+
headers=headers,
|
26
|
+
data=cbor2.dumps(args),
|
27
|
+
stream=True,
|
28
|
+
timeout=300,
|
29
|
+
)
|
30
|
+
if response.status_code != 200:
|
31
|
+
raise Exception(
|
32
|
+
"http request url={2} failed with code={0} text={1}".format(
|
33
|
+
response.status_code, response.text, url
|
34
|
+
)
|
35
|
+
)
|
36
|
+
return response
|
micromegas/time.py
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
import datetime
|
2
|
+
import pandas
|
3
|
+
import re
|
4
|
+
|
5
|
+
def format_datetime(value):
|
6
|
+
nonetype = type(None)
|
7
|
+
match type(value):
|
8
|
+
case datetime.datetime:
|
9
|
+
if value.tzinfo is None:
|
10
|
+
raise RuntimeError("datetime needs a valid time zone")
|
11
|
+
return value.isoformat()
|
12
|
+
case pandas.Timestamp:
|
13
|
+
return value.isoformat()
|
14
|
+
case nonetype:
|
15
|
+
return None
|
16
|
+
raise RuntimeError("value of unknown type in format_datetime")
|
17
|
+
|
18
|
+
def parse_time_delta(user_string):
|
19
|
+
parser = re.compile("(\\d+)([mhd])")
|
20
|
+
m = parser.match(user_string)
|
21
|
+
nbr = int(m.group(1))
|
22
|
+
unit = m.group(2)
|
23
|
+
if unit == "m":
|
24
|
+
return datetime.timedelta(minutes=nbr)
|
25
|
+
elif unit == "h":
|
26
|
+
return datetime.timedelta(hours=nbr)
|
27
|
+
elif unit == "d":
|
28
|
+
return datetime.timedelta(days=nbr)
|
29
|
+
else:
|
30
|
+
raise RuntimeError("invalid time delta: " + user_string)
|
@@ -1,7 +1,7 @@
|
|
1
|
-
micromegas/__init__.py,sha256=
|
2
|
-
micromegas/client.py,sha256=
|
3
|
-
micromegas/perfetto.py,sha256=
|
4
|
-
micromegas/request.py,sha256=
|
1
|
+
micromegas/__init__.py,sha256=E_j3LFxMk9rSMJunwDCi_90NsRHm1fKwjj_6KGMYCjQ,246
|
2
|
+
micromegas/client.py,sha256=M-LrQu9wHaJiYz-1TOM06O7Smd-R4EuE_KeZDS94eoA,5973
|
3
|
+
micromegas/perfetto.py,sha256=yuIe5iKvca61aWMBQNziSGM-DHcOEsiobtKx2SsNQ3E,7829
|
4
|
+
micromegas/request.py,sha256=NV0urom5P3_P2q94gX51hxW_Fnrp_DDRorsP3mUb5NM,941
|
5
5
|
micromegas/thirdparty/perfetto/protos/perfetto/common/android_energy_consumer_descriptor_pb2.py,sha256=l8QNXqnB-mJIkuFr2s1YoLQXHm3G-ZcOGp_OW_hQ0TE,1887
|
6
6
|
micromegas/thirdparty/perfetto/protos/perfetto/common/android_log_constants_pb2.py,sha256=O5zDZkV8Nji0O2ryJRP4FTWdgdOBlDymWNcpNNDOFxk,2017
|
7
7
|
micromegas/thirdparty/perfetto/protos/perfetto/common/builtin_clock_pb2.py,sha256=7qLL_BENTxRFQH8DfHDvyWAkgwy0VHrOaE8XhL8iZgk,1822
|
@@ -207,6 +207,7 @@ micromegas/thirdparty/perfetto/protos/perfetto/trace/track_event/track_event_pb2
|
|
207
207
|
micromegas/thirdparty/perfetto/protos/perfetto/trace/translation/translation_table_pb2.py,sha256=-hkUdv07TsSDHH1mier2KyAhmivK4GSzEfAzAEYv20U,6630
|
208
208
|
micromegas/thirdparty/perfetto/protos/perfetto/trace/trigger_pb2.py,sha256=We7Yi8o3cEcrSNxY1zLUUO6tEWnD36C2f3O_s8_qv0I,1435
|
209
209
|
micromegas/thirdparty/perfetto/protos/perfetto/trace/ui_state_pb2.py,sha256=Af-SXwhroNhRXMrtw6e2eU1liCImMRxSdmkt_AuSHf8,1752
|
210
|
-
micromegas
|
211
|
-
micromegas-0.1.
|
212
|
-
micromegas-0.1.
|
210
|
+
micromegas/time.py,sha256=teMWk_hniW2jI7MWJ2w0HuckdTiebdr35-snUz_3cfU,911
|
211
|
+
micromegas-0.1.6.dist-info/METADATA,sha256=yZywCrAstjiI57lhWaTOTaCHE3MfqlCuaG0bASmgcCM,839
|
212
|
+
micromegas-0.1.6.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
213
|
+
micromegas-0.1.6.dist-info/RECORD,,
|
File without changes
|