micromegas 0.1.3__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
micromegas/__init__.py CHANGED
@@ -1,4 +1,10 @@
1
1
  import os
2
+ from . import time
2
3
  from . import request
3
4
  from . import client
4
5
  from . import perfetto
6
+
7
+ def connect():
8
+ "connect to the analytics service using default values"
9
+ BASE_URL = "http://localhost:8082/"
10
+ return client.Client(BASE_URL)
micromegas/client.py CHANGED
@@ -1,4 +1,6 @@
1
1
  from . import request
2
+ from . import time
3
+ import cbor2
2
4
 
3
5
 
4
6
  class Client:
@@ -6,17 +8,28 @@ class Client:
6
8
  self.analytics_base_url = base_url + "analytics/"
7
9
  self.headers = headers
8
10
 
11
+ def find_process(self, process_id):
12
+ return request.request(
13
+ self.analytics_base_url + "find_process",
14
+ {"process_id": process_id},
15
+ headers=self.headers,
16
+ )
17
+
9
18
  def query_processes(self, begin, end, limit):
10
19
  return request.request(
11
20
  self.analytics_base_url + "query_processes",
12
- {"begin": begin.isoformat(), "end": end.isoformat(), "limit": limit},
21
+ {
22
+ "begin": time.format_datetime(begin),
23
+ "end": time.format_datetime(end),
24
+ "limit": limit,
25
+ },
13
26
  headers=self.headers,
14
27
  )
15
28
 
16
29
  def query_streams(self, begin, end, limit, process_id=None, tag_filter=None):
17
30
  args = {
18
- "begin": begin.isoformat(),
19
- "end": end.isoformat(),
31
+ "begin": time.format_datetime(begin),
32
+ "end": time.format_datetime(end),
20
33
  "limit": limit,
21
34
  "process_id": process_id,
22
35
  "tag_filter": tag_filter,
@@ -30,8 +43,8 @@ class Client:
30
43
 
31
44
  def query_blocks(self, begin, end, limit, stream_id):
32
45
  args = {
33
- "begin": begin.isoformat(),
34
- "end": end.isoformat(),
46
+ "begin": time.format_datetime(begin),
47
+ "end": time.format_datetime(end),
35
48
  "limit": limit,
36
49
  "stream_id": stream_id,
37
50
  }
@@ -46,8 +59,8 @@ class Client:
46
59
  return request.request(
47
60
  self.analytics_base_url + "query_spans",
48
61
  {
49
- "begin": begin.isoformat(),
50
- "end": end.isoformat(),
62
+ "begin": time.format_datetime(begin),
63
+ "end": time.format_datetime(end),
51
64
  "limit": limit,
52
65
  "stream_id": stream_id,
53
66
  },
@@ -58,35 +71,100 @@ class Client:
58
71
  return request.request(
59
72
  self.analytics_base_url + "query_thread_events",
60
73
  {
61
- "begin": begin.isoformat(),
62
- "end": end.isoformat(),
74
+ "begin": time.format_datetime(begin),
75
+ "end": time.format_datetime(end),
63
76
  "limit": limit,
64
77
  "stream_id": stream_id,
65
78
  },
66
79
  headers=self.headers,
67
80
  )
68
-
69
- def query_log_entries(self, begin, end, limit, stream_id):
81
+
82
+ def query_log_entries(
83
+ self,
84
+ begin,
85
+ end,
86
+ limit=None, # Necessary if stream_id is specified, ignored otherwise
87
+ stream_id=None, # If none, query is run on cached lakehouse using query engine
88
+ sql=None, # Necessary if stream_id is None, ignored otherwise
89
+ ):
70
90
  return request.request(
71
91
  self.analytics_base_url + "query_log_entries",
72
92
  {
73
- "begin": begin.isoformat(),
74
- "end": end.isoformat(),
93
+ "begin": time.format_datetime(begin),
94
+ "end": time.format_datetime(end),
75
95
  "limit": limit,
76
96
  "stream_id": stream_id,
97
+ "sql": sql,
77
98
  },
78
99
  headers=self.headers,
79
100
  )
80
101
 
81
- def query_metrics(self, begin, end, limit, stream_id):
102
+ def query_metrics(self, begin, end, limit=None, stream_id=None, sql=None):
82
103
  return request.request(
83
104
  self.analytics_base_url + "query_metrics",
84
105
  {
85
- "begin": begin.isoformat(),
86
- "end": end.isoformat(),
106
+ "begin": time.format_datetime(begin),
107
+ "end": time.format_datetime(end),
87
108
  "limit": limit,
88
109
  "stream_id": stream_id,
110
+ "sql": sql,
89
111
  },
90
112
  headers=self.headers,
91
113
  )
92
-
114
+
115
+ def query_view(self, view_set_name, view_instance_id, begin, end, sql):
116
+ return request.request(
117
+ self.analytics_base_url + "query_view",
118
+ {
119
+ "view_set_name": view_set_name,
120
+ "view_instance_id": view_instance_id,
121
+ "begin": time.format_datetime(begin),
122
+ "end": time.format_datetime(end),
123
+ "sql": sql,
124
+ },
125
+ headers=self.headers,
126
+ )
127
+
128
+ def query_partitions(self):
129
+ args = {}
130
+ return request.request(
131
+ self.analytics_base_url + "query_partitions",
132
+ args,
133
+ headers=self.headers,
134
+ )
135
+
136
+ def __stream_request(self, endpoint, args):
137
+ response = request.streamed_request(
138
+ self.analytics_base_url + endpoint,
139
+ args,
140
+ headers=self.headers,
141
+ )
142
+ while response.raw.readable():
143
+ try:
144
+ print(cbor2.load(response.raw))
145
+ except cbor2.CBORDecodeEOF:
146
+ break
147
+
148
+ def materialize_partitions(
149
+ self, view_set_name, view_instance_id, begin, end, partition_delta_seconds
150
+ ):
151
+ args = {
152
+ "view_set_name": view_set_name,
153
+ "view_instance_id": view_instance_id,
154
+ "begin": time.format_datetime(begin),
155
+ "end": time.format_datetime(end),
156
+ "partition_delta_seconds": partition_delta_seconds,
157
+ }
158
+ self.__stream_request("materialize_partitions", args)
159
+
160
+ def retire_partitions(
161
+ self, view_set_name, view_instance_id, begin, end, partition_delta_seconds
162
+ ):
163
+ args = {
164
+ "view_set_name": view_set_name,
165
+ "view_instance_id": view_instance_id,
166
+ "begin": time.format_datetime(begin),
167
+ "end": time.format_datetime(end),
168
+ "partition_delta_seconds": partition_delta_seconds,
169
+ }
170
+ self.__stream_request("retire_partitions", args)
micromegas/perfetto.py CHANGED
@@ -1,21 +1,52 @@
1
1
  import crc
2
+ from tqdm import tqdm
3
+
2
4
 
3
5
  # hack to allow perfetto proto imports
4
6
  # you can then import the protos like this: from protos.perfetto.trace import trace_pb2
5
7
  def load_perfetto_protos():
6
8
  import sys
7
9
  import pathlib
8
- perfetto_folder = pathlib.Path(__file__).parent.absolute() / "thirdparty/perfetto"
10
+
11
+ perfetto_folder = pathlib.Path(__file__).parent.absolute() / "thirdparty/perfetto"
9
12
  sys.path.append(str(perfetto_folder))
10
13
 
14
+
11
15
  def crc64_str(s):
12
16
  calculator = crc.Calculator(crc.Crc64.CRC64)
13
17
  return calculator.checksum(str.encode(s))
14
18
 
19
+
20
+ def generate_batches(df_blocks):
21
+ nb_events_threshold = 1024 * 1024
22
+ begin = df_blocks.iloc[0]["begin_time"]
23
+ end = df_blocks.iloc[0]["end_time"]
24
+ nb_events = 0
25
+ for index, block in df_blocks.iterrows():
26
+ nb_events += block["nb_objects"]
27
+ end = block["end_time"]
28
+ if nb_events > nb_events_threshold:
29
+ yield (begin, end, nb_events)
30
+ begin = block["end_time"]
31
+ nb_events = 0
32
+ if nb_events > 0:
33
+ yield (begin, end, nb_events)
34
+
35
+
15
36
  class Writer:
16
- def __init__( self, client, process_id, exe ):
37
+ """
38
+ Fetches thread events from the analytics server and formats them in the perfetto format.
39
+ Traces can be viewed using https://ui.perfetto.dev/
40
+ """
41
+
42
+ def __init__(self, client, process_id, exe):
17
43
  load_perfetto_protos()
18
44
  from protos.perfetto.trace import trace_pb2, trace_packet_pb2
45
+
46
+ self.names = {}
47
+ self.categories = {}
48
+ self.source_locations = {}
49
+ self.first = True
19
50
  self.client = client
20
51
  self.trace = trace_pb2.Trace()
21
52
  self.packets = self.trace.packet
@@ -27,8 +58,42 @@ class Writer:
27
58
  packet.track_descriptor.process.process_name = exe
28
59
  self.packets.append(packet)
29
60
 
30
- def append_thread( self, begin, end, stream_id, thread_name, thread_id ):
61
+ def get_name_iid(self, name):
62
+ iid = self.names.get(name)
63
+ is_new = False
64
+ if iid is None:
65
+ is_new = True
66
+ iid = len(self.names) + 1
67
+ self.names[name] = iid
68
+ return iid, is_new
69
+
70
+ def get_category_iid(self, cat):
71
+ iid = self.categories.get(cat)
72
+ is_new = False
73
+ if iid is None:
74
+ is_new = True
75
+ iid = len(self.categories) + 1
76
+ self.categories[cat] = iid
77
+ return iid, is_new
78
+
79
+ def get_location_iid(self, loc):
80
+ iid = self.source_locations.get(loc)
81
+ is_new = False
82
+ if iid is None:
83
+ is_new = True
84
+ iid = len(self.source_locations) + 1
85
+ self.source_locations[loc] = iid
86
+ return iid, is_new
87
+
88
+ def append_thread(self, stream_id, thread_name, thread_id):
31
89
  from protos.perfetto.trace import trace_pb2, trace_packet_pb2, track_event
90
+
91
+ df_blocks = self.client.query_blocks(
92
+ begin=None, end=None, limit=100_000, stream_id=stream_id
93
+ )
94
+ if df_blocks.empty:
95
+ return
96
+
32
97
  packet = trace_packet_pb2.TracePacket()
33
98
  thread_uuid = crc64_str(stream_id)
34
99
  packet.track_descriptor.uuid = thread_uuid
@@ -39,26 +104,102 @@ class Writer:
39
104
  self.packets.append(packet)
40
105
  trusted_packet_sequence_id = 1
41
106
 
42
- df_events = self.client.query_thread_events(begin, end, limit=1024*1024, stream_id = stream_id)
43
- df_events["ns"] = df_events["timestamp"].astype('int64')
44
- for index, event in df_events.iterrows():
45
- packet = trace_packet_pb2.TracePacket()
46
- packet.timestamp = event["ns"]
47
- if event["event_type"] == "begin":
48
- packet.track_event.type = track_event.track_event_pb2.TrackEvent.Type.TYPE_SLICE_BEGIN
49
- elif event["event_type"] == "end":
50
- packet.track_event.type = track_event.track_event_pb2.TrackEvent.Type.TYPE_SLICE_END
51
- else:
52
- raise Exception("unknown event type")
53
- packet.track_event.track_uuid = thread_uuid
54
- packet.track_event.name = event["name"]
55
- packet.trusted_packet_sequence_id = trusted_packet_sequence_id
56
- self.packets.append(packet)
57
-
58
- def write_file( self, filename ):
107
+ batches = list(generate_batches(df_blocks))
108
+ for begin, end, limit in tqdm(batches, unit="event batches"):
109
+ df_spans = self.client.query_spans(
110
+ begin, end, limit=limit, stream_id=stream_id
111
+ )
112
+ begin_ns = df_spans["begin"].astype("int64")
113
+ end_ns = df_spans["end"].astype("int64")
114
+ for index, span in df_spans.iterrows():
115
+ packet = trace_packet_pb2.TracePacket()
116
+ packet.timestamp = begin_ns[index]
117
+ packet.track_event.type = (
118
+ track_event.track_event_pb2.TrackEvent.Type.TYPE_SLICE_BEGIN
119
+ )
120
+ packet.track_event.track_uuid = thread_uuid
121
+ span_name = span["name"]
122
+ name_iid, new_name = self.get_name_iid(span_name)
123
+ packet.track_event.name_iid = name_iid
124
+ category_iid, new_category = self.get_category_iid(span["target"])
125
+ packet.track_event.category_iids.append(category_iid)
126
+
127
+ source_location = (span["filename"], span["line"])
128
+ source_location_iid, new_source_location = self.get_location_iid(source_location)
129
+ packet.track_event.source_location_iid = source_location_iid
130
+ if self.first:
131
+ # this is necessary for interning to work
132
+ self.first = False
133
+ packet.first_packet_on_sequence = True
134
+ packet.sequence_flags = 3
135
+ else:
136
+ packet.sequence_flags = 2
137
+
138
+ if new_name:
139
+ event_name = packet.interned_data.event_names.add()
140
+ event_name.iid = name_iid
141
+ event_name.name = span_name
142
+ if new_category:
143
+ cat_name = packet.interned_data.event_categories.add()
144
+ cat_name.iid = category_iid
145
+ cat_name.name = span["target"]
146
+ if new_source_location:
147
+ loc = packet.interned_data.source_locations.add()
148
+ loc.iid = source_location_iid
149
+ loc.file_name = source_location[0]
150
+ loc.line_number = source_location[1]
151
+
152
+ packet.trusted_packet_sequence_id = trusted_packet_sequence_id
153
+ self.packets.append(packet)
154
+
155
+ packet = trace_packet_pb2.TracePacket()
156
+ packet.timestamp = end_ns[index]
157
+ packet.track_event.type = (
158
+ track_event.track_event_pb2.TrackEvent.Type.TYPE_SLICE_END
159
+ )
160
+ packet.track_event.track_uuid = thread_uuid
161
+ packet.track_event.name_iid = name_iid
162
+ packet.track_event.category_iids.append(category_iid)
163
+ packet.track_event.source_location_iid = source_location_iid
164
+ packet.sequence_flags = 2
165
+ packet.trusted_packet_sequence_id = trusted_packet_sequence_id
166
+
167
+ self.packets.append(packet)
168
+
169
+ def write_file(self, filename):
59
170
  with open(filename, "wb") as f:
60
171
  f.write(self.trace.SerializeToString())
61
-
62
-
63
172
 
64
-
173
+
174
+ def get_process_cpu_streams(client, process_id):
175
+ def prop_to_dict(props):
176
+ prop_dict = {}
177
+ for p in props:
178
+ prop_dict[p["key"]] = p["value"]
179
+ return prop_dict
180
+
181
+ def get_thread_name(prop_dict):
182
+ return prop_dict["thread-name"]
183
+
184
+ def get_thread_id(prop_dict):
185
+ return int(prop_dict["thread-id"])
186
+
187
+ df_streams = client.query_streams(
188
+ begin=None, end=None, limit=1024, tag_filter="cpu", process_id=process_id
189
+ )
190
+ df_streams["properties"] = df_streams["properties"].apply(prop_to_dict)
191
+ df_streams["thread_name"] = df_streams["properties"].apply(get_thread_name)
192
+ df_streams["thread_id"] = df_streams["properties"].apply(get_thread_id)
193
+ return df_streams
194
+
195
+
196
+ def write_process_trace(client, process_id, trace_filepath):
197
+ process_df = client.find_process(process_id)
198
+ assert process_df.shape[0] == 1
199
+ process = process_df.iloc[0]
200
+ streams = get_process_cpu_streams(client, process_id)
201
+ writer = Writer(client, process_id, process["exe"])
202
+ for index, stream in tqdm(list(streams.iterrows()), unit="threads"):
203
+ stream_id = stream["thread_id"]
204
+ writer.append_thread(stream["stream_id"], stream["thread_name"], stream_id)
205
+ writer.write_file(trace_filepath)
micromegas/request.py CHANGED
@@ -18,3 +18,19 @@ def request(url, args, headers={}):
18
18
  )
19
19
  table = pq.read_table(io.BytesIO(response.content))
20
20
  return table.to_pandas()
21
+
22
+ def streamed_request(url, args, headers={}):
23
+ response = requests.post(
24
+ url,
25
+ headers=headers,
26
+ data=cbor2.dumps(args),
27
+ stream=True,
28
+ timeout=300,
29
+ )
30
+ if response.status_code != 200:
31
+ raise Exception(
32
+ "http request url={2} failed with code={0} text={1}".format(
33
+ response.status_code, response.text, url
34
+ )
35
+ )
36
+ return response
micromegas/time.py ADDED
@@ -0,0 +1,30 @@
1
+ import datetime
2
+ import pandas
3
+ import re
4
+
5
+ def format_datetime(value):
6
+ nonetype = type(None)
7
+ match type(value):
8
+ case datetime.datetime:
9
+ if value.tzinfo is None:
10
+ raise RuntimeError("datetime needs a valid time zone")
11
+ return value.isoformat()
12
+ case pandas.Timestamp:
13
+ return value.isoformat()
14
+ case nonetype:
15
+ return None
16
+ raise RuntimeError("value of unknown type in format_datetime")
17
+
18
+ def parse_time_delta(user_string):
19
+ parser = re.compile("(\\d+)([mhd])")
20
+ m = parser.match(user_string)
21
+ nbr = int(m.group(1))
22
+ unit = m.group(2)
23
+ if unit == "m":
24
+ return datetime.timedelta(minutes=nbr)
25
+ elif unit == "h":
26
+ return datetime.timedelta(hours=nbr)
27
+ elif unit == "d":
28
+ return datetime.timedelta(days=nbr)
29
+ else:
30
+ raise RuntimeError("invalid time delta: " + user_string)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: micromegas
3
- Version: 0.1.3
3
+ Version: 0.1.7
4
4
  Summary: Python analytics client for https://github.com/madesroches/micromegas/
5
5
  Author: Marc-Antoine Desroches
6
6
  Author-email: madesroches@gmail.com
@@ -16,6 +16,7 @@ Requires-Dist: protobuf (>=5.27.1,<6.0.0)
16
16
  Requires-Dist: pyarrow (>=16.0.0,<17.0.0)
17
17
  Requires-Dist: requests (>=2.31.0,<3.0.0)
18
18
  Requires-Dist: tabulate (>=0.9.0,<0.10.0)
19
+ Requires-Dist: tqdm (>=4.66.5,<5.0.0)
19
20
  Description-Content-Type: text/markdown
20
21
 
21
22
  # Micromegas
@@ -1,7 +1,7 @@
1
- micromegas/__init__.py,sha256=zIsOSogb45OohIYwtNrXF0tocz2PvbvTolf979tSrtU,76
2
- micromegas/client.py,sha256=TlwlAgk5E6A6E4DfwSClg1H5trbwhupfscgHqqWoqmA,2783
3
- micromegas/perfetto.py,sha256=F7cB-iQjk35EBooIXmtgSVntjt00jgubFwnriJGl5_I,2697
4
- micromegas/request.py,sha256=u6nqXV5iQjLdXjVrzDqrDV1yoqUbeJ9qjiVgz87-uN8,512
1
+ micromegas/__init__.py,sha256=E_j3LFxMk9rSMJunwDCi_90NsRHm1fKwjj_6KGMYCjQ,246
2
+ micromegas/client.py,sha256=2ejIRukZiJp6Q8YncK9dJtaazXX1s5TKNaKzndRxswk,5509
3
+ micromegas/perfetto.py,sha256=yuIe5iKvca61aWMBQNziSGM-DHcOEsiobtKx2SsNQ3E,7829
4
+ micromegas/request.py,sha256=NV0urom5P3_P2q94gX51hxW_Fnrp_DDRorsP3mUb5NM,941
5
5
  micromegas/thirdparty/perfetto/protos/perfetto/common/android_energy_consumer_descriptor_pb2.py,sha256=l8QNXqnB-mJIkuFr2s1YoLQXHm3G-ZcOGp_OW_hQ0TE,1887
6
6
  micromegas/thirdparty/perfetto/protos/perfetto/common/android_log_constants_pb2.py,sha256=O5zDZkV8Nji0O2ryJRP4FTWdgdOBlDymWNcpNNDOFxk,2017
7
7
  micromegas/thirdparty/perfetto/protos/perfetto/common/builtin_clock_pb2.py,sha256=7qLL_BENTxRFQH8DfHDvyWAkgwy0VHrOaE8XhL8iZgk,1822
@@ -207,6 +207,7 @@ micromegas/thirdparty/perfetto/protos/perfetto/trace/track_event/track_event_pb2
207
207
  micromegas/thirdparty/perfetto/protos/perfetto/trace/translation/translation_table_pb2.py,sha256=-hkUdv07TsSDHH1mier2KyAhmivK4GSzEfAzAEYv20U,6630
208
208
  micromegas/thirdparty/perfetto/protos/perfetto/trace/trigger_pb2.py,sha256=We7Yi8o3cEcrSNxY1zLUUO6tEWnD36C2f3O_s8_qv0I,1435
209
209
  micromegas/thirdparty/perfetto/protos/perfetto/trace/ui_state_pb2.py,sha256=Af-SXwhroNhRXMrtw6e2eU1liCImMRxSdmkt_AuSHf8,1752
210
- micromegas-0.1.3.dist-info/METADATA,sha256=g_S3KRAQOMELgMcL8lfFoMXuBTvPSEt3qN_L_SexKLY,839
211
- micromegas-0.1.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
212
- micromegas-0.1.3.dist-info/RECORD,,
210
+ micromegas/time.py,sha256=teMWk_hniW2jI7MWJ2w0HuckdTiebdr35-snUz_3cfU,911
211
+ micromegas-0.1.7.dist-info/METADATA,sha256=c7Dyj3APPSt59QLCF8id6Hcp6rjUDT_owecyrweWhAM,877
212
+ micromegas-0.1.7.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
213
+ micromegas-0.1.7.dist-info/RECORD,,