elastic-kernel 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. elastic_kernel/__init__.py +0 -0
  2. elastic_kernel/command.py +43 -0
  3. elastic_kernel/kernel.json +5 -0
  4. elastic_kernel/kernel.py +258 -0
  5. elastic_kernel-0.0.2.dist-info/METADATA +291 -0
  6. elastic_kernel-0.0.2.dist-info/RECORD +42 -0
  7. elastic_kernel-0.0.2.dist-info/WHEEL +5 -0
  8. elastic_kernel-0.0.2.dist-info/entry_points.txt +2 -0
  9. elastic_kernel-0.0.2.dist-info/licenses/LICENSE +201 -0
  10. elastic_kernel-0.0.2.dist-info/top_level.txt +2 -0
  11. elastic_notebook/__init__.py +0 -0
  12. elastic_notebook/algorithm/__init__.py +0 -0
  13. elastic_notebook/algorithm/baseline.py +31 -0
  14. elastic_notebook/algorithm/optimizer_exact.py +121 -0
  15. elastic_notebook/algorithm/selector.py +41 -0
  16. elastic_notebook/core/__init__.py +0 -0
  17. elastic_notebook/core/common/__init__.py +0 -0
  18. elastic_notebook/core/common/checkpoint_file.py +129 -0
  19. elastic_notebook/core/common/profile_graph_size.py +39 -0
  20. elastic_notebook/core/common/profile_migration_speed.py +69 -0
  21. elastic_notebook/core/common/profile_variable_size.py +66 -0
  22. elastic_notebook/core/graph/__init__.py +0 -0
  23. elastic_notebook/core/graph/cell_execution.py +39 -0
  24. elastic_notebook/core/graph/graph.py +75 -0
  25. elastic_notebook/core/graph/variable_snapshot.py +31 -0
  26. elastic_notebook/core/io/__init__.py +0 -0
  27. elastic_notebook/core/io/adapter.py +18 -0
  28. elastic_notebook/core/io/filesystem_adapter.py +30 -0
  29. elastic_notebook/core/io/migrate.py +98 -0
  30. elastic_notebook/core/io/pickle.py +71 -0
  31. elastic_notebook/core/io/recover.py +51 -0
  32. elastic_notebook/core/mutation/__init__.py +0 -0
  33. elastic_notebook/core/mutation/fingerprint.py +184 -0
  34. elastic_notebook/core/mutation/id_graph.py +147 -0
  35. elastic_notebook/core/mutation/object_hash.py +204 -0
  36. elastic_notebook/core/notebook/__init__.py +0 -0
  37. elastic_notebook/core/notebook/checkpoint.py +222 -0
  38. elastic_notebook/core/notebook/find_input_vars.py +117 -0
  39. elastic_notebook/core/notebook/find_output_vars.py +18 -0
  40. elastic_notebook/core/notebook/restore_notebook.py +91 -0
  41. elastic_notebook/core/notebook/update_graph.py +46 -0
  42. elastic_notebook/elastic_notebook.py +336 -0
@@ -0,0 +1,91 @@
1
+ import sys
2
+ import time
3
+ from io import StringIO
4
+
5
+ from ipykernel.zmqshell import ZMQInteractiveShell
6
+ from IPython import get_ipython
7
+
8
+ from elastic_notebook.core.graph.graph import DependencyGraph
9
+
10
+
11
+ def restore_notebook(
12
+ graph: DependencyGraph,
13
+ shell: ZMQInteractiveShell,
14
+ variables: dict,
15
+ ces_to_recompute: set,
16
+ write_log_location=None,
17
+ notebook_name=None,
18
+ optimizer_name=None,
19
+ ):
20
+ """
21
+ Restores the notebook. Declares variables back into the kernel and recomputes the CEs to restore non-migrated
22
+ variables.
23
+ Args:
24
+ graph (DependencyGraph): dependency graph representation of the notebook.
25
+ shell (ZMQInteractiveShell): interactive Jupyter shell storing the state of the current session.
26
+ variables (Dict): Mapping from OEs to lists of variables defined in those OEs.
27
+ oes_to_recompute (set): OEs to recompute to restore non-migrated variables.
28
+ write_log_location (str): location to write component runtimes to. For experimentation only.
29
+ notebook_name (str): notebook name. For experimentation only.
30
+ optimizer_name (str): optimizer name. For experimentation only.
31
+ """
32
+
33
+ # Recompute OEs following the order they were executed in.
34
+ recompute_start = time.time()
35
+ for ce in graph.cell_executions:
36
+ if ce in ces_to_recompute:
37
+ # Rerun cell code; suppress stdout when rerunning.
38
+ print("Rerunning cell", ce.cell_num + 1)
39
+
40
+ # 出力を完全に抑制するための設定
41
+ ipython = get_ipython()
42
+
43
+ # 元のdisplay_pubを保存
44
+ original_display_pub = ipython.display_pub
45
+
46
+ # display_pubを一時的に無効化
47
+ ipython.display_pub = None
48
+
49
+ # stdoutとstderrをリダイレクト
50
+ original_stdout = sys.stdout
51
+ original_stderr = sys.stderr
52
+ sys.stdout = StringIO()
53
+ sys.stderr = StringIO()
54
+
55
+ try:
56
+ # セルを実行(出力は抑制される)
57
+ cell_output = ipython.run_cell(ce.cell)
58
+ cell_output.raise_error()
59
+ except Exception as e:
60
+ # エラーが発生した場合は元のstderrに出力
61
+ sys.stderr = original_stderr
62
+ raise e
63
+ finally:
64
+ # 元の設定を復元
65
+ ipython.display_pub = original_display_pub
66
+ sys.stdout = original_stdout
67
+ sys.stderr = original_stderr
68
+
69
+ # Define output variables in the CE.
70
+ for pair in variables[ce.cell_num]:
71
+ print("Declaring variable", pair[0].name)
72
+ shell.user_ns[pair[0].name] = pair[1]
73
+
74
+ recompute_end = time.time()
75
+
76
+ if write_log_location:
77
+ with open(
78
+ write_log_location
79
+ + "/output_"
80
+ + notebook_name
81
+ + "_"
82
+ + optimizer_name
83
+ + ".txt",
84
+ "a",
85
+ ) as f:
86
+ f.write(
87
+ "Recompute stage took - "
88
+ + repr(recompute_end - recompute_start)
89
+ + " seconds"
90
+ + "\n"
91
+ )
@@ -0,0 +1,46 @@
1
+ from elastic_notebook.core.graph.graph import DependencyGraph
2
+
3
+
4
+ def update_graph(
5
+ cell: str,
6
+ cell_runtime: float,
7
+ start_time: float,
8
+ input_variables: set,
9
+ created_and_modified_variables: set,
10
+ deleted_variables: set,
11
+ graph: DependencyGraph,
12
+ ):
13
+ """
14
+ Updates the graph according to the newly executed cell and its input and output variables.
15
+ Args:
16
+ cell (str): Raw cell cell.
17
+ cell_runtime (float): Cell runtime.
18
+ start_time (time): Time of start of cell execution. Note that this is different from when the cell was
19
+ queued.
20
+ input_variables (set): Set of input variables of the cell.
21
+ created_and_modified_variables (set): set of created and modified variables.
22
+ deleted_variables (set): set of deleted variables
23
+ graph (DependencyGraph): Dependency graph representation of the notebook.
24
+ """
25
+
26
+ # Retrieve input variable snapshots
27
+ input_vss = set(
28
+ graph.variable_snapshots[variable][-1] for variable in input_variables
29
+ )
30
+
31
+ # Create output variable snapshots
32
+ output_vss_create = set(
33
+ graph.create_variable_snapshot(k, False) for k in created_and_modified_variables
34
+ )
35
+ output_vss_delete = set(
36
+ graph.create_variable_snapshot(k, True) for k in deleted_variables
37
+ )
38
+
39
+ # Add the newly created OE to the graph.
40
+ graph.add_cell_execution(
41
+ cell,
42
+ cell_runtime,
43
+ start_time,
44
+ input_vss,
45
+ output_vss_create.union(output_vss_delete),
46
+ )
@@ -0,0 +1,336 @@
1
+ from __future__ import print_function
2
+
3
+ import time
4
+ import types
5
+ from os.path import dirname
6
+
7
+ from IPython import get_ipython
8
+ from IPython.core.interactiveshell import InteractiveShell
9
+ from pympler import asizeof
10
+
11
+ from elastic_notebook.algorithm.baseline import MigrateAllBaseline, RecomputeAllBaseline
12
+ from elastic_notebook.algorithm.optimizer_exact import OptimizerExact
13
+ from elastic_notebook.algorithm.selector import OptimizerType
14
+ from elastic_notebook.core.common.profile_graph_size import profile_graph_size
15
+ from elastic_notebook.core.common.profile_migration_speed import profile_migration_speed
16
+ from elastic_notebook.core.graph.graph import DependencyGraph
17
+ from elastic_notebook.core.io.recover import resume
18
+ from elastic_notebook.core.mutation.fingerprint import (
19
+ compare_fingerprint,
20
+ construct_fingerprint,
21
+ )
22
+ from elastic_notebook.core.mutation.object_hash import UnserializableObj
23
+ from elastic_notebook.core.notebook.checkpoint import checkpoint
24
+ from elastic_notebook.core.notebook.find_input_vars import find_input_vars
25
+ from elastic_notebook.core.notebook.find_output_vars import find_created_deleted_vars
26
+ from elastic_notebook.core.notebook.restore_notebook import restore_notebook
27
+ from elastic_notebook.core.notebook.update_graph import update_graph
28
+
29
+
30
+ class ElasticNotebook:
31
+ """
32
+ Magics class for Elastic Notebook. Enable this in the notebook by running '%load_ext ElasticNotebook'.
33
+ Enables efficient checkpointing of intermediate notebook state via balancing migration and recomputation
34
+ costs.
35
+ """
36
+
37
+ def __init__(self, shell: InteractiveShell):
38
+ self.shell = shell
39
+
40
+ # Initialize the dependency graph for capturing notebook state.
41
+ self.dependency_graph = DependencyGraph()
42
+
43
+ # Migration properties.
44
+ self.migration_speed_bps = 100000
45
+ self.alpha = 1
46
+ self.selector = OptimizerExact(migration_speed_bps=self.migration_speed_bps)
47
+
48
+ # Dictionary of object fingerprints. For detecting modified references.
49
+ self.fingerprint_dict = {}
50
+
51
+ # Set of user-declared functions.
52
+ self.udfs = set()
53
+
54
+ # Flag if migration speed has been manually set. In this case, skip profiling of migration speed at checkpoint
55
+ # time.
56
+ self.manual_migration_speed = False
57
+
58
+ # Location to log runtimes to. For experiments only.
59
+ self.write_log_location = None
60
+
61
+ # Strings for determining log filename. For experiments only.
62
+ self.optimizer_name = ""
63
+ self.notebook_name = ""
64
+
65
+ # Total elapsed time spent inferring cell inputs and outputs.
66
+ # For measuring overhead.
67
+ self.total_recordevent_time = 0
68
+
69
+ # Dict for recording overhead of profiling operations.
70
+ self.profile_dict = {"idgraph": 0, "representation": 0}
71
+
72
+ # デバッグフラグ
73
+ self.debug = False
74
+
75
+ # マイグレーションと再計算の変数リスト
76
+ self._vss_to_migrate = []
77
+ self._vss_to_recompute = []
78
+
79
+ @property
80
+ def vss_to_migrate(self):
81
+ """マイグレーション対象の変数リストを取得"""
82
+ return self._vss_to_migrate
83
+
84
+ @property
85
+ def vss_to_recompute(self):
86
+ """再計算対象の変数リストを取得"""
87
+ return self._vss_to_recompute
88
+
89
+ def update_migration_lists(self, vss_to_migrate, vss_to_recompute):
90
+ """マイグレーションと再計算の変数リストを更新"""
91
+ self._vss_to_migrate = [vs.name for vs in vss_to_migrate]
92
+ self._vss_to_recompute = [vs.name for vs in vss_to_recompute]
93
+
94
+ def __str__(self):
95
+ """文字列表現を定義"""
96
+ return f"マイグレーション対象: {self.vss_to_migrate}\n再計算対象: {self.vss_to_recompute}"
97
+
98
+ def set_debug(self, debug):
99
+ self.debug = debug
100
+
101
+ def record_event(self, cell):
102
+ if self.debug:
103
+ print("Recording event...")
104
+
105
+ pre_execution = set(self.shell.user_ns.keys())
106
+
107
+ # Create id trees for output variables
108
+ for var in self.dependency_graph.variable_snapshots.keys():
109
+ if var not in self.fingerprint_dict and var in self.shell.user_ns:
110
+ self.fingerprint_dict[var] = construct_fingerprint(
111
+ self.shell.user_ns[var], self.profile_dict
112
+ )
113
+
114
+ # Find input variables (variables potentially accessed) of the cell.
115
+ input_variables, function_defs = find_input_vars(
116
+ cell,
117
+ set(self.dependency_graph.variable_snapshots.keys()),
118
+ self.shell,
119
+ self.udfs,
120
+ )
121
+ # Union of ID graphs of input variables. For detecting modifications to unserializable variables.
122
+ input_variables_id_graph_union = set()
123
+ for var in input_variables:
124
+ if var in self.fingerprint_dict:
125
+ input_variables_id_graph_union = input_variables_id_graph_union.union(
126
+ self.fingerprint_dict[var][1]
127
+ )
128
+
129
+ # Run the cell.
130
+ start_time = time.time()
131
+ try:
132
+ cell_output = get_ipython().run_cell(cell)
133
+ cell_output.raise_error()
134
+ # traceback_list = []
135
+ except Exception:
136
+ pass
137
+ # _, _, tb = sys.exc_info()
138
+ # traceback_list = traceback.extract_tb(tb).format()
139
+ cell_runtime = time.time() - start_time
140
+ post_execution = set(self.shell.user_ns.keys())
141
+ infer_start = time.time()
142
+
143
+ # Find created and deleted variables by computing difference between namespace pre and post execution.
144
+ created_variables, deleted_variables = find_created_deleted_vars(
145
+ pre_execution, post_execution
146
+ )
147
+
148
+ # Remove stored ID graphs for deleted variables.
149
+ for var in deleted_variables:
150
+ del self.fingerprint_dict[var]
151
+ if var in self.udfs:
152
+ self.udfs.remove(var)
153
+
154
+ # Find modified variables by comparing ID graphs and object hashes.
155
+ modified_variables = set()
156
+ for k, v in self.fingerprint_dict.items():
157
+ changed, overwritten = compare_fingerprint(
158
+ self.fingerprint_dict[k],
159
+ self.shell.user_ns[k],
160
+ self.profile_dict,
161
+ input_variables_id_graph_union,
162
+ )
163
+ if changed:
164
+ modified_variables.add(k)
165
+
166
+ # In the case of non-overwrite modification, the variable is additionally considered as accessed.
167
+ if changed and not overwritten:
168
+ input_variables.add(k)
169
+
170
+ # A user defined function has been overwritten.
171
+ elif overwritten and k in self.udfs:
172
+ self.udfs.remove(k)
173
+
174
+ # Select unserializable variables are assumed to be modified if accessed.
175
+ if (
176
+ not changed
177
+ and not overwritten
178
+ and isinstance(self.fingerprint_dict[k][2], UnserializableObj)
179
+ ):
180
+ if self.fingerprint_dict[k][1].intersection(
181
+ input_variables_id_graph_union
182
+ ):
183
+ modified_variables.add(k)
184
+
185
+ # Create ID graphs for output variables
186
+ for var in created_variables:
187
+ self.fingerprint_dict[var] = construct_fingerprint(
188
+ self.shell.user_ns[var], self.profile_dict
189
+ )
190
+
191
+ # Record newly defined UDFs
192
+ for udf in function_defs:
193
+ if udf in self.shell.user_ns and isinstance(
194
+ self.shell.user_ns[udf], types.FunctionType
195
+ ):
196
+ self.udfs.add(udf)
197
+
198
+ # Update the dependency graph.
199
+ update_graph(
200
+ cell,
201
+ cell_runtime,
202
+ start_time,
203
+ input_variables,
204
+ created_variables.union(modified_variables),
205
+ deleted_variables,
206
+ self.dependency_graph,
207
+ )
208
+
209
+ # Update total recordevent time tally.
210
+ infer_end = time.time()
211
+ self.total_recordevent_time += infer_end - infer_start
212
+
213
+ def set_migration_speed(self, migration_speed):
214
+ try:
215
+ if float(migration_speed) > 0:
216
+ self.migration_speed_bps = float(migration_speed)
217
+ self.manual_migration_speed = True
218
+ else:
219
+ pass
220
+ # print("Migration speed is not positive.")
221
+ except ValueError:
222
+ print("Migration speed is not a number.")
223
+ self.selector.migration_speed_bps = self.migration_speed_bps
224
+
225
+ def set_optimizer(self, optimizer):
226
+ self.optimizer_name = optimizer
227
+
228
+ if optimizer == OptimizerType.EXACT.value:
229
+ self.selector = OptimizerExact(self.migration_speed_bps)
230
+ self.alpha = 1
231
+ elif optimizer == OptimizerType.EXACT_C.value:
232
+ self.selector = OptimizerExact(self.migration_speed_bps)
233
+ self.alpha = 20
234
+ elif optimizer == OptimizerType.EXACT_R.value:
235
+ self.selector = OptimizerExact(self.migration_speed_bps)
236
+ self.alpha = 0.05
237
+ elif optimizer == OptimizerType.MIGRATE_ALL.value:
238
+ self.selector = MigrateAllBaseline(self.migration_speed_bps)
239
+ elif optimizer == OptimizerType.RECOMPUTE_ALL.value:
240
+ self.selector = RecomputeAllBaseline(self.migration_speed_bps)
241
+
242
+ def set_write_log_location(self, filename):
243
+ self.write_log_location = filename
244
+
245
+ def set_notebook_name(self, name):
246
+ self.notebook_name = name
247
+
248
+ def checkpoint(self, filename):
249
+ """チェックポイントを作成"""
250
+ if self.debug:
251
+ print("Checkpointing...")
252
+
253
+ # Write overhead metrics to file (for experiments).
254
+ if self.write_log_location:
255
+ with open(
256
+ self.write_log_location
257
+ + "/output_"
258
+ + self.notebook_name
259
+ + "_"
260
+ + self.optimizer_name
261
+ + ".txt",
262
+ "a",
263
+ ) as f:
264
+ f.write(
265
+ "comparison overhead - "
266
+ + repr(
267
+ asizeof.asizeof(self.dependency_graph)
268
+ + asizeof.asizeof(self.fingerprint_dict)
269
+ )
270
+ + " bytes"
271
+ + "\n"
272
+ )
273
+ f.write(
274
+ "notebook overhead - "
275
+ + repr(asizeof.asizeof(self.shell.user_ns))
276
+ + " bytes"
277
+ + "\n"
278
+ )
279
+ f.write(
280
+ "Dependency graph storage overhead - "
281
+ + repr(profile_graph_size(self.dependency_graph))
282
+ + " bytes"
283
+ + "\n"
284
+ )
285
+ f.write(
286
+ "Cell monitoring overhead - "
287
+ + repr(self.total_recordevent_time)
288
+ + " seconds"
289
+ + "\n"
290
+ )
291
+
292
+ # Profile the migration speed to filename.
293
+ if not self.manual_migration_speed:
294
+ self.migration_speed_bps = profile_migration_speed(
295
+ dirname(filename), alpha=self.alpha
296
+ )
297
+ self.selector.migration_speed_bps = self.migration_speed_bps
298
+
299
+ # Checkpoint the notebook.
300
+ return checkpoint(
301
+ self.dependency_graph,
302
+ self.shell,
303
+ self.fingerprint_dict,
304
+ self.selector,
305
+ self.udfs,
306
+ filename,
307
+ self.profile_dict,
308
+ self.write_log_location,
309
+ self.notebook_name,
310
+ self.optimizer_name,
311
+ self, # 自身のインスタンスを渡す
312
+ )
313
+
314
+ def load_checkpoint(self, filename):
315
+ if self.debug:
316
+ print("Loading checkpoint...")
317
+
318
+ # start_time = time.time()
319
+ (
320
+ self.dependency_graph,
321
+ variables,
322
+ vss_to_migrate,
323
+ vss_to_recompute,
324
+ self.udfs,
325
+ ) = resume(filename)
326
+
327
+ # Recompute missing VSs and redeclare variables into the kernel.
328
+ restore_notebook(
329
+ self.dependency_graph,
330
+ self.shell,
331
+ variables,
332
+ self.write_log_location,
333
+ self.notebook_name,
334
+ self.optimizer_name,
335
+ )
336
+ # print("Checkpoint load time:", time.time() - start_time)