machine-dialect 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. machine_dialect/__main__.py +667 -0
  2. machine_dialect/agent/__init__.py +5 -0
  3. machine_dialect/agent/agent.py +360 -0
  4. machine_dialect/ast/__init__.py +95 -0
  5. machine_dialect/ast/ast_node.py +35 -0
  6. machine_dialect/ast/call_expression.py +82 -0
  7. machine_dialect/ast/dict_extraction.py +60 -0
  8. machine_dialect/ast/expressions.py +439 -0
  9. machine_dialect/ast/literals.py +309 -0
  10. machine_dialect/ast/program.py +35 -0
  11. machine_dialect/ast/statements.py +1433 -0
  12. machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
  13. machine_dialect/ast/tests/test_boolean_literal.py +29 -0
  14. machine_dialect/ast/tests/test_collection_hir.py +138 -0
  15. machine_dialect/ast/tests/test_define_statement.py +142 -0
  16. machine_dialect/ast/tests/test_desugar.py +541 -0
  17. machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
  18. machine_dialect/cfg/__init__.py +6 -0
  19. machine_dialect/cfg/config.py +156 -0
  20. machine_dialect/cfg/examples.py +221 -0
  21. machine_dialect/cfg/generate_with_ai.py +187 -0
  22. machine_dialect/cfg/openai_generation.py +200 -0
  23. machine_dialect/cfg/parser.py +94 -0
  24. machine_dialect/cfg/tests/__init__.py +1 -0
  25. machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
  26. machine_dialect/cfg/tests/test_config.py +188 -0
  27. machine_dialect/cfg/tests/test_examples.py +391 -0
  28. machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
  29. machine_dialect/cfg/tests/test_openai_generation.py +256 -0
  30. machine_dialect/codegen/__init__.py +5 -0
  31. machine_dialect/codegen/bytecode_module.py +89 -0
  32. machine_dialect/codegen/bytecode_serializer.py +300 -0
  33. machine_dialect/codegen/opcodes.py +101 -0
  34. machine_dialect/codegen/register_codegen.py +1996 -0
  35. machine_dialect/codegen/symtab.py +208 -0
  36. machine_dialect/codegen/tests/__init__.py +1 -0
  37. machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
  38. machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
  39. machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
  40. machine_dialect/codegen/tests/test_symtab.py +418 -0
  41. machine_dialect/codegen/vm_serializer.py +621 -0
  42. machine_dialect/compiler/__init__.py +18 -0
  43. machine_dialect/compiler/compiler.py +197 -0
  44. machine_dialect/compiler/config.py +149 -0
  45. machine_dialect/compiler/context.py +149 -0
  46. machine_dialect/compiler/phases/__init__.py +19 -0
  47. machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
  48. machine_dialect/compiler/phases/codegen.py +40 -0
  49. machine_dialect/compiler/phases/hir_generation.py +39 -0
  50. machine_dialect/compiler/phases/mir_generation.py +86 -0
  51. machine_dialect/compiler/phases/optimization.py +110 -0
  52. machine_dialect/compiler/phases/parsing.py +39 -0
  53. machine_dialect/compiler/pipeline.py +143 -0
  54. machine_dialect/compiler/tests/__init__.py +1 -0
  55. machine_dialect/compiler/tests/test_compiler.py +568 -0
  56. machine_dialect/compiler/vm_runner.py +173 -0
  57. machine_dialect/errors/__init__.py +32 -0
  58. machine_dialect/errors/exceptions.py +369 -0
  59. machine_dialect/errors/messages.py +82 -0
  60. machine_dialect/errors/tests/__init__.py +0 -0
  61. machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
  62. machine_dialect/errors/tests/test_name_errors.py +118 -0
  63. machine_dialect/helpers/__init__.py +0 -0
  64. machine_dialect/helpers/stopwords.py +225 -0
  65. machine_dialect/helpers/validators.py +30 -0
  66. machine_dialect/lexer/__init__.py +9 -0
  67. machine_dialect/lexer/constants.py +23 -0
  68. machine_dialect/lexer/lexer.py +907 -0
  69. machine_dialect/lexer/tests/__init__.py +0 -0
  70. machine_dialect/lexer/tests/helpers.py +86 -0
  71. machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
  72. machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
  73. machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
  74. machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
  75. machine_dialect/lexer/tests/test_comments.py +200 -0
  76. machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
  77. machine_dialect/lexer/tests/test_lexer_position.py +113 -0
  78. machine_dialect/lexer/tests/test_list_tokens.py +282 -0
  79. machine_dialect/lexer/tests/test_stopwords.py +80 -0
  80. machine_dialect/lexer/tests/test_strict_equality.py +129 -0
  81. machine_dialect/lexer/tests/test_token.py +41 -0
  82. machine_dialect/lexer/tests/test_tokenization.py +294 -0
  83. machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
  84. machine_dialect/lexer/tests/test_url_literals.py +169 -0
  85. machine_dialect/lexer/tokens.py +487 -0
  86. machine_dialect/linter/__init__.py +10 -0
  87. machine_dialect/linter/__main__.py +144 -0
  88. machine_dialect/linter/linter.py +154 -0
  89. machine_dialect/linter/rules/__init__.py +8 -0
  90. machine_dialect/linter/rules/base.py +112 -0
  91. machine_dialect/linter/rules/statement_termination.py +99 -0
  92. machine_dialect/linter/tests/__init__.py +1 -0
  93. machine_dialect/linter/tests/mdrules/__init__.py +0 -0
  94. machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
  95. machine_dialect/linter/tests/test_linter.py +81 -0
  96. machine_dialect/linter/tests/test_rules.py +110 -0
  97. machine_dialect/linter/tests/test_violations.py +71 -0
  98. machine_dialect/linter/violations.py +51 -0
  99. machine_dialect/mir/__init__.py +69 -0
  100. machine_dialect/mir/analyses/__init__.py +20 -0
  101. machine_dialect/mir/analyses/alias_analysis.py +315 -0
  102. machine_dialect/mir/analyses/dominance_analysis.py +49 -0
  103. machine_dialect/mir/analyses/escape_analysis.py +286 -0
  104. machine_dialect/mir/analyses/loop_analysis.py +272 -0
  105. machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
  106. machine_dialect/mir/analyses/type_analysis.py +448 -0
  107. machine_dialect/mir/analyses/use_def_chains.py +232 -0
  108. machine_dialect/mir/basic_block.py +385 -0
  109. machine_dialect/mir/dataflow.py +445 -0
  110. machine_dialect/mir/debug_info.py +208 -0
  111. machine_dialect/mir/hir_to_mir.py +1738 -0
  112. machine_dialect/mir/mir_dumper.py +366 -0
  113. machine_dialect/mir/mir_function.py +167 -0
  114. machine_dialect/mir/mir_instructions.py +1877 -0
  115. machine_dialect/mir/mir_interpreter.py +556 -0
  116. machine_dialect/mir/mir_module.py +225 -0
  117. machine_dialect/mir/mir_printer.py +480 -0
  118. machine_dialect/mir/mir_transformer.py +410 -0
  119. machine_dialect/mir/mir_types.py +367 -0
  120. machine_dialect/mir/mir_validation.py +455 -0
  121. machine_dialect/mir/mir_values.py +268 -0
  122. machine_dialect/mir/optimization_config.py +233 -0
  123. machine_dialect/mir/optimization_pass.py +251 -0
  124. machine_dialect/mir/optimization_pipeline.py +355 -0
  125. machine_dialect/mir/optimizations/__init__.py +84 -0
  126. machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
  127. machine_dialect/mir/optimizations/branch_prediction.py +372 -0
  128. machine_dialect/mir/optimizations/constant_propagation.py +634 -0
  129. machine_dialect/mir/optimizations/cse.py +398 -0
  130. machine_dialect/mir/optimizations/dce.py +288 -0
  131. machine_dialect/mir/optimizations/inlining.py +551 -0
  132. machine_dialect/mir/optimizations/jump_threading.py +487 -0
  133. machine_dialect/mir/optimizations/licm.py +405 -0
  134. machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
  135. machine_dialect/mir/optimizations/strength_reduction.py +422 -0
  136. machine_dialect/mir/optimizations/tail_call.py +207 -0
  137. machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
  138. machine_dialect/mir/optimizations/type_narrowing.py +397 -0
  139. machine_dialect/mir/optimizations/type_specialization.py +447 -0
  140. machine_dialect/mir/optimizations/type_specific.py +906 -0
  141. machine_dialect/mir/optimize_mir.py +89 -0
  142. machine_dialect/mir/pass_manager.py +391 -0
  143. machine_dialect/mir/profiling/__init__.py +26 -0
  144. machine_dialect/mir/profiling/profile_collector.py +318 -0
  145. machine_dialect/mir/profiling/profile_data.py +372 -0
  146. machine_dialect/mir/profiling/profile_reader.py +272 -0
  147. machine_dialect/mir/profiling/profile_writer.py +226 -0
  148. machine_dialect/mir/register_allocation.py +302 -0
  149. machine_dialect/mir/reporting/__init__.py +17 -0
  150. machine_dialect/mir/reporting/optimization_reporter.py +314 -0
  151. machine_dialect/mir/reporting/report_formatter.py +289 -0
  152. machine_dialect/mir/ssa_construction.py +342 -0
  153. machine_dialect/mir/tests/__init__.py +1 -0
  154. machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
  155. machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
  156. machine_dialect/mir/tests/test_algebraic_division.py +126 -0
  157. machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
  158. machine_dialect/mir/tests/test_basic_block.py +425 -0
  159. machine_dialect/mir/tests/test_branch_prediction.py +459 -0
  160. machine_dialect/mir/tests/test_call_lowering.py +168 -0
  161. machine_dialect/mir/tests/test_collection_lowering.py +604 -0
  162. machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
  163. machine_dialect/mir/tests/test_custom_passes.py +166 -0
  164. machine_dialect/mir/tests/test_debug_info.py +285 -0
  165. machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
  166. machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
  167. machine_dialect/mir/tests/test_double_negation.py +231 -0
  168. machine_dialect/mir/tests/test_escape_analysis.py +233 -0
  169. machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
  170. machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
  171. machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
  172. machine_dialect/mir/tests/test_inlining.py +435 -0
  173. machine_dialect/mir/tests/test_licm.py +472 -0
  174. machine_dialect/mir/tests/test_mir_dumper.py +313 -0
  175. machine_dialect/mir/tests/test_mir_instructions.py +445 -0
  176. machine_dialect/mir/tests/test_mir_module.py +860 -0
  177. machine_dialect/mir/tests/test_mir_printer.py +387 -0
  178. machine_dialect/mir/tests/test_mir_types.py +123 -0
  179. machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
  180. machine_dialect/mir/tests/test_mir_validation.py +378 -0
  181. machine_dialect/mir/tests/test_mir_values.py +168 -0
  182. machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
  183. machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
  184. machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
  185. machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
  186. machine_dialect/mir/tests/test_pass_manager.py +294 -0
  187. machine_dialect/mir/tests/test_pass_registration.py +64 -0
  188. machine_dialect/mir/tests/test_profiling.py +356 -0
  189. machine_dialect/mir/tests/test_register_allocation.py +307 -0
  190. machine_dialect/mir/tests/test_report_formatters.py +372 -0
  191. machine_dialect/mir/tests/test_ssa_construction.py +433 -0
  192. machine_dialect/mir/tests/test_tail_call.py +236 -0
  193. machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
  194. machine_dialect/mir/tests/test_type_narrowing.py +277 -0
  195. machine_dialect/mir/tests/test_type_specialization.py +421 -0
  196. machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
  197. machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
  198. machine_dialect/mir/type_inference.py +368 -0
  199. machine_dialect/parser/__init__.py +12 -0
  200. machine_dialect/parser/enums.py +45 -0
  201. machine_dialect/parser/parser.py +3655 -0
  202. machine_dialect/parser/protocols.py +11 -0
  203. machine_dialect/parser/symbol_table.py +169 -0
  204. machine_dialect/parser/tests/__init__.py +0 -0
  205. machine_dialect/parser/tests/helper_functions.py +193 -0
  206. machine_dialect/parser/tests/test_action_statements.py +334 -0
  207. machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
  208. machine_dialect/parser/tests/test_call_statements.py +154 -0
  209. machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
  210. machine_dialect/parser/tests/test_collection_mutations.py +264 -0
  211. machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
  212. machine_dialect/parser/tests/test_define_integration.py +468 -0
  213. machine_dialect/parser/tests/test_define_statements.py +311 -0
  214. machine_dialect/parser/tests/test_dict_extraction.py +115 -0
  215. machine_dialect/parser/tests/test_empty_literal.py +155 -0
  216. machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
  217. machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
  218. machine_dialect/parser/tests/test_if_empty_block.py +61 -0
  219. machine_dialect/parser/tests/test_if_statements.py +299 -0
  220. machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
  221. machine_dialect/parser/tests/test_infix_expressions.py +680 -0
  222. machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
  223. machine_dialect/parser/tests/test_interaction_statements.py +269 -0
  224. machine_dialect/parser/tests/test_list_literals.py +277 -0
  225. machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
  226. machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
  227. machine_dialect/parser/tests/test_parse_errors.py +114 -0
  228. machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
  229. machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
  230. machine_dialect/parser/tests/test_program.py +13 -0
  231. machine_dialect/parser/tests/test_return_statements.py +89 -0
  232. machine_dialect/parser/tests/test_set_statements.py +152 -0
  233. machine_dialect/parser/tests/test_strict_equality.py +258 -0
  234. machine_dialect/parser/tests/test_symbol_table.py +217 -0
  235. machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
  236. machine_dialect/parser/tests/test_utility_statements.py +423 -0
  237. machine_dialect/parser/token_buffer.py +159 -0
  238. machine_dialect/repl/__init__.py +3 -0
  239. machine_dialect/repl/repl.py +426 -0
  240. machine_dialect/repl/tests/__init__.py +0 -0
  241. machine_dialect/repl/tests/test_repl.py +606 -0
  242. machine_dialect/semantic/__init__.py +12 -0
  243. machine_dialect/semantic/analyzer.py +906 -0
  244. machine_dialect/semantic/error_messages.py +189 -0
  245. machine_dialect/semantic/tests/__init__.py +1 -0
  246. machine_dialect/semantic/tests/test_analyzer.py +364 -0
  247. machine_dialect/semantic/tests/test_error_messages.py +104 -0
  248. machine_dialect/tests/edge_cases/__init__.py +10 -0
  249. machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
  250. machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
  251. machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
  252. machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
  253. machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
  254. machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
  255. machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
  256. machine_dialect/tests/integration/test_list_compilation.py +395 -0
  257. machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
  258. machine_dialect/type_checking/__init__.py +21 -0
  259. machine_dialect/type_checking/tests/__init__.py +1 -0
  260. machine_dialect/type_checking/tests/test_type_system.py +230 -0
  261. machine_dialect/type_checking/type_system.py +270 -0
  262. machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
  263. machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
  264. machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
  265. machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
  266. machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
  267. machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
  268. machine_dialect_vm/__init__.pyi +15 -0
@@ -0,0 +1,318 @@
1
+ """Profile collector for runtime profiling.
2
+
3
+ This module implements the profile collection mechanism that integrates
4
+ with the VM to gather runtime statistics for PGO.
5
+ """
6
+
7
+ import time
8
+ from typing import Any
9
+
10
+ from machine_dialect.mir.profiling.profile_data import (
11
+ BasicBlockProfile,
12
+ BranchProfile,
13
+ FunctionProfile,
14
+ IndirectCallProfile,
15
+ LoopProfile,
16
+ ProfileData,
17
+ )
18
+
19
+
20
+ class ProfileCollector:
21
+ """Collects runtime profile data during execution.
22
+
23
+ This collector integrates with the VM to gather statistics about
24
+ function calls, branches, loops, and basic blocks during program
25
+ execution.
26
+ """
27
+
28
+ def __init__(self, module_name: str = "default") -> None:
29
+ """Initialize the profile collector.
30
+
31
+ Args:
32
+ module_name: Name of the module being profiled.
33
+ """
34
+ self.profile_data = ProfileData(module_name=module_name)
35
+ self.enabled = False
36
+ self.sampling_rate = 1 # Sample every N events (1 = all events)
37
+ self.sample_counter = 0
38
+
39
+ # Stack for tracking function calls
40
+ self.call_stack: list[tuple[str, float]] = []
41
+
42
+ # Loop iteration tracking
43
+ self.loop_stack: list[tuple[str, int]] = []
44
+
45
+ # Current function context
46
+ self.current_function: str | None = None
47
+ self.current_block: str | None = None
48
+
49
+ def enable(self, sampling_rate: int = 1) -> None:
50
+ """Enable profile collection.
51
+
52
+ Args:
53
+ sampling_rate: Sample every N events (1 = all events).
54
+ """
55
+ self.enabled = True
56
+ self.sampling_rate = max(1, sampling_rate)
57
+
58
+ def disable(self) -> None:
59
+ """Disable profile collection."""
60
+ self.enabled = False
61
+
62
+ def should_sample(self) -> bool:
63
+ """Check if current event should be sampled.
64
+
65
+ Returns:
66
+ True if event should be sampled.
67
+ """
68
+ if not self.enabled:
69
+ return False
70
+
71
+ self.sample_counter += 1
72
+ if self.sample_counter >= self.sampling_rate:
73
+ self.sample_counter = 0
74
+ return True
75
+ return False
76
+
77
+ def enter_function(self, function_name: str, call_site: str | None = None) -> None:
78
+ """Record function entry.
79
+
80
+ Args:
81
+ function_name: Name of the function being entered.
82
+ call_site: Location of the call site.
83
+ """
84
+ if not self.should_sample():
85
+ return
86
+
87
+ # Record entry time
88
+ entry_time = time.perf_counter()
89
+ self.call_stack.append((function_name, entry_time))
90
+
91
+ # Update function profile
92
+ if function_name not in self.profile_data.functions:
93
+ self.profile_data.functions[function_name] = FunctionProfile(name=function_name)
94
+
95
+ profile = self.profile_data.functions[function_name]
96
+ profile.call_count += 1
97
+
98
+ # Record call site if provided
99
+ if call_site:
100
+ profile.call_sites[call_site] = profile.call_sites.get(call_site, 0) + 1
101
+
102
+ # Update context
103
+ self.current_function = function_name
104
+ self.profile_data.total_samples += 1
105
+
106
+ def exit_function(self, function_name: str) -> None:
107
+ """Record function exit.
108
+
109
+ Args:
110
+ function_name: Name of the function being exited.
111
+ """
112
+ if not self.enabled or not self.call_stack:
113
+ return
114
+
115
+ # Pop from call stack and calculate duration
116
+ if self.call_stack and self.call_stack[-1][0] == function_name:
117
+ _, entry_time = self.call_stack.pop()
118
+ duration = time.perf_counter() - entry_time
119
+
120
+ # Update function profile
121
+ if function_name in self.profile_data.functions:
122
+ profile = self.profile_data.functions[function_name]
123
+ # Convert to cycles (approximate)
124
+ cycles = int(duration * 1_000_000) # Microseconds as proxy for cycles
125
+ profile.total_cycles += cycles
126
+ profile.update_stats()
127
+
128
+ # Update context
129
+ if self.call_stack:
130
+ self.current_function = self.call_stack[-1][0]
131
+ else:
132
+ self.current_function = None
133
+
134
+ def record_branch(self, location: str, taken: bool) -> None:
135
+ """Record branch execution.
136
+
137
+ Args:
138
+ location: Branch location identifier.
139
+ taken: Whether the branch was taken.
140
+ """
141
+ if not self.should_sample():
142
+ return
143
+
144
+ # Create or update branch profile
145
+ if location not in self.profile_data.branches:
146
+ self.profile_data.branches[location] = BranchProfile(location=location)
147
+
148
+ profile = self.profile_data.branches[location]
149
+ if taken:
150
+ profile.taken_count += 1
151
+ else:
152
+ profile.not_taken_count += 1
153
+ profile.update_stats()
154
+
155
+ self.profile_data.total_samples += 1
156
+
157
+ def enter_loop(self, loop_id: str) -> None:
158
+ """Record loop entry.
159
+
160
+ Args:
161
+ loop_id: Loop identifier.
162
+ """
163
+ if not self.enabled:
164
+ return
165
+
166
+ # Push loop onto stack with iteration counter
167
+ self.loop_stack.append((loop_id, 0))
168
+
169
+ def record_loop_iteration(self) -> None:
170
+ """Record a loop iteration."""
171
+ if not self.enabled or not self.loop_stack:
172
+ return
173
+
174
+ # Increment iteration count for current loop
175
+ loop_id, iterations = self.loop_stack[-1]
176
+ self.loop_stack[-1] = (loop_id, iterations + 1)
177
+
178
+ def exit_loop(self, loop_id: str) -> None:
179
+ """Record loop exit.
180
+
181
+ Args:
182
+ loop_id: Loop identifier.
183
+ """
184
+ if not self.enabled or not self.loop_stack:
185
+ return
186
+
187
+ # Pop loop from stack and record iterations
188
+ if self.loop_stack and self.loop_stack[-1][0] == loop_id:
189
+ _, iterations = self.loop_stack.pop()
190
+
191
+ # Only record if we sampled this loop
192
+ if self.should_sample():
193
+ if loop_id not in self.profile_data.loops:
194
+ self.profile_data.loops[loop_id] = LoopProfile(location=loop_id)
195
+
196
+ profile = self.profile_data.loops[loop_id]
197
+ profile.record_iteration(iterations)
198
+ self.profile_data.total_samples += 1
199
+
200
+ def enter_block(self, block_id: str) -> None:
201
+ """Record basic block entry.
202
+
203
+ Args:
204
+ block_id: Block identifier.
205
+ """
206
+ if not self.should_sample():
207
+ return
208
+
209
+ # Create full block location
210
+ if self.current_function:
211
+ location = f"{self.current_function}:{block_id}"
212
+ else:
213
+ location = block_id
214
+
215
+ # Create or update block profile
216
+ if location not in self.profile_data.blocks:
217
+ self.profile_data.blocks[location] = BasicBlockProfile(location=location)
218
+
219
+ profile = self.profile_data.blocks[location]
220
+ profile.execution_count += 1
221
+ profile.update_stats()
222
+
223
+ self.current_block = block_id
224
+ self.profile_data.total_samples += 1
225
+
226
+ def record_indirect_call(self, call_site: str, target: str) -> None:
227
+ """Record an indirect call.
228
+
229
+ Args:
230
+ call_site: Location of the indirect call.
231
+ target: Actual target function called.
232
+ """
233
+ if not self.should_sample():
234
+ return
235
+
236
+ # Create or update indirect call profile
237
+ if call_site not in self.profile_data.indirect_calls:
238
+ self.profile_data.indirect_calls[call_site] = IndirectCallProfile(location=call_site)
239
+
240
+ profile = self.profile_data.indirect_calls[call_site]
241
+ profile.record_call(target)
242
+ self.profile_data.total_samples += 1
243
+
244
+ def get_profile_data(self) -> ProfileData:
245
+ """Get collected profile data.
246
+
247
+ Returns:
248
+ The collected profile data.
249
+ """
250
+ return self.profile_data
251
+
252
+ def reset(self) -> None:
253
+ """Reset all collected profile data."""
254
+ module_name = self.profile_data.module_name
255
+ self.profile_data = ProfileData(module_name=module_name)
256
+ self.call_stack.clear()
257
+ self.loop_stack.clear()
258
+ self.current_function = None
259
+ self.current_block = None
260
+ self.sample_counter = 0
261
+
262
+ def merge_profile(self, other_profile: ProfileData) -> None:
263
+ """Merge another profile into this collector's data.
264
+
265
+ Args:
266
+ other_profile: Profile data to merge.
267
+ """
268
+ self.profile_data.merge(other_profile)
269
+
270
+ def get_hot_path_hints(self) -> dict[str, Any]:
271
+ """Get optimization hints based on hot paths.
272
+
273
+ Returns:
274
+ Dictionary of optimization hints.
275
+ """
276
+ hints: dict[str, Any] = {
277
+ "hot_functions": self.profile_data.get_hot_functions(),
278
+ "hot_loops": self.profile_data.get_hot_loops(),
279
+ "predictable_branches": self.profile_data.get_predictable_branches(),
280
+ "inline_candidates": [],
281
+ "unroll_candidates": [],
282
+ "devirtualize_candidates": [],
283
+ }
284
+
285
+ # Find inline candidates
286
+ for name, func_profile in self.profile_data.functions.items():
287
+ if func_profile.inline_benefit > 50:
288
+ hints["inline_candidates"].append(
289
+ {
290
+ "function": name,
291
+ "benefit": func_profile.inline_benefit,
292
+ "call_count": func_profile.call_count,
293
+ }
294
+ )
295
+
296
+ # Find unroll candidates
297
+ for loc, loop_profile in self.profile_data.loops.items():
298
+ if loop_profile.unroll_benefit > 50:
299
+ hints["unroll_candidates"].append(
300
+ {
301
+ "loop": loc,
302
+ "benefit": loop_profile.unroll_benefit,
303
+ "avg_iterations": loop_profile.avg_iterations,
304
+ }
305
+ )
306
+
307
+ # Find devirtualization candidates
308
+ for loc, call_profile in self.profile_data.indirect_calls.items():
309
+ if call_profile.devirtualization_benefit > 50:
310
+ hints["devirtualize_candidates"].append(
311
+ {
312
+ "call_site": loc,
313
+ "target": call_profile.most_common_target,
314
+ "benefit": call_profile.devirtualization_benefit,
315
+ }
316
+ )
317
+
318
+ return hints
@@ -0,0 +1,372 @@
1
+ """Profile data structures for PGO.
2
+
3
+ This module defines the data structures used to store and manipulate
4
+ runtime profile information for profile-guided optimization.
5
+ """
6
+
7
+ from dataclasses import dataclass, field
8
+ from enum import Enum
9
+ from typing import Any
10
+
11
+
12
+ class ProfileType(Enum):
13
+ """Types of profile data collected."""
14
+
15
+ FUNCTION_CALL = "function_call"
16
+ BRANCH = "branch"
17
+ LOOP = "loop"
18
+ BASIC_BLOCK = "basic_block"
19
+ INDIRECT_CALL = "indirect_call"
20
+
21
+
22
+ @dataclass
23
+ class FunctionProfile:
24
+ """Profile data for a function.
25
+
26
+ Attributes:
27
+ name: Function name.
28
+ call_count: Number of times the function was called.
29
+ total_cycles: Total execution cycles (or time).
30
+ avg_cycles: Average execution cycles per call.
31
+ call_sites: Map of call site locations to call counts.
32
+ hot: Whether this is a hot function.
33
+ inline_benefit: Estimated benefit of inlining.
34
+ """
35
+
36
+ name: str
37
+ call_count: int = 0
38
+ total_cycles: int = 0
39
+ avg_cycles: float = 0.0
40
+ call_sites: dict[str, int] = field(default_factory=dict)
41
+ hot: bool = False
42
+ inline_benefit: float = 0.0
43
+
44
+ def update_stats(self) -> None:
45
+ """Update derived statistics."""
46
+ if self.call_count > 0:
47
+ self.avg_cycles = self.total_cycles / self.call_count
48
+ # Mark as hot if called frequently or takes significant time
49
+ self.hot = self.call_count > 100 or self.total_cycles > 10000
50
+ # Calculate inline benefit based on call frequency and size
51
+ self.inline_benefit = min(self.call_count * 0.1, 100.0)
52
+
53
+ def merge(self, other: "FunctionProfile") -> None:
54
+ """Merge another function profile into this one.
55
+
56
+ Args:
57
+ other: Profile to merge.
58
+ """
59
+ self.call_count += other.call_count
60
+ self.total_cycles += other.total_cycles
61
+ for site, count in other.call_sites.items():
62
+ self.call_sites[site] = self.call_sites.get(site, 0) + count
63
+ self.update_stats()
64
+
65
+
66
+ @dataclass
67
+ class BranchProfile:
68
+ """Profile data for a branch instruction.
69
+
70
+ Attributes:
71
+ location: Branch location (function:block:instruction).
72
+ taken_count: Number of times branch was taken.
73
+ not_taken_count: Number of times branch was not taken.
74
+ taken_probability: Probability of branch being taken.
75
+ predictable: Whether branch is predictable.
76
+ """
77
+
78
+ location: str
79
+ taken_count: int = 0
80
+ not_taken_count: int = 0
81
+ taken_probability: float = 0.5
82
+ predictable: bool = False
83
+
84
+ def update_stats(self) -> None:
85
+ """Update derived statistics."""
86
+ total = self.taken_count + self.not_taken_count
87
+ if total > 0:
88
+ self.taken_probability = self.taken_count / total
89
+ # Branch is predictable if heavily biased
90
+ self.predictable = self.taken_probability > 0.9 or self.taken_probability < 0.1
91
+
92
+ def merge(self, other: "BranchProfile") -> None:
93
+ """Merge another branch profile into this one.
94
+
95
+ Args:
96
+ other: Profile to merge.
97
+ """
98
+ self.taken_count += other.taken_count
99
+ self.not_taken_count += other.not_taken_count
100
+ self.update_stats()
101
+
102
+
103
+ @dataclass
104
+ class LoopProfile:
105
+ """Profile data for a loop.
106
+
107
+ Attributes:
108
+ location: Loop location (function:loop_id).
109
+ entry_count: Number of times loop was entered.
110
+ total_iterations: Total iterations across all entries.
111
+ avg_iterations: Average iterations per entry.
112
+ max_iterations: Maximum iterations observed.
113
+ min_iterations: Minimum iterations observed.
114
+ hot: Whether this is a hot loop.
115
+ unroll_benefit: Estimated benefit of unrolling.
116
+ """
117
+
118
+ location: str
119
+ entry_count: int = 0
120
+ total_iterations: int = 0
121
+ avg_iterations: float = 0.0
122
+ max_iterations: int = 0
123
+ min_iterations: int = 2**31 - 1 # Use max int instead of infinity
124
+ hot: bool = False
125
+ unroll_benefit: float = 0.0
126
+
127
+ def update_stats(self) -> None:
128
+ """Update derived statistics."""
129
+ if self.entry_count > 0:
130
+ self.avg_iterations = self.total_iterations / self.entry_count
131
+ # Mark as hot if executed frequently
132
+ self.hot = self.total_iterations > 1000
133
+ # Calculate unroll benefit for small, predictable loops
134
+ if self.avg_iterations < 10 and self.max_iterations < 20:
135
+ self.unroll_benefit = min(self.avg_iterations * 10, 100.0)
136
+
137
+ def record_iteration(self, iterations: int) -> None:
138
+ """Record a loop execution.
139
+
140
+ Args:
141
+ iterations: Number of iterations in this execution.
142
+ """
143
+ self.entry_count += 1
144
+ self.total_iterations += iterations
145
+ self.max_iterations = max(self.max_iterations, iterations)
146
+ self.min_iterations = min(self.min_iterations, iterations)
147
+ self.update_stats()
148
+
149
+ def merge(self, other: "LoopProfile") -> None:
150
+ """Merge another loop profile into this one.
151
+
152
+ Args:
153
+ other: Profile to merge.
154
+ """
155
+ self.entry_count += other.entry_count
156
+ self.total_iterations += other.total_iterations
157
+ self.max_iterations = max(self.max_iterations, other.max_iterations)
158
+ self.min_iterations = min(self.min_iterations, other.min_iterations)
159
+ self.update_stats()
160
+
161
+
162
+ @dataclass
163
+ class BasicBlockProfile:
164
+ """Profile data for a basic block.
165
+
166
+ Attributes:
167
+ location: Block location (function:block_id).
168
+ execution_count: Number of times block was executed.
169
+ instruction_count: Number of instructions in block.
170
+ total_cycles: Total execution cycles.
171
+ avg_cycles: Average cycles per execution.
172
+ hot: Whether this is a hot block.
173
+ """
174
+
175
+ location: str
176
+ execution_count: int = 0
177
+ instruction_count: int = 0
178
+ total_cycles: int = 0
179
+ avg_cycles: float = 0.0
180
+ hot: bool = False
181
+
182
+ def update_stats(self) -> None:
183
+ """Update derived statistics."""
184
+ if self.execution_count > 0:
185
+ self.avg_cycles = self.total_cycles / self.execution_count
186
+ # Mark as hot if executed frequently
187
+ self.hot = self.execution_count > 100
188
+
189
+ def merge(self, other: "BasicBlockProfile") -> None:
190
+ """Merge another block profile into this one.
191
+
192
+ Args:
193
+ other: Profile to merge.
194
+ """
195
+ self.execution_count += other.execution_count
196
+ self.total_cycles += other.total_cycles
197
+ self.instruction_count = max(self.instruction_count, other.instruction_count)
198
+ self.update_stats()
199
+
200
+
201
+ @dataclass
202
+ class IndirectCallProfile:
203
+ """Profile data for indirect calls.
204
+
205
+ Attributes:
206
+ location: Call site location.
207
+ targets: Map of target functions to call counts.
208
+ total_calls: Total number of calls.
209
+ most_common_target: Most frequently called target.
210
+ devirtualization_benefit: Benefit of devirtualizing.
211
+ """
212
+
213
+ location: str
214
+ targets: dict[str, int] = field(default_factory=dict)
215
+ total_calls: int = 0
216
+ most_common_target: str | None = None
217
+ devirtualization_benefit: float = 0.0
218
+
219
+ def record_call(self, target: str) -> None:
220
+ """Record an indirect call.
221
+
222
+ Args:
223
+ target: Target function name.
224
+ """
225
+ self.targets[target] = self.targets.get(target, 0) + 1
226
+ self.total_calls += 1
227
+ self.update_stats()
228
+
229
+ def update_stats(self) -> None:
230
+ """Update derived statistics."""
231
+ if self.targets:
232
+ # Find most common target
233
+ self.most_common_target = max(self.targets, key=self.targets.get) # type: ignore
234
+ # Calculate devirtualization benefit
235
+ if self.most_common_target:
236
+ freq = self.targets[self.most_common_target] / self.total_calls
237
+ if freq > 0.8: # If one target dominates
238
+ self.devirtualization_benefit = freq * 100
239
+
240
+ def merge(self, other: "IndirectCallProfile") -> None:
241
+ """Merge another indirect call profile.
242
+
243
+ Args:
244
+ other: Profile to merge.
245
+ """
246
+ for target, count in other.targets.items():
247
+ self.targets[target] = self.targets.get(target, 0) + count
248
+ self.total_calls += other.total_calls
249
+ self.update_stats()
250
+
251
+
252
+ @dataclass
253
+ class ProfileData:
254
+ """Complete profile data for a module.
255
+
256
+ Attributes:
257
+ module_name: Name of the profiled module.
258
+ functions: Function profile data.
259
+ branches: Branch profile data.
260
+ loops: Loop profile data.
261
+ blocks: Basic block profile data.
262
+ indirect_calls: Indirect call profile data.
263
+ total_samples: Total number of profile samples.
264
+ metadata: Additional metadata.
265
+ """
266
+
267
+ module_name: str
268
+ functions: dict[str, FunctionProfile] = field(default_factory=dict)
269
+ branches: dict[str, BranchProfile] = field(default_factory=dict)
270
+ loops: dict[str, LoopProfile] = field(default_factory=dict)
271
+ blocks: dict[str, BasicBlockProfile] = field(default_factory=dict)
272
+ indirect_calls: dict[str, IndirectCallProfile] = field(default_factory=dict)
273
+ total_samples: int = 0
274
+ metadata: dict[str, Any] = field(default_factory=dict)
275
+
276
+ def get_hot_functions(self, threshold: int = 100) -> list[str]:
277
+ """Get list of hot functions.
278
+
279
+ Args:
280
+ threshold: Minimum call count to be considered hot.
281
+
282
+ Returns:
283
+ List of hot function names.
284
+ """
285
+ return [name for name, profile in self.functions.items() if profile.call_count >= threshold or profile.hot]
286
+
287
+ def get_hot_loops(self) -> list[str]:
288
+ """Get list of hot loops.
289
+
290
+ Returns:
291
+ List of hot loop locations.
292
+ """
293
+ return [loc for loc, profile in self.loops.items() if profile.hot]
294
+
295
+ def get_predictable_branches(self) -> list[str]:
296
+ """Get list of predictable branches.
297
+
298
+ Returns:
299
+ List of predictable branch locations.
300
+ """
301
+ return [loc for loc, profile in self.branches.items() if profile.predictable]
302
+
303
+ def merge(self, other: "ProfileData") -> None:
304
+ """Merge another profile data into this one.
305
+
306
+ Args:
307
+ other: Profile data to merge.
308
+ """
309
+ # Merge functions
310
+ for name, func_profile in other.functions.items():
311
+ if name in self.functions:
312
+ self.functions[name].merge(func_profile)
313
+ else:
314
+ self.functions[name] = func_profile
315
+
316
+ # Merge branches
317
+ for loc, branch_profile in other.branches.items():
318
+ if loc in self.branches:
319
+ self.branches[loc].merge(branch_profile)
320
+ else:
321
+ self.branches[loc] = branch_profile
322
+
323
+ # Merge loops
324
+ for loc, loop_profile in other.loops.items():
325
+ if loc in self.loops:
326
+ self.loops[loc].merge(loop_profile)
327
+ else:
328
+ self.loops[loc] = loop_profile
329
+
330
+ # Merge blocks
331
+ for loc, block_profile in other.blocks.items():
332
+ if loc in self.blocks:
333
+ self.blocks[loc].merge(block_profile)
334
+ else:
335
+ self.blocks[loc] = block_profile
336
+
337
+ # Merge indirect calls
338
+ for loc, call_profile in other.indirect_calls.items():
339
+ if loc in self.indirect_calls:
340
+ self.indirect_calls[loc].merge(call_profile)
341
+ else:
342
+ self.indirect_calls[loc] = call_profile
343
+
344
+ self.total_samples += other.total_samples
345
+
346
+ def get_summary(self) -> dict[str, Any]:
347
+ """Get profile summary statistics.
348
+
349
+ Returns:
350
+ Dictionary of summary statistics.
351
+ """
352
+ return {
353
+ "module": self.module_name,
354
+ "total_samples": self.total_samples,
355
+ "functions": {
356
+ "total": len(self.functions),
357
+ "hot": len(self.get_hot_functions()),
358
+ },
359
+ "branches": {
360
+ "total": len(self.branches),
361
+ "predictable": len(self.get_predictable_branches()),
362
+ },
363
+ "loops": {"total": len(self.loops), "hot": len(self.get_hot_loops())},
364
+ "blocks": {
365
+ "total": len(self.blocks),
366
+ "hot": sum(1 for b in self.blocks.values() if b.hot),
367
+ },
368
+ "indirect_calls": {
369
+ "total": len(self.indirect_calls),
370
+ "devirtualizable": sum(1 for c in self.indirect_calls.values() if c.devirtualization_benefit > 50),
371
+ },
372
+ }