machine-dialect 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. machine_dialect/__main__.py +667 -0
  2. machine_dialect/agent/__init__.py +5 -0
  3. machine_dialect/agent/agent.py +360 -0
  4. machine_dialect/ast/__init__.py +95 -0
  5. machine_dialect/ast/ast_node.py +35 -0
  6. machine_dialect/ast/call_expression.py +82 -0
  7. machine_dialect/ast/dict_extraction.py +60 -0
  8. machine_dialect/ast/expressions.py +439 -0
  9. machine_dialect/ast/literals.py +309 -0
  10. machine_dialect/ast/program.py +35 -0
  11. machine_dialect/ast/statements.py +1433 -0
  12. machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
  13. machine_dialect/ast/tests/test_boolean_literal.py +29 -0
  14. machine_dialect/ast/tests/test_collection_hir.py +138 -0
  15. machine_dialect/ast/tests/test_define_statement.py +142 -0
  16. machine_dialect/ast/tests/test_desugar.py +541 -0
  17. machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
  18. machine_dialect/cfg/__init__.py +6 -0
  19. machine_dialect/cfg/config.py +156 -0
  20. machine_dialect/cfg/examples.py +221 -0
  21. machine_dialect/cfg/generate_with_ai.py +187 -0
  22. machine_dialect/cfg/openai_generation.py +200 -0
  23. machine_dialect/cfg/parser.py +94 -0
  24. machine_dialect/cfg/tests/__init__.py +1 -0
  25. machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
  26. machine_dialect/cfg/tests/test_config.py +188 -0
  27. machine_dialect/cfg/tests/test_examples.py +391 -0
  28. machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
  29. machine_dialect/cfg/tests/test_openai_generation.py +256 -0
  30. machine_dialect/codegen/__init__.py +5 -0
  31. machine_dialect/codegen/bytecode_module.py +89 -0
  32. machine_dialect/codegen/bytecode_serializer.py +300 -0
  33. machine_dialect/codegen/opcodes.py +101 -0
  34. machine_dialect/codegen/register_codegen.py +1996 -0
  35. machine_dialect/codegen/symtab.py +208 -0
  36. machine_dialect/codegen/tests/__init__.py +1 -0
  37. machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
  38. machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
  39. machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
  40. machine_dialect/codegen/tests/test_symtab.py +418 -0
  41. machine_dialect/codegen/vm_serializer.py +621 -0
  42. machine_dialect/compiler/__init__.py +18 -0
  43. machine_dialect/compiler/compiler.py +197 -0
  44. machine_dialect/compiler/config.py +149 -0
  45. machine_dialect/compiler/context.py +149 -0
  46. machine_dialect/compiler/phases/__init__.py +19 -0
  47. machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
  48. machine_dialect/compiler/phases/codegen.py +40 -0
  49. machine_dialect/compiler/phases/hir_generation.py +39 -0
  50. machine_dialect/compiler/phases/mir_generation.py +86 -0
  51. machine_dialect/compiler/phases/optimization.py +110 -0
  52. machine_dialect/compiler/phases/parsing.py +39 -0
  53. machine_dialect/compiler/pipeline.py +143 -0
  54. machine_dialect/compiler/tests/__init__.py +1 -0
  55. machine_dialect/compiler/tests/test_compiler.py +568 -0
  56. machine_dialect/compiler/vm_runner.py +173 -0
  57. machine_dialect/errors/__init__.py +32 -0
  58. machine_dialect/errors/exceptions.py +369 -0
  59. machine_dialect/errors/messages.py +82 -0
  60. machine_dialect/errors/tests/__init__.py +0 -0
  61. machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
  62. machine_dialect/errors/tests/test_name_errors.py +118 -0
  63. machine_dialect/helpers/__init__.py +0 -0
  64. machine_dialect/helpers/stopwords.py +225 -0
  65. machine_dialect/helpers/validators.py +30 -0
  66. machine_dialect/lexer/__init__.py +9 -0
  67. machine_dialect/lexer/constants.py +23 -0
  68. machine_dialect/lexer/lexer.py +907 -0
  69. machine_dialect/lexer/tests/__init__.py +0 -0
  70. machine_dialect/lexer/tests/helpers.py +86 -0
  71. machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
  72. machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
  73. machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
  74. machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
  75. machine_dialect/lexer/tests/test_comments.py +200 -0
  76. machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
  77. machine_dialect/lexer/tests/test_lexer_position.py +113 -0
  78. machine_dialect/lexer/tests/test_list_tokens.py +282 -0
  79. machine_dialect/lexer/tests/test_stopwords.py +80 -0
  80. machine_dialect/lexer/tests/test_strict_equality.py +129 -0
  81. machine_dialect/lexer/tests/test_token.py +41 -0
  82. machine_dialect/lexer/tests/test_tokenization.py +294 -0
  83. machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
  84. machine_dialect/lexer/tests/test_url_literals.py +169 -0
  85. machine_dialect/lexer/tokens.py +487 -0
  86. machine_dialect/linter/__init__.py +10 -0
  87. machine_dialect/linter/__main__.py +144 -0
  88. machine_dialect/linter/linter.py +154 -0
  89. machine_dialect/linter/rules/__init__.py +8 -0
  90. machine_dialect/linter/rules/base.py +112 -0
  91. machine_dialect/linter/rules/statement_termination.py +99 -0
  92. machine_dialect/linter/tests/__init__.py +1 -0
  93. machine_dialect/linter/tests/mdrules/__init__.py +0 -0
  94. machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
  95. machine_dialect/linter/tests/test_linter.py +81 -0
  96. machine_dialect/linter/tests/test_rules.py +110 -0
  97. machine_dialect/linter/tests/test_violations.py +71 -0
  98. machine_dialect/linter/violations.py +51 -0
  99. machine_dialect/mir/__init__.py +69 -0
  100. machine_dialect/mir/analyses/__init__.py +20 -0
  101. machine_dialect/mir/analyses/alias_analysis.py +315 -0
  102. machine_dialect/mir/analyses/dominance_analysis.py +49 -0
  103. machine_dialect/mir/analyses/escape_analysis.py +286 -0
  104. machine_dialect/mir/analyses/loop_analysis.py +272 -0
  105. machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
  106. machine_dialect/mir/analyses/type_analysis.py +448 -0
  107. machine_dialect/mir/analyses/use_def_chains.py +232 -0
  108. machine_dialect/mir/basic_block.py +385 -0
  109. machine_dialect/mir/dataflow.py +445 -0
  110. machine_dialect/mir/debug_info.py +208 -0
  111. machine_dialect/mir/hir_to_mir.py +1738 -0
  112. machine_dialect/mir/mir_dumper.py +366 -0
  113. machine_dialect/mir/mir_function.py +167 -0
  114. machine_dialect/mir/mir_instructions.py +1877 -0
  115. machine_dialect/mir/mir_interpreter.py +556 -0
  116. machine_dialect/mir/mir_module.py +225 -0
  117. machine_dialect/mir/mir_printer.py +480 -0
  118. machine_dialect/mir/mir_transformer.py +410 -0
  119. machine_dialect/mir/mir_types.py +367 -0
  120. machine_dialect/mir/mir_validation.py +455 -0
  121. machine_dialect/mir/mir_values.py +268 -0
  122. machine_dialect/mir/optimization_config.py +233 -0
  123. machine_dialect/mir/optimization_pass.py +251 -0
  124. machine_dialect/mir/optimization_pipeline.py +355 -0
  125. machine_dialect/mir/optimizations/__init__.py +84 -0
  126. machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
  127. machine_dialect/mir/optimizations/branch_prediction.py +372 -0
  128. machine_dialect/mir/optimizations/constant_propagation.py +634 -0
  129. machine_dialect/mir/optimizations/cse.py +398 -0
  130. machine_dialect/mir/optimizations/dce.py +288 -0
  131. machine_dialect/mir/optimizations/inlining.py +551 -0
  132. machine_dialect/mir/optimizations/jump_threading.py +487 -0
  133. machine_dialect/mir/optimizations/licm.py +405 -0
  134. machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
  135. machine_dialect/mir/optimizations/strength_reduction.py +422 -0
  136. machine_dialect/mir/optimizations/tail_call.py +207 -0
  137. machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
  138. machine_dialect/mir/optimizations/type_narrowing.py +397 -0
  139. machine_dialect/mir/optimizations/type_specialization.py +447 -0
  140. machine_dialect/mir/optimizations/type_specific.py +906 -0
  141. machine_dialect/mir/optimize_mir.py +89 -0
  142. machine_dialect/mir/pass_manager.py +391 -0
  143. machine_dialect/mir/profiling/__init__.py +26 -0
  144. machine_dialect/mir/profiling/profile_collector.py +318 -0
  145. machine_dialect/mir/profiling/profile_data.py +372 -0
  146. machine_dialect/mir/profiling/profile_reader.py +272 -0
  147. machine_dialect/mir/profiling/profile_writer.py +226 -0
  148. machine_dialect/mir/register_allocation.py +302 -0
  149. machine_dialect/mir/reporting/__init__.py +17 -0
  150. machine_dialect/mir/reporting/optimization_reporter.py +314 -0
  151. machine_dialect/mir/reporting/report_formatter.py +289 -0
  152. machine_dialect/mir/ssa_construction.py +342 -0
  153. machine_dialect/mir/tests/__init__.py +1 -0
  154. machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
  155. machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
  156. machine_dialect/mir/tests/test_algebraic_division.py +126 -0
  157. machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
  158. machine_dialect/mir/tests/test_basic_block.py +425 -0
  159. machine_dialect/mir/tests/test_branch_prediction.py +459 -0
  160. machine_dialect/mir/tests/test_call_lowering.py +168 -0
  161. machine_dialect/mir/tests/test_collection_lowering.py +604 -0
  162. machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
  163. machine_dialect/mir/tests/test_custom_passes.py +166 -0
  164. machine_dialect/mir/tests/test_debug_info.py +285 -0
  165. machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
  166. machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
  167. machine_dialect/mir/tests/test_double_negation.py +231 -0
  168. machine_dialect/mir/tests/test_escape_analysis.py +233 -0
  169. machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
  170. machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
  171. machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
  172. machine_dialect/mir/tests/test_inlining.py +435 -0
  173. machine_dialect/mir/tests/test_licm.py +472 -0
  174. machine_dialect/mir/tests/test_mir_dumper.py +313 -0
  175. machine_dialect/mir/tests/test_mir_instructions.py +445 -0
  176. machine_dialect/mir/tests/test_mir_module.py +860 -0
  177. machine_dialect/mir/tests/test_mir_printer.py +387 -0
  178. machine_dialect/mir/tests/test_mir_types.py +123 -0
  179. machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
  180. machine_dialect/mir/tests/test_mir_validation.py +378 -0
  181. machine_dialect/mir/tests/test_mir_values.py +168 -0
  182. machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
  183. machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
  184. machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
  185. machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
  186. machine_dialect/mir/tests/test_pass_manager.py +294 -0
  187. machine_dialect/mir/tests/test_pass_registration.py +64 -0
  188. machine_dialect/mir/tests/test_profiling.py +356 -0
  189. machine_dialect/mir/tests/test_register_allocation.py +307 -0
  190. machine_dialect/mir/tests/test_report_formatters.py +372 -0
  191. machine_dialect/mir/tests/test_ssa_construction.py +433 -0
  192. machine_dialect/mir/tests/test_tail_call.py +236 -0
  193. machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
  194. machine_dialect/mir/tests/test_type_narrowing.py +277 -0
  195. machine_dialect/mir/tests/test_type_specialization.py +421 -0
  196. machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
  197. machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
  198. machine_dialect/mir/type_inference.py +368 -0
  199. machine_dialect/parser/__init__.py +12 -0
  200. machine_dialect/parser/enums.py +45 -0
  201. machine_dialect/parser/parser.py +3655 -0
  202. machine_dialect/parser/protocols.py +11 -0
  203. machine_dialect/parser/symbol_table.py +169 -0
  204. machine_dialect/parser/tests/__init__.py +0 -0
  205. machine_dialect/parser/tests/helper_functions.py +193 -0
  206. machine_dialect/parser/tests/test_action_statements.py +334 -0
  207. machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
  208. machine_dialect/parser/tests/test_call_statements.py +154 -0
  209. machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
  210. machine_dialect/parser/tests/test_collection_mutations.py +264 -0
  211. machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
  212. machine_dialect/parser/tests/test_define_integration.py +468 -0
  213. machine_dialect/parser/tests/test_define_statements.py +311 -0
  214. machine_dialect/parser/tests/test_dict_extraction.py +115 -0
  215. machine_dialect/parser/tests/test_empty_literal.py +155 -0
  216. machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
  217. machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
  218. machine_dialect/parser/tests/test_if_empty_block.py +61 -0
  219. machine_dialect/parser/tests/test_if_statements.py +299 -0
  220. machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
  221. machine_dialect/parser/tests/test_infix_expressions.py +680 -0
  222. machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
  223. machine_dialect/parser/tests/test_interaction_statements.py +269 -0
  224. machine_dialect/parser/tests/test_list_literals.py +277 -0
  225. machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
  226. machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
  227. machine_dialect/parser/tests/test_parse_errors.py +114 -0
  228. machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
  229. machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
  230. machine_dialect/parser/tests/test_program.py +13 -0
  231. machine_dialect/parser/tests/test_return_statements.py +89 -0
  232. machine_dialect/parser/tests/test_set_statements.py +152 -0
  233. machine_dialect/parser/tests/test_strict_equality.py +258 -0
  234. machine_dialect/parser/tests/test_symbol_table.py +217 -0
  235. machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
  236. machine_dialect/parser/tests/test_utility_statements.py +423 -0
  237. machine_dialect/parser/token_buffer.py +159 -0
  238. machine_dialect/repl/__init__.py +3 -0
  239. machine_dialect/repl/repl.py +426 -0
  240. machine_dialect/repl/tests/__init__.py +0 -0
  241. machine_dialect/repl/tests/test_repl.py +606 -0
  242. machine_dialect/semantic/__init__.py +12 -0
  243. machine_dialect/semantic/analyzer.py +906 -0
  244. machine_dialect/semantic/error_messages.py +189 -0
  245. machine_dialect/semantic/tests/__init__.py +1 -0
  246. machine_dialect/semantic/tests/test_analyzer.py +364 -0
  247. machine_dialect/semantic/tests/test_error_messages.py +104 -0
  248. machine_dialect/tests/edge_cases/__init__.py +10 -0
  249. machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
  250. machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
  251. machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
  252. machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
  253. machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
  254. machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
  255. machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
  256. machine_dialect/tests/integration/test_list_compilation.py +395 -0
  257. machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
  258. machine_dialect/type_checking/__init__.py +21 -0
  259. machine_dialect/type_checking/tests/__init__.py +1 -0
  260. machine_dialect/type_checking/tests/test_type_system.py +230 -0
  261. machine_dialect/type_checking/type_system.py +270 -0
  262. machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
  263. machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
  264. machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
  265. machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
  266. machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
  267. machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
  268. machine_dialect_vm/__init__.pyi +15 -0
@@ -0,0 +1,187 @@
1
+ #!/usr/bin/env python3
2
+ """Generate Machine Dialect™ code using AI models.
3
+
4
+ This module provides functionality to generate Machine Dialect™ code using
5
+ AI language models (such as OpenAI's GPT models). It includes code generation,
6
+ validation, and configuration management for AI API integration.
7
+
8
+ The module supports:
9
+ - Generating Machine Dialect™ code from natural language descriptions
10
+ - Validating generated code against the CFG parser
11
+ - Flexible configuration through environment variables or config files
12
+ - Command-line interface for code generation tasks
13
+ - Saving generated code to files
14
+
15
+ Note:
16
+ OpenAI integration is currently commented out. Uncomment the relevant
17
+ sections when the OpenAI library is installed.
18
+
19
+ Example:
20
+ Generate code from the command line::
21
+
22
+ $ python -m machine_dialect.cfg.generate_with_ai "calculate rectangle area"
23
+ $ python -m machine_dialect.cfg.generate_with_ai "sort a list" --model gpt-4
24
+ """
25
+
26
+ import argparse
27
+
28
+ from machine_dialect.cfg import CFGParser
29
+
30
+ # Uncomment when you have OpenAI installed
31
+ # from openai import OpenAI
32
+ from machine_dialect.cfg.config import ConfigLoader
33
+
34
+
35
+ def generate_code(
36
+ task: str,
37
+ api_key: str | None = None,
38
+ model: str | None = None,
39
+ temperature: float = 0.7,
40
+ max_tokens: int = 500,
41
+ validate: bool = True,
42
+ ) -> str:
43
+ """Generate Machine Dialect™ code for a given task.
44
+
45
+ Args:
46
+ task: Description of what the code should do.
47
+ api_key: AI API key (overrides config/env).
48
+ model: AI model to use (overrides config/env).
49
+ temperature: Sampling temperature (0-2, lower = more deterministic).
50
+ max_tokens: Maximum tokens to generate.
51
+ validate: Whether to validate generated code.
52
+
53
+ Returns:
54
+ Generated Machine Dialect™ code.
55
+
56
+ Raises:
57
+ ValueError: If API key or model is not configured.
58
+
59
+ Example:
60
+ >>> code = generate_code(
61
+ ... "calculate area",
62
+ ... model="gpt-3.5-turbo",
63
+ ... temperature=0.5
64
+ ... )
65
+ >>> print(code)
66
+ Set `width` to 10.
67
+ ...
68
+ """
69
+ # Load configuration
70
+ loader = ConfigLoader()
71
+ config = loader.load()
72
+
73
+ # Override with function arguments if provided
74
+ if api_key:
75
+ config.key = api_key
76
+ if model:
77
+ config.model = model
78
+
79
+ # Check configuration
80
+ if not config.key:
81
+ raise ValueError(loader.get_error_message())
82
+ if not config.model:
83
+ raise ValueError("No AI model configured. " + loader.get_error_message())
84
+
85
+ # Create OpenAI client
86
+ # Uncomment when you have OpenAI installed:
87
+ # from openai import OpenAI
88
+ # client = OpenAI(api_key=config.key)
89
+
90
+ print(f"\nModel: {config.model}")
91
+ print(f"Task: {task}")
92
+ print(f"Temperature: {temperature}")
93
+ print(f"Max tokens: {max_tokens}")
94
+
95
+ # Actual API call (uncomment when you have OpenAI):
96
+ # print(f"\nGenerating code with {config.model}...")
97
+ # generated_code = generate_with_openai(client, config.model, task, max_tokens, temperature)
98
+
99
+ # For demonstration, show configuration
100
+ print("\n" + "=" * 60)
101
+ print("Configuration:")
102
+ print("=" * 60)
103
+ print(f"Model: {config.model}")
104
+ print(f"API Key: {'*' * 10 if config.key else 'Not configured'}")
105
+
106
+ # Example of what would be returned
107
+ example_code = """Set `width` to 10.
108
+ Set `height` to 5.
109
+ Set `area` to width * height.
110
+ Say "The area is: ".
111
+ Say area."""
112
+
113
+ print("\n" + "=" * 60)
114
+ print(f"Example Generated Code (what {config.model} would return):")
115
+ print("=" * 60)
116
+ print(example_code)
117
+
118
+ # Validate if requested
119
+ if validate:
120
+ print("\n" + "=" * 60)
121
+ print("Validating generated code...")
122
+ print("=" * 60)
123
+
124
+ parser = CFGParser()
125
+ is_valid = parser.validate(example_code)
126
+
127
+ if is_valid:
128
+ print("✓ Generated code is syntactically valid!")
129
+ else:
130
+ print("✗ Generated code has syntax errors")
131
+
132
+ return example_code
133
+
134
+
135
+ def main() -> int:
136
+ """Main entry point for the AI code generation script.
137
+
138
+ Parses command-line arguments and generates Machine Dialect™ code
139
+ based on the provided task description. Supports configuration
140
+ overrides, validation, and saving output to files.
141
+
142
+ Returns:
143
+ Exit code: 0 for success, 1 for errors.
144
+
145
+ Example:
146
+ >>> # Generate code for calculating area
147
+ >>> sys.argv = ['prog', 'calculate area of rectangle']
148
+ >>> main()
149
+ 0
150
+ """
151
+ parser = argparse.ArgumentParser(description="Generate Machine Dialect™ code using AI models")
152
+ parser.add_argument("task", help="Description of what the code should do")
153
+ parser.add_argument("--api-key", help="AI API key (overrides config/env)")
154
+ parser.add_argument("--model", help="AI model to use (overrides config/env)")
155
+ parser.add_argument("--temperature", type=float, default=0.7, help="Sampling temperature (0-2, default: 0.7)")
156
+ parser.add_argument("--max-tokens", type=int, default=500, help="Maximum tokens to generate (default: 500)")
157
+ parser.add_argument("--no-validate", action="store_true", help="Skip validation of generated code")
158
+ parser.add_argument("--save", help="Save generated code to file")
159
+
160
+ args = parser.parse_args()
161
+
162
+ try:
163
+ code = generate_code(
164
+ task=args.task,
165
+ api_key=args.api_key,
166
+ model=args.model,
167
+ temperature=args.temperature,
168
+ max_tokens=args.max_tokens,
169
+ validate=not args.no_validate,
170
+ )
171
+
172
+ if args.save:
173
+ with open(args.save, "w") as f:
174
+ f.write(code)
175
+ print(f"\nCode saved to: {args.save}")
176
+
177
+ except Exception as e:
178
+ print(f"Error: {e}")
179
+ return 1
180
+
181
+ return 0
182
+
183
+
184
+ if __name__ == "__main__":
185
+ import sys
186
+
187
+ sys.exit(main())
@@ -0,0 +1,200 @@
1
+ """Grammar-based generation module for Machine Dialect™ using GPT-5's CFG support."""
2
+
3
+ from pathlib import Path
4
+ from typing import Any
5
+
6
+
7
+ def generate_with_openai(
8
+ client: Any, # OpenAI client
9
+ model: str,
10
+ task_description: str,
11
+ max_tokens: int = 500,
12
+ temperature: float = 0.7,
13
+ ) -> tuple[str, dict[str, Any]]:
14
+ """Generate Machine Dialect™ code using GPT-5's context-free grammar constraints.
15
+
16
+ This function uses GPT-5's custom tools with CFG to ensure syntactically correct
17
+ Machine Dialect™ code generation. The model is constrained to only produce
18
+ strings that match the Machine Dialect™ grammar.
19
+
20
+ Args:
21
+ client: OpenAI client instance.
22
+ model: Model name (must support CFG, e.g., 'gpt-5').
23
+ task_description: What the code should do.
24
+ max_tokens: Maximum tokens to generate.
25
+ temperature: Sampling temperature (0-2).
26
+
27
+ Returns:
28
+ Tuple of (generated_code, token_info) where:
29
+ - generated_code: Machine Dialect™ code that is syntactically valid.
30
+ - token_info: Dictionary with prompt_tokens, completion_tokens, total_tokens.
31
+
32
+ Raises:
33
+ ValueError: If the model doesn't support CFG or response is invalid.
34
+ """
35
+ # Check if model supports CFG (currently only GPT-5 family)
36
+ if "gpt-5" not in model.lower():
37
+ raise ValueError(
38
+ f"Model '{model}' does not support context-free grammar constraints. "
39
+ "Please use a GPT-5 model (gpt-5, gpt-5-mini, or gpt-5-nano)."
40
+ )
41
+
42
+ # Create the CFG definition for Machine Dialect™
43
+ machine_dialect_cfg = _get_machine_dialect_cfg()
44
+
45
+ # Create the API request using GPT-5's custom tools with CFG
46
+ # Note: GPT-5 doesn't support temperature parameter (always uses 1.0)
47
+ import time
48
+
49
+ api_start = time.time()
50
+
51
+ try:
52
+ response = client.responses.create(
53
+ model=model,
54
+ input=[
55
+ {
56
+ "role": "developer",
57
+ "content": (
58
+ "You are a Machine Dialect™ code generator. Generate code that performs the "
59
+ "requested task using the Machine Dialect™ language. The output must conform "
60
+ "to the provided context-free grammar.\n"
61
+ "IMPORTANT:\n"
62
+ "- Write in English even if the instruction is in another language.\n"
63
+ "- Always define variables before trying to use them.\n"
64
+ "- When creating utilities, define proper Inputs (parameters the utility accepts) "
65
+ "and Outputs (values it returns) sections.\n"
66
+ "- Don't hardcode values that should be parameters - use the Inputs section instead."
67
+ ),
68
+ },
69
+ {"role": "user", "content": f"Generate Machine Dialect™ code for: {task_description}"},
70
+ ],
71
+ tools=[
72
+ {
73
+ "type": "custom",
74
+ "name": "machine_dialect_generator",
75
+ "description": "Generates syntactically valid Machine Dialect™ code",
76
+ "format": machine_dialect_cfg,
77
+ }
78
+ ],
79
+ parallel_tool_calls=False,
80
+ timeout=30.0, # 30 second timeout
81
+ # temperature parameter removed - GPT-5 doesn't support it
82
+ )
83
+
84
+ api_time = time.time() - api_start
85
+ if api_time > 5.0: # Log if it takes more than 5 seconds
86
+ print(f" ⚠️ API call took {api_time:.2f}s")
87
+
88
+ except Exception as e:
89
+ api_time = time.time() - api_start
90
+ raise ValueError(f"API call failed after {api_time:.2f}s: {e!s}") from e
91
+
92
+ # Extract the generated code from the response
93
+ # The response should have an output_text attribute directly
94
+ if hasattr(response, "output_text"):
95
+ generated_code = response.output_text
96
+ elif hasattr(response, "output"):
97
+ # Fallback to output attribute if output_text doesn't exist
98
+ if isinstance(response.output, list) and len(response.output) > 1:
99
+ # Try to get the second output (tool output)
100
+ tool_output = response.output[1]
101
+
102
+ # Check various attributes on the tool output
103
+ if hasattr(tool_output, "text"):
104
+ generated_code = tool_output.text
105
+ elif hasattr(tool_output, "input"):
106
+ generated_code = tool_output.input
107
+ elif hasattr(tool_output, "tool_input"):
108
+ generated_code = tool_output.tool_input
109
+ elif hasattr(tool_output, "content"):
110
+ generated_code = tool_output.content
111
+ else:
112
+ generated_code = str(tool_output)
113
+ elif isinstance(response.output, str):
114
+ generated_code = response.output
115
+ else:
116
+ generated_code = str(response.output)
117
+ else:
118
+ # Last resort: try to extract from string representation
119
+ response_str = str(response)
120
+ if "output_text=" in response_str:
121
+ import re
122
+
123
+ match = re.search(r"output_text='([^']*)'", response_str)
124
+ if not match:
125
+ match = re.search(r'output_text="([^"]*)"', response_str)
126
+ if match:
127
+ generated_code = match.group(1)
128
+ else:
129
+ raise ValueError(f"Could not extract code from response: {response_str[:200]}...")
130
+ else:
131
+ raise ValueError(f"Response has no output_text or output attribute: {dir(response)}")
132
+
133
+ if not generated_code or generated_code == "None":
134
+ # Provide more helpful error message
135
+ error_msg = "Failed to extract valid code from GPT-5 response.\n"
136
+ error_msg += f"Response type: {type(response).__name__}\n"
137
+ error_msg += f"Response attributes: {[attr for attr in dir(response) if not attr.startswith('_')][:10]}\n"
138
+ if hasattr(response, "output"):
139
+ error_msg += f"Output type: {type(response.output).__name__}\n"
140
+ error_msg += f"Extracted value: {repr(generated_code[:100]) if generated_code else 'None'}"
141
+ raise ValueError(error_msg)
142
+
143
+ # Extract token usage if available
144
+ token_info = {}
145
+ if hasattr(response, "usage"):
146
+ usage = response.usage
147
+ if usage:
148
+ token_info["prompt_tokens"] = getattr(usage, "prompt_tokens", None)
149
+ token_info["completion_tokens"] = getattr(usage, "completion_tokens", None)
150
+ token_info["total_tokens"] = getattr(usage, "total_tokens", None)
151
+
152
+ # If individual counts are not available but total is, try to estimate
153
+ if token_info["total_tokens"] and not token_info["prompt_tokens"]:
154
+ # Can't accurately split, just show total
155
+ token_info["prompt_tokens"] = None
156
+ token_info["completion_tokens"] = None
157
+
158
+ # Return both code and token info as a tuple
159
+ return (str(generated_code), token_info)
160
+
161
+
162
+ # Cache the grammar to avoid re-reading the file
163
+ _cached_grammar: dict[str, Any] | None = None
164
+
165
+
166
+ def _get_machine_dialect_cfg() -> dict[str, Any]:
167
+ """Get the Machine Dialect™ context-free grammar in GPT-5 format.
168
+
169
+ Returns:
170
+ Dictionary containing the CFG definition for GPT-5's custom tools.
171
+ """
172
+ global _cached_grammar
173
+
174
+ if _cached_grammar is None:
175
+ # Read the Machine Dialect™ Lark grammar file for GPT-5
176
+ grammar_path = Path(__file__).parent / "machine_dialect.lark"
177
+
178
+ with open(grammar_path) as f:
179
+ lark_grammar = f.read()
180
+
181
+ _cached_grammar = {
182
+ "type": "grammar",
183
+ "syntax": "lark", # Using Lark syntax as required by GPT-5
184
+ "definition": lark_grammar,
185
+ }
186
+
187
+ return _cached_grammar
188
+
189
+
190
+ def validate_model_support(model: str) -> bool:
191
+ """Check if a model supports context-free grammar constraints.
192
+
193
+ Args:
194
+ model: The model name to check.
195
+
196
+ Returns:
197
+ True if the model supports CFG, False otherwise.
198
+ """
199
+ supported_models = ["gpt-5", "gpt-5-mini", "gpt-5-nano"]
200
+ return any(supported in model.lower() for supported in supported_models)
@@ -0,0 +1,94 @@
1
+ """CFG Parser for simplified Machine Dialect™ using Lark."""
2
+
3
+ from pathlib import Path
4
+ from typing import Any
5
+
6
+ from lark import Lark, Token, Tree
7
+ from lark.exceptions import LarkError
8
+
9
+
10
+ class CFGParser:
11
+ """Parser for simplified Machine Dialect™ using Lark CFG."""
12
+
13
+ def __init__(self) -> None:
14
+ """Initialize the parser with the grammar file."""
15
+ grammar_path = Path(__file__).parent / "machine_dialect.lark"
16
+ with open(grammar_path) as f:
17
+ grammar_content = f.read()
18
+
19
+ self.parser = Lark(grammar_content, parser="lalr", start="start", debug=False)
20
+
21
+ def parse(self, code: str) -> Tree[Any]:
22
+ """Parse Machine Dialect™ code into an AST.
23
+
24
+ Args:
25
+ code: The Machine Dialect™ code to parse.
26
+
27
+ Returns:
28
+ A Lark Tree representing the parsed AST.
29
+
30
+ Raises:
31
+ LarkError: If the code cannot be parsed.
32
+ """
33
+ # Handle empty or whitespace-only input
34
+ if not code or not code.strip():
35
+ # Return an empty tree for empty programs
36
+ from lark import Tree
37
+
38
+ return Tree("program", [Tree("statement_list", [])])
39
+
40
+ try:
41
+ return self.parser.parse(code)
42
+ except LarkError as e:
43
+ # Convert Lark errors to match main parser behavior
44
+ raise ValueError(f"Syntax error: {e}") from e
45
+
46
+ def validate(self, code: str) -> bool:
47
+ """Validate if the code conforms to the grammar.
48
+
49
+ Args:
50
+ code: The Machine Dialect™ code to validate.
51
+
52
+ Returns:
53
+ True if valid, False otherwise.
54
+ """
55
+ try:
56
+ self.parse(code)
57
+ return True
58
+ except (LarkError, ValueError):
59
+ return False
60
+
61
+ def get_grammar_rules(self) -> str:
62
+ """Get the grammar rules in a format suitable for GPT-5 CFG.
63
+
64
+ Returns:
65
+ String representation of grammar rules.
66
+ """
67
+ grammar_path = Path(__file__).parent / "machine_dialect.lark"
68
+ with open(grammar_path) as f:
69
+ return f.read()
70
+
71
+ def tree_to_dict(self, tree: Tree[Any] | Token) -> dict[str, Any]:
72
+ """Convert a Lark tree to a dictionary representation.
73
+
74
+ Args:
75
+ tree: The Lark tree or token to convert.
76
+
77
+ Returns:
78
+ Dictionary representation of the tree.
79
+ """
80
+ if isinstance(tree, Token):
81
+ return {"type": "token", "name": tree.type, "value": tree.value}
82
+
83
+ return {"type": "tree", "name": tree.data, "children": [self.tree_to_dict(child) for child in tree.children]}
84
+
85
+ def pretty_print(self, tree: Tree[Any]) -> str:
86
+ """Pretty print a parsed tree.
87
+
88
+ Args:
89
+ tree: The Lark tree to print.
90
+
91
+ Returns:
92
+ A formatted string representation of the tree.
93
+ """
94
+ return str(tree.pretty())
@@ -0,0 +1 @@
1
+ """Tests for the CFG module."""