machine-dialect 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- machine_dialect/__main__.py +667 -0
- machine_dialect/agent/__init__.py +5 -0
- machine_dialect/agent/agent.py +360 -0
- machine_dialect/ast/__init__.py +95 -0
- machine_dialect/ast/ast_node.py +35 -0
- machine_dialect/ast/call_expression.py +82 -0
- machine_dialect/ast/dict_extraction.py +60 -0
- machine_dialect/ast/expressions.py +439 -0
- machine_dialect/ast/literals.py +309 -0
- machine_dialect/ast/program.py +35 -0
- machine_dialect/ast/statements.py +1433 -0
- machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
- machine_dialect/ast/tests/test_boolean_literal.py +29 -0
- machine_dialect/ast/tests/test_collection_hir.py +138 -0
- machine_dialect/ast/tests/test_define_statement.py +142 -0
- machine_dialect/ast/tests/test_desugar.py +541 -0
- machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
- machine_dialect/cfg/__init__.py +6 -0
- machine_dialect/cfg/config.py +156 -0
- machine_dialect/cfg/examples.py +221 -0
- machine_dialect/cfg/generate_with_ai.py +187 -0
- machine_dialect/cfg/openai_generation.py +200 -0
- machine_dialect/cfg/parser.py +94 -0
- machine_dialect/cfg/tests/__init__.py +1 -0
- machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
- machine_dialect/cfg/tests/test_config.py +188 -0
- machine_dialect/cfg/tests/test_examples.py +391 -0
- machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
- machine_dialect/cfg/tests/test_openai_generation.py +256 -0
- machine_dialect/codegen/__init__.py +5 -0
- machine_dialect/codegen/bytecode_module.py +89 -0
- machine_dialect/codegen/bytecode_serializer.py +300 -0
- machine_dialect/codegen/opcodes.py +101 -0
- machine_dialect/codegen/register_codegen.py +1996 -0
- machine_dialect/codegen/symtab.py +208 -0
- machine_dialect/codegen/tests/__init__.py +1 -0
- machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
- machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
- machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
- machine_dialect/codegen/tests/test_symtab.py +418 -0
- machine_dialect/codegen/vm_serializer.py +621 -0
- machine_dialect/compiler/__init__.py +18 -0
- machine_dialect/compiler/compiler.py +197 -0
- machine_dialect/compiler/config.py +149 -0
- machine_dialect/compiler/context.py +149 -0
- machine_dialect/compiler/phases/__init__.py +19 -0
- machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
- machine_dialect/compiler/phases/codegen.py +40 -0
- machine_dialect/compiler/phases/hir_generation.py +39 -0
- machine_dialect/compiler/phases/mir_generation.py +86 -0
- machine_dialect/compiler/phases/optimization.py +110 -0
- machine_dialect/compiler/phases/parsing.py +39 -0
- machine_dialect/compiler/pipeline.py +143 -0
- machine_dialect/compiler/tests/__init__.py +1 -0
- machine_dialect/compiler/tests/test_compiler.py +568 -0
- machine_dialect/compiler/vm_runner.py +173 -0
- machine_dialect/errors/__init__.py +32 -0
- machine_dialect/errors/exceptions.py +369 -0
- machine_dialect/errors/messages.py +82 -0
- machine_dialect/errors/tests/__init__.py +0 -0
- machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
- machine_dialect/errors/tests/test_name_errors.py +118 -0
- machine_dialect/helpers/__init__.py +0 -0
- machine_dialect/helpers/stopwords.py +225 -0
- machine_dialect/helpers/validators.py +30 -0
- machine_dialect/lexer/__init__.py +9 -0
- machine_dialect/lexer/constants.py +23 -0
- machine_dialect/lexer/lexer.py +907 -0
- machine_dialect/lexer/tests/__init__.py +0 -0
- machine_dialect/lexer/tests/helpers.py +86 -0
- machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
- machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
- machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
- machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
- machine_dialect/lexer/tests/test_comments.py +200 -0
- machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
- machine_dialect/lexer/tests/test_lexer_position.py +113 -0
- machine_dialect/lexer/tests/test_list_tokens.py +282 -0
- machine_dialect/lexer/tests/test_stopwords.py +80 -0
- machine_dialect/lexer/tests/test_strict_equality.py +129 -0
- machine_dialect/lexer/tests/test_token.py +41 -0
- machine_dialect/lexer/tests/test_tokenization.py +294 -0
- machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
- machine_dialect/lexer/tests/test_url_literals.py +169 -0
- machine_dialect/lexer/tokens.py +487 -0
- machine_dialect/linter/__init__.py +10 -0
- machine_dialect/linter/__main__.py +144 -0
- machine_dialect/linter/linter.py +154 -0
- machine_dialect/linter/rules/__init__.py +8 -0
- machine_dialect/linter/rules/base.py +112 -0
- machine_dialect/linter/rules/statement_termination.py +99 -0
- machine_dialect/linter/tests/__init__.py +1 -0
- machine_dialect/linter/tests/mdrules/__init__.py +0 -0
- machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
- machine_dialect/linter/tests/test_linter.py +81 -0
- machine_dialect/linter/tests/test_rules.py +110 -0
- machine_dialect/linter/tests/test_violations.py +71 -0
- machine_dialect/linter/violations.py +51 -0
- machine_dialect/mir/__init__.py +69 -0
- machine_dialect/mir/analyses/__init__.py +20 -0
- machine_dialect/mir/analyses/alias_analysis.py +315 -0
- machine_dialect/mir/analyses/dominance_analysis.py +49 -0
- machine_dialect/mir/analyses/escape_analysis.py +286 -0
- machine_dialect/mir/analyses/loop_analysis.py +272 -0
- machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
- machine_dialect/mir/analyses/type_analysis.py +448 -0
- machine_dialect/mir/analyses/use_def_chains.py +232 -0
- machine_dialect/mir/basic_block.py +385 -0
- machine_dialect/mir/dataflow.py +445 -0
- machine_dialect/mir/debug_info.py +208 -0
- machine_dialect/mir/hir_to_mir.py +1738 -0
- machine_dialect/mir/mir_dumper.py +366 -0
- machine_dialect/mir/mir_function.py +167 -0
- machine_dialect/mir/mir_instructions.py +1877 -0
- machine_dialect/mir/mir_interpreter.py +556 -0
- machine_dialect/mir/mir_module.py +225 -0
- machine_dialect/mir/mir_printer.py +480 -0
- machine_dialect/mir/mir_transformer.py +410 -0
- machine_dialect/mir/mir_types.py +367 -0
- machine_dialect/mir/mir_validation.py +455 -0
- machine_dialect/mir/mir_values.py +268 -0
- machine_dialect/mir/optimization_config.py +233 -0
- machine_dialect/mir/optimization_pass.py +251 -0
- machine_dialect/mir/optimization_pipeline.py +355 -0
- machine_dialect/mir/optimizations/__init__.py +84 -0
- machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
- machine_dialect/mir/optimizations/branch_prediction.py +372 -0
- machine_dialect/mir/optimizations/constant_propagation.py +634 -0
- machine_dialect/mir/optimizations/cse.py +398 -0
- machine_dialect/mir/optimizations/dce.py +288 -0
- machine_dialect/mir/optimizations/inlining.py +551 -0
- machine_dialect/mir/optimizations/jump_threading.py +487 -0
- machine_dialect/mir/optimizations/licm.py +405 -0
- machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
- machine_dialect/mir/optimizations/strength_reduction.py +422 -0
- machine_dialect/mir/optimizations/tail_call.py +207 -0
- machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
- machine_dialect/mir/optimizations/type_narrowing.py +397 -0
- machine_dialect/mir/optimizations/type_specialization.py +447 -0
- machine_dialect/mir/optimizations/type_specific.py +906 -0
- machine_dialect/mir/optimize_mir.py +89 -0
- machine_dialect/mir/pass_manager.py +391 -0
- machine_dialect/mir/profiling/__init__.py +26 -0
- machine_dialect/mir/profiling/profile_collector.py +318 -0
- machine_dialect/mir/profiling/profile_data.py +372 -0
- machine_dialect/mir/profiling/profile_reader.py +272 -0
- machine_dialect/mir/profiling/profile_writer.py +226 -0
- machine_dialect/mir/register_allocation.py +302 -0
- machine_dialect/mir/reporting/__init__.py +17 -0
- machine_dialect/mir/reporting/optimization_reporter.py +314 -0
- machine_dialect/mir/reporting/report_formatter.py +289 -0
- machine_dialect/mir/ssa_construction.py +342 -0
- machine_dialect/mir/tests/__init__.py +1 -0
- machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
- machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
- machine_dialect/mir/tests/test_algebraic_division.py +126 -0
- machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
- machine_dialect/mir/tests/test_basic_block.py +425 -0
- machine_dialect/mir/tests/test_branch_prediction.py +459 -0
- machine_dialect/mir/tests/test_call_lowering.py +168 -0
- machine_dialect/mir/tests/test_collection_lowering.py +604 -0
- machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
- machine_dialect/mir/tests/test_custom_passes.py +166 -0
- machine_dialect/mir/tests/test_debug_info.py +285 -0
- machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
- machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
- machine_dialect/mir/tests/test_double_negation.py +231 -0
- machine_dialect/mir/tests/test_escape_analysis.py +233 -0
- machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
- machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
- machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
- machine_dialect/mir/tests/test_inlining.py +435 -0
- machine_dialect/mir/tests/test_licm.py +472 -0
- machine_dialect/mir/tests/test_mir_dumper.py +313 -0
- machine_dialect/mir/tests/test_mir_instructions.py +445 -0
- machine_dialect/mir/tests/test_mir_module.py +860 -0
- machine_dialect/mir/tests/test_mir_printer.py +387 -0
- machine_dialect/mir/tests/test_mir_types.py +123 -0
- machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
- machine_dialect/mir/tests/test_mir_validation.py +378 -0
- machine_dialect/mir/tests/test_mir_values.py +168 -0
- machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
- machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
- machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
- machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
- machine_dialect/mir/tests/test_pass_manager.py +294 -0
- machine_dialect/mir/tests/test_pass_registration.py +64 -0
- machine_dialect/mir/tests/test_profiling.py +356 -0
- machine_dialect/mir/tests/test_register_allocation.py +307 -0
- machine_dialect/mir/tests/test_report_formatters.py +372 -0
- machine_dialect/mir/tests/test_ssa_construction.py +433 -0
- machine_dialect/mir/tests/test_tail_call.py +236 -0
- machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
- machine_dialect/mir/tests/test_type_narrowing.py +277 -0
- machine_dialect/mir/tests/test_type_specialization.py +421 -0
- machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
- machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
- machine_dialect/mir/type_inference.py +368 -0
- machine_dialect/parser/__init__.py +12 -0
- machine_dialect/parser/enums.py +45 -0
- machine_dialect/parser/parser.py +3655 -0
- machine_dialect/parser/protocols.py +11 -0
- machine_dialect/parser/symbol_table.py +169 -0
- machine_dialect/parser/tests/__init__.py +0 -0
- machine_dialect/parser/tests/helper_functions.py +193 -0
- machine_dialect/parser/tests/test_action_statements.py +334 -0
- machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
- machine_dialect/parser/tests/test_call_statements.py +154 -0
- machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
- machine_dialect/parser/tests/test_collection_mutations.py +264 -0
- machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
- machine_dialect/parser/tests/test_define_integration.py +468 -0
- machine_dialect/parser/tests/test_define_statements.py +311 -0
- machine_dialect/parser/tests/test_dict_extraction.py +115 -0
- machine_dialect/parser/tests/test_empty_literal.py +155 -0
- machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
- machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
- machine_dialect/parser/tests/test_if_empty_block.py +61 -0
- machine_dialect/parser/tests/test_if_statements.py +299 -0
- machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
- machine_dialect/parser/tests/test_infix_expressions.py +680 -0
- machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
- machine_dialect/parser/tests/test_interaction_statements.py +269 -0
- machine_dialect/parser/tests/test_list_literals.py +277 -0
- machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
- machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
- machine_dialect/parser/tests/test_parse_errors.py +114 -0
- machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
- machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
- machine_dialect/parser/tests/test_program.py +13 -0
- machine_dialect/parser/tests/test_return_statements.py +89 -0
- machine_dialect/parser/tests/test_set_statements.py +152 -0
- machine_dialect/parser/tests/test_strict_equality.py +258 -0
- machine_dialect/parser/tests/test_symbol_table.py +217 -0
- machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
- machine_dialect/parser/tests/test_utility_statements.py +423 -0
- machine_dialect/parser/token_buffer.py +159 -0
- machine_dialect/repl/__init__.py +3 -0
- machine_dialect/repl/repl.py +426 -0
- machine_dialect/repl/tests/__init__.py +0 -0
- machine_dialect/repl/tests/test_repl.py +606 -0
- machine_dialect/semantic/__init__.py +12 -0
- machine_dialect/semantic/analyzer.py +906 -0
- machine_dialect/semantic/error_messages.py +189 -0
- machine_dialect/semantic/tests/__init__.py +1 -0
- machine_dialect/semantic/tests/test_analyzer.py +364 -0
- machine_dialect/semantic/tests/test_error_messages.py +104 -0
- machine_dialect/tests/edge_cases/__init__.py +10 -0
- machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
- machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
- machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
- machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
- machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
- machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
- machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
- machine_dialect/tests/integration/test_list_compilation.py +395 -0
- machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
- machine_dialect/type_checking/__init__.py +21 -0
- machine_dialect/type_checking/tests/__init__.py +1 -0
- machine_dialect/type_checking/tests/test_type_system.py +230 -0
- machine_dialect/type_checking/type_system.py +270 -0
- machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
- machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
- machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
- machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
- machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
- machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
- machine_dialect_vm/__init__.pyi +15 -0
@@ -0,0 +1,187 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""Generate Machine Dialect™ code using AI models.
|
3
|
+
|
4
|
+
This module provides functionality to generate Machine Dialect™ code using
|
5
|
+
AI language models (such as OpenAI's GPT models). It includes code generation,
|
6
|
+
validation, and configuration management for AI API integration.
|
7
|
+
|
8
|
+
The module supports:
|
9
|
+
- Generating Machine Dialect™ code from natural language descriptions
|
10
|
+
- Validating generated code against the CFG parser
|
11
|
+
- Flexible configuration through environment variables or config files
|
12
|
+
- Command-line interface for code generation tasks
|
13
|
+
- Saving generated code to files
|
14
|
+
|
15
|
+
Note:
|
16
|
+
OpenAI integration is currently commented out. Uncomment the relevant
|
17
|
+
sections when the OpenAI library is installed.
|
18
|
+
|
19
|
+
Example:
|
20
|
+
Generate code from the command line::
|
21
|
+
|
22
|
+
$ python -m machine_dialect.cfg.generate_with_ai "calculate rectangle area"
|
23
|
+
$ python -m machine_dialect.cfg.generate_with_ai "sort a list" --model gpt-4
|
24
|
+
"""
|
25
|
+
|
26
|
+
import argparse
|
27
|
+
|
28
|
+
from machine_dialect.cfg import CFGParser
|
29
|
+
|
30
|
+
# Uncomment when you have OpenAI installed
|
31
|
+
# from openai import OpenAI
|
32
|
+
from machine_dialect.cfg.config import ConfigLoader
|
33
|
+
|
34
|
+
|
35
|
+
def generate_code(
|
36
|
+
task: str,
|
37
|
+
api_key: str | None = None,
|
38
|
+
model: str | None = None,
|
39
|
+
temperature: float = 0.7,
|
40
|
+
max_tokens: int = 500,
|
41
|
+
validate: bool = True,
|
42
|
+
) -> str:
|
43
|
+
"""Generate Machine Dialect™ code for a given task.
|
44
|
+
|
45
|
+
Args:
|
46
|
+
task: Description of what the code should do.
|
47
|
+
api_key: AI API key (overrides config/env).
|
48
|
+
model: AI model to use (overrides config/env).
|
49
|
+
temperature: Sampling temperature (0-2, lower = more deterministic).
|
50
|
+
max_tokens: Maximum tokens to generate.
|
51
|
+
validate: Whether to validate generated code.
|
52
|
+
|
53
|
+
Returns:
|
54
|
+
Generated Machine Dialect™ code.
|
55
|
+
|
56
|
+
Raises:
|
57
|
+
ValueError: If API key or model is not configured.
|
58
|
+
|
59
|
+
Example:
|
60
|
+
>>> code = generate_code(
|
61
|
+
... "calculate area",
|
62
|
+
... model="gpt-3.5-turbo",
|
63
|
+
... temperature=0.5
|
64
|
+
... )
|
65
|
+
>>> print(code)
|
66
|
+
Set `width` to 10.
|
67
|
+
...
|
68
|
+
"""
|
69
|
+
# Load configuration
|
70
|
+
loader = ConfigLoader()
|
71
|
+
config = loader.load()
|
72
|
+
|
73
|
+
# Override with function arguments if provided
|
74
|
+
if api_key:
|
75
|
+
config.key = api_key
|
76
|
+
if model:
|
77
|
+
config.model = model
|
78
|
+
|
79
|
+
# Check configuration
|
80
|
+
if not config.key:
|
81
|
+
raise ValueError(loader.get_error_message())
|
82
|
+
if not config.model:
|
83
|
+
raise ValueError("No AI model configured. " + loader.get_error_message())
|
84
|
+
|
85
|
+
# Create OpenAI client
|
86
|
+
# Uncomment when you have OpenAI installed:
|
87
|
+
# from openai import OpenAI
|
88
|
+
# client = OpenAI(api_key=config.key)
|
89
|
+
|
90
|
+
print(f"\nModel: {config.model}")
|
91
|
+
print(f"Task: {task}")
|
92
|
+
print(f"Temperature: {temperature}")
|
93
|
+
print(f"Max tokens: {max_tokens}")
|
94
|
+
|
95
|
+
# Actual API call (uncomment when you have OpenAI):
|
96
|
+
# print(f"\nGenerating code with {config.model}...")
|
97
|
+
# generated_code = generate_with_openai(client, config.model, task, max_tokens, temperature)
|
98
|
+
|
99
|
+
# For demonstration, show configuration
|
100
|
+
print("\n" + "=" * 60)
|
101
|
+
print("Configuration:")
|
102
|
+
print("=" * 60)
|
103
|
+
print(f"Model: {config.model}")
|
104
|
+
print(f"API Key: {'*' * 10 if config.key else 'Not configured'}")
|
105
|
+
|
106
|
+
# Example of what would be returned
|
107
|
+
example_code = """Set `width` to 10.
|
108
|
+
Set `height` to 5.
|
109
|
+
Set `area` to width * height.
|
110
|
+
Say "The area is: ".
|
111
|
+
Say area."""
|
112
|
+
|
113
|
+
print("\n" + "=" * 60)
|
114
|
+
print(f"Example Generated Code (what {config.model} would return):")
|
115
|
+
print("=" * 60)
|
116
|
+
print(example_code)
|
117
|
+
|
118
|
+
# Validate if requested
|
119
|
+
if validate:
|
120
|
+
print("\n" + "=" * 60)
|
121
|
+
print("Validating generated code...")
|
122
|
+
print("=" * 60)
|
123
|
+
|
124
|
+
parser = CFGParser()
|
125
|
+
is_valid = parser.validate(example_code)
|
126
|
+
|
127
|
+
if is_valid:
|
128
|
+
print("✓ Generated code is syntactically valid!")
|
129
|
+
else:
|
130
|
+
print("✗ Generated code has syntax errors")
|
131
|
+
|
132
|
+
return example_code
|
133
|
+
|
134
|
+
|
135
|
+
def main() -> int:
|
136
|
+
"""Main entry point for the AI code generation script.
|
137
|
+
|
138
|
+
Parses command-line arguments and generates Machine Dialect™ code
|
139
|
+
based on the provided task description. Supports configuration
|
140
|
+
overrides, validation, and saving output to files.
|
141
|
+
|
142
|
+
Returns:
|
143
|
+
Exit code: 0 for success, 1 for errors.
|
144
|
+
|
145
|
+
Example:
|
146
|
+
>>> # Generate code for calculating area
|
147
|
+
>>> sys.argv = ['prog', 'calculate area of rectangle']
|
148
|
+
>>> main()
|
149
|
+
0
|
150
|
+
"""
|
151
|
+
parser = argparse.ArgumentParser(description="Generate Machine Dialect™ code using AI models")
|
152
|
+
parser.add_argument("task", help="Description of what the code should do")
|
153
|
+
parser.add_argument("--api-key", help="AI API key (overrides config/env)")
|
154
|
+
parser.add_argument("--model", help="AI model to use (overrides config/env)")
|
155
|
+
parser.add_argument("--temperature", type=float, default=0.7, help="Sampling temperature (0-2, default: 0.7)")
|
156
|
+
parser.add_argument("--max-tokens", type=int, default=500, help="Maximum tokens to generate (default: 500)")
|
157
|
+
parser.add_argument("--no-validate", action="store_true", help="Skip validation of generated code")
|
158
|
+
parser.add_argument("--save", help="Save generated code to file")
|
159
|
+
|
160
|
+
args = parser.parse_args()
|
161
|
+
|
162
|
+
try:
|
163
|
+
code = generate_code(
|
164
|
+
task=args.task,
|
165
|
+
api_key=args.api_key,
|
166
|
+
model=args.model,
|
167
|
+
temperature=args.temperature,
|
168
|
+
max_tokens=args.max_tokens,
|
169
|
+
validate=not args.no_validate,
|
170
|
+
)
|
171
|
+
|
172
|
+
if args.save:
|
173
|
+
with open(args.save, "w") as f:
|
174
|
+
f.write(code)
|
175
|
+
print(f"\nCode saved to: {args.save}")
|
176
|
+
|
177
|
+
except Exception as e:
|
178
|
+
print(f"Error: {e}")
|
179
|
+
return 1
|
180
|
+
|
181
|
+
return 0
|
182
|
+
|
183
|
+
|
184
|
+
if __name__ == "__main__":
|
185
|
+
import sys
|
186
|
+
|
187
|
+
sys.exit(main())
|
@@ -0,0 +1,200 @@
|
|
1
|
+
"""Grammar-based generation module for Machine Dialect™ using GPT-5's CFG support."""
|
2
|
+
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import Any
|
5
|
+
|
6
|
+
|
7
|
+
def generate_with_openai(
|
8
|
+
client: Any, # OpenAI client
|
9
|
+
model: str,
|
10
|
+
task_description: str,
|
11
|
+
max_tokens: int = 500,
|
12
|
+
temperature: float = 0.7,
|
13
|
+
) -> tuple[str, dict[str, Any]]:
|
14
|
+
"""Generate Machine Dialect™ code using GPT-5's context-free grammar constraints.
|
15
|
+
|
16
|
+
This function uses GPT-5's custom tools with CFG to ensure syntactically correct
|
17
|
+
Machine Dialect™ code generation. The model is constrained to only produce
|
18
|
+
strings that match the Machine Dialect™ grammar.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
client: OpenAI client instance.
|
22
|
+
model: Model name (must support CFG, e.g., 'gpt-5').
|
23
|
+
task_description: What the code should do.
|
24
|
+
max_tokens: Maximum tokens to generate.
|
25
|
+
temperature: Sampling temperature (0-2).
|
26
|
+
|
27
|
+
Returns:
|
28
|
+
Tuple of (generated_code, token_info) where:
|
29
|
+
- generated_code: Machine Dialect™ code that is syntactically valid.
|
30
|
+
- token_info: Dictionary with prompt_tokens, completion_tokens, total_tokens.
|
31
|
+
|
32
|
+
Raises:
|
33
|
+
ValueError: If the model doesn't support CFG or response is invalid.
|
34
|
+
"""
|
35
|
+
# Check if model supports CFG (currently only GPT-5 family)
|
36
|
+
if "gpt-5" not in model.lower():
|
37
|
+
raise ValueError(
|
38
|
+
f"Model '{model}' does not support context-free grammar constraints. "
|
39
|
+
"Please use a GPT-5 model (gpt-5, gpt-5-mini, or gpt-5-nano)."
|
40
|
+
)
|
41
|
+
|
42
|
+
# Create the CFG definition for Machine Dialect™
|
43
|
+
machine_dialect_cfg = _get_machine_dialect_cfg()
|
44
|
+
|
45
|
+
# Create the API request using GPT-5's custom tools with CFG
|
46
|
+
# Note: GPT-5 doesn't support temperature parameter (always uses 1.0)
|
47
|
+
import time
|
48
|
+
|
49
|
+
api_start = time.time()
|
50
|
+
|
51
|
+
try:
|
52
|
+
response = client.responses.create(
|
53
|
+
model=model,
|
54
|
+
input=[
|
55
|
+
{
|
56
|
+
"role": "developer",
|
57
|
+
"content": (
|
58
|
+
"You are a Machine Dialect™ code generator. Generate code that performs the "
|
59
|
+
"requested task using the Machine Dialect™ language. The output must conform "
|
60
|
+
"to the provided context-free grammar.\n"
|
61
|
+
"IMPORTANT:\n"
|
62
|
+
"- Write in English even if the instruction is in another language.\n"
|
63
|
+
"- Always define variables before trying to use them.\n"
|
64
|
+
"- When creating utilities, define proper Inputs (parameters the utility accepts) "
|
65
|
+
"and Outputs (values it returns) sections.\n"
|
66
|
+
"- Don't hardcode values that should be parameters - use the Inputs section instead."
|
67
|
+
),
|
68
|
+
},
|
69
|
+
{"role": "user", "content": f"Generate Machine Dialect™ code for: {task_description}"},
|
70
|
+
],
|
71
|
+
tools=[
|
72
|
+
{
|
73
|
+
"type": "custom",
|
74
|
+
"name": "machine_dialect_generator",
|
75
|
+
"description": "Generates syntactically valid Machine Dialect™ code",
|
76
|
+
"format": machine_dialect_cfg,
|
77
|
+
}
|
78
|
+
],
|
79
|
+
parallel_tool_calls=False,
|
80
|
+
timeout=30.0, # 30 second timeout
|
81
|
+
# temperature parameter removed - GPT-5 doesn't support it
|
82
|
+
)
|
83
|
+
|
84
|
+
api_time = time.time() - api_start
|
85
|
+
if api_time > 5.0: # Log if it takes more than 5 seconds
|
86
|
+
print(f" ⚠️ API call took {api_time:.2f}s")
|
87
|
+
|
88
|
+
except Exception as e:
|
89
|
+
api_time = time.time() - api_start
|
90
|
+
raise ValueError(f"API call failed after {api_time:.2f}s: {e!s}") from e
|
91
|
+
|
92
|
+
# Extract the generated code from the response
|
93
|
+
# The response should have an output_text attribute directly
|
94
|
+
if hasattr(response, "output_text"):
|
95
|
+
generated_code = response.output_text
|
96
|
+
elif hasattr(response, "output"):
|
97
|
+
# Fallback to output attribute if output_text doesn't exist
|
98
|
+
if isinstance(response.output, list) and len(response.output) > 1:
|
99
|
+
# Try to get the second output (tool output)
|
100
|
+
tool_output = response.output[1]
|
101
|
+
|
102
|
+
# Check various attributes on the tool output
|
103
|
+
if hasattr(tool_output, "text"):
|
104
|
+
generated_code = tool_output.text
|
105
|
+
elif hasattr(tool_output, "input"):
|
106
|
+
generated_code = tool_output.input
|
107
|
+
elif hasattr(tool_output, "tool_input"):
|
108
|
+
generated_code = tool_output.tool_input
|
109
|
+
elif hasattr(tool_output, "content"):
|
110
|
+
generated_code = tool_output.content
|
111
|
+
else:
|
112
|
+
generated_code = str(tool_output)
|
113
|
+
elif isinstance(response.output, str):
|
114
|
+
generated_code = response.output
|
115
|
+
else:
|
116
|
+
generated_code = str(response.output)
|
117
|
+
else:
|
118
|
+
# Last resort: try to extract from string representation
|
119
|
+
response_str = str(response)
|
120
|
+
if "output_text=" in response_str:
|
121
|
+
import re
|
122
|
+
|
123
|
+
match = re.search(r"output_text='([^']*)'", response_str)
|
124
|
+
if not match:
|
125
|
+
match = re.search(r'output_text="([^"]*)"', response_str)
|
126
|
+
if match:
|
127
|
+
generated_code = match.group(1)
|
128
|
+
else:
|
129
|
+
raise ValueError(f"Could not extract code from response: {response_str[:200]}...")
|
130
|
+
else:
|
131
|
+
raise ValueError(f"Response has no output_text or output attribute: {dir(response)}")
|
132
|
+
|
133
|
+
if not generated_code or generated_code == "None":
|
134
|
+
# Provide more helpful error message
|
135
|
+
error_msg = "Failed to extract valid code from GPT-5 response.\n"
|
136
|
+
error_msg += f"Response type: {type(response).__name__}\n"
|
137
|
+
error_msg += f"Response attributes: {[attr for attr in dir(response) if not attr.startswith('_')][:10]}\n"
|
138
|
+
if hasattr(response, "output"):
|
139
|
+
error_msg += f"Output type: {type(response.output).__name__}\n"
|
140
|
+
error_msg += f"Extracted value: {repr(generated_code[:100]) if generated_code else 'None'}"
|
141
|
+
raise ValueError(error_msg)
|
142
|
+
|
143
|
+
# Extract token usage if available
|
144
|
+
token_info = {}
|
145
|
+
if hasattr(response, "usage"):
|
146
|
+
usage = response.usage
|
147
|
+
if usage:
|
148
|
+
token_info["prompt_tokens"] = getattr(usage, "prompt_tokens", None)
|
149
|
+
token_info["completion_tokens"] = getattr(usage, "completion_tokens", None)
|
150
|
+
token_info["total_tokens"] = getattr(usage, "total_tokens", None)
|
151
|
+
|
152
|
+
# If individual counts are not available but total is, try to estimate
|
153
|
+
if token_info["total_tokens"] and not token_info["prompt_tokens"]:
|
154
|
+
# Can't accurately split, just show total
|
155
|
+
token_info["prompt_tokens"] = None
|
156
|
+
token_info["completion_tokens"] = None
|
157
|
+
|
158
|
+
# Return both code and token info as a tuple
|
159
|
+
return (str(generated_code), token_info)
|
160
|
+
|
161
|
+
|
162
|
+
# Cache the grammar to avoid re-reading the file
|
163
|
+
_cached_grammar: dict[str, Any] | None = None
|
164
|
+
|
165
|
+
|
166
|
+
def _get_machine_dialect_cfg() -> dict[str, Any]:
|
167
|
+
"""Get the Machine Dialect™ context-free grammar in GPT-5 format.
|
168
|
+
|
169
|
+
Returns:
|
170
|
+
Dictionary containing the CFG definition for GPT-5's custom tools.
|
171
|
+
"""
|
172
|
+
global _cached_grammar
|
173
|
+
|
174
|
+
if _cached_grammar is None:
|
175
|
+
# Read the Machine Dialect™ Lark grammar file for GPT-5
|
176
|
+
grammar_path = Path(__file__).parent / "machine_dialect.lark"
|
177
|
+
|
178
|
+
with open(grammar_path) as f:
|
179
|
+
lark_grammar = f.read()
|
180
|
+
|
181
|
+
_cached_grammar = {
|
182
|
+
"type": "grammar",
|
183
|
+
"syntax": "lark", # Using Lark syntax as required by GPT-5
|
184
|
+
"definition": lark_grammar,
|
185
|
+
}
|
186
|
+
|
187
|
+
return _cached_grammar
|
188
|
+
|
189
|
+
|
190
|
+
def validate_model_support(model: str) -> bool:
|
191
|
+
"""Check if a model supports context-free grammar constraints.
|
192
|
+
|
193
|
+
Args:
|
194
|
+
model: The model name to check.
|
195
|
+
|
196
|
+
Returns:
|
197
|
+
True if the model supports CFG, False otherwise.
|
198
|
+
"""
|
199
|
+
supported_models = ["gpt-5", "gpt-5-mini", "gpt-5-nano"]
|
200
|
+
return any(supported in model.lower() for supported in supported_models)
|
@@ -0,0 +1,94 @@
|
|
1
|
+
"""CFG Parser for simplified Machine Dialect™ using Lark."""
|
2
|
+
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import Any
|
5
|
+
|
6
|
+
from lark import Lark, Token, Tree
|
7
|
+
from lark.exceptions import LarkError
|
8
|
+
|
9
|
+
|
10
|
+
class CFGParser:
|
11
|
+
"""Parser for simplified Machine Dialect™ using Lark CFG."""
|
12
|
+
|
13
|
+
def __init__(self) -> None:
|
14
|
+
"""Initialize the parser with the grammar file."""
|
15
|
+
grammar_path = Path(__file__).parent / "machine_dialect.lark"
|
16
|
+
with open(grammar_path) as f:
|
17
|
+
grammar_content = f.read()
|
18
|
+
|
19
|
+
self.parser = Lark(grammar_content, parser="lalr", start="start", debug=False)
|
20
|
+
|
21
|
+
def parse(self, code: str) -> Tree[Any]:
|
22
|
+
"""Parse Machine Dialect™ code into an AST.
|
23
|
+
|
24
|
+
Args:
|
25
|
+
code: The Machine Dialect™ code to parse.
|
26
|
+
|
27
|
+
Returns:
|
28
|
+
A Lark Tree representing the parsed AST.
|
29
|
+
|
30
|
+
Raises:
|
31
|
+
LarkError: If the code cannot be parsed.
|
32
|
+
"""
|
33
|
+
# Handle empty or whitespace-only input
|
34
|
+
if not code or not code.strip():
|
35
|
+
# Return an empty tree for empty programs
|
36
|
+
from lark import Tree
|
37
|
+
|
38
|
+
return Tree("program", [Tree("statement_list", [])])
|
39
|
+
|
40
|
+
try:
|
41
|
+
return self.parser.parse(code)
|
42
|
+
except LarkError as e:
|
43
|
+
# Convert Lark errors to match main parser behavior
|
44
|
+
raise ValueError(f"Syntax error: {e}") from e
|
45
|
+
|
46
|
+
def validate(self, code: str) -> bool:
|
47
|
+
"""Validate if the code conforms to the grammar.
|
48
|
+
|
49
|
+
Args:
|
50
|
+
code: The Machine Dialect™ code to validate.
|
51
|
+
|
52
|
+
Returns:
|
53
|
+
True if valid, False otherwise.
|
54
|
+
"""
|
55
|
+
try:
|
56
|
+
self.parse(code)
|
57
|
+
return True
|
58
|
+
except (LarkError, ValueError):
|
59
|
+
return False
|
60
|
+
|
61
|
+
def get_grammar_rules(self) -> str:
|
62
|
+
"""Get the grammar rules in a format suitable for GPT-5 CFG.
|
63
|
+
|
64
|
+
Returns:
|
65
|
+
String representation of grammar rules.
|
66
|
+
"""
|
67
|
+
grammar_path = Path(__file__).parent / "machine_dialect.lark"
|
68
|
+
with open(grammar_path) as f:
|
69
|
+
return f.read()
|
70
|
+
|
71
|
+
def tree_to_dict(self, tree: Tree[Any] | Token) -> dict[str, Any]:
|
72
|
+
"""Convert a Lark tree to a dictionary representation.
|
73
|
+
|
74
|
+
Args:
|
75
|
+
tree: The Lark tree or token to convert.
|
76
|
+
|
77
|
+
Returns:
|
78
|
+
Dictionary representation of the tree.
|
79
|
+
"""
|
80
|
+
if isinstance(tree, Token):
|
81
|
+
return {"type": "token", "name": tree.type, "value": tree.value}
|
82
|
+
|
83
|
+
return {"type": "tree", "name": tree.data, "children": [self.tree_to_dict(child) for child in tree.children]}
|
84
|
+
|
85
|
+
def pretty_print(self, tree: Tree[Any]) -> str:
|
86
|
+
"""Pretty print a parsed tree.
|
87
|
+
|
88
|
+
Args:
|
89
|
+
tree: The Lark tree to print.
|
90
|
+
|
91
|
+
Returns:
|
92
|
+
A formatted string representation of the tree.
|
93
|
+
"""
|
94
|
+
return str(tree.pretty())
|
@@ -0,0 +1 @@
|
|
1
|
+
"""Tests for the CFG module."""
|