morphml 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of morphml might be problematic. Click here for more details.

Files changed (158) hide show
  1. morphml/__init__.py +14 -0
  2. morphml/api/__init__.py +26 -0
  3. morphml/api/app.py +326 -0
  4. morphml/api/auth.py +193 -0
  5. morphml/api/client.py +338 -0
  6. morphml/api/models.py +132 -0
  7. morphml/api/rate_limit.py +192 -0
  8. morphml/benchmarking/__init__.py +36 -0
  9. morphml/benchmarking/comparison.py +430 -0
  10. morphml/benchmarks/__init__.py +56 -0
  11. morphml/benchmarks/comparator.py +409 -0
  12. morphml/benchmarks/datasets.py +280 -0
  13. morphml/benchmarks/metrics.py +199 -0
  14. morphml/benchmarks/openml_suite.py +201 -0
  15. morphml/benchmarks/problems.py +289 -0
  16. morphml/benchmarks/suite.py +318 -0
  17. morphml/cli/__init__.py +5 -0
  18. morphml/cli/commands/experiment.py +329 -0
  19. morphml/cli/main.py +457 -0
  20. morphml/cli/quickstart.py +312 -0
  21. morphml/config.py +278 -0
  22. morphml/constraints/__init__.py +19 -0
  23. morphml/constraints/handler.py +205 -0
  24. morphml/constraints/predicates.py +285 -0
  25. morphml/core/__init__.py +3 -0
  26. morphml/core/crossover.py +449 -0
  27. morphml/core/dsl/README.md +359 -0
  28. morphml/core/dsl/__init__.py +72 -0
  29. morphml/core/dsl/ast_nodes.py +364 -0
  30. morphml/core/dsl/compiler.py +318 -0
  31. morphml/core/dsl/layers.py +368 -0
  32. morphml/core/dsl/lexer.py +336 -0
  33. morphml/core/dsl/parser.py +455 -0
  34. morphml/core/dsl/search_space.py +386 -0
  35. morphml/core/dsl/syntax.py +199 -0
  36. morphml/core/dsl/type_system.py +361 -0
  37. morphml/core/dsl/validator.py +386 -0
  38. morphml/core/graph/__init__.py +40 -0
  39. morphml/core/graph/edge.py +124 -0
  40. morphml/core/graph/graph.py +507 -0
  41. morphml/core/graph/mutations.py +409 -0
  42. morphml/core/graph/node.py +196 -0
  43. morphml/core/graph/serialization.py +361 -0
  44. morphml/core/graph/visualization.py +431 -0
  45. morphml/core/objectives/__init__.py +20 -0
  46. morphml/core/search/__init__.py +33 -0
  47. morphml/core/search/individual.py +252 -0
  48. morphml/core/search/parameters.py +453 -0
  49. morphml/core/search/population.py +375 -0
  50. morphml/core/search/search_engine.py +340 -0
  51. morphml/distributed/__init__.py +76 -0
  52. morphml/distributed/fault_tolerance.py +497 -0
  53. morphml/distributed/health_monitor.py +348 -0
  54. morphml/distributed/master.py +709 -0
  55. morphml/distributed/proto/README.md +224 -0
  56. morphml/distributed/proto/__init__.py +74 -0
  57. morphml/distributed/proto/worker.proto +170 -0
  58. morphml/distributed/proto/worker_pb2.py +79 -0
  59. morphml/distributed/proto/worker_pb2_grpc.py +423 -0
  60. morphml/distributed/resource_manager.py +416 -0
  61. morphml/distributed/scheduler.py +567 -0
  62. morphml/distributed/storage/__init__.py +33 -0
  63. morphml/distributed/storage/artifacts.py +381 -0
  64. morphml/distributed/storage/cache.py +366 -0
  65. morphml/distributed/storage/checkpointing.py +329 -0
  66. morphml/distributed/storage/database.py +459 -0
  67. morphml/distributed/worker.py +549 -0
  68. morphml/evaluation/__init__.py +5 -0
  69. morphml/evaluation/heuristic.py +237 -0
  70. morphml/exceptions.py +55 -0
  71. morphml/execution/__init__.py +5 -0
  72. morphml/execution/local_executor.py +350 -0
  73. morphml/integrations/__init__.py +28 -0
  74. morphml/integrations/jax_adapter.py +206 -0
  75. morphml/integrations/pytorch_adapter.py +530 -0
  76. morphml/integrations/sklearn_adapter.py +206 -0
  77. morphml/integrations/tensorflow_adapter.py +230 -0
  78. morphml/logging_config.py +93 -0
  79. morphml/meta_learning/__init__.py +66 -0
  80. morphml/meta_learning/architecture_similarity.py +277 -0
  81. morphml/meta_learning/experiment_database.py +240 -0
  82. morphml/meta_learning/knowledge_base/__init__.py +19 -0
  83. morphml/meta_learning/knowledge_base/embedder.py +179 -0
  84. morphml/meta_learning/knowledge_base/knowledge_base.py +313 -0
  85. morphml/meta_learning/knowledge_base/meta_features.py +265 -0
  86. morphml/meta_learning/knowledge_base/vector_store.py +271 -0
  87. morphml/meta_learning/predictors/__init__.py +27 -0
  88. morphml/meta_learning/predictors/ensemble.py +221 -0
  89. morphml/meta_learning/predictors/gnn_predictor.py +552 -0
  90. morphml/meta_learning/predictors/learning_curve.py +231 -0
  91. morphml/meta_learning/predictors/proxy_metrics.py +261 -0
  92. morphml/meta_learning/strategy_evolution/__init__.py +27 -0
  93. morphml/meta_learning/strategy_evolution/adaptive_optimizer.py +226 -0
  94. morphml/meta_learning/strategy_evolution/bandit.py +276 -0
  95. morphml/meta_learning/strategy_evolution/portfolio.py +230 -0
  96. morphml/meta_learning/transfer.py +581 -0
  97. morphml/meta_learning/warm_start.py +286 -0
  98. morphml/optimizers/__init__.py +74 -0
  99. morphml/optimizers/adaptive_operators.py +399 -0
  100. morphml/optimizers/bayesian/__init__.py +52 -0
  101. morphml/optimizers/bayesian/acquisition.py +387 -0
  102. morphml/optimizers/bayesian/base.py +319 -0
  103. morphml/optimizers/bayesian/gaussian_process.py +635 -0
  104. morphml/optimizers/bayesian/smac.py +534 -0
  105. morphml/optimizers/bayesian/tpe.py +411 -0
  106. morphml/optimizers/differential_evolution.py +220 -0
  107. morphml/optimizers/evolutionary/__init__.py +61 -0
  108. morphml/optimizers/evolutionary/cma_es.py +416 -0
  109. morphml/optimizers/evolutionary/differential_evolution.py +556 -0
  110. morphml/optimizers/evolutionary/encoding.py +426 -0
  111. morphml/optimizers/evolutionary/particle_swarm.py +449 -0
  112. morphml/optimizers/genetic_algorithm.py +486 -0
  113. morphml/optimizers/gradient_based/__init__.py +22 -0
  114. morphml/optimizers/gradient_based/darts.py +550 -0
  115. morphml/optimizers/gradient_based/enas.py +585 -0
  116. morphml/optimizers/gradient_based/operations.py +474 -0
  117. morphml/optimizers/gradient_based/utils.py +601 -0
  118. morphml/optimizers/hill_climbing.py +169 -0
  119. morphml/optimizers/multi_objective/__init__.py +56 -0
  120. morphml/optimizers/multi_objective/indicators.py +504 -0
  121. morphml/optimizers/multi_objective/nsga2.py +647 -0
  122. morphml/optimizers/multi_objective/visualization.py +427 -0
  123. morphml/optimizers/nsga2.py +308 -0
  124. morphml/optimizers/random_search.py +172 -0
  125. morphml/optimizers/simulated_annealing.py +181 -0
  126. morphml/plugins/__init__.py +35 -0
  127. morphml/plugins/custom_evaluator_example.py +81 -0
  128. morphml/plugins/custom_optimizer_example.py +63 -0
  129. morphml/plugins/plugin_system.py +454 -0
  130. morphml/reports/__init__.py +30 -0
  131. morphml/reports/generator.py +362 -0
  132. morphml/tracking/__init__.py +7 -0
  133. morphml/tracking/experiment.py +309 -0
  134. morphml/tracking/logger.py +301 -0
  135. morphml/tracking/reporter.py +357 -0
  136. morphml/utils/__init__.py +6 -0
  137. morphml/utils/checkpoint.py +189 -0
  138. morphml/utils/comparison.py +390 -0
  139. morphml/utils/export.py +407 -0
  140. morphml/utils/progress.py +392 -0
  141. morphml/utils/validation.py +392 -0
  142. morphml/version.py +7 -0
  143. morphml/visualization/__init__.py +50 -0
  144. morphml/visualization/analytics.py +423 -0
  145. morphml/visualization/architecture_diagrams.py +353 -0
  146. morphml/visualization/architecture_plot.py +223 -0
  147. morphml/visualization/convergence_plot.py +174 -0
  148. morphml/visualization/crossover_viz.py +386 -0
  149. morphml/visualization/graph_viz.py +338 -0
  150. morphml/visualization/pareto_plot.py +149 -0
  151. morphml/visualization/plotly_dashboards.py +422 -0
  152. morphml/visualization/population.py +309 -0
  153. morphml/visualization/progress.py +260 -0
  154. morphml-1.0.0.dist-info/METADATA +434 -0
  155. morphml-1.0.0.dist-info/RECORD +158 -0
  156. morphml-1.0.0.dist-info/WHEEL +4 -0
  157. morphml-1.0.0.dist-info/entry_points.txt +3 -0
  158. morphml-1.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,368 @@
1
+ """Layer builders for the MorphML DSL.
2
+
3
+ This module provides a Pythonic interface for defining neural network layers
4
+ in search spaces using a builder pattern.
5
+
6
+ Example:
7
+ >>> from morphml.core.dsl import Layer
8
+ >>>
9
+ >>> # Define a conv2d layer with multiple filter options
10
+ >>> conv = Layer.conv2d(filters=[32, 64, 128], kernel_size=[3, 5])
11
+ >>>
12
+ >>> # Define a dense layer
13
+ >>> dense = Layer.dense(units=[128, 256, 512])
14
+ """
15
+
16
+ from typing import Any, Dict, List, Optional, Union
17
+
18
+ from morphml.core.graph import GraphNode
19
+
20
+
21
+ class LayerSpec:
22
+ """
23
+ Specification for a layer in the search space.
24
+
25
+ A LayerSpec defines a layer type and its parameter ranges.
26
+ During search, specific parameter values are sampled from these ranges.
27
+
28
+ Attributes:
29
+ operation: Layer operation type (conv2d, dense, etc.)
30
+ param_ranges: Dictionary of parameter names to possible values
31
+ metadata: Additional metadata
32
+
33
+ Example:
34
+ >>> spec = LayerSpec("conv2d", {
35
+ ... "filters": [32, 64, 128],
36
+ ... "kernel_size": [3, 5, 7]
37
+ ... })
38
+ """
39
+
40
+ def __init__(
41
+ self,
42
+ operation: str,
43
+ param_ranges: Optional[Dict[str, Any]] = None,
44
+ metadata: Optional[Dict[str, Any]] = None,
45
+ ):
46
+ """
47
+ Initialize layer specification.
48
+
49
+ Args:
50
+ operation: Layer operation type
51
+ param_ranges: Parameter ranges (param_name -> list of values or single value)
52
+ metadata: Additional metadata
53
+ """
54
+ self.operation = operation
55
+ self.param_ranges = param_ranges or {}
56
+ self.metadata = metadata or {}
57
+
58
+ def sample(self) -> GraphNode:
59
+ """
60
+ Sample a concrete layer from this specification.
61
+
62
+ Returns:
63
+ GraphNode with sampled parameters
64
+ """
65
+ import random
66
+
67
+ # Sample one value from each parameter range
68
+ params = {}
69
+ for param_name, values in self.param_ranges.items():
70
+ if isinstance(values, list) and values:
71
+ params[param_name] = random.choice(values)
72
+ else:
73
+ params[param_name] = values
74
+
75
+ return GraphNode.create(self.operation, params=params, metadata=self.metadata)
76
+
77
+ def to_dict(self) -> Dict[str, Any]:
78
+ """Serialize to dictionary."""
79
+ return {
80
+ "operation": self.operation,
81
+ "param_ranges": self.param_ranges,
82
+ "metadata": self.metadata,
83
+ }
84
+
85
+ @classmethod
86
+ def from_dict(cls, data: Dict[str, Any]) -> "LayerSpec":
87
+ """Deserialize from dictionary."""
88
+ return cls(
89
+ operation=data["operation"],
90
+ param_ranges=data.get("param_ranges", {}),
91
+ metadata=data.get("metadata", {}),
92
+ )
93
+
94
+ def __repr__(self) -> str:
95
+ """String representation."""
96
+ return f"LayerSpec(operation={self.operation}, params={list(self.param_ranges.keys())})"
97
+
98
+
99
+ class Layer:
100
+ """
101
+ Builder for defining layers in the search space.
102
+
103
+ Provides static methods for creating layer specifications with a
104
+ Pythonic API.
105
+
106
+ Example:
107
+ >>> # Convolutional layers
108
+ >>> Layer.conv2d(filters=[32, 64], kernel_size=3)
109
+ >>> Layer.conv2d(filters=64, kernel_size=[3, 5, 7])
110
+ >>>
111
+ >>> # Pooling layers
112
+ >>> Layer.maxpool(pool_size=2)
113
+ >>> Layer.avgpool(pool_size=[2, 3])
114
+ >>>
115
+ >>> # Dense layers
116
+ >>> Layer.dense(units=[128, 256, 512])
117
+ >>>
118
+ >>> # Activation layers
119
+ >>> Layer.relu()
120
+ >>> Layer.sigmoid()
121
+ """
122
+
123
+ @staticmethod
124
+ def conv2d(
125
+ filters: Union[int, List[int]],
126
+ kernel_size: Union[int, List[int]] = 3,
127
+ strides: Union[int, List[int]] = 1,
128
+ padding: str = "same",
129
+ activation: Optional[str] = None,
130
+ **kwargs: Any,
131
+ ) -> LayerSpec:
132
+ """
133
+ Define a 2D convolutional layer.
134
+
135
+ Args:
136
+ filters: Number of filters (can be list for search)
137
+ kernel_size: Kernel size (can be list for search)
138
+ strides: Stride size
139
+ padding: Padding mode ('same' or 'valid')
140
+ activation: Optional activation function
141
+ **kwargs: Additional parameters
142
+
143
+ Returns:
144
+ LayerSpec for conv2d layer
145
+
146
+ Example:
147
+ >>> Layer.conv2d(filters=[32, 64, 128], kernel_size=[3, 5])
148
+ """
149
+ param_ranges = {
150
+ "filters": filters if isinstance(filters, list) else [filters],
151
+ "kernel_size": kernel_size if isinstance(kernel_size, list) else [kernel_size],
152
+ "strides": strides if isinstance(strides, list) else [strides],
153
+ "padding": [padding],
154
+ }
155
+
156
+ if activation:
157
+ param_ranges["activation"] = [activation]
158
+
159
+ param_ranges.update(kwargs)
160
+
161
+ return LayerSpec("conv2d", param_ranges)
162
+
163
+ @staticmethod
164
+ def dense(
165
+ units: Union[int, List[int]],
166
+ activation: Optional[str] = None,
167
+ use_bias: bool = True,
168
+ **kwargs: Any,
169
+ ) -> LayerSpec:
170
+ """
171
+ Define a fully-connected (dense) layer.
172
+
173
+ Args:
174
+ units: Number of units (can be list for search)
175
+ activation: Optional activation function
176
+ use_bias: Whether to use bias
177
+ **kwargs: Additional parameters
178
+
179
+ Returns:
180
+ LayerSpec for dense layer
181
+
182
+ Example:
183
+ >>> Layer.dense(units=[128, 256, 512])
184
+ """
185
+ param_ranges = {
186
+ "units": units if isinstance(units, list) else [units],
187
+ "use_bias": [use_bias],
188
+ }
189
+
190
+ if activation:
191
+ param_ranges["activation"] = [activation]
192
+
193
+ param_ranges.update(kwargs)
194
+
195
+ return LayerSpec("dense", param_ranges)
196
+
197
+ @staticmethod
198
+ def maxpool(
199
+ pool_size: Union[int, List[int]] = 2,
200
+ strides: Optional[Union[int, List[int]]] = None,
201
+ padding: str = "valid",
202
+ **kwargs: Any,
203
+ ) -> LayerSpec:
204
+ """
205
+ Define a max pooling layer.
206
+
207
+ Args:
208
+ pool_size: Pool size (can be list for search)
209
+ strides: Stride size (defaults to pool_size)
210
+ padding: Padding mode
211
+ **kwargs: Additional parameters
212
+
213
+ Returns:
214
+ LayerSpec for maxpool layer
215
+ """
216
+ param_ranges = {
217
+ "pool_size": pool_size if isinstance(pool_size, list) else [pool_size],
218
+ "padding": [padding],
219
+ }
220
+
221
+ if strides is not None:
222
+ param_ranges["strides"] = strides if isinstance(strides, list) else [strides]
223
+
224
+ param_ranges.update(kwargs)
225
+
226
+ return LayerSpec("maxpool", param_ranges)
227
+
228
+ @staticmethod
229
+ def avgpool(
230
+ pool_size: Union[int, List[int]] = 2,
231
+ strides: Optional[Union[int, List[int]]] = None,
232
+ padding: str = "valid",
233
+ **kwargs: Any,
234
+ ) -> LayerSpec:
235
+ """Define an average pooling layer."""
236
+ param_ranges = {
237
+ "pool_size": pool_size if isinstance(pool_size, list) else [pool_size],
238
+ "padding": [padding],
239
+ }
240
+
241
+ if strides is not None:
242
+ param_ranges["strides"] = strides if isinstance(strides, list) else [strides]
243
+
244
+ param_ranges.update(kwargs)
245
+
246
+ return LayerSpec("avgpool", param_ranges)
247
+
248
+ @staticmethod
249
+ def dropout(rate: Union[float, List[float]] = 0.5, **kwargs: Any) -> LayerSpec:
250
+ """
251
+ Define a dropout layer.
252
+
253
+ Args:
254
+ rate: Dropout rate (can be list for search)
255
+ **kwargs: Additional parameters
256
+
257
+ Returns:
258
+ LayerSpec for dropout layer
259
+ """
260
+ param_ranges = {
261
+ "rate": rate if isinstance(rate, list) else [rate],
262
+ }
263
+ param_ranges.update(kwargs)
264
+
265
+ return LayerSpec("dropout", param_ranges)
266
+
267
+ @staticmethod
268
+ def batchnorm(**kwargs: Any) -> LayerSpec:
269
+ """Define a batch normalization layer."""
270
+ return LayerSpec("batchnorm", param_ranges=kwargs)
271
+
272
+ @staticmethod
273
+ def flatten(**kwargs: Any) -> LayerSpec:
274
+ """
275
+ Define a flatten layer.
276
+
277
+ Flattens the input tensor to 1D (excluding batch dimension).
278
+ Commonly used between convolutional and dense layers.
279
+
280
+ Args:
281
+ **kwargs: Additional parameters
282
+
283
+ Returns:
284
+ LayerSpec for flatten layer
285
+ """
286
+ return LayerSpec("flatten", param_ranges=kwargs)
287
+
288
+ @staticmethod
289
+ def relu(**kwargs: Any) -> LayerSpec:
290
+ """Define a ReLU activation layer."""
291
+ return LayerSpec("relu", param_ranges=kwargs)
292
+
293
+ @staticmethod
294
+ def sigmoid(**kwargs: Any) -> LayerSpec:
295
+ """Define a sigmoid activation layer."""
296
+ return LayerSpec("sigmoid", param_ranges=kwargs)
297
+
298
+ @staticmethod
299
+ def tanh(**kwargs: Any) -> LayerSpec:
300
+ """Define a tanh activation layer."""
301
+ return LayerSpec("tanh", param_ranges=kwargs)
302
+
303
+ @staticmethod
304
+ def softmax(**kwargs: Any) -> LayerSpec:
305
+ """Define a softmax activation layer."""
306
+ return LayerSpec("softmax", param_ranges=kwargs)
307
+
308
+ @staticmethod
309
+ def input(shape: tuple, **kwargs: Any) -> LayerSpec:
310
+ """
311
+ Define an input layer.
312
+
313
+ Args:
314
+ shape: Input shape (excluding batch dimension)
315
+ **kwargs: Additional parameters
316
+
317
+ Returns:
318
+ LayerSpec for input layer
319
+ """
320
+ param_ranges = {"shape": [shape]}
321
+ param_ranges.update(kwargs)
322
+
323
+ return LayerSpec("input", param_ranges)
324
+
325
+ @staticmethod
326
+ def output(units: int, activation: str = "softmax", **kwargs: Any) -> LayerSpec:
327
+ """
328
+ Define an output layer.
329
+
330
+ Args:
331
+ units: Number of output units (classes)
332
+ activation: Activation function
333
+ **kwargs: Additional parameters
334
+
335
+ Returns:
336
+ LayerSpec for output layer
337
+ """
338
+ param_ranges = {
339
+ "units": [units],
340
+ "activation": [activation],
341
+ }
342
+ param_ranges.update(kwargs)
343
+
344
+ return LayerSpec("dense", param_ranges, metadata={"is_output": True})
345
+
346
+ @staticmethod
347
+ def custom(
348
+ operation: str,
349
+ param_ranges: Optional[Dict[str, List[Any]]] = None,
350
+ **kwargs: Any,
351
+ ) -> LayerSpec:
352
+ """
353
+ Define a custom layer.
354
+
355
+ Args:
356
+ operation: Operation type
357
+ param_ranges: Parameter ranges
358
+ **kwargs: Additional parameter ranges
359
+
360
+ Returns:
361
+ LayerSpec for custom layer
362
+
363
+ Example:
364
+ >>> Layer.custom("my_op", {"param1": [1, 2, 3]})
365
+ """
366
+ ranges = param_ranges or {}
367
+ ranges.update(kwargs)
368
+ return LayerSpec(operation, ranges)
@@ -0,0 +1,336 @@
1
+ """Lexical analyzer for MorphML DSL.
2
+
3
+ Converts source code into a stream of tokens for parsing.
4
+
5
+ Author: Eshan Roy <eshanized@proton.me>
6
+ Organization: TONMOY INFRASTRUCTURE & VISION
7
+ """
8
+
9
+ from dataclasses import dataclass
10
+ from typing import Any, List, Optional
11
+
12
+ from morphml.core.dsl.syntax import KEYWORDS, OPERATORS, TokenType
13
+ from morphml.exceptions import DSLError
14
+ from morphml.logging_config import get_logger
15
+
16
+ logger = get_logger(__name__)
17
+
18
+
19
+ @dataclass
20
+ class Token:
21
+ """Represents a single token in the source code."""
22
+
23
+ type: TokenType
24
+ value: Any
25
+ line: int
26
+ column: int
27
+
28
+ def __repr__(self) -> str:
29
+ """String representation for debugging."""
30
+ return f"Token({self.type.name}, {self.value!r}, {self.line}:{self.column})"
31
+
32
+
33
+ class Lexer:
34
+ """
35
+ Tokenizes MorphML DSL source code.
36
+
37
+ Converts a string of source code into a stream of tokens that can be
38
+ parsed into an abstract syntax tree.
39
+
40
+ Example:
41
+ >>> source = 'Layer.conv2d(filters=[32, 64])'
42
+ >>> lexer = Lexer(source)
43
+ >>> tokens = lexer.tokenize()
44
+ >>> for token in tokens:
45
+ ... print(token)
46
+ """
47
+
48
+ def __init__(self, source: str):
49
+ """
50
+ Initialize lexer with source code.
51
+
52
+ Args:
53
+ source: Source code string to tokenize
54
+ """
55
+ self.source = source
56
+ self.position = 0
57
+ self.line = 1
58
+ self.column = 1
59
+ self.tokens: List[Token] = []
60
+
61
+ def tokenize(self) -> List[Token]:
62
+ """
63
+ Main tokenization method.
64
+
65
+ Returns:
66
+ List of tokens representing the source code
67
+
68
+ Raises:
69
+ DSLError: If invalid characters or syntax is encountered
70
+ """
71
+ while self.position < len(self.source):
72
+ # Skip whitespace
73
+ if self._skip_whitespace():
74
+ continue
75
+
76
+ # Skip comments
77
+ if self._skip_comment():
78
+ continue
79
+
80
+ # Match tokens
81
+ if self._match_number():
82
+ continue
83
+ elif self._match_string():
84
+ continue
85
+ elif self._match_keyword_or_identifier():
86
+ continue
87
+ elif self._match_operator():
88
+ continue
89
+ else:
90
+ char = self._current_char()
91
+ self._error(f"Unexpected character: '{char}' (ASCII {ord(char)})")
92
+
93
+ # Add EOF token
94
+ self.tokens.append(Token(TokenType.EOF, None, self.line, self.column))
95
+ logger.debug(f"Tokenized {len(self.tokens)} tokens from {self.line} lines")
96
+ return self.tokens
97
+
98
+ def _current_char(self) -> Optional[str]:
99
+ """Get current character without advancing."""
100
+ if self.position >= len(self.source):
101
+ return None
102
+ return self.source[self.position]
103
+
104
+ def _peek_char(self, offset: int = 1) -> Optional[str]:
105
+ """Look ahead at character at offset from current position."""
106
+ pos = self.position + offset
107
+ if pos >= len(self.source):
108
+ return None
109
+ return self.source[pos]
110
+
111
+ def _advance(self) -> Optional[str]:
112
+ """Move to next character and return current one."""
113
+ if self.position >= len(self.source):
114
+ return None
115
+
116
+ char = self.source[self.position]
117
+ self.position += 1
118
+
119
+ # Track line and column for error messages
120
+ if char == "\n":
121
+ self.line += 1
122
+ self.column = 1
123
+ else:
124
+ self.column += 1
125
+
126
+ return char
127
+
128
+ def _skip_whitespace(self) -> bool:
129
+ """Skip whitespace characters (space, tab, newline)."""
130
+ char = self._current_char()
131
+ if char and char in " \t\n\r":
132
+ self._advance()
133
+ return True
134
+ return False
135
+
136
+ def _skip_comment(self) -> bool:
137
+ """Skip comments starting with #."""
138
+ if self._current_char() == "#":
139
+ # Skip until end of line
140
+ while self._current_char() and self._current_char() != "\n":
141
+ self._advance()
142
+ return True
143
+ return False
144
+
145
+ def _match_number(self) -> bool:
146
+ """
147
+ Match integer, float, or scientific notation.
148
+
149
+ Formats supported:
150
+ - Integer: 42, -123
151
+ - Float: 3.14, -0.5
152
+ - Scientific: 1.5e-3, 2E+10
153
+ """
154
+ start_pos = self.position
155
+ start_col = self.column
156
+
157
+ # Handle negative sign
158
+ if self._current_char() == "-":
159
+ # Check if next char is digit
160
+ if not (self._peek_char() and self._peek_char().isdigit()):
161
+ return False
162
+ self._advance()
163
+
164
+ # Must start with digit
165
+ if not (self._current_char() and self._current_char().isdigit()):
166
+ return False
167
+
168
+ # Match integer part
169
+ while self._current_char() and self._current_char().isdigit():
170
+ self._advance()
171
+
172
+ # Match decimal point and fractional part
173
+ if self._current_char() == ".":
174
+ # Peek ahead to ensure it's a decimal, not a method call
175
+ if self._peek_char() and self._peek_char().isdigit():
176
+ self._advance() # consume '.'
177
+ while self._current_char() and self._current_char().isdigit():
178
+ self._advance()
179
+
180
+ # Match scientific notation
181
+ if self._current_char() and self._current_char() in ("e", "E"):
182
+ self._advance()
183
+ # Optional sign
184
+ if self._current_char() and self._current_char() in ("+", "-"):
185
+ self._advance()
186
+ # Exponent digits
187
+ if not (self._current_char() and self._current_char().isdigit()):
188
+ self._error("Invalid scientific notation: expected digits after exponent")
189
+ while self._current_char() and self._current_char().isdigit():
190
+ self._advance()
191
+
192
+ # Extract value and convert
193
+ value_str = self.source[start_pos : self.position]
194
+ try:
195
+ if "." in value_str or "e" in value_str or "E" in value_str:
196
+ value = float(value_str)
197
+ else:
198
+ value = int(value_str)
199
+ except ValueError:
200
+ self._error(f"Invalid number format: {value_str}")
201
+
202
+ self.tokens.append(Token(TokenType.NUMBER, value, self.line, start_col))
203
+ return True
204
+
205
+ def _match_string(self) -> bool:
206
+ """
207
+ Match quoted strings with escape sequences.
208
+
209
+ Supports both single and double quotes.
210
+ Handles escape sequences: \\n, \\t, \\', \\", \\\\
211
+ """
212
+ start_col = self.column
213
+ quote_char = self._current_char()
214
+
215
+ if quote_char not in ('"', "'"):
216
+ return False
217
+
218
+ self._advance() # consume opening quote
219
+ chars = []
220
+
221
+ while self._current_char() and self._current_char() != quote_char:
222
+ if self._current_char() == "\\":
223
+ # Handle escape sequences
224
+ self._advance()
225
+ escape_char = self._current_char()
226
+ if escape_char == "n":
227
+ chars.append("\n")
228
+ elif escape_char == "t":
229
+ chars.append("\t")
230
+ elif escape_char == "r":
231
+ chars.append("\r")
232
+ elif escape_char == "\\":
233
+ chars.append("\\")
234
+ elif escape_char == quote_char:
235
+ chars.append(quote_char)
236
+ else:
237
+ chars.append(escape_char)
238
+ self._advance()
239
+ else:
240
+ chars.append(self._current_char())
241
+ self._advance()
242
+
243
+ if self._current_char() != quote_char:
244
+ self._error(f"Unterminated string starting at line {self.line}, column {start_col}")
245
+
246
+ self._advance() # consume closing quote
247
+
248
+ value = "".join(chars)
249
+ self.tokens.append(Token(TokenType.STRING, value, self.line, start_col))
250
+ return True
251
+
252
+ def _match_keyword_or_identifier(self) -> bool:
253
+ """
254
+ Match keywords or identifiers.
255
+
256
+ Identifiers: [a-zA-Z_][a-zA-Z0-9_]*
257
+ Keywords: SearchSpace, Layer, Evolution, etc.
258
+ """
259
+ start_col = self.column
260
+ char = self._current_char()
261
+
262
+ # Must start with letter or underscore
263
+ if not (char and (char.isalpha() or char == "_")):
264
+ return False
265
+
266
+ # Collect identifier characters
267
+ chars = []
268
+ while self._current_char() and (
269
+ self._current_char().isalnum() or self._current_char() == "_"
270
+ ):
271
+ chars.append(self._current_char())
272
+ self._advance()
273
+
274
+ identifier = "".join(chars)
275
+
276
+ # Check if it's a keyword
277
+ if identifier in KEYWORDS:
278
+ token_type = KEYWORDS[identifier]
279
+ # For boolean keywords, store the boolean value
280
+ if token_type == TokenType.BOOLEAN:
281
+ value = identifier in ("True", "true")
282
+ else:
283
+ value = identifier
284
+ self.tokens.append(Token(token_type, value, self.line, start_col))
285
+ else:
286
+ # Regular identifier
287
+ self.tokens.append(Token(TokenType.IDENTIFIER, identifier, self.line, start_col))
288
+
289
+ return True
290
+
291
+ def _match_operator(self) -> bool:
292
+ """Match operators and delimiters."""
293
+ start_col = self.column
294
+ char = self._current_char()
295
+
296
+ if char in OPERATORS:
297
+ token_type = OPERATORS[char]
298
+ self._advance()
299
+ self.tokens.append(Token(token_type, char, self.line, start_col))
300
+ return True
301
+
302
+ return False
303
+
304
+ def _error(self, message: str) -> None:
305
+ """
306
+ Raise DSLError with line and column information.
307
+
308
+ Args:
309
+ message: Error message
310
+
311
+ Raises:
312
+ DSLError: With formatted error message
313
+ """
314
+ raise DSLError(
315
+ f"{message}\n"
316
+ f" at line {self.line}, column {self.column}\n"
317
+ f" {self._get_error_context()}",
318
+ line=self.line,
319
+ column=self.column,
320
+ )
321
+
322
+ def _get_error_context(self) -> str:
323
+ """Get source code context around error for display."""
324
+ # Find start and end of current line
325
+ line_start = self.position
326
+ while line_start > 0 and self.source[line_start - 1] != "\n":
327
+ line_start -= 1
328
+
329
+ line_end = self.position
330
+ while line_end < len(self.source) and self.source[line_end] != "\n":
331
+ line_end += 1
332
+
333
+ line_content = self.source[line_start:line_end]
334
+ pointer = " " * (self.column - 1) + "^"
335
+
336
+ return f"{line_content}\n {pointer}"