klue-langcraft 0.0.7 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,357 @@
1
+ # Brief for creating the Parser, Tokenizer and Parsing our first DSL
2
+
3
+ # 1. Parsing Libraries in Ruby
4
+
5
+ Here are three Ruby parsing libraries with their pros and cons:
6
+
7
+ ### Parslet
8
+ **Pros:**
9
+ - Pure Ruby library for constructing parsers using parsing expression grammars (PEG).
10
+ - Intuitive and readable grammar definitions embedded in Ruby code.
11
+ - Actively maintained with compatibility for modern Ruby versions.
12
+
13
+ **Cons:**
14
+ - Can be slower for large inputs due to backtracking.
15
+ - Verbose grammars can become complex for intricate DSLs.
16
+
17
+ ### Racc
18
+ **Pros:**
19
+ - LALR(1) parser generator that comes standard with Ruby.
20
+ - Generates fast parsers suitable for complex grammars.
21
+ - Actively maintained as part of the Ruby language.
22
+
23
+ **Cons:**
24
+ - Steeper learning curve with Yacc-like syntax.
25
+ - Less intuitive for those unfamiliar with parser generators.
26
+
27
+ ### Treetop
28
+ **Pros:**
29
+ - Provides a powerful parsing DSL and supports PEG.
30
+ - Clean syntax with grammars defined in separate files.
31
+ - Memoization for improved parsing performance.
32
+
33
+ **Cons:**
34
+ - Less active development; may not be updated for recent Ruby versions.
35
+ - Potential compatibility issues with newer Ruby releases.
36
+
37
+ **Note:** Based on maintenance and compatibility, Parslet and Racc are more suitable for your needs.
38
+
39
+ ---
40
+
41
+ # 2. Converting the DSL Definition into JSON
42
+
43
+ Transforming your DSL definition into JSON will facilitate parsing and validation. Here's how your DSL definition can be represented in JSON:
44
+
45
+ ```json
46
+ {
47
+ "definition": {
48
+ "name": "workflow",
49
+ "params": [
50
+ {
51
+ "name": "name",
52
+ "type": "positional"
53
+ }
54
+ ],
55
+ "nodes": [
56
+ {
57
+ "name": "description",
58
+ "params": [
59
+ {
60
+ "name": "description",
61
+ "type": "positional"
62
+ }
63
+ ]
64
+ },
65
+ {
66
+ "name": "settings",
67
+ "nodes": [
68
+ {
69
+ "name": "setting",
70
+ "repeat": true,
71
+ "params": [
72
+ {
73
+ "name": "key",
74
+ "type": "declarative"
75
+ },
76
+ {
77
+ "name": "value",
78
+ "type": "positional"
79
+ }
80
+ ]
81
+ }
82
+ ]
83
+ },
84
+ {
85
+ "name": "prompts",
86
+ "nodes": [
87
+ {
88
+ "name": "prompt",
89
+ "repeat": true,
90
+ "params": [
91
+ {
92
+ "name": "key",
93
+ "type": "positional"
94
+ },
95
+ {
96
+ "name": "content",
97
+ "type": "named",
98
+ "default": ""
99
+ }
100
+ ]
101
+ }
102
+ ]
103
+ },
104
+ {
105
+ "name": "section",
106
+ "repeat": true,
107
+ "params": [
108
+ {
109
+ "name": "name",
110
+ "type": "positional"
111
+ }
112
+ ],
113
+ "nodes": [
114
+ {
115
+ "name": "step",
116
+ "repeat": true,
117
+ "params": [
118
+ {
119
+ "name": "key",
120
+ "type": "positional"
121
+ }
122
+ ],
123
+ "nodes": [
124
+ {
125
+ "name": "input",
126
+ "repeat": true,
127
+ "params": [
128
+ {
129
+ "name": "key",
130
+ "type": "positional"
131
+ }
132
+ ]
133
+ },
134
+ {
135
+ "name": "prompt",
136
+ "params": [
137
+ {
138
+ "name": "key",
139
+ "type": "positional"
140
+ }
141
+ ]
142
+ },
143
+ {
144
+ "name": "output",
145
+ "repeat": true,
146
+ "params": [
147
+ {
148
+ "name": "key",
149
+ "type": "positional"
150
+ }
151
+ ]
152
+ }
153
+ ]
154
+ }
155
+ ]
156
+ },
157
+ {
158
+ "name": "actions",
159
+ "nodes": [
160
+ {
161
+ "name": "save",
162
+ "params": []
163
+ },
164
+ {
165
+ "name": "save_json",
166
+ "params": [
167
+ {
168
+ "name": "path",
169
+ "type": "positional"
170
+ }
171
+ ]
172
+ }
173
+ ]
174
+ }
175
+ ]
176
+ }
177
+ }
178
+ ```
179
+
180
+ # 3. Writing a Parser in Raw Ruby
181
+
182
+ Given the simplicity and hierarchical nature of your DSL, you can write a custom parser in Ruby without external libraries. Below is an outline of how to approach this:
183
+
184
+ ### Step 1: Tokenization
185
+ - Create a tokenizer that reads the DSL code and breaks it down into tokens (keywords, symbols, identifiers, strings, etc.).
186
+
187
+ ```ruby
188
+ class Tokenizer
189
+ attr_reader :tokens
190
+
191
+ def initialize(code)
192
+ @code = code
193
+ @tokens = []
194
+ end
195
+
196
+ def tokenize
197
+ # Implement logic to convert code into tokens
198
+ # Handle strings, symbols, keywords, and delimiters
199
+ end
200
+ end
201
+ ```
202
+
203
+ ### Step 2: Parsing
204
+ - Use recursive descent parsing to process tokens according to the rules defined in your JSON schema.
205
+
206
+ ```ruby
207
+ class Parser
208
+ def initialize(tokens, schema)
209
+ @tokens = tokens
210
+ @schema = schema
211
+ @position = 0
212
+ end
213
+
214
+ def parse
215
+ parse_node(@schema['definition'])
216
+ end
217
+
218
+ private
219
+
220
+ def parse_node(node_schema)
221
+ node = { 'name' => node_schema['name'], 'params' => {}, 'children' => [] }
222
+
223
+ # Parse parameters
224
+ node['params'] = parse_params(node_schema['params'])
225
+
226
+ # If node has child nodes
227
+ if node_schema['nodes']
228
+ # Expect 'do'
229
+ expect('do')
230
+
231
+ # Parse child nodes
232
+ while peek != 'end'
233
+ child_node_schema = match_node_schema(node_schema['nodes'])
234
+ node['children'] << parse_node(child_node_schema)
235
+ end
236
+
237
+ expect('end')
238
+ end
239
+
240
+ node
241
+ end
242
+
243
+ def parse_params(params_schema)
244
+ params = {}
245
+ params_schema.each do |param_schema|
246
+ # Extract parameter based on its type
247
+ params[param_schema['name']] = extract_param(param_schema)
248
+ end
249
+ params
250
+ end
251
+
252
+ def extract_param(param_schema)
253
+ # Implement extraction logic based on param_schema['type']
254
+ end
255
+
256
+ def expect(expected_token)
257
+ actual_token = next_token
258
+ if actual_token != expected_token
259
+ raise "Expected '#{expected_token}', got '#{actual_token}'"
260
+ end
261
+ end
262
+
263
+ def next_token
264
+ token = @tokens[@position]
265
+ @position += 1
266
+ token
267
+ end
268
+
269
+ def peek
270
+ @tokens[@position]
271
+ end
272
+
273
+ def match_node_schema(nodes_schema)
274
+ current_token = peek
275
+ nodes_schema.find { |ns| ns['name'] == current_token } || raise("Unknown node '#{current_token}'")
276
+ end
277
+ end
278
+ ```
279
+
280
+ ### Step 3: Building the Abstract Syntax Tree (AST)
281
+ - As you parse, construct an AST that captures both the structural elements and their associated data.
282
+
283
+ #### Example Usage:
284
+
285
+ ```ruby
286
+ # Read DSL code from file
287
+ dsl_code = File.read('workflow_dsl.rb')
288
+
289
+ # Tokenize the DSL code
290
+ tokenizer = Tokenizer.new(dsl_code)
291
+ tokenizer.tokenize
292
+
293
+ # Parse tokens into an AST
294
+ parser = Parser.new(tokenizer.tokens, schema)
295
+ ast = parser.parse
296
+
297
+ # Output the AST
298
+ puts ast.inspect
299
+ ```
300
+
301
+ ### Considerations:
302
+
303
+ #### Parsing Parameters:
304
+ - Implement the `parse_params` method to handle different parameter types:
305
+ - **Positional Parameters:** Split `params_str` by commas or spaces, and assign values in order.
306
+ - **Declarative Parameters:** Use the node name as the parameter value.
307
+ - **Named Parameters:** Look for `key: value` pairs.
308
+
309
+ #### Handling Repetition:
310
+ - For nodes with `repeat: true`, allow multiple instances by continuing to parse matching nodes until none are found.
311
+
312
+ ### Advantages:
313
+ - **Simplicity:** Direct control over parsing logic tailored to your DSL.
314
+ - **No Dependencies:** Eliminates issues with outdated libraries.
315
+ - **Customizable:** Easy to modify as your DSL evolves.
316
+
317
+ ### Challenges:
318
+ - **Complexity Management:** As your DSL grows, the parser logic may become more complex.
319
+ - **Testing:** Thorough testing is needed to ensure reliability.
320
+ - **Performance:** May need optimization for large DSL files.
321
+
322
+ ---
323
+
324
+ # 5. Additional Considerations
325
+
326
+ While your immediate focus is on building the engine, keep in mind future integration with tools like IDEs:
327
+
328
+ - **Abstract Syntax Tree (AST):** A well-structured AST can facilitate features like syntax highlighting and code completion.
329
+ - **Language Server Protocol (LSP):** If you decide to provide IDE support, structuring your parser to output data compatible with LSP can be beneficial.
330
+ - **Extensibility:** Designing your parser and data structures with future enhancements in mind can save time later.
331
+
332
+ ---
333
+
334
+ # 6. Conclusion
335
+
336
+ Creating a custom parser in Ruby without external libraries is feasible for your DSL, especially given its hierarchical and relatively simple structure. This approach offers:
337
+
338
+ - **Control and Flexibility:** Tailor the parser to your specific needs without external constraints.
339
+ - **Understanding:** Deepens your knowledge of parsing techniques and the inner workings of your DSL.
340
+ - **Maintainability:** Avoids dependency issues associated with outdated gems.
341
+
342
+ ### Next Steps:
343
+ 1. **Implement the Parser:** Start coding the parser using the outlined approaches.
344
+ 2. **Test with Examples:** Use your existing DSL examples to validate the parser's functionality.
345
+ 3. **Iterate:** Refine the parser based on testing, adding error handling and edge case management as needed.
346
+ - **Parameter Types:** Implement logic for different parameter types (positional, declarative, named, etc.).
347
+ - **Repeating Nodes:** Handle nodes with `repeat: true` by looping until no more matching nodes are found.
348
+ - **Error Handling:** Include meaningful error messages for unexpected tokens or structure violations.
349
+ - **Whitespace and Comments:** Strip out or ignore to simplify tokenization.
350
+
351
+ ---
352
+
353
+ # 4. Writing the Parser Without External Libraries
354
+
355
+ You can implement the parser using Ruby's built-in capabilities, focusing on string manipulation and control structures.
356
+
357
+ ### Simplified Parser Example:
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Klue
4
+ module Langcraft
5
+ # Parser class
6
+ class Parser
7
+ def initialize(tokens, schema)
8
+ @tokens = tokens
9
+ @schema = schema
10
+ @position = 0
11
+ end
12
+
13
+ def parse
14
+ parse_node(@schema['definition'])
15
+ end
16
+
17
+ private
18
+
19
+ def parse_node(node_schema)
20
+ node = { 'name' => node_schema['name'], 'params' => {}, 'children' => [] }
21
+
22
+ # Parse parameters
23
+ node['params'] = parse_params(node_schema['params'])
24
+
25
+ # If node has child nodes
26
+ if node_schema['nodes']
27
+ # Expect 'do'
28
+ expect('do')
29
+
30
+ # Parse child nodes
31
+ while peek != 'end'
32
+ child_node_schema = match_node_schema(node_schema['nodes'])
33
+ node['children'] << parse_node(child_node_schema)
34
+ end
35
+
36
+ expect('end')
37
+ end
38
+
39
+ node
40
+ end
41
+
42
+ def parse_params(params_schema)
43
+ params = {}
44
+ params_schema.each do |param_schema|
45
+ # Extract parameter based on its type
46
+ params[param_schema['name']] = extract_param(param_schema)
47
+ end
48
+ params
49
+ end
50
+
51
+ def extract_param(param_schema)
52
+ # Implement extraction logic based on param_schema['type']
53
+ end
54
+
55
+ def expect(expected_token)
56
+ actual_token = next_token
57
+ return unless actual_token != expected_token
58
+
59
+ raise "Expected '#{expected_token}', got '#{actual_token}'"
60
+ end
61
+
62
+ def next_token
63
+ token = @tokens[@position]
64
+ @position += 1
65
+ token
66
+ end
67
+
68
+ def peek
69
+ @tokens[@position]
70
+ end
71
+
72
+ def match_node_schema(nodes_schema)
73
+ current_token = peek
74
+ nodes_schema.find { |ns| ns['name'] == current_token } || raise("Unknown node '#{current_token}'")
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Read DSL code from file
4
+ dsl_code = File.read('workflow_dsl.rb')
5
+
6
+ # Tokenize the DSL code
7
+ tokenizer = Tokenizer.new(dsl_code)
8
+ tokenizer.tokenize
9
+
10
+ # Parse tokens into an AST
11
+ parser = Parser.new(tokenizer.tokens, schema)
12
+ ast = parser.parse
13
+
14
+ # Output the AST
15
+ puts ast.inspect
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Klue
4
+ module Langcraft
5
+ # Tokenizer class
6
+ class Tokenizer
7
+ attr_reader :tokens
8
+
9
+ def initialize(code)
10
+ @code = code
11
+ @tokens = []
12
+ end
13
+
14
+ def tokenize
15
+ # Implement logic to convert code into tokens
16
+ # Handle strings, symbols, keywords, and delimiters
17
+ end
18
+ end
19
+ end
20
+ end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Klue
4
4
  module Langcraft
5
- VERSION = '0.0.7'
5
+ VERSION = '0.1.0'
6
6
  end
7
7
  end
data/package-lock.json CHANGED
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "klue-langcraft",
3
- "version": "0.0.7",
3
+ "version": "0.1.0",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "klue-langcraft",
9
- "version": "0.0.7",
9
+ "version": "0.1.0",
10
10
  "devDependencies": {
11
11
  "@klueless-js/semantic-release-rubygem": "github:klueless-js/semantic-release-rubygem",
12
12
  "@semantic-release/changelog": "^6.0.3",
data/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "klue-langcraft",
3
- "version": "0.0.7",
3
+ "version": "0.1.0",
4
4
  "description": "Domain Specific Language Crafting",
5
5
  "scripts": {
6
6
  "release": "semantic-release"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: klue-langcraft
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Cruwys
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-09-21 00:00:00.000000000 Z
11
+ date: 2024-09-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: k_log
@@ -48,8 +48,14 @@ files:
48
48
  - Rakefile
49
49
  - bin/console
50
50
  - bin/setup
51
+ - docs/dsl-examples.md
51
52
  - docs/dsl-rules.md
53
+ - docs/dsl-samples/index.md
54
+ - docs/dsl-samples/youtube-launch-optimizer-old.klue
55
+ - docs/dsl-samples/youtube-launch-optimizer-strawberry.json
56
+ - docs/dsl-samples/youtube-launch-optimizer-strawberry.klue
52
57
  - docs/dsl-samples/youtube-launch-optimizer.defn.klue
58
+ - docs/dsl-samples/youtube-launch-optimizer.json
53
59
  - docs/dsl-samples/youtube-launch-optimizer.klue
54
60
  - docs/project-plan/project-plan.md
55
61
  - docs/project-plan/project.drawio
@@ -57,6 +63,10 @@ files:
57
63
  - docs/project-plan/project_in_progress.svg
58
64
  - docs/project-plan/project_todo.svg
59
65
  - lib/klue/langcraft.rb
66
+ - lib/klue/langcraft/-brief.md
67
+ - lib/klue/langcraft/parser.rb
68
+ - lib/klue/langcraft/sample_usage.rb
69
+ - lib/klue/langcraft/tokenizer.rb
60
70
  - lib/klue/langcraft/version.rb
61
71
  - package-lock.json
62
72
  - package.json