personify 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. data/.gitignore +1 -0
  2. data/LICENSE +20 -0
  3. data/README.md +172 -0
  4. data/Rakefile +53 -0
  5. data/VERSION +1 -0
  6. data/doc/syntax_ideas.md +141 -0
  7. data/lib/personify/context.rb +55 -0
  8. data/lib/personify/parser/personify.rb +1071 -0
  9. data/lib/personify/parser/personify.treetop +107 -0
  10. data/lib/personify/parser/personify_node_classes.rb +121 -0
  11. data/lib/personify/template.rb +17 -0
  12. data/lib/personify.rb +8 -0
  13. data/script/generate_parser.rb +6 -0
  14. data/test/context_test.rb +122 -0
  15. data/test/fixtures/multiple_tags.txt +8 -0
  16. data/test/parse_runner.rb +60 -0
  17. data/test/parser_test.rb +291 -0
  18. data/test/test_helper.rb +16 -0
  19. data/vendor/treetop/.gitignore +5 -0
  20. data/vendor/treetop/History.txt +9 -0
  21. data/vendor/treetop/README +164 -0
  22. data/vendor/treetop/Rakefile +20 -0
  23. data/vendor/treetop/Treetop.tmbundle/Snippets/grammar ___ end.tmSnippet +20 -0
  24. data/vendor/treetop/Treetop.tmbundle/Snippets/rule ___ end.tmSnippet +18 -0
  25. data/vendor/treetop/Treetop.tmbundle/Syntaxes/Treetop Grammar.tmLanguage +251 -0
  26. data/vendor/treetop/Treetop.tmbundle/info.plist +10 -0
  27. data/vendor/treetop/bin/tt +28 -0
  28. data/vendor/treetop/doc/contributing_and_planned_features.markdown +103 -0
  29. data/vendor/treetop/doc/grammar_composition.markdown +65 -0
  30. data/vendor/treetop/doc/index.markdown +90 -0
  31. data/vendor/treetop/doc/pitfalls_and_advanced_techniques.markdown +51 -0
  32. data/vendor/treetop/doc/semantic_interpretation.markdown +189 -0
  33. data/vendor/treetop/doc/site.rb +110 -0
  34. data/vendor/treetop/doc/sitegen.rb +60 -0
  35. data/vendor/treetop/doc/syntactic_recognition.markdown +100 -0
  36. data/vendor/treetop/doc/using_in_ruby.markdown +21 -0
  37. data/vendor/treetop/examples/lambda_calculus/arithmetic.rb +551 -0
  38. data/vendor/treetop/examples/lambda_calculus/arithmetic.treetop +97 -0
  39. data/vendor/treetop/examples/lambda_calculus/arithmetic_node_classes.rb +7 -0
  40. data/vendor/treetop/examples/lambda_calculus/arithmetic_test.rb +54 -0
  41. data/vendor/treetop/examples/lambda_calculus/lambda_calculus +0 -0
  42. data/vendor/treetop/examples/lambda_calculus/lambda_calculus.rb +718 -0
  43. data/vendor/treetop/examples/lambda_calculus/lambda_calculus.treetop +132 -0
  44. data/vendor/treetop/examples/lambda_calculus/lambda_calculus_node_classes.rb +5 -0
  45. data/vendor/treetop/examples/lambda_calculus/lambda_calculus_test.rb +89 -0
  46. data/vendor/treetop/examples/lambda_calculus/test_helper.rb +18 -0
  47. data/vendor/treetop/lib/treetop/bootstrap_gen_1_metagrammar.rb +45 -0
  48. data/vendor/treetop/lib/treetop/compiler/grammar_compiler.rb +40 -0
  49. data/vendor/treetop/lib/treetop/compiler/lexical_address_space.rb +17 -0
  50. data/vendor/treetop/lib/treetop/compiler/metagrammar.rb +2955 -0
  51. data/vendor/treetop/lib/treetop/compiler/metagrammar.treetop +404 -0
  52. data/vendor/treetop/lib/treetop/compiler/node_classes/anything_symbol.rb +20 -0
  53. data/vendor/treetop/lib/treetop/compiler/node_classes/atomic_expression.rb +14 -0
  54. data/vendor/treetop/lib/treetop/compiler/node_classes/character_class.rb +22 -0
  55. data/vendor/treetop/lib/treetop/compiler/node_classes/choice.rb +31 -0
  56. data/vendor/treetop/lib/treetop/compiler/node_classes/declaration_sequence.rb +24 -0
  57. data/vendor/treetop/lib/treetop/compiler/node_classes/grammar.rb +28 -0
  58. data/vendor/treetop/lib/treetop/compiler/node_classes/inline_module.rb +27 -0
  59. data/vendor/treetop/lib/treetop/compiler/node_classes/nonterminal.rb +13 -0
  60. data/vendor/treetop/lib/treetop/compiler/node_classes/optional.rb +19 -0
  61. data/vendor/treetop/lib/treetop/compiler/node_classes/parenthesized_expression.rb +9 -0
  62. data/vendor/treetop/lib/treetop/compiler/node_classes/parsing_expression.rb +138 -0
  63. data/vendor/treetop/lib/treetop/compiler/node_classes/parsing_rule.rb +55 -0
  64. data/vendor/treetop/lib/treetop/compiler/node_classes/predicate.rb +45 -0
  65. data/vendor/treetop/lib/treetop/compiler/node_classes/repetition.rb +55 -0
  66. data/vendor/treetop/lib/treetop/compiler/node_classes/sequence.rb +68 -0
  67. data/vendor/treetop/lib/treetop/compiler/node_classes/terminal.rb +20 -0
  68. data/vendor/treetop/lib/treetop/compiler/node_classes/transient_prefix.rb +9 -0
  69. data/vendor/treetop/lib/treetop/compiler/node_classes/treetop_file.rb +9 -0
  70. data/vendor/treetop/lib/treetop/compiler/node_classes.rb +19 -0
  71. data/vendor/treetop/lib/treetop/compiler/ruby_builder.rb +113 -0
  72. data/vendor/treetop/lib/treetop/compiler.rb +6 -0
  73. data/vendor/treetop/lib/treetop/ruby_extensions/string.rb +42 -0
  74. data/vendor/treetop/lib/treetop/ruby_extensions.rb +2 -0
  75. data/vendor/treetop/lib/treetop/runtime/compiled_parser.rb +95 -0
  76. data/vendor/treetop/lib/treetop/runtime/interval_skip_list/head_node.rb +15 -0
  77. data/vendor/treetop/lib/treetop/runtime/interval_skip_list/interval_skip_list.rb +200 -0
  78. data/vendor/treetop/lib/treetop/runtime/interval_skip_list/node.rb +164 -0
  79. data/vendor/treetop/lib/treetop/runtime/interval_skip_list.rb +4 -0
  80. data/vendor/treetop/lib/treetop/runtime/syntax_node.rb +72 -0
  81. data/vendor/treetop/lib/treetop/runtime/terminal_parse_failure.rb +16 -0
  82. data/vendor/treetop/lib/treetop/runtime/terminal_syntax_node.rb +17 -0
  83. data/vendor/treetop/lib/treetop/runtime.rb +5 -0
  84. data/vendor/treetop/lib/treetop/version.rb +9 -0
  85. data/vendor/treetop/lib/treetop.rb +11 -0
  86. data/vendor/treetop/script/generate_metagrammar.rb +14 -0
  87. data/vendor/treetop/script/svnadd +11 -0
  88. data/vendor/treetop/script/svnrm +11 -0
  89. data/vendor/treetop/spec/compiler/and_predicate_spec.rb +36 -0
  90. data/vendor/treetop/spec/compiler/anything_symbol_spec.rb +52 -0
  91. data/vendor/treetop/spec/compiler/character_class_spec.rb +188 -0
  92. data/vendor/treetop/spec/compiler/choice_spec.rb +80 -0
  93. data/vendor/treetop/spec/compiler/circular_compilation_spec.rb +28 -0
  94. data/vendor/treetop/spec/compiler/failure_propagation_functional_spec.rb +21 -0
  95. data/vendor/treetop/spec/compiler/grammar_compiler_spec.rb +84 -0
  96. data/vendor/treetop/spec/compiler/grammar_spec.rb +41 -0
  97. data/vendor/treetop/spec/compiler/nonterminal_symbol_spec.rb +40 -0
  98. data/vendor/treetop/spec/compiler/not_predicate_spec.rb +38 -0
  99. data/vendor/treetop/spec/compiler/one_or_more_spec.rb +35 -0
  100. data/vendor/treetop/spec/compiler/optional_spec.rb +37 -0
  101. data/vendor/treetop/spec/compiler/parenthesized_expression_spec.rb +19 -0
  102. data/vendor/treetop/spec/compiler/parsing_rule_spec.rb +32 -0
  103. data/vendor/treetop/spec/compiler/sequence_spec.rb +115 -0
  104. data/vendor/treetop/spec/compiler/terminal_spec.rb +81 -0
  105. data/vendor/treetop/spec/compiler/terminal_symbol_spec.rb +37 -0
  106. data/vendor/treetop/spec/compiler/test_grammar.treetop +7 -0
  107. data/vendor/treetop/spec/compiler/test_grammar.tt +7 -0
  108. data/vendor/treetop/spec/compiler/test_grammar_do.treetop +7 -0
  109. data/vendor/treetop/spec/compiler/zero_or_more_spec.rb +56 -0
  110. data/vendor/treetop/spec/composition/a.treetop +11 -0
  111. data/vendor/treetop/spec/composition/b.treetop +11 -0
  112. data/vendor/treetop/spec/composition/c.treetop +10 -0
  113. data/vendor/treetop/spec/composition/d.treetop +10 -0
  114. data/vendor/treetop/spec/composition/grammar_composition_spec.rb +26 -0
  115. data/vendor/treetop/spec/ruby_extensions/string_spec.rb +32 -0
  116. data/vendor/treetop/spec/runtime/compiled_parser_spec.rb +101 -0
  117. data/vendor/treetop/spec/runtime/interval_skip_list/delete_spec.rb +147 -0
  118. data/vendor/treetop/spec/runtime/interval_skip_list/expire_range_spec.rb +349 -0
  119. data/vendor/treetop/spec/runtime/interval_skip_list/insert_and_delete_node.rb +385 -0
  120. data/vendor/treetop/spec/runtime/interval_skip_list/insert_spec.rb +660 -0
  121. data/vendor/treetop/spec/runtime/interval_skip_list/interval_skip_list_spec.graffle +6175 -0
  122. data/vendor/treetop/spec/runtime/interval_skip_list/interval_skip_list_spec.rb +58 -0
  123. data/vendor/treetop/spec/runtime/interval_skip_list/palindromic_fixture.rb +23 -0
  124. data/vendor/treetop/spec/runtime/interval_skip_list/palindromic_fixture_spec.rb +164 -0
  125. data/vendor/treetop/spec/runtime/interval_skip_list/spec_helper.rb +84 -0
  126. data/vendor/treetop/spec/runtime/syntax_node_spec.rb +53 -0
  127. data/vendor/treetop/spec/spec_helper.rb +106 -0
  128. data/vendor/treetop/spec/spec_suite.rb +4 -0
  129. data/vendor/treetop/treetop.gemspec +18 -0
  130. metadata +196 -0
@@ -0,0 +1,291 @@
1
+ require 'test_helper'
2
+ require File.dirname(__FILE__) + "/../vendor/treetop/lib/treetop"
3
+ Treetop.load "../lib/personify/parser/personify"
4
+ # require File.dirname(__FILE__) + "/../personify"
5
+
6
+ class ParserTest < Test::Unit::TestCase
7
+ include ParserTestHelper
8
+ context "The parser" do
9
+ setup do
10
+ @parser = PersonifyLanguageParser.new
11
+ end
12
+
13
+ context "parsing keys" do
14
+ should "eval [A] as key" do
15
+ assert_equal "test", parse("[A]").eval({"a" => "test"})
16
+ end
17
+
18
+ should "eval [A_B] as key" do
19
+ assert_equal "test", parse("[A_B]").eval({"a_b" => "test"})
20
+ end
21
+
22
+ should "eval [A.B] as nested key" do
23
+ assert_equal "test", parse("[A.B]").eval({"a.b" => "fail", "a" => {"b" => "test"}})
24
+ end
25
+
26
+ should "eval [AB_C.D] as nested key" do
27
+ assert_equal "test", parse("[AB_C.D]").eval({"ab_c.d" => "fail", "ab_c" => {"d" => "test"}})
28
+ end
29
+
30
+ should "eval [L1.L2.L3.L4.L5.L6.L7.L8] as nested key" do
31
+ assert_equal "test", parse("[L1.L2.L3.L4.L5.L6.L7.L8]").eval({"l1" => {"l2" => {"l3" => {"l4" => {"l5" => {"l6" => {"l7" => {"l8" => "test"}}}}}}}})
32
+ end
33
+
34
+ should "eval [1.2] as nested key" do
35
+ assert_equal "test", parse("[1.2]").eval({"1" => {"2" => "test"}})
36
+ end
37
+ end
38
+
39
+ context "parsing strings" do
40
+ should "eval [\"str\"] as string" do
41
+ assert_equal "str", parse("[\"str\"]").eval({})
42
+ end
43
+ end
44
+
45
+ context "parsing text" do
46
+ should "eval text" do
47
+ assert_equal "text", parse("text").eval()
48
+ assert_equal "t", parse("t").eval()
49
+ assert_equal "t\n1\n2", parse("t\n1\n2").eval()
50
+ end
51
+
52
+ should "eval UTF8 text" do
53
+ assert_equal "financiële", parse("financiële").eval()
54
+ end
55
+
56
+ should "eval empty text" do
57
+ assert_equal "", parse("").eval()
58
+ end
59
+
60
+ should "eval '[bla]' as text" do
61
+ assert_equal "[bla]", parse("[bla]").eval()
62
+ end
63
+
64
+ should "eval '[KE bla' as text" do
65
+ assert_equal "[KE bla", parse("[KE bla").eval()
66
+ end
67
+
68
+ should "eval nested brackets as text" do
69
+ assert_equal "[[BLA]]", parse("[[BLA]]").eval
70
+ end
71
+
72
+ should "eval '[BLA]' as text on nil 'bla' " do
73
+ assert_equal "[BLA]", parse("[BLA]").eval()
74
+ end
75
+ end
76
+
77
+ context "parsing expressions" do
78
+ should "eval simple expression" do
79
+ assert_equal "var", parse("[VAR]").eval("var" => "var")
80
+ assert_equal "a var b", parse("a [VAR] b").eval("var" => "var")
81
+ assert_equal "var\nvar", parse("[VAR]\n[VAR]").eval("var" => "var")
82
+ end
83
+
84
+ should "eval simple expression with empty substitution" do
85
+ assert_equal "", parse("[VAR]").eval("var" => "")
86
+ end
87
+
88
+ should "eval expressions careless of whitespace" do
89
+ assert_equal "k1",parse("[ K1]").eval("k1" => "k1")
90
+ assert_equal "k1",parse("[ K1 ]").eval("k1" => "k1")
91
+ assert_equal "k1",parse("[K1 | K2]").eval("k1" => "k1")
92
+ end
93
+
94
+ context "with alternatives" do
95
+ should "eval alternative expression on first empty" do
96
+ assert_equal "k2", parse("[K1|K2]").eval("k2" => "k2")
97
+ assert_equal "k2", parse("[K1|K2]").eval("k2" => "k2")
98
+ assert_equal "k3", parse("[K1|K2|K3]").eval("k1" => nil,"k2" => nil, "k3" => "k3")
99
+ end
100
+
101
+ should "eval first expression on first nonempty" do
102
+ assert_equal "k2", parse("[K1|K2]").eval("k2" => "k2")
103
+ assert_equal "k3", parse("[K1|K2|K3]").eval("k2" => nil, "k3" => "k3")
104
+ end
105
+
106
+ should "eval strings in alternative expression" do
107
+ assert_equal "str", parse("[K1|str]").eval
108
+ assert_equal "str", parse("[K1|\"str\"]").eval
109
+ assert_equal "str", parse("[K1|\"str\"]").eval
110
+ assert_equal " str", parse("[K1|\" str\"]").eval
111
+ end
112
+
113
+ should "eval implicit strings in alternative expression" do
114
+ assert_equal "str", parse("[K1|str]").eval
115
+ end
116
+ end
117
+
118
+ context "with nested context substitutions" do
119
+ should "not eval [A.B] if it isn't in the context" do
120
+ assert_equal "[A.B]", parse("[A.B]").eval({})
121
+ assert_equal "[A.B]", parse("[A.B]").eval({"a" => {"c" => "v"}})
122
+ end
123
+ should "eval functions with one or more levels" do
124
+ assert_equal "v", parse("[LEVEL1.FUNC()]").eval({"level1" => {"func" => Proc.new{"v"}}})
125
+ end
126
+ should "eval substitution with missing key" do
127
+ assert_equal "[L1.L2]", parse("[L1.L2]").eval({"L1" => nil})
128
+ assert_equal "[L1.L2]", parse("[L1.L2]").eval({"L1" => {}})
129
+ assert_equal "[L1.L2]", parse("[L1.L2]").eval({"L1" => {"L2" => nil}})
130
+ end
131
+ should "eval substitution with non endpoint key" do
132
+ # Will just call to_s
133
+ assert_equal "l3v", parse("[L1.L2]").eval({"l1" => {"l2" => {"l3" => "v"}}})
134
+ end
135
+ end
136
+
137
+ context "with function" do
138
+ should "eval with single parameter" do
139
+ assert_equal "v", parse("[FUNC(K1)]").eval("func" => Proc.new{|v| v }, "k1" => "v")
140
+ end
141
+ should "eval with multiple parameters" do
142
+ assert_equal "v1v2", parse("[FUNC(K1,K2)]").eval("func" => Proc.new{|*v| v.join }, "k1" => "v1", "k2" => "v2")
143
+ end
144
+ should "eval with string parameters" do
145
+ assert_equal "str", parse("[FUNC(\"str\")]").eval("func" => Proc.new{|v| v }, "k1" => "v1", "k2" => "v2")
146
+ end
147
+ should "eval with implicit string parameters" do
148
+ assert_equal "str", parse("[FUNC(str)]").eval("func" => Proc.new{|v| v }, "k1" => "v1", "k2" => "v2")
149
+ end
150
+ should "eval with splat parameters" do
151
+ assert_equal "v1+v2v3", parse("[FUNC(\"v1\",\"v2\",\"v3\")]").eval("func" => Proc.new{|v1,*v2| v1 + "+" + v2.join })
152
+ end
153
+ should "eval with too much parameters" do
154
+ assert_equal "p1p2", parse("[FUNC(\"p1\",\"p2\")]").eval("func" => Proc.new{|v1| v1 })
155
+ end
156
+ should "eval with no parameters" do
157
+ assert_equal "val", parse("[FUNC()]").eval("func" => Proc.new{ "val" })
158
+ end
159
+ should "eval with alternative expression" do
160
+ assert_equal "fb", parse("[FUNC()|\"fb\"]").eval("func" => Proc.new{ false })
161
+ assert_equal "fb", parse("[FUNC()|\"fb\"]").eval("func" => Proc.new{ nil })
162
+ end
163
+ should "eval with broken function call" do
164
+ assert_equal "[FUN(\"s\"]", parse("[FUN(\"s\"]").eval("func" => Proc.new{})
165
+ end
166
+ end
167
+
168
+ context "with block function" do
169
+ setup do
170
+ @context = {
171
+ "test" => Proc.new{|block| block },
172
+ "test_param" => Proc.new{|param,block| param ? block : nil },
173
+ "test_return" => Proc.new{ "return" },
174
+ "true" => true,
175
+ "false" => nil,
176
+ "key" => "value",
177
+ "DO" => "dooo?"
178
+ }
179
+ end
180
+
181
+ should "not accept [END] or [DO] as keys" do
182
+ assert_equal "[END]", parse("[END]").eval({"end" => "??"})
183
+ assert_equal "[DO]", parse("[DO]").eval({"do" => "??"})
184
+ end
185
+
186
+ should "eval" do
187
+ assert_equal "value", parse("[TEST() DO]value[END]").eval(@context)
188
+ end
189
+
190
+ should "eval with parameter" do
191
+ assert_equal "value", parse("[TEST_PARAM(TRUE) DO]value[END]").eval(@context)
192
+ end
193
+
194
+ should "replace with return value" do
195
+ assert_equal "return", parse("[TEST_RETURN() DO]value[END]").eval(@context)
196
+ end
197
+
198
+ should "eval as alternative with block parameter" do
199
+ assert_equal "value", parse("[UNKNOWN_KEY | TEST() DO]value[END]").eval(@context)
200
+ end
201
+
202
+ should "eval with block parameter and alternative" do
203
+ assert_equal "value", parse("[TEST_PARAM(TRUE) DO]value[END | \"alt\"]").eval(@context)
204
+ assert_equal "alt", parse("[TEST_PARAM(FALSE) DO]value[END | \"alt\"]").eval(@context)
205
+ end
206
+
207
+ should "eval substitution within block parameter" do
208
+ assert_equal "value", parse("[TEST() DO][KEY][END]").eval(@context)
209
+ assert_equal "bla value bla", parse("[TEST() DO]bla [KEY] bla[END]").eval(@context)
210
+ end
211
+
212
+ should "strip off \\s*\\n around DO] and [END]" do
213
+ assert_equal "value", parse("[TEST() DO]\s\s\nvalue[END]\n").eval(@context)
214
+ assert_equal "value ", parse("[TEST() DO]\s\s\nvalue [END]\n").eval(@context)
215
+ assert_equal "value ", parse("[TEST() DO]\s\s\nvalue [END] ").eval(@context)
216
+ assert_equal " value ", parse("[TEST() DO]\s\s\n value [END]").eval(@context)
217
+ assert_equal "\nvalue\n", parse("[TEST() DO]\s\s\n\nvalue\n[END]").eval(@context)
218
+ assert_equal "\nvalue\n", parse("[TEST() DO]\s\s\n\nvalue\n[END]\n").eval(@context)
219
+ end
220
+
221
+ end
222
+
223
+ context "with logical operators" do
224
+ should "eval AND operator" do
225
+ t = "[A && B]"
226
+ assert_equal "b", parse(t).eval("a" => "a", "b" => "b")
227
+ assert_equal t, parse(t).eval("a" => "a", "b" => nil)
228
+ assert_equal t, parse(t).eval("a" => nil, "b" => "b")
229
+ assert_equal t, parse(t).eval("a" => nil, "b" => nil)
230
+ end
231
+
232
+ should "eval OR operator" do
233
+ t = "[A || B]"
234
+ assert_equal "a", parse(t).eval("a" => "a", "b" => "b")
235
+ assert_equal "a", parse(t).eval("a" => "a", "b" => nil)
236
+ assert_equal "b", parse(t).eval("a" => nil, "b" => "b")
237
+ assert_equal t, parse(t).eval("a" => nil, "b" => nil)
238
+ end
239
+
240
+ should "eval multiple operators (a || b && c) without precedence" do
241
+ t = "[A || B && C]"
242
+ assert_equal "a", parse(t).eval("a" => "a", "b" => "b", "c" => "c")
243
+ assert_equal "a", parse(t).eval("a" => "a", "b" => nil, "c" => "c")
244
+ assert_equal "a", parse(t).eval("a" => "a", "b" => "b", "c" => nil)
245
+
246
+ assert_equal "c", parse(t).eval("a" => nil, "b" => "b", "c" => "c")
247
+ assert_equal t, parse(t).eval("a" => nil, "b" => nil, "c" => "c")
248
+ assert_equal t, parse(t).eval("a" => nil, "b" => "b", "c" => nil)
249
+ end
250
+
251
+ # Attention we don't have support for precedence!
252
+ should "eval multiple operators (a && b || c) without precedence" do
253
+ t = "[A && B || C]"
254
+ assert_equal "b", parse(t).eval("a" => "a", "b" => "b", "c" => "c")
255
+ assert_equal "c", parse(t).eval("a" => "a", "b" => nil, "c" => "c")
256
+ assert_equal "b", parse(t).eval("a" => "a", "b" => "b", "c" => nil)
257
+
258
+ assert_equal t, parse(t).eval("a" => nil, "b" => "b", "c" => "c")
259
+ assert_equal t, parse(t).eval("a" => nil, "b" => nil, "c" => "c")
260
+ assert_equal t, parse(t).eval("a" => nil, "b" => "b", "c" => nil)
261
+ end
262
+
263
+ should "eval within function parameter" do
264
+ t = "[FUN(A && B)]"
265
+ c = {"fun" => Proc.new{|v| v }}
266
+
267
+ assert_equal "b", parse(t).eval(c.update("a" => "a", "b" => "b"))
268
+ assert_equal t, parse(t).eval(c.update("a" => "a", "b" => nil))
269
+ assert_equal t, parse(t).eval(c.update("a" => nil, "b" => "b"))
270
+ assert_equal t, parse(t).eval(c.update("a" => nil, "b" => nil))
271
+ end
272
+
273
+ should "eval with alternatives" do
274
+ t = "[A && B | \"no\"]"
275
+
276
+ assert_equal "b", parse(t).eval("a" => "a", "b" => "b")
277
+ assert_equal "no", parse(t).eval("a" => "a", "b" => nil)
278
+ assert_equal "no", parse(t).eval("a" => nil, "b" => "b")
279
+ assert_equal "no", parse(t).eval("a" => nil, "b" => nil)
280
+ end
281
+
282
+ should "eval with broken syntax" do
283
+ assert_equal "[A &&]", parse("[A &&]").eval("a" => "1")
284
+ assert_equal "[A ||]", parse("[A ||]").eval("a" => "1")
285
+ assert_equal "[|| A]", parse("[|| A]").eval("a" => "1")
286
+ assert_equal "[&& A]", parse("[&& A]").eval("a" => "1")
287
+ end
288
+ end
289
+ end
290
+ end
291
+ end
@@ -0,0 +1,16 @@
1
+ require 'rubygems'
2
+ require 'shoulda'
3
+ require File.dirname(__FILE__) + "/../lib/personify"
4
+
5
+ require 'test/unit'
6
+
7
+ module ParserTestHelper
8
+ def parse(input)
9
+ result = @parser.parse(input)
10
+ unless result
11
+ puts @parser.terminal_failures.join("\n")
12
+ end
13
+ assert !result.nil?
14
+ result
15
+ end
16
+ end
@@ -0,0 +1,5 @@
1
+ *.gem
2
+ *.iml
3
+ *.iws
4
+ *.ipr
5
+ doc/site/*.*
@@ -0,0 +1,9 @@
1
+ == 1.2.5 2009-03-10
2
+
3
+ * 1 minor enhancement:
4
+ * Extensions may always be modules, instead of sometimes modules, sometimes classes
5
+
6
+ == 0.0.1 2008-05-21
7
+
8
+ * 1 major enhancement:
9
+ * Initial release
@@ -0,0 +1,164 @@
1
+ Tutorial
2
+ ========
3
+ Languages can be split into two components, their *syntax* and their *semantics*. It's your understanding of English syntax that tells you the stream of words "Sleep furiously green ideas colorless" is not a valid sentence. Semantics is deeper. Even if we rearrange the above sentence to be "Colorless green ideas sleep furiously", which is syntactically correct, it remains nonsensical on a semantic level. With Treetop, you'll be dealing with languages that are much simpler than English, but these basic concepts apply. Your programs will need to address both the syntax and the semantics of the languages they interpret.
4
+
5
+ Treetop equips you with powerful tools for each of these two aspects of interpreter writing. You'll describe the syntax of your language with a *parsing expression grammar*. From this description, Treetop will generate a Ruby parser that transforms streams of characters written into your language into *abstract syntax trees* representing their structure. You'll then describe the semantics of your language in Ruby by defining methods on the syntax trees the parser generates.
6
+
7
+ Parsing Expression Grammars, The Basics
8
+ =======================================
9
+ The first step in using Treetop is defining a grammar in a file with the `.treetop` extension. Here's a grammar that's useless because it's empty:
10
+
11
+ # my_grammar.treetop
12
+ grammar MyGrammar
13
+ end
14
+
15
+ Next, you start filling your grammar with rules. Each rule associates a name with a parsing expression, like the following:
16
+
17
+ # my_grammar.treetop
18
+ # You can use a .tt extension instead if you wish
19
+ grammar MyGrammar
20
+ rule hello
21
+ 'hello chomsky'
22
+ end
23
+ end
24
+
25
+ The first rule becomes the *root* of the grammar, causing its expression to be matched when a parser for the grammar is fed a string. The above grammar can now be used in a Ruby program. Notice how a string matching the first rule parses successfully, but a second nonmatching string does not.
26
+
27
+ # use_grammar.rb
28
+ require 'rubygems'
29
+ require 'treetop'
30
+ Treetop.load 'my_grammar'
31
+ # or just:
32
+ # require 'my_grammar' # This works because Polyglot hooks "require" to find and load Treetop files
33
+
34
+ parser = MyGrammarParser.new
35
+ puts parser.parse('hello chomsky') # => Treetop::Runtime::SyntaxNode
36
+ puts parser.parse('silly generativists!') # => nil
37
+
38
+ Users of *regular expressions* will find parsing expressions familiar. They share the same basic purpose, matching strings against patterns. However, parsing expressions can recognize a broader category of languages than their less expressive brethren. Before we get into demonstrating that, lets cover some basics. At first parsing expressions won't seem much different. Trust that they are.
39
+
40
+ Terminal Symbols
41
+ ----------------
42
+ The expression in the grammar above is a terminal symbol. It will only match a string that matches it exactly. There are two other kinds of terminal symbols, which we'll revisit later. Terminals are called *atomic expressions* because they aren't composed of smaller expressions.
43
+
44
+ Ordered Choices
45
+ ---------------
46
+ Ordered choices are *composite expressions*, which allow for any of several subexpressions to be matched. These should be familiar from regular expressions, but in parsing expressions, they are delimited by the `/` character. Its important to note that the choices are prioritized in the order they appear. If an earlier expression is matched, no subsequent expressions are tried. Here's an example:
47
+
48
+ # my_grammar.treetop
49
+ grammar MyGrammar
50
+ rule hello
51
+ 'hello chomsky' / 'hello lambek'
52
+ end
53
+ end
54
+
55
+ # fragment of use_grammar.rb
56
+ puts parser.parse('hello chomsky') # => Treetop::Runtime::SyntaxNode
57
+ puts parser.parse('hello lambek') # => Treetop::Runtime::SyntaxNode
58
+ puts parser.parse('silly generativists!') # => nil
59
+
60
+ Note that once a choice rule has matched the text using a particular alternative at a particular location in the input and hence has succeeded, that choice will never be reconsidered, even if the chosen alternative causes another rule to fail where a later alternative wouldn't have. It's always a later alternative, since the first to succeed is final - why keep looking when you've found what you wanted? This is a feature of PEG parsers that you need to understand if you're going to succeed in using Treetop. In order to memoize success and failures, such decisions cannot be reversed. Luckily Treetop provides a variety of clever ways you can tell it to avoid making the wrong decisions. But more on that later.
61
+
62
+ Sequences
63
+ ---------
64
+ Sequences are composed of other parsing expressions separated by spaces. Using sequences, we can tighten up the above grammar.
65
+
66
+ # my_grammar.treetop
67
+ grammar MyGrammar
68
+ rule hello
69
+ 'hello ' ('chomsky' / 'lambek')
70
+ end
71
+ end
72
+
73
+ Note the use of parentheses to override the default precedence rules, which bind sequences more tightly than choices.
74
+
75
+ Once the whole sequence has been matched, the result is memoized and the details of the match will not be reconsidered for that location in the input.
76
+
77
+ Nonterminal Symbols
78
+ -------------------
79
+ Here we leave regular expressions behind. Nonterminals allow expressions to refer to other expressions by name. A trivial use of this facility would allow us to make the above grammar more readable should the list of names grow longer.
80
+
81
+ # my_grammar.treetop
82
+ grammar MyGrammar
83
+ rule hello
84
+ 'hello ' linguist
85
+ end
86
+
87
+ rule linguist
88
+ 'chomsky' / 'lambek' / 'jacobsen' / 'frege'
89
+ end
90
+ end
91
+
92
+ The true power of this facility, however, is unleashed when writing *recursive expressions*. Here is a self-referential expression that can match any number of open parentheses followed by any number of closed parentheses. This is theoretically impossible with regular expressions due to the *pumping lemma*.
93
+
94
+ # parentheses.treetop
95
+ grammar Parentheses
96
+ rule parens
97
+ '(' parens ')' / ''
98
+ end
99
+ end
100
+
101
+
102
+ The `parens` expression simply states that a `parens` is a set of parentheses surrounding another `parens` expression or, if that doesn't match, the empty string. If you are uncomfortable with recursion, its time to get comfortable, because it is the basis of language. Here's a tip: Don't try and imagine the parser circling round and round through the same rule. Instead, imagine the rule is *already* defined while you are defining it. If you imagine that `parens` already matches a string of matching parentheses, then its easy to think of `parens` as an open and closing parentheses around another set of matching parentheses, which conveniently, you happen to be defining. You know that `parens` is supposed to represent a string of matched parentheses, so trust in that meaning, even if you haven't fully implemented it yet.
103
+
104
+ Repetition
105
+ ----------
106
+ Any item in a rule may be followed by a '+' or a '*' character, signifying one-or-more and zero-or-more occurrences of that item. Beware though; the match is greedy, and if it matches too many items and causes subsequent items in the sequence to fail, the number matched will never be reconsidered. Here's a simple example of a rule that will never succeed:
107
+
108
+ # toogreedy.treetop
109
+ grammar TooGreedy
110
+ rule a_s
111
+ 'a'* 'a'
112
+ end
113
+ end
114
+
115
+ The 'a'* will always eat up any 'a's that follow, and the subsequent 'a' will find none there, so the whole rule will fail. You might need to use lookahead to avoid matching too much.
116
+
117
+ Negative Lookahead
118
+ ------------------
119
+
120
+ When you need to ensure that the following item *doesn't* match in some case where it might otherwise, you can use negat!ve lookahead, which is an item preceeded by a ! - here's an example:
121
+
122
+ # postcondition.treetop
123
+ grammar PostCondition
124
+ rule conditional_sentence
125
+ ( !conditional_keyword word )+ conditional_keyword [ \t]+ word*
126
+ end
127
+
128
+ rule word
129
+ ([a-zA-Z]+ [ \t]+)
130
+ end
131
+
132
+ rule conditional_keyword
133
+ 'if' / 'while' / 'until'
134
+ end
135
+ end
136
+
137
+ Even though the rule `word` would match any of the conditional keywords, the first words of a conditional_sentence must not be conditional_keywords. The negative lookahead prevents that matching, and prevents the repetition from matching too much input. Note that the lookahead may be a grammar rule of any complexity, including one that isn't used elsewhere in your grammar.
138
+
139
+ Positive lookahead
140
+ ------------------
141
+
142
+ Sometimes you want an item to match, but only if the *following* text would match some pattern. You don't want to consume that following text, but if it's not there, you want this rule to fail. You can append a positive lookahead like this to a rule by appending the lookahead rule preceeded by an & character.
143
+
144
+
145
+
146
+ Features to cover in the talk
147
+ =============================
148
+
149
+ * Treetop files
150
+ * Grammar definition
151
+ * Rules
152
+ * Loading a grammar
153
+ * Compiling a grammar with the `tt` command
154
+ * Accessing a parser for the grammar from Ruby
155
+ * Parsing Expressions of all kinds
156
+ ? Left recursion and factorization
157
+ - Here I can talk about function application, discussing how the operator
158
+ could be an arbitrary expression
159
+ * Inline node class eval blocks
160
+ * Node class declarations
161
+ * Labels
162
+ * Use of super within within labels
163
+ * Grammar composition with include
164
+ * Use of super with grammar composition
@@ -0,0 +1,20 @@
1
+ dir = File.dirname(__FILE__)
2
+ require 'rubygems'
3
+ require 'rake'
4
+ $LOAD_PATH.unshift(File.join(dir, 'vendor', 'rspec', 'lib'))
5
+ require 'spec/rake/spectask'
6
+
7
+ Gem::manage_gems
8
+ require 'rake/gempackagetask'
9
+
10
+ task :default => :spec
11
+
12
+ Spec::Rake::SpecTask.new do |t|
13
+ t.pattern = 'spec/**/*spec.rb'
14
+ end
15
+
16
+ load "./treetop.gemspec"
17
+
18
+ Rake::GemPackageTask.new($gemspec) do |pkg|
19
+ pkg.need_tar = true
20
+ end
@@ -0,0 +1,20 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3
+ <plist version="1.0">
4
+ <dict>
5
+ <key>content</key>
6
+ <string>grammar ${1:GrammarName}
7
+ rule ${2:rule_name}
8
+ $0
9
+ end
10
+ end</string>
11
+ <key>name</key>
12
+ <string>grammar ... end</string>
13
+ <key>scope</key>
14
+ <string>source.treetop</string>
15
+ <key>tabTrigger</key>
16
+ <string>g</string>
17
+ <key>uuid</key>
18
+ <string>2AE73FBB-88B9-4049-B208-D2440A146164</string>
19
+ </dict>
20
+ </plist>
@@ -0,0 +1,18 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3
+ <plist version="1.0">
4
+ <dict>
5
+ <key>content</key>
6
+ <string>rule ${1:rule_name}
7
+ $0
8
+ end</string>
9
+ <key>name</key>
10
+ <string>rule ... end</string>
11
+ <key>scope</key>
12
+ <string>meta.grammar.treetop</string>
13
+ <key>tabTrigger</key>
14
+ <string>r</string>
15
+ <key>uuid</key>
16
+ <string>498A1881-498E-4BDA-9303-7E42B425C5EE</string>
17
+ </dict>
18
+ </plist>