parsanol 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of parsanol might be problematic. Click here for more details.

Files changed (336) hide show
  1. checksums.yaml +7 -0
  2. data/HISTORY.txt +25 -0
  3. data/LICENSE +23 -0
  4. data/README.adoc +643 -0
  5. data/Rakefile +189 -0
  6. data/example/balanced-parens/basic.rb +42 -0
  7. data/example/balanced-parens/basic.rb.md +86 -0
  8. data/example/balanced-parens/parens.rb +42 -0
  9. data/example/balanced-parens/ruby_transform.rb +162 -0
  10. data/example/big.erb +73 -0
  11. data/example/boolean-algebra/basic.rb +70 -0
  12. data/example/boolean-algebra/basic.rb.md +108 -0
  13. data/example/boolean-algebra/ruby_transform.rb +263 -0
  14. data/example/calculator/basic.rb +153 -0
  15. data/example/calculator/basic.rb.md +120 -0
  16. data/example/calculator/pattern.rb +153 -0
  17. data/example/calculator/ruby_transform.rb +156 -0
  18. data/example/calculator/ruby_transform.rb.md +32 -0
  19. data/example/calculator/serialized.rb +257 -0
  20. data/example/calculator/serialized.rb.md +32 -0
  21. data/example/calculator/transform.rb +153 -0
  22. data/example/calculator/zero_copy.rb +269 -0
  23. data/example/calculator/zero_copy.rb.md +36 -0
  24. data/example/capture/basic.rb +49 -0
  25. data/example/capture/basic.rb.md +106 -0
  26. data/example/capture/example.json +39 -0
  27. data/example/comments/basic.rb +35 -0
  28. data/example/comments/basic.rb.md +110 -0
  29. data/example/csv/ruby_transform.rb +148 -0
  30. data/example/csv/ruby_transform.rb.md +131 -0
  31. data/example/csv/serialized.rb +201 -0
  32. data/example/csv/serialized.rb.md +31 -0
  33. data/example/csv/zero_copy.rb +276 -0
  34. data/example/csv/zero_copy.rb.md +36 -0
  35. data/example/custom_atoms/indent_atom.rb +79 -0
  36. data/example/deepest-errors/basic.rb +131 -0
  37. data/example/deepest-errors/basic.rb.md +152 -0
  38. data/example/documentation/basic.rb +18 -0
  39. data/example/documentation/basic.rb.md +97 -0
  40. data/example/email/basic.rb +55 -0
  41. data/example/email/basic.rb.md +102 -0
  42. data/example/email/ruby_transform.rb +106 -0
  43. data/example/empty/basic.rb +13 -0
  44. data/example/empty/basic.rb.md +73 -0
  45. data/example/empty/example.json +38 -0
  46. data/example/erb/basic.rb +47 -0
  47. data/example/erb/basic.rb.md +103 -0
  48. data/example/erb/optimized.rb +42 -0
  49. data/example/error-reporting/basic.rb +132 -0
  50. data/example/error-reporting/basic.rb.md +122 -0
  51. data/example/expression-evaluator/basic.rb +284 -0
  52. data/example/expression-evaluator/basic.rb.md +138 -0
  53. data/example/ini/basic.rb +154 -0
  54. data/example/ini/basic.rb.md +129 -0
  55. data/example/ini/ruby_transform.rb +154 -0
  56. data/example/ip-address/basic.rb +125 -0
  57. data/example/ip-address/basic.rb.md +139 -0
  58. data/example/iso-6709/basic.rb +231 -0
  59. data/example/iso-6709/basic.rb.md +143 -0
  60. data/example/iso-8601/basic.rb +275 -0
  61. data/example/iso-8601/basic.rb.md +149 -0
  62. data/example/json/basic.rb +128 -0
  63. data/example/json/basic.rb.md +121 -0
  64. data/example/json/pattern.rb +128 -0
  65. data/example/json/ruby_transform.rb +200 -0
  66. data/example/json/ruby_transform.rb.md +32 -0
  67. data/example/json/serialized.rb +233 -0
  68. data/example/json/serialized.rb.md +31 -0
  69. data/example/json/transform.rb +128 -0
  70. data/example/json/zero_copy.rb +316 -0
  71. data/example/json/zero_copy.rb.md +36 -0
  72. data/example/local/basic.rb +34 -0
  73. data/example/local/basic.rb.md +91 -0
  74. data/example/local/example.json +38 -0
  75. data/example/markdown/basic.rb +287 -0
  76. data/example/markdown/basic.rb.md +160 -0
  77. data/example/markup/basic.rb +173 -0
  78. data/example/markup/basic.rb.md +118 -0
  79. data/example/mathn/basic.rb +47 -0
  80. data/example/mathn/basic.rb.md +96 -0
  81. data/example/mathn/example.json +39 -0
  82. data/example/minilisp/basic.rb +94 -0
  83. data/example/minilisp/basic.rb.md +133 -0
  84. data/example/modularity/basic.rb +47 -0
  85. data/example/modularity/basic.rb.md +152 -0
  86. data/example/nested-errors/basic.rb +132 -0
  87. data/example/nested-errors/basic.rb.md +157 -0
  88. data/example/output/boolean_algebra.out +4 -0
  89. data/example/output/calc.out +1 -0
  90. data/example/output/capture.out +3 -0
  91. data/example/output/comments.out +8 -0
  92. data/example/output/deepest_errors.out +54 -0
  93. data/example/output/documentation.err +4 -0
  94. data/example/output/documentation.out +1 -0
  95. data/example/output/email_parser.out +2 -0
  96. data/example/output/empty.err +1 -0
  97. data/example/output/erb.out +7 -0
  98. data/example/output/ignore.out +1 -0
  99. data/example/output/ignore_whitespace.out +1 -0
  100. data/example/output/ip_address.out +9 -0
  101. data/example/output/json.out +5 -0
  102. data/example/output/local.out +3 -0
  103. data/example/output/mathn.out +4 -0
  104. data/example/output/minilisp.out +5 -0
  105. data/example/output/modularity.out +0 -0
  106. data/example/output/nested_errors.out +54 -0
  107. data/example/output/optimized_erb.out +1 -0
  108. data/example/output/parens.out +8 -0
  109. data/example/output/prec_calc.out +5 -0
  110. data/example/output/readme.out +1 -0
  111. data/example/output/scopes.out +1 -0
  112. data/example/output/seasons.out +28 -0
  113. data/example/output/sentence.out +1 -0
  114. data/example/output/simple_xml.out +2 -0
  115. data/example/output/string_parser.out +3 -0
  116. data/example/prec-calc/basic.rb +71 -0
  117. data/example/prec-calc/basic.rb.md +114 -0
  118. data/example/readme/basic.rb +30 -0
  119. data/example/readme/basic.rb.md +80 -0
  120. data/example/scopes/basic.rb +15 -0
  121. data/example/scopes/basic.rb.md +73 -0
  122. data/example/scopes/example.json +38 -0
  123. data/example/seasons/basic.rb +46 -0
  124. data/example/seasons/basic.rb.md +117 -0
  125. data/example/seasons/example.json +40 -0
  126. data/example/sentence/basic.rb +36 -0
  127. data/example/sentence/basic.rb.md +81 -0
  128. data/example/sexp/ruby_transform.rb +180 -0
  129. data/example/sexp/ruby_transform.rb.md +143 -0
  130. data/example/simple-xml/basic.rb +54 -0
  131. data/example/simple-xml/basic.rb.md +125 -0
  132. data/example/simple.lit +3 -0
  133. data/example/string-literal/basic.rb +77 -0
  134. data/example/string-literal/basic.rb.md +128 -0
  135. data/example/test.lit +4 -0
  136. data/example/toml/basic.rb +226 -0
  137. data/example/toml/basic.rb.md +173 -0
  138. data/example/url/basic.rb +219 -0
  139. data/example/url/basic.rb.md +142 -0
  140. data/example/url/ruby_transform.rb +219 -0
  141. data/example/yaml/basic.rb +216 -0
  142. data/example/yaml/basic.rb.md +148 -0
  143. data/ext/parsanol_native/extconf.rb +4 -0
  144. data/lib/parsanol/accelerator/application.rb +62 -0
  145. data/lib/parsanol/accelerator/engine.rb +112 -0
  146. data/lib/parsanol/accelerator.rb +162 -0
  147. data/lib/parsanol/ast_visitor.rb +122 -0
  148. data/lib/parsanol/atoms/alternative.rb +97 -0
  149. data/lib/parsanol/atoms/base.rb +214 -0
  150. data/lib/parsanol/atoms/can_flatten.rb +192 -0
  151. data/lib/parsanol/atoms/capture.rb +41 -0
  152. data/lib/parsanol/atoms/context.rb +351 -0
  153. data/lib/parsanol/atoms/context_optimized.rb +42 -0
  154. data/lib/parsanol/atoms/custom.rb +110 -0
  155. data/lib/parsanol/atoms/cut.rb +62 -0
  156. data/lib/parsanol/atoms/dsl.rb +130 -0
  157. data/lib/parsanol/atoms/dynamic.rb +33 -0
  158. data/lib/parsanol/atoms/entity.rb +55 -0
  159. data/lib/parsanol/atoms/ignored.rb +28 -0
  160. data/lib/parsanol/atoms/infix.rb +121 -0
  161. data/lib/parsanol/atoms/lookahead.rb +64 -0
  162. data/lib/parsanol/atoms/named.rb +50 -0
  163. data/lib/parsanol/atoms/re.rb +61 -0
  164. data/lib/parsanol/atoms/repetition.rb +241 -0
  165. data/lib/parsanol/atoms/scope.rb +28 -0
  166. data/lib/parsanol/atoms/sequence.rb +157 -0
  167. data/lib/parsanol/atoms/str.rb +90 -0
  168. data/lib/parsanol/atoms/visitor.rb +91 -0
  169. data/lib/parsanol/atoms.rb +36 -0
  170. data/lib/parsanol/buffer.rb +130 -0
  171. data/lib/parsanol/builder_callbacks.rb +353 -0
  172. data/lib/parsanol/cause.rb +101 -0
  173. data/lib/parsanol/context.rb +23 -0
  174. data/lib/parsanol/convenience.rb +35 -0
  175. data/lib/parsanol/edit_tracker.rb +107 -0
  176. data/lib/parsanol/error_reporter/contextual.rb +122 -0
  177. data/lib/parsanol/error_reporter/deepest.rb +106 -0
  178. data/lib/parsanol/error_reporter/tree.rb +68 -0
  179. data/lib/parsanol/error_reporter.rb +98 -0
  180. data/lib/parsanol/export.rb +163 -0
  181. data/lib/parsanol/expression/treetop.rb +94 -0
  182. data/lib/parsanol/expression.rb +51 -0
  183. data/lib/parsanol/fast_mode.rb +145 -0
  184. data/lib/parsanol/first_set.rb +75 -0
  185. data/lib/parsanol/grammar_builder.rb +177 -0
  186. data/lib/parsanol/graphviz.rb +97 -0
  187. data/lib/parsanol/incremental_parser.rb +179 -0
  188. data/lib/parsanol/interval_tree.rb +215 -0
  189. data/lib/parsanol/lazy_result.rb +178 -0
  190. data/lib/parsanol/lexer.rb +146 -0
  191. data/lib/parsanol/native/parser.rb +630 -0
  192. data/lib/parsanol/native/serializer.rb +245 -0
  193. data/lib/parsanol/native/transformer.rb +438 -0
  194. data/lib/parsanol/native/types.rb +41 -0
  195. data/lib/parsanol/native.rb +217 -0
  196. data/lib/parsanol/optimizer.rb +86 -0
  197. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  198. data/lib/parsanol/optimizers/cut_inserter.rb +175 -0
  199. data/lib/parsanol/optimizers/lookahead_optimizer.rb +58 -0
  200. data/lib/parsanol/optimizers/quantifier_optimizer.rb +62 -0
  201. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  202. data/lib/parsanol/options/ruby_transform.rb +109 -0
  203. data/lib/parsanol/options/serialized.rb +94 -0
  204. data/lib/parsanol/options/zero_copy.rb +130 -0
  205. data/lib/parsanol/options.rb +20 -0
  206. data/lib/parsanol/parallel.rb +133 -0
  207. data/lib/parsanol/parsanol_native.bundle +0 -0
  208. data/lib/parsanol/parser.rb +151 -0
  209. data/lib/parsanol/parslet.rb +148 -0
  210. data/lib/parsanol/parslet_native.bundle +0 -0
  211. data/lib/parsanol/pattern/binding.rb +49 -0
  212. data/lib/parsanol/pattern.rb +115 -0
  213. data/lib/parsanol/pool.rb +220 -0
  214. data/lib/parsanol/pools/array_pool.rb +75 -0
  215. data/lib/parsanol/pools/buffer_pool.rb +173 -0
  216. data/lib/parsanol/pools/position_pool.rb +92 -0
  217. data/lib/parsanol/pools/slice_pool.rb +64 -0
  218. data/lib/parsanol/position.rb +89 -0
  219. data/lib/parsanol/result.rb +44 -0
  220. data/lib/parsanol/result_builder.rb +208 -0
  221. data/lib/parsanol/result_stream.rb +262 -0
  222. data/lib/parsanol/rig/rspec.rb +52 -0
  223. data/lib/parsanol/rope.rb +78 -0
  224. data/lib/parsanol/scope.rb +42 -0
  225. data/lib/parsanol/slice.rb +172 -0
  226. data/lib/parsanol/source/line_cache.rb +99 -0
  227. data/lib/parsanol/source.rb +171 -0
  228. data/lib/parsanol/source_location.rb +164 -0
  229. data/lib/parsanol/streaming_parser.rb +124 -0
  230. data/lib/parsanol/string_view.rb +192 -0
  231. data/lib/parsanol/transform.rb +267 -0
  232. data/lib/parsanol/version.rb +5 -0
  233. data/lib/parsanol/wasm/README.md +80 -0
  234. data/lib/parsanol/wasm/package.json +51 -0
  235. data/lib/parsanol/wasm/parsanol.js +252 -0
  236. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  237. data/lib/parsanol/wasm_parser.rb +239 -0
  238. data/lib/parsanol.rb +408 -0
  239. data/parsanol-ruby.gemspec +56 -0
  240. data/spec/acceptance/examples_spec.rb +96 -0
  241. data/spec/acceptance/infix_parser_spec.rb +145 -0
  242. data/spec/acceptance/mixing_parsers_spec.rb +74 -0
  243. data/spec/acceptance/regression_spec.rb +329 -0
  244. data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
  245. data/spec/acceptance/unconsumed_input_spec.rb +21 -0
  246. data/spec/benchmark/comparative/runner_spec.rb +105 -0
  247. data/spec/integration/array_pooling_spec.rb +193 -0
  248. data/spec/integration/buffer_allocation_spec.rb +324 -0
  249. data/spec/integration/position_pooling_spec.rb +184 -0
  250. data/spec/integration/result_builder_spec.rb +282 -0
  251. data/spec/integration/rope_stringview_integration_spec.rb +188 -0
  252. data/spec/integration/slice_pooling_spec.rb +63 -0
  253. data/spec/integration/string_view_integration_spec.rb +125 -0
  254. data/spec/lexer_spec.rb +231 -0
  255. data/spec/parsanol/atom_results_spec.rb +39 -0
  256. data/spec/parsanol/atoms/alternative_spec.rb +26 -0
  257. data/spec/parsanol/atoms/base_spec.rb +127 -0
  258. data/spec/parsanol/atoms/capture_spec.rb +21 -0
  259. data/spec/parsanol/atoms/combinations_spec.rb +5 -0
  260. data/spec/parsanol/atoms/custom_spec.rb +79 -0
  261. data/spec/parsanol/atoms/dsl_spec.rb +7 -0
  262. data/spec/parsanol/atoms/entity_spec.rb +77 -0
  263. data/spec/parsanol/atoms/ignored_spec.rb +15 -0
  264. data/spec/parsanol/atoms/infix_spec.rb +5 -0
  265. data/spec/parsanol/atoms/lookahead_spec.rb +22 -0
  266. data/spec/parsanol/atoms/named_spec.rb +4 -0
  267. data/spec/parsanol/atoms/re_spec.rb +14 -0
  268. data/spec/parsanol/atoms/repetition_spec.rb +24 -0
  269. data/spec/parsanol/atoms/scope_spec.rb +26 -0
  270. data/spec/parsanol/atoms/sequence_spec.rb +28 -0
  271. data/spec/parsanol/atoms/str_spec.rb +15 -0
  272. data/spec/parsanol/atoms/visitor_spec.rb +101 -0
  273. data/spec/parsanol/atoms_spec.rb +488 -0
  274. data/spec/parsanol/auto_optimize_spec.rb +334 -0
  275. data/spec/parsanol/buffer_spec.rb +219 -0
  276. data/spec/parsanol/builder_callbacks_spec.rb +377 -0
  277. data/spec/parsanol/choice_optimizer_spec.rb +231 -0
  278. data/spec/parsanol/convenience_spec.rb +54 -0
  279. data/spec/parsanol/cut_inserter_spec.rb +248 -0
  280. data/spec/parsanol/cut_spec.rb +66 -0
  281. data/spec/parsanol/edit_tracker_spec.rb +218 -0
  282. data/spec/parsanol/error_reporter/contextual_spec.rb +122 -0
  283. data/spec/parsanol/error_reporter/deepest_spec.rb +82 -0
  284. data/spec/parsanol/error_reporter/tree_spec.rb +7 -0
  285. data/spec/parsanol/export_spec.rb +67 -0
  286. data/spec/parsanol/expression/treetop_spec.rb +75 -0
  287. data/spec/parsanol/first_set_spec.rb +298 -0
  288. data/spec/parsanol/interval_tree_spec.rb +205 -0
  289. data/spec/parsanol/lazy_result_spec.rb +288 -0
  290. data/spec/parsanol/lookahead_optimizer_spec.rb +252 -0
  291. data/spec/parsanol/minilisp.citrus +29 -0
  292. data/spec/parsanol/minilisp.tt +29 -0
  293. data/spec/parsanol/optimizer_spec.rb +459 -0
  294. data/spec/parsanol/options/parslet_compat_spec.rb +166 -0
  295. data/spec/parsanol/options/ruby_transform_spec.rb +70 -0
  296. data/spec/parsanol/options/serialized_spec.rb +69 -0
  297. data/spec/parsanol/options/zero_copy_spec.rb +230 -0
  298. data/spec/parsanol/parser_spec.rb +36 -0
  299. data/spec/parsanol/parslet_spec.rb +38 -0
  300. data/spec/parsanol/pattern_spec.rb +272 -0
  301. data/spec/parsanol/pool_spec.rb +392 -0
  302. data/spec/parsanol/pools/array_pool_spec.rb +356 -0
  303. data/spec/parsanol/pools/buffer_pool_spec.rb +365 -0
  304. data/spec/parsanol/pools/position_pool_spec.rb +118 -0
  305. data/spec/parsanol/pools/slice_pool_spec.rb +262 -0
  306. data/spec/parsanol/position_spec.rb +14 -0
  307. data/spec/parsanol/result_builder_spec.rb +391 -0
  308. data/spec/parsanol/rig/rspec_spec.rb +54 -0
  309. data/spec/parsanol/rope_spec.rb +207 -0
  310. data/spec/parsanol/scope_spec.rb +45 -0
  311. data/spec/parsanol/slice_spec.rb +249 -0
  312. data/spec/parsanol/source/line_cache_spec.rb +74 -0
  313. data/spec/parsanol/source_spec.rb +207 -0
  314. data/spec/parsanol/string_view_spec.rb +345 -0
  315. data/spec/parsanol/transform/context_spec.rb +56 -0
  316. data/spec/parsanol/transform_spec.rb +183 -0
  317. data/spec/parsanol/tree_memoization_spec.rb +149 -0
  318. data/spec/parslet_compatibility/expressir_edge_cases_spec.rb +153 -0
  319. data/spec/parslet_compatibility/minimal_reproduction.rb +199 -0
  320. data/spec/parslet_compatibility_spec.rb +399 -0
  321. data/spec/parslet_imported/atom_spec.rb +93 -0
  322. data/spec/parslet_imported/combinator_spec.rb +161 -0
  323. data/spec/parslet_imported/spec_helper.rb +73 -0
  324. data/spec/performance/batch_parsing_benchmark.rb +129 -0
  325. data/spec/performance/complete_optimization_summary.rb +143 -0
  326. data/spec/performance/grammar_caching_analysis.rb +121 -0
  327. data/spec/performance/grammar_caching_benchmark.rb +80 -0
  328. data/spec/performance/native_benchmark_spec.rb +230 -0
  329. data/spec/performance/phase5_benchmark.rb +144 -0
  330. data/spec/performance/profiling_benchmark.rb +131 -0
  331. data/spec/performance/ruby_improvements_benchmark.rb +171 -0
  332. data/spec/performance_spec.rb +374 -0
  333. data/spec/spec_helper.rb +79 -0
  334. data/spec/support/opal.rb +8 -0
  335. data/spec/support/opal.rb.erb +14 -0
  336. metadata +485 -0
@@ -0,0 +1,36 @@
1
+ # encoding: UTF-8
2
+
3
+ # Demonstrates working with Unicode text and natural language.
4
+ # Originally contributed to Parslet, ported to Parsanol as an example.
5
+
6
+ $:.unshift File.dirname(__FILE__) + "/../lib"
7
+
8
+ require 'parsanol/parslet'
9
+
10
+ class MyParser < Parsanol::Parser
11
+ rule(:sentence) { (match('[^。]').repeat(1) >> str("。")).as(:sentence) }
12
+ rule(:sentences) { sentence.repeat }
13
+ root(:sentences)
14
+ end
15
+
16
+ class Transformer < Parsanol::Transform
17
+ rule(:sentence => simple(:sen)) { sen.to_s }
18
+ end
19
+
20
+ string =
21
+ "RubyKaigi2009のテーマは、「変わる/変える」です。 前回の" +
22
+ "RubyKaigi2008のテーマであった「多様性」の言葉の通り、 " +
23
+ "2008年はRubyそのものに関しても、またRubyの活躍する舞台に関しても、 " +
24
+ "ますます多様化が進みつつあります。RubyKaigi2008は、そのような " +
25
+ "Rubyの生態系をあらためて認識する場となりました。 しかし、" +
26
+ "こうした多様化が進む中、異なる者同士が単純に距離を 置いたままでは、" +
27
+ "その違いを認識したところであまり意味がありません。 異なる実装、" +
28
+ "異なる思想、異なる背景といった、様々な多様性を理解しつつ、 " +
29
+ "すり合わせるべきものをすり合わせ、変えていくべきところを " +
30
+ "変えていくことが、豊かな未来へとつながる道に違いありません。"
31
+
32
+ parser = MyParser.new
33
+ transformer = Transformer.new
34
+
35
+ tree = parser.parse(string)
36
+ p transformer.apply(tree)
@@ -0,0 +1,81 @@
1
+ # Sentence Parser - Ruby Implementation
2
+
3
+ ## How to Run
4
+
5
+ ```bash
6
+ cd parsanol-ruby/example/sentence
7
+ ruby basic.rb
8
+ ```
9
+
10
+ ## Code Walkthrough
11
+
12
+ ### Japanese Sentence Rule
13
+
14
+ Sentences end with the Japanese period character:
15
+
16
+ ```ruby
17
+ rule(:sentence) { (match('[^。]').repeat(1) >> str("。")).as(:sentence) }
18
+ ```
19
+
20
+ The character `。` (U+3002) is the CJK full stop, used as sentence delimiter.
21
+
22
+ ### Multiple Sentences Rule
23
+
24
+ Text is a sequence of sentences:
25
+
26
+ ```ruby
27
+ rule(:sentences) { sentence.repeat }
28
+ root(:sentences)
29
+ ```
30
+
31
+ Repetition handles arbitrary-length text.
32
+
33
+ ### Transform Rule
34
+
35
+ The transform extracts sentence content:
36
+
37
+ ```ruby
38
+ class Transformer < Parsanol::Transform
39
+ rule(:sentence => simple(:sen)) { sen.to_s }
40
+ end
41
+ ```
42
+
43
+ Pattern matching extracts the captured string value.
44
+
45
+ ### Unicode Handling
46
+
47
+ Ruby 1.9+ handles Unicode natively:
48
+
49
+ ```ruby
50
+ # encoding: UTF-8
51
+ ```
52
+
53
+ The encoding pragma ensures proper interpretation of multibyte characters.
54
+
55
+ ## Output Types
56
+
57
+ ```ruby
58
+ # Parse tree:
59
+ [
60
+ {:sentence=>"RubyKaigi2009のテーマは、「変わる/変える」です"@s},
61
+ {:sentence=>" 前回のRubyKaigi2008のテーマであった..."@s}
62
+ ]
63
+
64
+ # After transform:
65
+ ["RubyKaigi2009のテーマは、「変わる/変える」です。",
66
+ " 前回のRubyKaigi2008のテーマであった..."]
67
+ ```
68
+
69
+ ## Design Decisions
70
+
71
+ ### Why Use Japanese Period Character?
72
+
73
+ Natural language parsing must respect language-specific punctuation. Japanese uses `。` not `.`.
74
+
75
+ ### Why Simple Character Class?
76
+
77
+ `[^。]` excludes only the delimiter. This is simpler than defining valid Japanese character ranges.
78
+
79
+ ### Why Transform to Strings?
80
+
81
+ Extracting plain strings makes the result easy to process further (count words, analyze sentiment, etc.).
@@ -0,0 +1,180 @@
1
+ # S-Expression Parser Example - RubyTransform
2
+ #
3
+ # This example demonstrates parsing Lisp-style S-expressions.
4
+ # Shows nested lists, atoms, and quoted strings.
5
+ #
6
+ # Run with: ruby -Ilib example/sexp_ruby_transform.rb
7
+
8
+ $:.unshift File.dirname(__FILE__) + "/../lib"
9
+
10
+ require 'parsanol'
11
+
12
+ # Step 1: Define the S-expression grammar
13
+ class SexpParser < Parsanol::Parser
14
+ root :sexp
15
+
16
+ # An S-Expression can be a list or atom
17
+ rule(:sexp) {
18
+ list | atom
19
+ }
20
+
21
+ # List: ( ... ) - recursively contains sexps
22
+ rule(:list) {
23
+ str('(') >> elements >> str(')')
24
+ }
25
+
26
+ # Elements: zero or more sexps separated by whitespace
27
+ rule(:elements) {
28
+ (sexp >> space?).repeat
29
+ }
30
+
31
+ # Atom: number or symbol (whitespace required to separate)
32
+ rule(:atom) {
33
+ number | symbol
34
+ }
35
+
36
+ # Symbol: sequence of non-whitespace, non-special chars
37
+ rule(:symbol) {
38
+ match('[^\s\(\)]+')
39
+ }
40
+
41
+ # Number: integer or float
42
+ rule(:number) {
43
+ (
44
+ str('-').maybe >>
45
+ match('[0-9]').repeat(1) >>
46
+ (str('.') >> match('[0-9]').repeat).maybe
47
+ )
48
+ }
49
+
50
+ rule(:space?) { match('\s').repeat }
51
+ end
52
+
53
+ # Step 2: S-expression node classes
54
+ class Sexp; end
55
+
56
+ class SexpList < Sexp
57
+ attr_reader :elements
58
+
59
+ def initialize(elements)
60
+ @elements = elements
61
+ end
62
+
63
+ def to_s
64
+ "(#{@elements.map(&:to_s).join(' ')})"
65
+ end
66
+ end
67
+
68
+ class SexpSymbol < Sexp
69
+ attr_reader :name
70
+
71
+ def initialize(name)
72
+ @name = name.to_s
73
+ end
74
+
75
+ def to_s
76
+ @name
77
+ end
78
+ end
79
+
80
+ class SexpNumber < Sexp
81
+ attr_reader :value
82
+
83
+ def initialize(value)
84
+ @value = value.to_s
85
+ end
86
+
87
+ def to_s
88
+ @value
89
+ end
90
+ end
91
+
92
+ # Step 3: Parse and transform
93
+ def parse_sexp(input)
94
+ parser = SexpParser.new
95
+ tree = parser.parse(input)
96
+
97
+ puts "Parse tree: #{tree.inspect}"
98
+
99
+ # Build AST
100
+ ast = build_ast(tree)
101
+ puts "AST: #{ast.to_s}"
102
+
103
+ ast
104
+ rescue Parsanol::ParseFailed => e
105
+ puts "Parse failed: #{e.message}"
106
+ nil
107
+ end
108
+
109
+ def build_ast(tree)
110
+ return nil if tree.nil?
111
+
112
+ # Handle slice
113
+ if tree.is_a?(Parsanol::Slice)
114
+ s = tree.to_s
115
+ if s.match?(/^-?\d+(\.\d+)?$/)
116
+ SexpNumber.new(s)
117
+ else
118
+ SexpSymbol.new(s)
119
+ end
120
+ elsif tree.is_a?(Array)
121
+ # It's a list of sexps
122
+ elements = tree.map { |t| build_ast(t) }.compact
123
+ SexpList.new(elements)
124
+ elsif tree.is_a?(Hash)
125
+ # Try to find the actual sexp value
126
+ if tree[:sexp]
127
+ build_ast(tree[:sexp])
128
+ elsif tree[:list]
129
+ build_ast(tree[:list])
130
+ elsif tree[:elements]
131
+ build_ast(tree[:elements])
132
+ elsif tree[:atom]
133
+ build_ast(tree[:atom])
134
+ else
135
+ # Just use the raw string
136
+ s = tree.to_s
137
+ if s.match?(/^-?\d+(\.\d+)?$/)
138
+ SexpNumber.new(s)
139
+ else
140
+ SexpSymbol.new(s)
141
+ end
142
+ end
143
+ else
144
+ # Treat as string
145
+ s = tree.to_s
146
+ if s.match?(/^-?\d+(\.\d+)?$/)
147
+ SexpNumber.new(s)
148
+ else
149
+ SexpSymbol.new(s)
150
+ end
151
+ end
152
+ end
153
+
154
+ # Example usage
155
+ if __FILE__ == $0
156
+ puts "=" * 60
157
+ puts "S-Expression Parser - RubyTransform"
158
+ puts "=" * 60
159
+ puts
160
+
161
+ test_cases = [
162
+ "42",
163
+ "hello",
164
+ "(+ 1 2)",
165
+ "(+ 1 (* 2 3))",
166
+ "(list 1 2 3)",
167
+ "()",
168
+ ]
169
+
170
+ test_cases.each do |input|
171
+ puts "-" * 40
172
+ puts "Input: #{input}"
173
+ begin
174
+ ast = parse_sexp(input)
175
+ rescue => e
176
+ puts "Error: #{e.message}"
177
+ end
178
+ puts
179
+ end
180
+ end
@@ -0,0 +1,143 @@
1
+ # S-Expression Parser - Ruby Implementation (Transform)
2
+
3
+ ## How to Run
4
+
5
+ ```bash
6
+ cd parsanol-ruby/example/sexp
7
+ ruby ruby_transform.rb
8
+ ```
9
+
10
+ ## Code Walkthrough
11
+
12
+ ### S-Expression Structure
13
+
14
+ An S-expression is either a list or an atom:
15
+
16
+ ```ruby
17
+ rule(:sexp) {
18
+ list | atom
19
+ }
20
+ ```
21
+
22
+ This recursive definition allows arbitrarily nested structures.
23
+
24
+ ### List Rule
25
+
26
+ Lists are parenthesized elements:
27
+
28
+ ```ruby
29
+ rule(:list) {
30
+ str('(') >> elements >> str(')')
31
+ }
32
+
33
+ rule(:elements) {
34
+ (sexp >> space?).repeat
35
+ }
36
+ ```
37
+
38
+ Elements are separated by optional whitespace, allowing `(a b c)` or `(a b c)`.
39
+
40
+ ### Atom Rule
41
+
42
+ Atoms are numbers or symbols:
43
+
44
+ ```ruby
45
+ rule(:atom) {
46
+ number | symbol
47
+ }
48
+
49
+ rule(:symbol) {
50
+ match('[^\s\(\)]+')
51
+ }
52
+
53
+ rule(:number) {
54
+ (
55
+ str('-').maybe >>
56
+ match('[0-9]').repeat(1) >>
57
+ (str('.') >> match('[0-9]').repeat).maybe
58
+ )
59
+ }
60
+ ```
61
+
62
+ Symbols exclude whitespace and parentheses; numbers support negative and decimal.
63
+
64
+ ### AST Node Classes
65
+
66
+ Typed classes represent the parsed structure:
67
+
68
+ ```ruby
69
+ class SexpList < Sexp
70
+ attr_reader :elements
71
+ def to_s
72
+ "(#{@elements.map(&:to_s).join(' ')})"
73
+ end
74
+ end
75
+
76
+ class SexpSymbol < Sexp
77
+ attr_reader :name
78
+ end
79
+
80
+ class SexpNumber < Sexp
81
+ attr_reader :value
82
+ end
83
+ ```
84
+
85
+ Each class has a meaningful `to_s` for debugging.
86
+
87
+ ### AST Builder Function
88
+
89
+ A recursive function builds the AST:
90
+
91
+ ```ruby
92
+ def build_ast(tree)
93
+ return nil if tree.nil?
94
+
95
+ if tree.is_a?(Parsanol::Slice)
96
+ s = tree.to_s
97
+ if s.match?(/^-?\d+(\.\d+)?$/)
98
+ SexpNumber.new(s)
99
+ else
100
+ SexpSymbol.new(s)
101
+ end
102
+ elsif tree.is_a?(Array)
103
+ elements = tree.map { |t| build_ast(t) }.compact
104
+ SexpList.new(elements)
105
+ # ...
106
+ end
107
+ end
108
+ ```
109
+
110
+ Pattern matching on tree type determines node construction.
111
+
112
+ ## Output Types
113
+
114
+ ```ruby
115
+ # Input: "(+ 1 (* 2 3))"
116
+ # AST:
117
+ #<SexpList @elements=[
118
+ #<SexpSymbol @name="+">,
119
+ #<SexpNumber @value="1">,
120
+ #<SexpList @elements=[
121
+ #<SexpSymbol @name="*">,
122
+ #<SexpNumber @value="2">,
123
+ #<SexpNumber @value="3">
124
+ ]>
125
+ ]>
126
+
127
+ # to_s output:
128
+ "(+ 1 (* 2 3))"
129
+ ```
130
+
131
+ ## Design Decisions
132
+
133
+ ### Why Recursive AST Builder?
134
+
135
+ The parse tree has varied structure (Hash, Array, Slice). A recursive function handles all cases uniformly.
136
+
137
+ ### Why Separate Number and Symbol Classes?
138
+
139
+ Different AST node types allow type-specific behavior (evaluation, formatting, analysis).
140
+
141
+ ### Why Not Use Transform?
142
+
143
+ The tree structure varies; manual recursion provides more control than pattern-based transforms for this case.
@@ -0,0 +1,54 @@
1
+ # A simple xml parser. It is simple in the respect as that it doesn't address
2
+ # any of the complexities of XML. This is ruby 1.9.
3
+
4
+ $:.unshift File.dirname(__FILE__) + "/../lib"
5
+
6
+ require 'pp'
7
+ require 'parsanol/parslet'
8
+
9
+ class XML < Parsanol::Parser
10
+ root :document
11
+
12
+ rule(:document) {
13
+ tag(close: false).as(:o) >> document.as(:i) >> tag(close: true).as(:c) |
14
+ text
15
+ }
16
+
17
+ # Perhaps we could have some syntax sugar to make this more easy?
18
+ #
19
+ def tag(opts={})
20
+ close = opts[:close] || false
21
+
22
+ parslet = str('<')
23
+ parslet = parslet >> str('/') if close
24
+ parslet = parslet >> (str('>').absent? >> match("[a-zA-Z]")).repeat(1).as(:name)
25
+ parslet = parslet >> str('>')
26
+
27
+ parslet
28
+ end
29
+
30
+ rule(:text) {
31
+ match('[^<>]').repeat(0)
32
+ }
33
+ end
34
+
35
+ def check(xml)
36
+ r = XML.new.parse(xml)
37
+
38
+ # We'll validate the tree by reducing valid pairs of tags into simply the
39
+ # string "verified". If the transformation ends on a string, then the
40
+ # document was 'valid'.
41
+ #
42
+ t = Parsanol::Transform.new do
43
+ rule(
44
+ o: {name: simple(:tag)},
45
+ c: {name: simple(:tag)},
46
+ i: simple(:t)
47
+ ) { 'verified' }
48
+ end
49
+
50
+ t.apply(r)
51
+ end
52
+
53
+ pp check("<a><b>some text in the tags</b></a>")
54
+ pp check("<b><b>some text in the tags</b></a>")
@@ -0,0 +1,125 @@
1
+ # Simple XML Parser - Ruby Implementation
2
+
3
+ ## How to Run
4
+
5
+ ```bash
6
+ cd parsanol-ruby/example/simple-xml
7
+ ruby basic.rb
8
+ ```
9
+
10
+ ## Code Walkthrough
11
+
12
+ ### Document Rule
13
+
14
+ Documents are nested tag pairs with content:
15
+
16
+ ```ruby
17
+ rule(:document) {
18
+ tag(close: false).as(:o) >> document.as(:i) >> tag(close: true).as(:c) |
19
+ text
20
+ }
21
+ ```
22
+
23
+ Recursive structure allows arbitrary nesting depth.
24
+
25
+ ### Tag Rule with Parameter
26
+
27
+ Tags are generated dynamically based on open/close state:
28
+
29
+ ```ruby
30
+ def tag(opts={})
31
+ close = opts[:close] || false
32
+
33
+ parslet = str('<')
34
+ parslet = parslet >> str('/') if close
35
+ parslet = parslet >> (str('>').absent? >> match("[a-zA-Z]")).repeat(1).as(:name)
36
+ parslet = parslet >> str('>')
37
+
38
+ parslet
39
+ end
40
+ ```
41
+
42
+ Method generates opening `<tag>` or closing `</tag>` patterns.
43
+
44
+ ### Text Rule
45
+
46
+ Text content excludes angle brackets:
47
+
48
+ ```ruby
49
+ rule(:text) {
50
+ match('[^<>]').repeat(0)
51
+ }
52
+ ```
53
+
54
+ Simple character class prevents tag confusion.
55
+
56
+ ### Tag Validation via Transform
57
+
58
+ A transform validates matching open/close tags:
59
+
60
+ ```ruby
61
+ t = Parsanol::Transform.new do
62
+ rule(
63
+ o: {name: simple(:tag)},
64
+ c: {name: simple(:tag)},
65
+ i: simple(:t)
66
+ ) { 'verified' }
67
+ end
68
+ ```
69
+
70
+ Pattern matching ensures both tags have the same name.
71
+
72
+ ### Validation Logic
73
+
74
+ If tags don't match, the transform fails:
75
+
76
+ ```ruby
77
+ def check(xml)
78
+ r = XML.new.parse(xml)
79
+ t = Parsanol::Transform.new do
80
+ rule(
81
+ o: {name: simple(:tag)},
82
+ c: {name: simple(:tag)},
83
+ i: simple(:t)
84
+ ) { 'verified' }
85
+ end
86
+ t.apply(r)
87
+ end
88
+ ```
89
+
90
+ Returns 'verified' for valid XML, fails otherwise.
91
+
92
+ ## Output Types
93
+
94
+ ```ruby
95
+ # Valid XML:
96
+ {
97
+ o: {name: "a"},
98
+ i: {
99
+ o: {name: "b"},
100
+ i: "some text in the tags",
101
+ c: {name: "b"}
102
+ },
103
+ c: {name: "a"}
104
+ }
105
+
106
+ # After validation:
107
+ "verified"
108
+
109
+ # Mismatched tags:
110
+ # Transform fails (pattern doesn't match)
111
+ ```
112
+
113
+ ## Design Decisions
114
+
115
+ ### Why Validate via Transform?
116
+
117
+ Transforms can express constraints that are hard to encode in the grammar itself.
118
+
119
+ ### Why Method Instead of Rule for Tags?
120
+
121
+ `tag(close: true/false)` demonstrates dynamic rule generation, useful for related patterns.
122
+
123
+ ### Why Not Full XML?
124
+
125
+ This is a teaching example. Real XML requires handling attributes, namespaces, CDATA, etc.
@@ -0,0 +1,3 @@
1
+ 123
2
+ 12345
3
+ " Some String with \"escapes\""
@@ -0,0 +1,77 @@
1
+ # A more complex parser that illustrates how a compiler might be constructed.
2
+ # The parser recognizes strings and integer literals and constructs almost a
3
+ # useful AST from the file contents.
4
+
5
+ require 'pp'
6
+
7
+ $:.unshift File.dirname(__FILE__) + "/../lib"
8
+ require 'parsanol/parslet'
9
+
10
+ include Parsanol::Parslet
11
+
12
+ class LiteralsParser < Parsanol::Parser
13
+ rule :space do
14
+ (match '[ ]').repeat(1)
15
+ end
16
+
17
+ rule :literals do
18
+ (literal >> eol).repeat
19
+ end
20
+
21
+ rule :literal do
22
+ (integer | string).as(:literal) >> space.maybe
23
+ end
24
+
25
+ rule :string do
26
+ str('"') >>
27
+ (
28
+ (str('\\') >> any) |
29
+ (str('"').absent? >> any)
30
+ ).repeat.as(:string) >>
31
+ str('"')
32
+ end
33
+
34
+ rule :integer do
35
+ match('[0-9]').repeat(1).as(:integer)
36
+ end
37
+
38
+ rule :eol do
39
+ line_end.repeat(1)
40
+ end
41
+
42
+ rule :line_end do
43
+ crlf >> space.maybe
44
+ end
45
+
46
+ rule :crlf do
47
+ match('[\r\n]').repeat(1)
48
+ end
49
+
50
+ root :literals
51
+ end
52
+
53
+ input_name = File.join(File.dirname(__FILE__), 'simple.lit')
54
+ file = File.read(input_name)
55
+
56
+ parsetree = LiteralsParser.new.parse(file)
57
+
58
+ class Lit < Struct.new(:text)
59
+ def to_s
60
+ text.inspect
61
+ end
62
+ end
63
+ class StringLit < Lit
64
+ end
65
+ class IntLit < Lit
66
+ def to_s
67
+ text
68
+ end
69
+ end
70
+
71
+ transform = Parsanol::Transform.new do
72
+ rule(:literal => {:integer => simple(:x)}) { IntLit.new(x) }
73
+ rule(:literal => {:string => simple(:s)}) { StringLit.new(s) }
74
+ end
75
+
76
+ ast = transform.apply(parsetree)
77
+ pp ast