lernen 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +18 -0
  3. data/README.md +531 -28
  4. data/Rakefile +29 -7
  5. data/Steepfile +14 -0
  6. data/examples/ripper_prism.rb +63 -0
  7. data/examples/uri_parse_regexp.rb +73 -0
  8. data/lib/lernen/algorithm/cex_processor/acex.rb +43 -0
  9. data/lib/lernen/algorithm/cex_processor/prefix_transformer_acex.rb +43 -0
  10. data/lib/lernen/algorithm/cex_processor.rb +115 -0
  11. data/lib/lernen/algorithm/kearns_vazirani/discrimination_tree.rb +207 -0
  12. data/lib/lernen/algorithm/kearns_vazirani/kearns_vazirani_learner.rb +100 -0
  13. data/lib/lernen/algorithm/kearns_vazirani.rb +44 -0
  14. data/lib/lernen/algorithm/kearns_vazirani_vpa/discrimination_tree_vpa.rb +246 -0
  15. data/lib/lernen/algorithm/kearns_vazirani_vpa/kearns_vazirani_vpa_learner.rb +89 -0
  16. data/lib/lernen/algorithm/kearns_vazirani_vpa.rb +35 -0
  17. data/lib/lernen/algorithm/learner.rb +82 -0
  18. data/lib/lernen/algorithm/lsharp/lsharp_learner.rb +367 -0
  19. data/lib/lernen/algorithm/lsharp/observation_tree.rb +115 -0
  20. data/lib/lernen/algorithm/lsharp.rb +43 -0
  21. data/lib/lernen/algorithm/lstar/lstar_learner.rb +49 -0
  22. data/lib/lernen/algorithm/lstar/observation_table.rb +214 -0
  23. data/lib/lernen/algorithm/lstar.rb +49 -0
  24. data/lib/lernen/algorithm/procedural/atr_manager.rb +200 -0
  25. data/lib/lernen/algorithm/procedural/procedural_learner.rb +223 -0
  26. data/lib/lernen/algorithm/procedural/procedural_sul.rb +47 -0
  27. data/lib/lernen/algorithm/procedural/return_indices_acex.rb +58 -0
  28. data/lib/lernen/algorithm/procedural.rb +57 -0
  29. data/lib/lernen/algorithm.rb +19 -0
  30. data/lib/lernen/automaton/dfa.rb +204 -0
  31. data/lib/lernen/automaton/mealy.rb +108 -0
  32. data/lib/lernen/automaton/moore.rb +122 -0
  33. data/lib/lernen/automaton/moore_like.rb +83 -0
  34. data/lib/lernen/automaton/proc_util.rb +93 -0
  35. data/lib/lernen/automaton/spa.rb +368 -0
  36. data/lib/lernen/automaton/transition_system.rb +209 -0
  37. data/lib/lernen/automaton/vpa.rb +300 -0
  38. data/lib/lernen/automaton.rb +19 -92
  39. data/lib/lernen/equiv/combined_oracle.rb +57 -0
  40. data/lib/lernen/equiv/exhaustive_search_oracle.rb +60 -0
  41. data/lib/lernen/equiv/moore_like_simulator_oracle.rb +36 -0
  42. data/lib/lernen/equiv/oracle.rb +109 -0
  43. data/lib/lernen/equiv/random_walk_oracle.rb +69 -0
  44. data/lib/lernen/equiv/random_well_matched_word_oracle.rb +139 -0
  45. data/lib/lernen/equiv/random_word_oracle.rb +71 -0
  46. data/lib/lernen/equiv/spa_simulator_oracle.rb +39 -0
  47. data/lib/lernen/equiv/test_words_oracle.rb +42 -0
  48. data/lib/lernen/equiv/transition_system_simulator_oracle.rb +36 -0
  49. data/lib/lernen/equiv/vpa_simulator_oracle.rb +48 -0
  50. data/lib/lernen/equiv.rb +25 -0
  51. data/lib/lernen/graph.rb +215 -0
  52. data/lib/lernen/system/block_sul.rb +41 -0
  53. data/lib/lernen/system/moore_like_simulator.rb +45 -0
  54. data/lib/lernen/system/moore_like_sul.rb +33 -0
  55. data/lib/lernen/system/sul.rb +126 -0
  56. data/lib/lernen/system/transition_system_simulator.rb +40 -0
  57. data/lib/lernen/system.rb +72 -0
  58. data/lib/lernen/version.rb +2 -1
  59. data/lib/lernen.rb +322 -13
  60. data/rbs_collection.lock.yaml +16 -0
  61. data/rbs_collection.yaml +14 -0
  62. data/renovate.json +6 -0
  63. data/sig/generated/lernen/algorithm/cex_processor/acex.rbs +30 -0
  64. data/sig/generated/lernen/algorithm/cex_processor/prefix_transformer_acex.rbs +27 -0
  65. data/sig/generated/lernen/algorithm/cex_processor.rbs +59 -0
  66. data/sig/generated/lernen/algorithm/kearns_vazirani/discrimination_tree.rbs +68 -0
  67. data/sig/generated/lernen/algorithm/kearns_vazirani/kearns_vazirani_learner.rbs +51 -0
  68. data/sig/generated/lernen/algorithm/kearns_vazirani.rbs +32 -0
  69. data/sig/generated/lernen/algorithm/kearns_vazirani_vpa/discrimination_tree_vpa.rbs +73 -0
  70. data/sig/generated/lernen/algorithm/kearns_vazirani_vpa/kearns_vazirani_vpa_learner.rbs +51 -0
  71. data/sig/generated/lernen/algorithm/kearns_vazirani_vpa.rbs +20 -0
  72. data/sig/generated/lernen/algorithm/learner.rbs +53 -0
  73. data/sig/generated/lernen/algorithm/lsharp/lsharp_learner.rbs +103 -0
  74. data/sig/generated/lernen/algorithm/lsharp/observation_tree.rbs +53 -0
  75. data/sig/generated/lernen/algorithm/lsharp.rbs +38 -0
  76. data/sig/generated/lernen/algorithm/lstar/lstar_learner.rbs +38 -0
  77. data/sig/generated/lernen/algorithm/lstar/observation_table.rbs +79 -0
  78. data/sig/generated/lernen/algorithm/lstar.rbs +37 -0
  79. data/sig/generated/lernen/algorithm/procedural/atr_manager.rbs +80 -0
  80. data/sig/generated/lernen/algorithm/procedural/procedural_learner.rbs +79 -0
  81. data/sig/generated/lernen/algorithm/procedural/procedural_sul.rbs +36 -0
  82. data/sig/generated/lernen/algorithm/procedural/return_indices_acex.rbs +33 -0
  83. data/sig/generated/lernen/algorithm/procedural.rbs +27 -0
  84. data/sig/generated/lernen/algorithm.rbs +10 -0
  85. data/sig/generated/lernen/automaton/dfa.rbs +93 -0
  86. data/sig/generated/lernen/automaton/mealy.rbs +61 -0
  87. data/sig/generated/lernen/automaton/moore.rbs +69 -0
  88. data/sig/generated/lernen/automaton/moore_like.rbs +63 -0
  89. data/sig/generated/lernen/automaton/proc_util.rbs +38 -0
  90. data/sig/generated/lernen/automaton/spa.rbs +125 -0
  91. data/sig/generated/lernen/automaton/transition_system.rbs +108 -0
  92. data/sig/generated/lernen/automaton/vpa.rbs +109 -0
  93. data/sig/generated/lernen/automaton.rbs +15 -0
  94. data/sig/generated/lernen/equiv/combined_oracle.rbs +27 -0
  95. data/sig/generated/lernen/equiv/exhaustive_search_oracle.rbs +38 -0
  96. data/sig/generated/lernen/equiv/moore_like_simulator_oracle.rbs +27 -0
  97. data/sig/generated/lernen/equiv/oracle.rbs +75 -0
  98. data/sig/generated/lernen/equiv/random_walk_oracle.rbs +41 -0
  99. data/sig/generated/lernen/equiv/random_well_matched_word_oracle.rbs +70 -0
  100. data/sig/generated/lernen/equiv/random_word_oracle.rbs +45 -0
  101. data/sig/generated/lernen/equiv/spa_simulator_oracle.rbs +30 -0
  102. data/sig/generated/lernen/equiv/test_words_oracle.rbs +20 -0
  103. data/sig/generated/lernen/equiv/transition_system_simulator_oracle.rbs +27 -0
  104. data/sig/generated/lernen/equiv/vpa_simulator_oracle.rbs +33 -0
  105. data/sig/generated/lernen/equiv.rbs +11 -0
  106. data/sig/generated/lernen/graph.rbs +80 -0
  107. data/sig/generated/lernen/system/block_sul.rbs +29 -0
  108. data/sig/generated/lernen/system/moore_like_simulator.rbs +31 -0
  109. data/sig/generated/lernen/system/moore_like_sul.rbs +28 -0
  110. data/sig/generated/lernen/system/sul.rbs +87 -0
  111. data/sig/generated/lernen/system/transition_system_simulator.rbs +28 -0
  112. data/sig/generated/lernen/system.rbs +62 -0
  113. data/sig/generated/lernen/version.rbs +6 -0
  114. data/sig/generated/lernen.rbs +214 -0
  115. data/sig-test/generated/test/example_test.rbs +14 -0
  116. data/sig-test/generated/test/lernen/algorithm/kearns_vazirani_test.rbs +16 -0
  117. data/sig-test/generated/test/lernen/algorithm/kearns_vazirani_vpa_test.rbs +10 -0
  118. data/sig-test/generated/test/lernen/algorithm/lsharp_test.rbs +16 -0
  119. data/sig-test/generated/test/lernen/algorithm/lstar_test.rbs +16 -0
  120. data/sig-test/generated/test/lernen/algorithm/procedural_test.rbs +10 -0
  121. data/sig-test/generated/test/lernen/automaton/dfa_test.rbs +19 -0
  122. data/sig-test/generated/test/lernen/automaton/mealy_test.rbs +19 -0
  123. data/sig-test/generated/test/lernen/automaton/moore_test.rbs +19 -0
  124. data/sig-test/generated/test/lernen/automaton/proc_util_test.rbs +19 -0
  125. data/sig-test/generated/test/lernen/automaton/spa_test.rbs +19 -0
  126. data/sig-test/generated/test/lernen/automaton/vpa_test.rbs +19 -0
  127. data/sig-test/generated/test/lernen/equiv/exhaustive_search_oracle_test.rbs +10 -0
  128. data/sig-test/generated/test/lernen/equiv/random_walk_oracle_test.rbs +10 -0
  129. data/sig-test/generated/test/lernen/equiv/random_word_oracle_test.rbs +10 -0
  130. data/sig-test/generated/test/lernen/system/block_sul_test.rbs +16 -0
  131. data/sig-test/generated/test/lernen/system/moore_like_simulator_test.rbs +16 -0
  132. data/sig-test/generated/test/lernen/system/transition_system_simulator_test.rbs +13 -0
  133. data/sig-test/generated/test/lernen/system_test.rbs +11 -0
  134. data/sig-test/generated/test/lernen_test.rbs +13 -0
  135. metadata +131 -11
  136. data/.yardopts +0 -3
  137. data/lib/lernen/cex_processor.rb +0 -61
  138. data/lib/lernen/kearns_vazirani.rb +0 -199
  139. data/lib/lernen/lsharp.rb +0 -335
  140. data/lib/lernen/lstar.rb +0 -169
  141. data/lib/lernen/oracle.rb +0 -116
  142. data/lib/lernen/sul.rb +0 -134
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c925ae55bb57b20dc2ef637e751882ec493d565b7f0b7a8348c858593ef0d5f4
4
- data.tar.gz: dbfae4d012e582aeb909460eea27c30945a62e6586cd8ae7f9be48ac2c6dac6c
3
+ metadata.gz: '08702d90ac4daf26b0d8a402a97f742017a4b84f9f6eaa6da32c3f54cb22272f'
4
+ data.tar.gz: d76d51a8c96da2c63d8102d4b0fe938b39e632b7477c18bf7e1d6fd2174bf57f
5
5
  SHA512:
6
- metadata.gz: c0621a919ee2cebdc932f9aec31aac52ca7cfc528cd8f8f24ff4519e2d39769c4a6c2c5f9eeecad1394339afbe1625a89fb3acb418aa02e452eef2b78eb2a111
7
- data.tar.gz: e88d2feb2c44e766e348c62c05ea15cc5eddcb816077e33db40071957047f31b7eccfeb2f9d8b1679c1907f6a57e116042dac009180b5cef2a4453da1c05f7d1
6
+ metadata.gz: 12164e56ddcf1879aa1b7a38fb61e8604115ff72de711db9e0377425d953bca45ee0e1a3b0e9f9bcdeee1c90de3107a61bc97cb7cd6ee55a730901b793869a0a
7
+ data.tar.gz: e7400acdf6a5c2837054e4992947d8e9000406712931e04e75ef63e386e52359fa2eac0794f335f6eb0903ce2942ec1b1ff743787bcd36466cf0a532c8ebac67
data/.rubocop.yml CHANGED
@@ -32,3 +32,21 @@ Metrics/ParameterLists:
32
32
 
33
33
  Metrics/PerceivedComplexity:
34
34
  Enabled: false
35
+
36
+ Lint/ShadowingOuterLocalVariable:
37
+ Enabled: false
38
+
39
+ Style/NumericPredicate:
40
+ Enabled: false
41
+
42
+ Style/AccessorGrouping:
43
+ Enabled: false
44
+
45
+ Style/CombinableLoops:
46
+ Enabled: false
47
+
48
+ Style/CommentedKeyword:
49
+ Enabled: false
50
+
51
+ Minitest/MultipleAssertions:
52
+ Enabled: false
data/README.md CHANGED
@@ -1,42 +1,545 @@
1
1
  # Lernen
2
2
 
3
- > a simple automata learning library.
3
+ > an automata learning library written in Ruby.
4
4
 
5
- ## Usage
5
+ ## Short Introduction to Automata Learning and Lernen
6
+
7
+ **Automata learning** is a technique to *infer an [automaton](https://en.wikipedia.org/wiki/Automata_theory) from a program*.
8
+
9
+ Once getting an automaton from a program, we earn some benefits:
10
+
11
+ - **Visualization**: An automaton is a state-transition system, i.e., a labelled graph.
12
+ Therefore, graph visualization tools such as [GraphViz](https://graphviz.org) and [Mermaid](https://mermaid.js.org) allow you to see the structure of the system at the ready.
13
+ - **Model checking**: An automaton is a model of the system.
14
+ Model checking ensures some good properties about the system, e.g., that two different implementations of the system behave exactly the same.
15
+
16
+ **Lernen** is an automata learning library written in Ruby.
17
+ This library includes implementations of not only eminent automata learning algorithms such as Angluin's $L^\ast$ and $\textrm{Kearns-Vazirani}$ ($\textrm{KV}$), but also a modern algorithm such as $L^\\#$.
18
+ Also, this library supports inferring an automaton accepting a non-regular language, namely VPA (visibly pushdown automaton).
19
+
20
+ As case studies of the real-world applications of automata learning, we introduce two examples with Lernen.
21
+
22
+ ### Case Study 1: `URI.parse` and `URI` Regexp ([`examples/uri_parse_regexp.rb`](./examples/uri_parse_regexp.rb))
23
+
24
+ URL validation is a common task in a Web application, and we can achieve this task by using `URI.parse`.
25
+ For example, the following method `valid_and_http_url?` checks whether or not a given string is valid as URI and its scheme is `http` or `https`.
26
+
27
+ ```ruby
28
+ def valid_and_http_url?(string)
29
+ uri = URI.parse(string)
30
+ uri.scheme == "http" || uri.scheme == "https"
31
+ rescue URI::Error
32
+ false
33
+ end
34
+ ```
35
+
36
+ However, this method is *a bit inefficient* because it allocates a `URI` object for each call.
37
+
38
+ In Ruby's `uri` library, fortunately, we have the `URI::DEFAULT_PARSER.make_regexp` method to build a regexp pattern that matches valid URIs with given schemes.
39
+ Thus, we can rewrite the `valid_and_http_url?` method with using this.
40
+
41
+ ```ruby
42
+ VALID_AND_HTTP_URL_REGEXP = /\A#{URI::DEFAULT_PARSER.make_regexp(%w[http https])}\z/
43
+ def new_valid_and_http_url?(string)
44
+ string.match?(VALID_AND_HTTP_URL_REGEXP)
45
+ end
46
+ ```
47
+
48
+ Then, `new_valid_and_http_url?` avoids allocations.
49
+ The performance of the new one is better.
50
+
51
+ Now, we have a question: *Do these implementations behave exactly the same?*
52
+
53
+ It is a typical question appeared on refactoring a program, and Lernen and automata learning can give an answer for it.
54
+
55
+ First, we need to infer two automata from two validation methods.
56
+ That can be done by the following code.
57
+
58
+ ```ruby
59
+ # `alphabet` is an array of pieces of words.
60
+ # Learning algorithm infers an automaton on this alphabet, so in this case,
61
+ # we specify some possible subwords in URLs to `alphabet`.
62
+ alphabet = %w[http https ftp example com foo 80 12 : / . ? = & # @ %]
63
+
64
+ # `oracle` specifies a kind of an equivalence oracle using on learning,
65
+ # and `oracle_params` is a paremeter object to it.
66
+ oracle = :random_word
67
+ oracle_params = { max_words: 2000 }.freeze
68
+
69
+ # Infer a automaton by calling the `Lernen.learn` method with the target program.
70
+
71
+ # `URI.parse` DFA:
72
+ uri_parse_dfa = Lernen.learn(alphabet:, oracle:, oracle_params:) do |word|
73
+ # `word.join` is necessary because `word` is an array of `alphabet` elements.
74
+ valid_and_http_url?(word.join)
75
+ end
76
+
77
+ # `URI` regexp DFA:
78
+ uri_regexp_dfa = Lernen.learn(alphabet:, oracle:, oracle_params:) do |word|
79
+ new_valid_and_http_url?(word.join)
80
+ end
81
+ ```
82
+
83
+ `uri_parse_dfa` and `uri_regexp_dfa` are `Lernen::Automaton::DFA` objects.
84
+ `DFA#to_mermaid` returns a [Mermaid](https://mermaid.js.org) diagram representation of the obtained DFA.
85
+
86
+ ```ruby
87
+ uri_parse_dfa.to_mermaid
88
+ # => "flowchart TD\n" ...
89
+ uri_regexp_dfa.to_mermaid
90
+ # => "flowchart TD\n" ...
91
+ ```
92
+
93
+ <details>
94
+ <summary>Mermaid diagrams for <code>URI.parse</code> and <code>URI</code> regexp DFAs</summary>
95
+
96
+ #### `URI.parse` DFA
97
+
98
+ ```mermaid
99
+ flowchart TD
100
+ 0((0))
101
+ 1((1))
102
+ 2((2))
103
+ 3(((3)))
104
+ 4(((4)))
105
+ 5(((5)))
106
+ 6((6))
107
+ 7(((7)))
108
+
109
+ 0 -- "'http'" --> 1
110
+ 0 -- "'https'" --> 1
111
+ 0 -- "'ftp'" --> 2
112
+ 0 -- "'example'" --> 2
113
+ 0 -- "'com'" --> 2
114
+ 0 -- "'foo'" --> 2
115
+ 0 -- "'80'" --> 2
116
+ 0 -- "'12'" --> 2
117
+ 0 -- "':'" --> 2
118
+ 0 -- "'/'" --> 2
119
+ 0 -- "'.'" --> 2
120
+ 0 -- "'?'" --> 2
121
+ 0 -- "'='" --> 2
122
+ 0 -- "'&'" --> 2
123
+ 0 -- "'#'" --> 2
124
+ 0 -- "'@'" --> 2
125
+ 0 -- "'%'" --> 2
126
+ 1 -- "'http'" --> 2
127
+ 1 -- "'https'" --> 2
128
+ 1 -- "'ftp'" --> 2
129
+ 1 -- "'example'" --> 2
130
+ 1 -- "'com'" --> 2
131
+ 1 -- "'foo'" --> 2
132
+ 1 -- "'80'" --> 2
133
+ 1 -- "'12'" --> 2
134
+ 1 -- "':'" --> 3
135
+ 1 -- "'/'" --> 2
136
+ 1 -- "'.'" --> 2
137
+ 1 -- "'?'" --> 2
138
+ 1 -- "'='" --> 2
139
+ 1 -- "'&'" --> 2
140
+ 1 -- "'#'" --> 2
141
+ 1 -- "'@'" --> 2
142
+ 1 -- "'%'" --> 2
143
+ 2 -- "'http'" --> 2
144
+ 2 -- "'https'" --> 2
145
+ 2 -- "'ftp'" --> 2
146
+ 2 -- "'example'" --> 2
147
+ 2 -- "'com'" --> 2
148
+ 2 -- "'foo'" --> 2
149
+ 2 -- "'80'" --> 2
150
+ 2 -- "'12'" --> 2
151
+ 2 -- "':'" --> 2
152
+ 2 -- "'/'" --> 2
153
+ 2 -- "'.'" --> 2
154
+ 2 -- "'?'" --> 2
155
+ 2 -- "'='" --> 2
156
+ 2 -- "'&'" --> 2
157
+ 2 -- "'#'" --> 2
158
+ 2 -- "'@'" --> 2
159
+ 2 -- "'%'" --> 2
160
+ 3 -- "'http'" --> 3
161
+ 3 -- "'https'" --> 3
162
+ 3 -- "'ftp'" --> 3
163
+ 3 -- "'example'" --> 3
164
+ 3 -- "'com'" --> 3
165
+ 3 -- "'foo'" --> 3
166
+ 3 -- "'80'" --> 3
167
+ 3 -- "'12'" --> 3
168
+ 3 -- "':'" --> 3
169
+ 3 -- "'/'" --> 3
170
+ 3 -- "'.'" --> 3
171
+ 3 -- "'?'" --> 4
172
+ 3 -- "'='" --> 3
173
+ 3 -- "'&'" --> 3
174
+ 3 -- "'#'" --> 5
175
+ 3 -- "'@'" --> 3
176
+ 3 -- "'%'" --> 6
177
+ 4 -- "'http'" --> 4
178
+ 4 -- "'https'" --> 4
179
+ 4 -- "'ftp'" --> 4
180
+ 4 -- "'example'" --> 4
181
+ 4 -- "'com'" --> 4
182
+ 4 -- "'foo'" --> 4
183
+ 4 -- "'80'" --> 4
184
+ 4 -- "'12'" --> 4
185
+ 4 -- "':'" --> 4
186
+ 4 -- "'/'" --> 4
187
+ 4 -- "'.'" --> 4
188
+ 4 -- "'?'" --> 4
189
+ 4 -- "'='" --> 4
190
+ 4 -- "'&'" --> 4
191
+ 4 -- "'#'" --> 5
192
+ 4 -- "'@'" --> 4
193
+ 4 -- "'%'" --> 7
194
+ 5 -- "'http'" --> 5
195
+ 5 -- "'https'" --> 5
196
+ 5 -- "'ftp'" --> 5
197
+ 5 -- "'example'" --> 5
198
+ 5 -- "'com'" --> 5
199
+ 5 -- "'foo'" --> 5
200
+ 5 -- "'80'" --> 5
201
+ 5 -- "'12'" --> 5
202
+ 5 -- "':'" --> 5
203
+ 5 -- "'/'" --> 5
204
+ 5 -- "'.'" --> 5
205
+ 5 -- "'?'" --> 5
206
+ 5 -- "'='" --> 5
207
+ 5 -- "'&'" --> 5
208
+ 5 -- "'#'" --> 2
209
+ 5 -- "'@'" --> 5
210
+ 5 -- "'%'" --> 6
211
+ 6 -- "'http'" --> 2
212
+ 6 -- "'https'" --> 2
213
+ 6 -- "'ftp'" --> 2
214
+ 6 -- "'example'" --> 2
215
+ 6 -- "'com'" --> 2
216
+ 6 -- "'foo'" --> 2
217
+ 6 -- "'80'" --> 3
218
+ 6 -- "'12'" --> 3
219
+ 6 -- "':'" --> 2
220
+ 6 -- "'/'" --> 2
221
+ 6 -- "'.'" --> 2
222
+ 6 -- "'?'" --> 2
223
+ 6 -- "'='" --> 2
224
+ 6 -- "'&'" --> 2
225
+ 6 -- "'#'" --> 2
226
+ 6 -- "'@'" --> 2
227
+ 6 -- "'%'" --> 2
228
+ 7 -- "'http'" --> 2
229
+ 7 -- "'https'" --> 2
230
+ 7 -- "'ftp'" --> 4
231
+ 7 -- "'example'" --> 4
232
+ 7 -- "'com'" --> 4
233
+ 7 -- "'foo'" --> 4
234
+ 7 -- "'80'" --> 4
235
+ 7 -- "'12'" --> 4
236
+ 7 -- "':'" --> 3
237
+ 7 -- "'/'" --> 3
238
+ 7 -- "'.'" --> 3
239
+ 7 -- "'?'" --> 3
240
+ 7 -- "'='" --> 3
241
+ 7 -- "'&'" --> 3
242
+ 7 -- "'#'" --> 5
243
+ 7 -- "'@'" --> 3
244
+ 7 -- "'%'" --> 3
245
+ ```
246
+
247
+ #### `URI` regexp DFA
248
+
249
+ ```mermaid
250
+ flowchart TD
251
+ 0((0))
252
+ 1((1))
253
+ 2((2))
254
+ 3(((3)))
255
+ 4(((4)))
256
+
257
+ 0 -- "'http'" --> 1
258
+ 0 -- "'https'" --> 1
259
+ 0 -- "'ftp'" --> 2
260
+ 0 -- "'example'" --> 2
261
+ 0 -- "'com'" --> 2
262
+ 0 -- "'foo'" --> 2
263
+ 0 -- "'80'" --> 2
264
+ 0 -- "'12'" --> 2
265
+ 0 -- "':'" --> 2
266
+ 0 -- "'/'" --> 2
267
+ 0 -- "'.'" --> 2
268
+ 0 -- "'?'" --> 2
269
+ 0 -- "'='" --> 2
270
+ 0 -- "'&'" --> 2
271
+ 0 -- "'#'" --> 2
272
+ 0 -- "'@'" --> 2
273
+ 0 -- "'%'" --> 2
274
+ 1 -- "'http'" --> 2
275
+ 1 -- "'https'" --> 2
276
+ 1 -- "'ftp'" --> 2
277
+ 1 -- "'example'" --> 2
278
+ 1 -- "'com'" --> 2
279
+ 1 -- "'foo'" --> 2
280
+ 1 -- "'80'" --> 2
281
+ 1 -- "'12'" --> 2
282
+ 1 -- "':'" --> 3
283
+ 1 -- "'/'" --> 2
284
+ 1 -- "'.'" --> 2
285
+ 1 -- "'?'" --> 2
286
+ 1 -- "'='" --> 2
287
+ 1 -- "'&'" --> 2
288
+ 1 -- "'#'" --> 2
289
+ 1 -- "'@'" --> 2
290
+ 1 -- "'%'" --> 2
291
+ 2 -- "'http'" --> 2
292
+ 2 -- "'https'" --> 2
293
+ 2 -- "'ftp'" --> 2
294
+ 2 -- "'example'" --> 2
295
+ 2 -- "'com'" --> 2
296
+ 2 -- "'foo'" --> 2
297
+ 2 -- "'80'" --> 2
298
+ 2 -- "'12'" --> 2
299
+ 2 -- "':'" --> 2
300
+ 2 -- "'/'" --> 2
301
+ 2 -- "'.'" --> 2
302
+ 2 -- "'?'" --> 2
303
+ 2 -- "'='" --> 2
304
+ 2 -- "'&'" --> 2
305
+ 2 -- "'#'" --> 2
306
+ 2 -- "'@'" --> 2
307
+ 2 -- "'%'" --> 2
308
+ 3 -- "'http'" --> 3
309
+ 3 -- "'https'" --> 3
310
+ 3 -- "'ftp'" --> 3
311
+ 3 -- "'example'" --> 3
312
+ 3 -- "'com'" --> 3
313
+ 3 -- "'foo'" --> 3
314
+ 3 -- "'80'" --> 3
315
+ 3 -- "'12'" --> 3
316
+ 3 -- "':'" --> 3
317
+ 3 -- "'/'" --> 3
318
+ 3 -- "'.'" --> 3
319
+ 3 -- "'?'" --> 3
320
+ 3 -- "'='" --> 3
321
+ 3 -- "'&'" --> 3
322
+ 3 -- "'#'" --> 4
323
+ 3 -- "'@'" --> 3
324
+ 3 -- "'%'" --> 2
325
+ 4 -- "'http'" --> 4
326
+ 4 -- "'https'" --> 4
327
+ 4 -- "'ftp'" --> 4
328
+ 4 -- "'example'" --> 4
329
+ 4 -- "'com'" --> 4
330
+ 4 -- "'foo'" --> 4
331
+ 4 -- "'80'" --> 4
332
+ 4 -- "'12'" --> 4
333
+ 4 -- "':'" --> 4
334
+ 4 -- "'/'" --> 4
335
+ 4 -- "'.'" --> 4
336
+ 4 -- "'?'" --> 4
337
+ 4 -- "'='" --> 4
338
+ 4 -- "'&'" --> 4
339
+ 4 -- "'#'" --> 2
340
+ 4 -- "'@'" --> 4
341
+ 4 -- "'%'" --> 2
342
+ ```
343
+
344
+ </details>
345
+
346
+ Next, we use `DFA.find_separating_word` to check equivalence between two automata.
347
+ This method finds a seperating word between two automata that is accepted by one automaton and rejected by another automaton.
348
+ This method returns `nil` if a separating word is not found, that is, two automata are equivalent.
349
+
350
+ ```ruby
351
+ sep_word = Lernen::Automaton::DFA.find_separating_word(alphabet, uri_parse_dfa, uri_regexp_dfa)
352
+ sep_word&.join
353
+ # => "http:?%"
354
+ ```
355
+
356
+ Then, we got `"http:?%"` as the separating word between the two automata.
357
+ It means that the two DFAs of `URI.parse` and `URI` regexp *obtained by `Lernen.learn`* are not equivalent.
358
+ Finally, we need to ensure that the separating word distinguish the actual implementations: `valid_and_http_url?` and `new_valid_and_http_url?`.
359
+
360
+ ```ruby
361
+ valid_and_http_url?(sep_word.join)
362
+ # => true
363
+ new_valid_and_http_url?(sep_word.join)
364
+ # => false
365
+ ```
366
+
367
+ Because of `valid_and_http_url?("http:?%") != new_valid_and_http_url?("http:?%")`, we can answer the first question:
368
+ *Validations with `URI.parse` and `URI` regexp are not the same because they behave differently with `"http:?%"`.*
369
+
370
+ ### Case Study 2: Two Parsers for Ruby ([`examples/ripper_prism.rb`](./examples/ripper_prism.rb))
371
+
372
+ Since 3.2, Ruby has two parser implementations: [`parse.y`](https://github.com/ruby/ruby/blob/master/parse.y) and [Prism](https://github.com/ruby/prism).
373
+ `parse.y` is a traditional LALR parser and Prism is a hand-written recursive descent parser.
374
+ Although Prism says highly compatibility to `parse.y`, we afraid whether they behave exactly the same or not.
375
+
376
+ This situation seems similar to the first case study, but there is a big difference here; that is, Ruby's grammar is not regular.
377
+ In other words, no DFA can recognize Ruby's grammar and learning a DFA for Ruby's grammar is impossible.
378
+
379
+ In this case, VPA (visibly pushdown automaton) works well.
380
+ VPA is a finite-state automaton extended with explicit nesting characters.
381
+ It can be thought of as pushdown automata, where characters that push or pop onto a stack are limited.
382
+ Although VPA is less powerful than pushdown automata, it can handle non-regular language such as nested parentheses.
383
+
384
+ VPA does not represent all Ruby's grammar, but it is good enough to find a bug in the parsers.
385
+ We infer automata of the Ripper (`parse.y`) and Prism parsers on alphabet with only parentheses (`(` and `)`), a string literal (`"a"`), and a colon (`:`).
6
386
 
7
387
  ```ruby
8
- require "lernen"
388
+ # `alphabet`, `call_alphabet`, and `return_alphabet` are arrays of pieces of words.
389
+ # The `alphabet` characters cause neither push nor pop,
390
+ # the `call_alphabet` characters cause push onto a stack, and
391
+ # the `return_alphabet` characters cause pop onto a stack.
392
+ alphabet = %w["a" :]
393
+ call_alphabet = %w[(]
394
+ return_alphabet = %w[)]
9
395
 
10
- alphabet = %w[0 1]
11
- sul = Lernen::SUL.from_block { |inputs| inputs.count { _1 == "1" } % 4 == 3 }
12
- oracle = Lernen::BreadthFirstExplorationOracle.new(alphabet, sul)
396
+ # `oracle` specifies a kind of an equivalence oracle using on learning,
397
+ # and `oracle_params` is a paremeter object to it.
398
+ oracle = :random_well_matched_word
399
+ oracle_params = { max_words: 2000 }.freeze
13
400
 
14
- dfa = Lernen::LStar.learn(alphabet, sul, oracle, automaton_type: :dfa)
15
- # => Lernen::DFA.new(
16
- # 0,
17
- # Set[3],
18
- # {
19
- # [0, "0"] => 0,
20
- # [0, "1"] => 1,
21
- # [1, "0"] => 1,
22
- # [1, "1"] => 2,
23
- # [2, "0"] => 2,
24
- # [2, "1"] => 3,
25
- # [3, "0"] => 3,
26
- # [3, "1"] => 0
27
- # }
28
- # )
401
+ # When `call_alphabet` and `return_alphabet` are specified to `Lernen.learn`,
402
+ # it infers a VPA instead of a automaton.
403
+
404
+ # Ripper (parse.y) VPA:
405
+ ripper_vpa = Lernen.learn(alphabet:, call_alphabet:, return_alphabet:, oracle:, oracle_params:, random:) do |word|
406
+ !Ripper.sexp(word.join).nil?
407
+ end
408
+
409
+ # Prism VPA:
410
+ prism_vpa = Lernen.learn(alphabet:, call_alphabet:, return_alphabet:, oracle:, oracle_params:, random:) do |word|
411
+ Prism.parse(word.join).success?
412
+ end
29
413
  ```
30
414
 
31
- ## Algorithms
415
+ <details>
416
+ <summary>Mermaid diagrams for Ripper and Prism VPAs</summary>
417
+
418
+ #### Ripper VPA
419
+
420
+ ```mermaid
421
+ flowchart TD
422
+ 0(((0)))
423
+ 1(((1)))
424
+ 2(((2)))
425
+ 4((4))
426
+ 5((5))
427
+ 6((6))
428
+ 7((7))
429
+
430
+ 0 -- "'#34;a#34;'" --> 1
431
+ 0 -- "':'" --> 4
432
+ 1 -- "'#34;a#34;'" --> 1
433
+ 1 -- "':'" --> 5
434
+ 2 -- "':'" --> 5
435
+ 4 -- "'#34;a#34;'" --> 2
436
+ 5 -- "'#34;a#34;'" --> 6
437
+ 5 -- "':'" --> 7
438
+ 6 -- "'#34;a#34;'" --> 6
439
+
440
+ 0 -- "')'/(0,'(')" --> 2
441
+ 0 -- "')'/(5,'(')" --> 6
442
+ 0 -- "')'/(7,'(')" --> 2
443
+ 1 -- "')'/(0,'(')" --> 2
444
+ 1 -- "')'/(5,'(')" --> 6
445
+ 1 -- "')'/(7,'(')" --> 2
446
+ 2 -- "')'/(0,'(')" --> 2
447
+ 2 -- "')'/(5,'(')" --> 6
448
+ 2 -- "')'/(7,'(')" --> 2
449
+ 6 -- "')'/(7,'(')" --> 2
450
+ ```
451
+
452
+ #### Prism VPA
453
+
454
+ ```mermaid
455
+ flowchart TD
456
+ 0(((0)))
457
+ 1(((1)))
458
+ 2(((2)))
459
+ 4((4))
460
+ 5(((5)))
461
+ 6((6))
462
+ 7((7))
463
+ 8((8))
464
+
465
+ 0 -- "'#34;a#34;'" --> 1
466
+ 0 -- "':'" --> 4
467
+ 1 -- "'#34;a#34;'" --> 5
468
+ 1 -- "':'" --> 6
469
+ 2 -- "':'" --> 7
470
+ 4 -- "'#34;a#34;'" --> 2
471
+ 5 -- "'#34;a#34;'" --> 5
472
+ 5 -- "':'" --> 7
473
+ 6 -- "':'" --> 8
474
+ 7 -- "':'" --> 8
475
+
476
+ 0 -- "')'/(0,'(')" --> 2
477
+ 0 -- "')'/(8,'(')" --> 2
478
+ 1 -- "')'/(0,'(')" --> 2
479
+ 1 -- "')'/(8,'(')" --> 2
480
+ 2 -- "')'/(0,'(')" --> 2
481
+ 2 -- "')'/(8,'(')" --> 2
482
+ 5 -- "')'/(0,'(')" --> 2
483
+ 5 -- "')'/(8,'(')" --> 2
484
+ 6 -- "')'/(0,'(')" --> 2
485
+ ```
486
+
487
+ </details>
488
+
489
+ As with DFAs, we can check whether two VPAs are equal by calling `find_separating_word`.
490
+
491
+ ```ruby
492
+ sep_word = Lernen::Automaton::VPA.find_separating_word(alphabet, call_alphabet, return_alphabet, ripper_vpa, prism_vpa)
493
+ puts sep_word&.join
494
+ # => "(\"a\":)"
495
+ ```
496
+
497
+ Then, we got `"(\"a\":)"` as the separating word between Ripper and Prism VPAs.
498
+ As of 2024/09/08 (Prism 1.0.0 and Ruby 3.3.5), this is indeed a counterexample of a string that behaves differently in Prism and `parse.y`.
499
+
500
+ ```ruby
501
+ !Ripper.parse("(\"a\":)").nil?
502
+ # => false
503
+ Prism.parse("(\"a\":)").success?
504
+ # => true
505
+ ```
506
+
507
+ This seems like a bug, since Prism parses this as a symbol literal surrounded by parentheses.
508
+
509
+ ## Contributing
510
+
511
+ This library is under active development, and the API is subject to breaking changes.
512
+
513
+ If you find a bug or problem with the library, please create an issue or a pull request.
514
+
515
+ We can use `rake` during development.
516
+ These are tasks defined for this project.
517
+
518
+ - Run tests.
519
+
520
+ ```console
521
+ $ bundle exec rake test
522
+ ```
523
+
524
+ - Run type checking using Steep.
525
+
526
+ ```console
527
+ $ bundle exec rake steep
528
+ ```
529
+
530
+ - Run code formatting using Rubocop and `syntax_tree`.
531
+
532
+ ```console
533
+ $ bundle exec rake format
534
+ ```
32
535
 
33
- Learnen supports these automata learning algorithms.
536
+ - Check code formatting.
537
+
538
+ ```
539
+ $ bundle exec rake lint
540
+ ```
34
541
 
35
- | Algorithm | Supported `automaton_type` |
36
- |:----------------:|:--------------------------:|
37
- | `LStar` | `:dfa`, `:moore`, `:mealy` |
38
- | `KearnsVazirani` | `:dfa`, `:moore`, `:mealy` |
39
- | `LSharp` | `:dfa`, `:moore`, `:mealy` |
542
+ When you make a pull request, please make sure it pass `rake test && rake steep && rake lint`.
40
543
 
41
544
  ## License
42
545
 
data/Rakefile CHANGED
@@ -1,16 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "bundler/gem_tasks"
4
- require "minitest/test_task"
5
- require "yard"
4
+ require "rake/testtask"
6
5
  require "rubocop/rake_task"
7
6
  require "syntax_tree/rake_tasks"
8
7
 
9
- Minitest::TestTask.create
10
-
11
- YARD::Rake::YardocTask.new do |t|
12
- t.files = ["lib/**/*.rb"]
13
- t.stats_options = ["--list-undoc"]
8
+ Rake::TestTask.new(:test) do |t|
9
+ t.verbose = false
10
+ t.pattern = "test/**/*_test.rb"
14
11
  end
15
12
 
16
13
  RuboCop::RakeTask.new { |t| t.options = %w[--fail-level W] }
@@ -27,3 +24,28 @@ end
27
24
 
28
25
  task format: %w[rubocop:autocorrect_all stree:write]
29
26
  task lint: %w[rubocop stree:check]
27
+
28
+ namespace :rbs_inline do
29
+ desc "Generate RBS signatures for `lib` files"
30
+ task :lib do
31
+ sh "bin/rbs-inline", "lib", "--output=sig/generated"
32
+ end
33
+
34
+ desc "Generate RBS signatures for `test` files"
35
+ task :test do
36
+ sh "bin/rbs-inline", "test", "--output=sig-test/generated"
37
+ end
38
+
39
+ task default: %i[lib test]
40
+ end
41
+
42
+ task rbs_inline: %i[rbs_inline:lib rbs_inline:test]
43
+
44
+ namespace :steep do
45
+ desc "Run `steep check`"
46
+ task :check do
47
+ sh "bin/steep", "check"
48
+ end
49
+ end
50
+
51
+ task steep: %i[rbs_inline steep:check]
data/Steepfile ADDED
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ target :lib do
4
+ signature "sig"
5
+ check "lib"
6
+ end
7
+
8
+ target :test do
9
+ signature "sig"
10
+ signature "sig-test"
11
+ library "minitest"
12
+ library "ripper"
13
+ check "test"
14
+ end