moxml 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/dependent-repos.json +5 -0
  3. data/.github/workflows/dependent-tests.yml +20 -0
  4. data/.github/workflows/docs.yml +59 -0
  5. data/.github/workflows/rake.yml +12 -4
  6. data/.github/workflows/release.yml +5 -3
  7. data/.gitignore +37 -0
  8. data/.rubocop.yml +15 -7
  9. data/.rubocop_todo.yml +238 -40
  10. data/Gemfile +14 -9
  11. data/LICENSE.md +6 -2
  12. data/README.adoc +535 -373
  13. data/Rakefile +53 -0
  14. data/benchmarks/.gitignore +6 -0
  15. data/benchmarks/generate_report.rb +550 -0
  16. data/docs/Gemfile +13 -0
  17. data/docs/_config.yml +138 -0
  18. data/docs/_guides/advanced-features.adoc +87 -0
  19. data/docs/_guides/development-testing.adoc +165 -0
  20. data/docs/_guides/index.adoc +45 -0
  21. data/docs/_guides/modifying-xml.adoc +293 -0
  22. data/docs/_guides/parsing-xml.adoc +231 -0
  23. data/docs/_guides/sax-parsing.adoc +603 -0
  24. data/docs/_guides/working-with-documents.adoc +118 -0
  25. data/docs/_pages/adapter-compatibility.adoc +369 -0
  26. data/docs/_pages/adapters/headed-ox.adoc +237 -0
  27. data/docs/_pages/adapters/index.adoc +98 -0
  28. data/docs/_pages/adapters/libxml.adoc +286 -0
  29. data/docs/_pages/adapters/nokogiri.adoc +252 -0
  30. data/docs/_pages/adapters/oga.adoc +292 -0
  31. data/docs/_pages/adapters/ox.adoc +55 -0
  32. data/docs/_pages/adapters/rexml.adoc +293 -0
  33. data/docs/_pages/best-practices.adoc +430 -0
  34. data/docs/_pages/compatibility.adoc +468 -0
  35. data/docs/_pages/configuration.adoc +251 -0
  36. data/docs/_pages/error-handling.adoc +350 -0
  37. data/docs/_pages/headed-ox-limitations.adoc +558 -0
  38. data/docs/_pages/headed-ox.adoc +1025 -0
  39. data/docs/_pages/index.adoc +35 -0
  40. data/docs/_pages/installation.adoc +141 -0
  41. data/docs/_pages/node-api-reference.adoc +50 -0
  42. data/docs/_pages/performance.adoc +36 -0
  43. data/docs/_pages/quick-start.adoc +244 -0
  44. data/docs/_pages/thread-safety.adoc +29 -0
  45. data/docs/_references/document-api.adoc +408 -0
  46. data/docs/_references/index.adoc +48 -0
  47. data/docs/_tutorials/basic-usage.adoc +268 -0
  48. data/docs/_tutorials/builder-pattern.adoc +343 -0
  49. data/docs/_tutorials/index.adoc +33 -0
  50. data/docs/_tutorials/namespace-handling.adoc +325 -0
  51. data/docs/_tutorials/xpath-queries.adoc +359 -0
  52. data/docs/index.adoc +122 -0
  53. data/examples/README.md +124 -0
  54. data/examples/api_client/README.md +424 -0
  55. data/examples/api_client/api_client.rb +394 -0
  56. data/examples/api_client/example_response.xml +48 -0
  57. data/examples/headed_ox_example/README.md +90 -0
  58. data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
  59. data/examples/rss_parser/README.md +194 -0
  60. data/examples/rss_parser/example_feed.xml +93 -0
  61. data/examples/rss_parser/rss_parser.rb +189 -0
  62. data/examples/sax_parsing/README.md +50 -0
  63. data/examples/sax_parsing/data_extractor.rb +75 -0
  64. data/examples/sax_parsing/example.xml +21 -0
  65. data/examples/sax_parsing/large_file.rb +78 -0
  66. data/examples/sax_parsing/simple_parser.rb +55 -0
  67. data/examples/web_scraper/README.md +352 -0
  68. data/examples/web_scraper/example_page.html +201 -0
  69. data/examples/web_scraper/web_scraper.rb +312 -0
  70. data/lib/moxml/adapter/base.rb +107 -28
  71. data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
  72. data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
  73. data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
  74. data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
  75. data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
  76. data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
  77. data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
  78. data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
  79. data/lib/moxml/adapter/customized_ox/attribute.rb +28 -3
  80. data/lib/moxml/adapter/customized_ox/namespace.rb +0 -2
  81. data/lib/moxml/adapter/customized_ox/text.rb +0 -2
  82. data/lib/moxml/adapter/customized_rexml/formatter.rb +11 -6
  83. data/lib/moxml/adapter/headed_ox.rb +161 -0
  84. data/lib/moxml/adapter/libxml.rb +1548 -0
  85. data/lib/moxml/adapter/nokogiri.rb +121 -9
  86. data/lib/moxml/adapter/oga.rb +123 -12
  87. data/lib/moxml/adapter/ox.rb +283 -27
  88. data/lib/moxml/adapter/rexml.rb +127 -20
  89. data/lib/moxml/adapter.rb +21 -4
  90. data/lib/moxml/attribute.rb +6 -0
  91. data/lib/moxml/builder.rb +40 -4
  92. data/lib/moxml/config.rb +8 -3
  93. data/lib/moxml/context.rb +39 -1
  94. data/lib/moxml/doctype.rb +13 -1
  95. data/lib/moxml/document.rb +39 -6
  96. data/lib/moxml/document_builder.rb +27 -5
  97. data/lib/moxml/element.rb +71 -2
  98. data/lib/moxml/error.rb +175 -6
  99. data/lib/moxml/node.rb +94 -3
  100. data/lib/moxml/node_set.rb +34 -0
  101. data/lib/moxml/sax/block_handler.rb +194 -0
  102. data/lib/moxml/sax/element_handler.rb +124 -0
  103. data/lib/moxml/sax/handler.rb +113 -0
  104. data/lib/moxml/sax.rb +31 -0
  105. data/lib/moxml/version.rb +1 -1
  106. data/lib/moxml/xml_utils/encoder.rb +4 -4
  107. data/lib/moxml/xml_utils.rb +7 -4
  108. data/lib/moxml/xpath/ast/node.rb +159 -0
  109. data/lib/moxml/xpath/cache.rb +91 -0
  110. data/lib/moxml/xpath/compiler.rb +1768 -0
  111. data/lib/moxml/xpath/context.rb +26 -0
  112. data/lib/moxml/xpath/conversion.rb +124 -0
  113. data/lib/moxml/xpath/engine.rb +52 -0
  114. data/lib/moxml/xpath/errors.rb +101 -0
  115. data/lib/moxml/xpath/lexer.rb +304 -0
  116. data/lib/moxml/xpath/parser.rb +485 -0
  117. data/lib/moxml/xpath/ruby/generator.rb +269 -0
  118. data/lib/moxml/xpath/ruby/node.rb +193 -0
  119. data/lib/moxml/xpath.rb +37 -0
  120. data/lib/moxml.rb +5 -2
  121. data/moxml.gemspec +3 -1
  122. data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
  123. data/spec/consistency/README.md +77 -0
  124. data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
  125. data/spec/examples/README.md +75 -0
  126. data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
  127. data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
  128. data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
  129. data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
  130. data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
  131. data/spec/integration/README.md +71 -0
  132. data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
  133. data/spec/integration/headed_ox_integration_spec.rb +326 -0
  134. data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
  135. data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
  136. data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
  137. data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
  138. data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
  139. data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
  140. data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
  141. data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -2
  142. data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
  143. data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
  144. data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
  145. data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
  146. data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
  147. data/spec/moxml/README.md +41 -0
  148. data/spec/moxml/adapter/.gitkeep +0 -0
  149. data/spec/moxml/adapter/README.md +61 -0
  150. data/spec/moxml/adapter/base_spec.rb +27 -0
  151. data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
  152. data/spec/moxml/adapter/libxml_spec.rb +14 -0
  153. data/spec/moxml/adapter/ox_spec.rb +9 -8
  154. data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
  155. data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
  156. data/spec/moxml/adapter_spec.rb +16 -0
  157. data/spec/moxml/attribute_spec.rb +30 -0
  158. data/spec/moxml/builder_spec.rb +33 -0
  159. data/spec/moxml/cdata_spec.rb +31 -0
  160. data/spec/moxml/comment_spec.rb +31 -0
  161. data/spec/moxml/config_spec.rb +3 -3
  162. data/spec/moxml/context_spec.rb +28 -0
  163. data/spec/moxml/declaration_spec.rb +36 -0
  164. data/spec/moxml/doctype_spec.rb +33 -0
  165. data/spec/moxml/document_builder_spec.rb +30 -0
  166. data/spec/moxml/document_spec.rb +105 -0
  167. data/spec/moxml/element_spec.rb +143 -0
  168. data/spec/moxml/error_spec.rb +266 -22
  169. data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
  170. data/spec/moxml/namespace_spec.rb +32 -0
  171. data/spec/moxml/node_set_spec.rb +39 -0
  172. data/spec/moxml/node_spec.rb +37 -0
  173. data/spec/moxml/processing_instruction_spec.rb +34 -0
  174. data/spec/moxml/sax_spec.rb +1067 -0
  175. data/spec/moxml/text_spec.rb +31 -0
  176. data/spec/moxml/version_spec.rb +14 -0
  177. data/spec/moxml/xml_utils/.gitkeep +0 -0
  178. data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
  179. data/spec/moxml/xml_utils_spec.rb +49 -0
  180. data/spec/moxml/xpath/ast/node_spec.rb +83 -0
  181. data/spec/moxml/xpath/axes_spec.rb +296 -0
  182. data/spec/moxml/xpath/cache_spec.rb +358 -0
  183. data/spec/moxml/xpath/compiler_spec.rb +406 -0
  184. data/spec/moxml/xpath/context_spec.rb +210 -0
  185. data/spec/moxml/xpath/conversion_spec.rb +365 -0
  186. data/spec/moxml/xpath/fixtures/sample.xml +25 -0
  187. data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
  188. data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
  189. data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
  190. data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
  191. data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
  192. data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
  193. data/spec/moxml/xpath/lexer_spec.rb +488 -0
  194. data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
  195. data/spec/moxml/xpath/parser_spec.rb +364 -0
  196. data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
  197. data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
  198. data/spec/moxml/xpath_capabilities_spec.rb +199 -0
  199. data/spec/moxml/xpath_spec.rb +77 -0
  200. data/spec/performance/README.md +83 -0
  201. data/spec/performance/benchmark_spec.rb +64 -0
  202. data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +3 -1
  203. data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
  204. data/spec/performance/xpath_benchmark_spec.rb +259 -0
  205. data/spec/spec_helper.rb +58 -1
  206. data/spec/support/xml_matchers.rb +1 -1
  207. metadata +176 -35
  208. data/lib/ox/node.rb +0 -9
  209. data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
  210. /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
  211. /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
  212. /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
  213. /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
  214. /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
  215. /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
@@ -0,0 +1,488 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helper"
4
+
5
+ RSpec.describe Moxml::XPath::Lexer do
6
+ describe "#tokenize" do
7
+ context "operators" do
8
+ it "tokenizes single slash" do
9
+ tokens = described_class.new("/").tokenize
10
+ expect(tokens).to eq([[:slash, "/", 0]])
11
+ end
12
+
13
+ it "tokenizes double slash" do
14
+ tokens = described_class.new("//").tokenize
15
+ expect(tokens).to eq([[:dslash, "//", 0]])
16
+ end
17
+
18
+ it "tokenizes pipe" do
19
+ tokens = described_class.new("|").tokenize
20
+ expect(tokens).to eq([[:pipe, "|", 0]])
21
+ end
22
+
23
+ it "tokenizes plus" do
24
+ tokens = described_class.new("+").tokenize
25
+ expect(tokens).to eq([[:plus, "+", 0]])
26
+ end
27
+
28
+ it "tokenizes minus" do
29
+ tokens = described_class.new("-").tokenize
30
+ expect(tokens).to eq([[:minus, "-", 0]])
31
+ end
32
+
33
+ it "tokenizes star" do
34
+ tokens = described_class.new("*").tokenize
35
+ expect(tokens).to eq([[:star, "*", 0]])
36
+ end
37
+
38
+ it "tokenizes equals" do
39
+ tokens = described_class.new("=").tokenize
40
+ expect(tokens).to eq([[:eq, "=", 0]])
41
+ end
42
+
43
+ it "tokenizes not equals" do
44
+ tokens = described_class.new("!=").tokenize
45
+ expect(tokens).to eq([[:neq, "!=", 0]])
46
+ end
47
+
48
+ it "tokenizes less than" do
49
+ tokens = described_class.new("<").tokenize
50
+ expect(tokens).to eq([[:lt, "<", 0]])
51
+ end
52
+
53
+ it "tokenizes greater than" do
54
+ tokens = described_class.new(">").tokenize
55
+ expect(tokens).to eq([[:gt, ">", 0]])
56
+ end
57
+
58
+ it "tokenizes less than or equal" do
59
+ tokens = described_class.new("<=").tokenize
60
+ expect(tokens).to eq([[:lte, "<=", 0]])
61
+ end
62
+
63
+ it "tokenizes greater than or equal" do
64
+ tokens = described_class.new(">=").tokenize
65
+ expect(tokens).to eq([[:gte, ">=", 0]])
66
+ end
67
+ end
68
+
69
+ context "delimiters" do
70
+ it "tokenizes left parenthesis" do
71
+ tokens = described_class.new("(").tokenize
72
+ expect(tokens).to eq([[:lparen, "(", 0]])
73
+ end
74
+
75
+ it "tokenizes right parenthesis" do
76
+ tokens = described_class.new(")").tokenize
77
+ expect(tokens).to eq([[:rparen, ")", 0]])
78
+ end
79
+
80
+ it "tokenizes left bracket" do
81
+ tokens = described_class.new("[").tokenize
82
+ expect(tokens).to eq([[:lbracket, "[", 0]])
83
+ end
84
+
85
+ it "tokenizes right bracket" do
86
+ tokens = described_class.new("]").tokenize
87
+ expect(tokens).to eq([[:rbracket, "]", 0]])
88
+ end
89
+
90
+ it "tokenizes comma" do
91
+ tokens = described_class.new(",").tokenize
92
+ expect(tokens).to eq([[:comma, ",", 0]])
93
+ end
94
+
95
+ it "tokenizes at sign" do
96
+ tokens = described_class.new("@").tokenize
97
+ expect(tokens).to eq([[:at, "@", 0]])
98
+ end
99
+
100
+ it "tokenizes single colon" do
101
+ tokens = described_class.new(":").tokenize
102
+ expect(tokens).to eq([[:colon, ":", 0]])
103
+ end
104
+
105
+ it "tokenizes double colon" do
106
+ tokens = described_class.new("::").tokenize
107
+ expect(tokens).to eq([[:dcolon, "::", 0]])
108
+ end
109
+
110
+ it "tokenizes dot" do
111
+ tokens = described_class.new(".").tokenize
112
+ expect(tokens).to eq([[:dot, ".", 0]])
113
+ end
114
+
115
+ it "tokenizes double dot" do
116
+ tokens = described_class.new("..").tokenize
117
+ expect(tokens).to eq([[:ddot, "..", 0]])
118
+ end
119
+
120
+ it "tokenizes dollar sign" do
121
+ tokens = described_class.new("$").tokenize
122
+ expect(tokens).to eq([[:dollar, "$", 0]])
123
+ end
124
+ end
125
+
126
+ context "keywords" do
127
+ it 'tokenizes "and" keyword' do
128
+ tokens = described_class.new("and").tokenize
129
+ expect(tokens).to eq([[:and, "and", 0]])
130
+ end
131
+
132
+ it 'tokenizes "or" keyword' do
133
+ tokens = described_class.new("or").tokenize
134
+ expect(tokens).to eq([[:or, "or", 0]])
135
+ end
136
+
137
+ it 'tokenizes "mod" keyword' do
138
+ tokens = described_class.new("mod").tokenize
139
+ expect(tokens).to eq([[:mod, "mod", 0]])
140
+ end
141
+
142
+ it 'tokenizes "div" keyword' do
143
+ tokens = described_class.new("div").tokenize
144
+ expect(tokens).to eq([[:div, "div", 0]])
145
+ end
146
+ end
147
+
148
+ context "axis names" do
149
+ it "tokenizes child axis" do
150
+ tokens = described_class.new("child::").tokenize
151
+ expect(tokens).to eq([[:axis, "child", 0], [:dcolon, "::", 5]])
152
+ end
153
+
154
+ it "tokenizes descendant axis" do
155
+ tokens = described_class.new("descendant::").tokenize
156
+ expect(tokens).to eq([[:axis, "descendant", 0], [:dcolon, "::", 10]])
157
+ end
158
+
159
+ it "tokenizes ancestor-or-self axis" do
160
+ tokens = described_class.new("ancestor-or-self::").tokenize
161
+ expect(tokens).to eq([[:axis, "ancestor-or-self", 0],
162
+ [:dcolon, "::", 16]])
163
+ end
164
+
165
+ it "tokenizes following-sibling axis" do
166
+ tokens = described_class.new("following-sibling::").tokenize
167
+ expect(tokens).to eq([[:axis, "following-sibling", 0],
168
+ [:dcolon, "::", 17]])
169
+ end
170
+
171
+ it "treats axis name without :: as regular name" do
172
+ tokens = described_class.new("child").tokenize
173
+ expect(tokens).to eq([[:name, "child", 0]])
174
+ end
175
+ end
176
+
177
+ context "node types" do
178
+ it "tokenizes text() node type" do
179
+ tokens = described_class.new("text").tokenize
180
+ expect(tokens).to eq([[:node_type, "text", 0]])
181
+ end
182
+
183
+ it "tokenizes comment() node type" do
184
+ tokens = described_class.new("comment").tokenize
185
+ expect(tokens).to eq([[:node_type, "comment", 0]])
186
+ end
187
+
188
+ it "tokenizes node() node type" do
189
+ tokens = described_class.new("node").tokenize
190
+ expect(tokens).to eq([[:node_type, "node", 0]])
191
+ end
192
+
193
+ it "tokenizes processing-instruction() node type" do
194
+ tokens = described_class.new("processing-instruction").tokenize
195
+ expect(tokens).to eq([[:node_type, "processing-instruction", 0]])
196
+ end
197
+ end
198
+
199
+ context "strings" do
200
+ it "tokenizes double-quoted string" do
201
+ tokens = described_class.new('"hello"').tokenize
202
+ expect(tokens).to eq([[:string, "hello", 0]])
203
+ end
204
+
205
+ it "tokenizes single-quoted string" do
206
+ tokens = described_class.new("'world'").tokenize
207
+ expect(tokens).to eq([[:string, "world", 0]])
208
+ end
209
+
210
+ it "tokenizes empty string" do
211
+ tokens = described_class.new('""').tokenize
212
+ expect(tokens).to eq([[:string, "", 0]])
213
+ end
214
+
215
+ it "tokenizes string with spaces" do
216
+ tokens = described_class.new('"hello world"').tokenize
217
+ expect(tokens).to eq([[:string, "hello world", 0]])
218
+ end
219
+
220
+ it "tokenizes string with escaped quote" do
221
+ tokens = described_class.new('"hello\\"world"').tokenize
222
+ expect(tokens).to eq([[:string, 'hello"world', 0]])
223
+ end
224
+
225
+ it "raises error for unterminated string" do
226
+ expect { described_class.new('"hello').tokenize }
227
+ .to raise_error(Moxml::XPath::SyntaxError, /Unterminated string/)
228
+ end
229
+ end
230
+
231
+ context "numbers" do
232
+ it "tokenizes integer" do
233
+ tokens = described_class.new("123").tokenize
234
+ expect(tokens).to eq([[:number, "123", 0]])
235
+ end
236
+
237
+ it "tokenizes decimal" do
238
+ tokens = described_class.new("123.45").tokenize
239
+ expect(tokens).to eq([[:number, "123.45", 0]])
240
+ end
241
+
242
+ it "tokenizes number starting with dot" do
243
+ tokens = described_class.new(".5").tokenize
244
+ expect(tokens).to eq([[:number, ".5", 0]])
245
+ end
246
+
247
+ it "tokenizes zero" do
248
+ tokens = described_class.new("0").tokenize
249
+ expect(tokens).to eq([[:number, "0", 0]])
250
+ end
251
+
252
+ it "tokenizes large number" do
253
+ tokens = described_class.new("999999").tokenize
254
+ expect(tokens).to eq([[:number, "999999", 0]])
255
+ end
256
+ end
257
+
258
+ context "names" do
259
+ it "tokenizes simple name" do
260
+ tokens = described_class.new("book").tokenize
261
+ expect(tokens).to eq([[:name, "book", 0]])
262
+ end
263
+
264
+ it "tokenizes name with underscore" do
265
+ tokens = described_class.new("my_element").tokenize
266
+ expect(tokens).to eq([[:name, "my_element", 0]])
267
+ end
268
+
269
+ it "tokenizes name with hyphen" do
270
+ tokens = described_class.new("my-element").tokenize
271
+ expect(tokens).to eq([[:name, "my-element", 0]])
272
+ end
273
+
274
+ it "tokenizes name with dot" do
275
+ tokens = described_class.new("my.element").tokenize
276
+ expect(tokens).to eq([[:name, "my.element", 0]])
277
+ end
278
+
279
+ it "tokenizes name with numbers" do
280
+ tokens = described_class.new("element123").tokenize
281
+ expect(tokens).to eq([[:name, "element123", 0]])
282
+ end
283
+
284
+ it "tokenizes uppercase name" do
285
+ tokens = described_class.new("BOOK").tokenize
286
+ expect(tokens).to eq([[:name, "BOOK", 0]])
287
+ end
288
+
289
+ it "tokenizes mixed case name" do
290
+ tokens = described_class.new("MyElement").tokenize
291
+ expect(tokens).to eq([[:name, "MyElement", 0]])
292
+ end
293
+ end
294
+
295
+ context "whitespace handling" do
296
+ it "skips leading whitespace" do
297
+ tokens = described_class.new(" book").tokenize
298
+ expect(tokens).to eq([[:name, "book", 2]])
299
+ end
300
+
301
+ it "skips trailing whitespace" do
302
+ tokens = described_class.new("book ").tokenize
303
+ expect(tokens).to eq([[:name, "book", 0]])
304
+ end
305
+
306
+ it "skips whitespace between tokens" do
307
+ tokens = described_class.new("book @id").tokenize
308
+ expect(tokens).to eq([[:name, "book", 0], [:at, "@", 7],
309
+ [:name, "id", 8]])
310
+ end
311
+
312
+ it "handles tabs and newlines" do
313
+ tokens = described_class.new("book\t\n@id").tokenize
314
+ expect(tokens).to eq([[:name, "book", 0], [:at, "@", 6],
315
+ [:name, "id", 7]])
316
+ end
317
+ end
318
+
319
+ context "complex expressions" do
320
+ it "tokenizes simple path" do
321
+ tokens = described_class.new("/root/child").tokenize
322
+ expect(tokens).to eq([
323
+ [:slash, "/", 0],
324
+ [:name, "root", 1],
325
+ [:slash, "/", 5],
326
+ [:name, "child", 6],
327
+ ])
328
+ end
329
+
330
+ it "tokenizes descendant path" do
331
+ tokens = described_class.new("//book").tokenize
332
+ expect(tokens).to eq([
333
+ [:dslash, "//", 0],
334
+ [:name, "book", 2],
335
+ ])
336
+ end
337
+
338
+ it "tokenizes attribute predicate" do
339
+ tokens = described_class.new("book[@id]").tokenize
340
+ expect(tokens).to eq([
341
+ [:name, "book", 0],
342
+ [:lbracket, "[", 4],
343
+ [:at, "@", 5],
344
+ [:name, "id", 6],
345
+ [:rbracket, "]", 8],
346
+ ])
347
+ end
348
+
349
+ it "tokenizes comparison predicate" do
350
+ tokens = described_class.new("book[@price < 10]").tokenize
351
+ expect(tokens).to eq([
352
+ [:name, "book", 0],
353
+ [:lbracket, "[", 4],
354
+ [:at, "@", 5],
355
+ [:name, "price", 6],
356
+ [:lt, "<", 12],
357
+ [:number, "10", 14],
358
+ [:rbracket, "]", 16],
359
+ ])
360
+ end
361
+
362
+ it "tokenizes function call" do
363
+ tokens = described_class.new("count(//item)").tokenize
364
+ expect(tokens).to eq([
365
+ [:name, "count", 0],
366
+ [:lparen, "(", 5],
367
+ [:dslash, "//", 6],
368
+ [:name, "item", 8],
369
+ [:rparen, ")", 12],
370
+ ])
371
+ end
372
+
373
+ it "tokenizes union expression" do
374
+ tokens = described_class.new("book | article").tokenize
375
+ expect(tokens).to eq([
376
+ [:name, "book", 0],
377
+ [:pipe, "|", 5],
378
+ [:name, "article", 7],
379
+ ])
380
+ end
381
+
382
+ it "tokenizes arithmetic expression" do
383
+ tokens = described_class.new("@a + @b").tokenize
384
+ expect(tokens).to eq([
385
+ [:at, "@", 0],
386
+ [:name, "a", 1],
387
+ [:plus, "+", 3],
388
+ [:at, "@", 5],
389
+ [:name, "b", 6],
390
+ ])
391
+ end
392
+
393
+ it "tokenizes logical expression" do
394
+ tokens = described_class.new("@a and @b or @c").tokenize
395
+ expect(tokens).to eq([
396
+ [:at, "@", 0],
397
+ [:name, "a", 1],
398
+ [:and, "and", 3],
399
+ [:at, "@", 7],
400
+ [:name, "b", 8],
401
+ [:or, "or", 10],
402
+ [:at, "@", 13],
403
+ [:name, "c", 14],
404
+ ])
405
+ end
406
+
407
+ it "tokenizes namespaced name" do
408
+ tokens = described_class.new("ns:element").tokenize
409
+ expect(tokens).to eq([
410
+ [:name, "ns", 0],
411
+ [:colon, ":", 2],
412
+ [:name, "element", 3],
413
+ ])
414
+ end
415
+
416
+ it "tokenizes axis with node test" do
417
+ tokens = described_class.new("child::book").tokenize
418
+ expect(tokens).to eq([
419
+ [:axis, "child", 0],
420
+ [:dcolon, "::", 5],
421
+ [:name, "book", 7],
422
+ ])
423
+ end
424
+ end
425
+
426
+ context "error handling" do
427
+ it "raises error for unexpected exclamation mark" do
428
+ expect { described_class.new("!").tokenize }
429
+ .to raise_error(Moxml::XPath::SyntaxError, /Unexpected '!'/)
430
+ end
431
+
432
+ it "raises error for unexpected character" do
433
+ expect { described_class.new("#").tokenize }
434
+ .to raise_error(Moxml::XPath::SyntaxError, /Unexpected character/)
435
+ end
436
+
437
+ it "raises error for unterminated double-quoted string" do
438
+ expect { described_class.new('"unterminated').tokenize }
439
+ .to raise_error(Moxml::XPath::SyntaxError, /Unterminated string/)
440
+ end
441
+
442
+ it "raises error for unterminated single-quoted string" do
443
+ expect { described_class.new("'unterminated").tokenize }
444
+ .to raise_error(Moxml::XPath::SyntaxError, /Unterminated string/)
445
+ end
446
+ end
447
+
448
+ context "edge cases" do
449
+ it "tokenizes empty expression" do
450
+ tokens = described_class.new("").tokenize
451
+ expect(tokens).to eq([])
452
+ end
453
+
454
+ it "tokenizes only whitespace" do
455
+ tokens = described_class.new(" ").tokenize
456
+ expect(tokens).to eq([])
457
+ end
458
+
459
+ it "tokenizes wildcard" do
460
+ tokens = described_class.new("*").tokenize
461
+ expect(tokens).to eq([[:star, "*", 0]])
462
+ end
463
+
464
+ it "tokenizes parent shorthand" do
465
+ tokens = described_class.new("..").tokenize
466
+ expect(tokens).to eq([[:ddot, "..", 0]])
467
+ end
468
+
469
+ it "tokenizes current shorthand" do
470
+ tokens = described_class.new(".").tokenize
471
+ expect(tokens).to eq([[:dot, ".", 0]])
472
+ end
473
+
474
+ it "tokenizes variable reference" do
475
+ tokens = described_class.new("$var").tokenize
476
+ expect(tokens).to eq([
477
+ [:dollar, "$", 0],
478
+ [:name, "var", 1],
479
+ ])
480
+ end
481
+
482
+ it "preserves token positions correctly" do
483
+ tokens = described_class.new("a + b").tokenize
484
+ expect(tokens.map(&:last)).to eq([0, 2, 4])
485
+ end
486
+ end
487
+ end
488
+ end
@@ -0,0 +1,210 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helper"
4
+
5
+ RSpec.describe "XPath Parser Integration" do
6
+ describe "end-to-end parsing" do
7
+ it "parses simple path and creates valid AST" do
8
+ ast = Moxml::XPath::Parser.parse("/root/child")
9
+
10
+ expect(ast).to be_a(Moxml::XPath::AST::Node)
11
+ expect(ast.type).to eq(:absolute_path)
12
+ expect(ast.children).not_to be_empty
13
+ end
14
+
15
+ it "parses descendant path" do
16
+ ast = Moxml::XPath::Parser.parse("//book")
17
+
18
+ expect(ast.type).to eq(:absolute_path)
19
+ expect(ast.children.first.type).to eq(:axis)
20
+ end
21
+
22
+ it "parses predicate expression" do
23
+ ast = Moxml::XPath::Parser.parse("//book[@id='123']")
24
+
25
+ expect(ast.type).to eq(:absolute_path)
26
+ expect(ast.children).not_to be_empty
27
+ end
28
+
29
+ it "parses complex expression with operators" do
30
+ ast = Moxml::XPath::Parser.parse("//book[@price < 10 and @year > 2000]")
31
+
32
+ expect(ast).to be_a(Moxml::XPath::AST::Node)
33
+ expect(ast.type).to eq(:absolute_path)
34
+ end
35
+
36
+ it "parses function call" do
37
+ ast = Moxml::XPath::Parser.parse("count(//item)")
38
+
39
+ expect(ast).to be_a(Moxml::XPath::AST::Node)
40
+ # Function calls parse successfully
41
+ end
42
+
43
+ it "parses union expression" do
44
+ ast = Moxml::XPath::Parser.parse("book | article | chapter")
45
+
46
+ expect(ast).to be_a(Moxml::XPath::AST::Node)
47
+ # Union expressions parse successfully
48
+ end
49
+ end
50
+
51
+ describe "AST structure validation" do
52
+ it "creates hierarchical structure for nested paths" do
53
+ ast = Moxml::XPath::Parser.parse("/library/book/title")
54
+
55
+ expect(ast.type).to eq(:absolute_path)
56
+ expect(ast.children).to be_an(Array)
57
+ expect(ast.children).not_to be_empty
58
+ end
59
+
60
+ it "preserves operator precedence in AST" do
61
+ ast = Moxml::XPath::Parser.parse("1 + 2 * 3")
62
+
63
+ # Verify AST exists and has structure (don't check specific types)
64
+ expect(ast).to be_a(Moxml::XPath::AST::Node)
65
+ expect(ast.children).to be_an(Array)
66
+ expect(ast.children.size).to eq(2)
67
+ # Precedence is correct if it compiles and executes properly
68
+ end
69
+
70
+ it "correctly represents literals" do
71
+ ast = Moxml::XPath::Parser.parse('"hello world"')
72
+
73
+ expect(ast.type).to eq(:string)
74
+ expect(ast.value).to eq("hello world")
75
+ end
76
+
77
+ it "correctly represents numbers" do
78
+ ast = Moxml::XPath::Parser.parse("42.5")
79
+
80
+ expect(ast.type).to eq(:number)
81
+ expect(ast.value).to eq(42.5)
82
+ end
83
+ end
84
+
85
+ describe "caching behavior" do
86
+ it "caches parsed expressions" do
87
+ expr = '//book[@id="123"]'
88
+
89
+ ast1 = Moxml::XPath::Parser.parse_with_cache(expr)
90
+ ast2 = Moxml::XPath::Parser.parse_with_cache(expr)
91
+
92
+ # Same expression should return same cached object
93
+ expect(ast1).to be(ast2)
94
+ end
95
+
96
+ it "handles different expressions independently" do
97
+ ast1 = Moxml::XPath::Parser.parse_with_cache("//book")
98
+ ast2 = Moxml::XPath::Parser.parse_with_cache("//article")
99
+
100
+ expect(ast1).not_to be(ast2)
101
+ expect(ast1.children).not_to eq(ast2.children)
102
+ end
103
+ end
104
+
105
+ describe "error handling" do
106
+ it "provides clear error for syntax errors" do
107
+ expect { Moxml::XPath::Parser.parse("book[") }
108
+ .to raise_error(Moxml::XPath::SyntaxError) do |error|
109
+ expect(error.message).to include("Expected")
110
+ expect(error.expression).to eq("book[")
111
+ end
112
+ end
113
+
114
+ it "includes position information in errors" do
115
+ expect { Moxml::XPath::Parser.parse("book[@id") }
116
+ .to raise_error(Moxml::XPath::SyntaxError) do |error|
117
+ expect(error.position).to be_a(Integer)
118
+ end
119
+ end
120
+ end
121
+
122
+ describe "complex real-world expressions" do
123
+ it "parses complex predicate with multiple conditions" do
124
+ expr = '//book[@price < 50 and @year > 2000 and @category="fiction"]'
125
+ ast = Moxml::XPath::Parser.parse(expr)
126
+
127
+ expect(ast).to be_a(Moxml::XPath::AST::Node)
128
+ expect(ast.type).to eq(:absolute_path)
129
+ end
130
+
131
+ it "parses nested path in predicate" do
132
+ expr = '//book[author/name="Smith"]/title'
133
+ ast = Moxml::XPath::Parser.parse(expr)
134
+
135
+ expect(ast.type).to eq(:absolute_path)
136
+ end
137
+
138
+ it "parses function call with multiple arguments" do
139
+ expr = "substring(title, 1, 10)"
140
+ ast = Moxml::XPath::Parser.parse(expr)
141
+
142
+ expect(ast).to be_a(Moxml::XPath::AST::Node)
143
+ # Function with multiple arguments parses successfully
144
+ end
145
+
146
+ it "parses arithmetic expression in predicate" do
147
+ expr = "//book[@price * 1.1 < 100]"
148
+ ast = Moxml::XPath::Parser.parse(expr)
149
+
150
+ expect(ast.type).to eq(:absolute_path)
151
+ end
152
+
153
+ it "parses union of complex paths" do
154
+ expr = '//book[@category="fiction"] | //article[@type="review"]'
155
+ ast = Moxml::XPath::Parser.parse(expr)
156
+
157
+ expect(ast).to be_a(Moxml::XPath::AST::Node)
158
+ # Union of complex paths parses successfully
159
+ end
160
+ end
161
+
162
+ describe "XPath constructs coverage" do
163
+ it "handles all axis types" do
164
+ axes = %w[
165
+ child descendant parent ancestor
166
+ following-sibling preceding-sibling
167
+ following preceding attribute namespace
168
+ self descendant-or-self ancestor-or-self
169
+ ]
170
+
171
+ axes.each do |axis|
172
+ expr = "#{axis}::node()"
173
+ expect { Moxml::XPath::Parser.parse(expr) }.not_to raise_error
174
+ end
175
+ end
176
+
177
+ it "handles all node type tests" do
178
+ node_types = %w[text comment node processing-instruction]
179
+
180
+ node_types.each do |type|
181
+ expr = "#{type}()"
182
+ expect { Moxml::XPath::Parser.parse(expr) }.not_to raise_error
183
+ end
184
+ end
185
+
186
+ it "handles all operators" do
187
+ operators = {
188
+ "or" => "@a or @b",
189
+ "and" => "@a and @b",
190
+ "=" => "@a = 1",
191
+ "!=" => "@a != 1",
192
+ "<" => "@a < 1",
193
+ ">" => "@a > 1",
194
+ "<=" => "@a <= 1",
195
+ ">=" => "@a >= 1",
196
+ "+" => "@a + 1",
197
+ "-" => "@a - 1",
198
+ "*" => "@a * 1",
199
+ "div" => "@a div 1",
200
+ "mod" => "@a mod 1",
201
+ "|" => "a | b",
202
+ }
203
+
204
+ operators.each do |name, expr|
205
+ expect { Moxml::XPath::Parser.parse(expr) }
206
+ .not_to raise_error, "Failed to parse operator: #{name}"
207
+ end
208
+ end
209
+ end
210
+ end