parsanol 1.3.8 → 1.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ca5f39af8b6ed42fb15609ae995687d58531d88b899a7c81fd708b2e36f9cf8d
4
- data.tar.gz: 2a1d92d10d6f258ba0f79de6d6df22ffab5993ba8bc60f4ddfcbbc86a036546b
3
+ metadata.gz: 4db89a80339a628a73fff78b0c139b82378a3264e2009021ef2665fe40717731
4
+ data.tar.gz: f3be215e5a7c60837082916d36acba8443715206dcf3d5b875f5372c7c4ec985
5
5
  SHA512:
6
- metadata.gz: bede99bb9de3cef6a3cb934c3405114bc082fd7e071d3ec320739b93ecedb73bc114d41452cacd9ef3337ca950047916b48ba3d3001f46accf813ec8e2338e3d
7
- data.tar.gz: 7f29093ef0e2023dffbbb8a5bcffd0c8e59a99a247ca73361e349c62e4defdb2039afe31c0b5c29050074b93767dfd187931ec567051b9bf30f23577ab7b1fc3
6
+ metadata.gz: 9922b36fd81975232080e50a7d4f5b78604349be51a7812ec17d564a027d8f0152e9ec0e764fb57dfda2538cfb7247e3a6bd4d4c10feacdd795d7ffa907ed2ee
7
+ data.tar.gz: 9e7fe2260017d554dcb260c7f3583d31d2da2cdb4081ff049259290d63a65abece0722906982d1c125facd88316502f5d0e82b890e306a6b93d8afc24a9f856f
data/Cargo.lock CHANGED
@@ -206,15 +206,15 @@ dependencies = [
206
206
 
207
207
  [[package]]
208
208
  name = "itoa"
209
- version = "1.0.17"
209
+ version = "1.0.18"
210
210
  source = "registry+https://github.com/rust-lang/crates.io-index"
211
- checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
211
+ checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
212
212
 
213
213
  [[package]]
214
214
  name = "js-sys"
215
- version = "0.3.91"
215
+ version = "0.3.92"
216
216
  source = "registry+https://github.com/rust-lang/crates.io-index"
217
- checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c"
217
+ checksum = "cc4c90f45aa2e6eacbe8645f77fdea542ac97a494bcd117a67df9ff4d611f995"
218
218
  dependencies = [
219
219
  "once_cell",
220
220
  "wasm-bindgen",
@@ -305,8 +305,8 @@ checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
305
305
 
306
306
  [[package]]
307
307
  name = "parsanol"
308
- version = "0.4.1"
309
- source = "git+https://github.com/parsanol/parsanol-rs?rev=3716de2#3716de22dc7e371e2dfb59bb28f443d0d1550b9e"
308
+ version = "0.5.1"
309
+ source = "git+https://github.com/parsanol/parsanol-rs?branch=main#6de2f6877b75c3d4ab2d73dd75cf32405e1f5505"
310
310
  dependencies = [
311
311
  "ahash",
312
312
  "getrandom 0.3.4",
@@ -322,8 +322,8 @@ dependencies = [
322
322
 
323
323
  [[package]]
324
324
  name = "parsanol-derive"
325
- version = "0.4.1"
326
- source = "git+https://github.com/parsanol/parsanol-rs?rev=3716de2#3716de22dc7e371e2dfb59bb28f443d0d1550b9e"
325
+ version = "0.5.1"
326
+ source = "git+https://github.com/parsanol/parsanol-rs?branch=main#6de2f6877b75c3d4ab2d73dd75cf32405e1f5505"
327
327
  dependencies = [
328
328
  "proc-macro2",
329
329
  "quote",
@@ -382,16 +382,16 @@ checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf"
382
382
 
383
383
  [[package]]
384
384
  name = "rb-sys"
385
- version = "0.9.124"
386
- source = "git+https://github.com/oxidize-rb/rb-sys.git?rev=b42b5fba0424fcc6778fed9a01e05215eaa53f85#b42b5fba0424fcc6778fed9a01e05215eaa53f85"
385
+ version = "0.9.126"
386
+ source = "git+https://github.com/oxidize-rb/rb-sys.git?rev=daa12b6521a0c442a705d7f9a929029119aab1d6#daa12b6521a0c442a705d7f9a929029119aab1d6"
387
387
  dependencies = [
388
388
  "rb-sys-build",
389
389
  ]
390
390
 
391
391
  [[package]]
392
392
  name = "rb-sys-build"
393
- version = "0.9.124"
394
- source = "git+https://github.com/oxidize-rb/rb-sys.git?rev=b42b5fba0424fcc6778fed9a01e05215eaa53f85#b42b5fba0424fcc6778fed9a01e05215eaa53f85"
393
+ version = "0.9.126"
394
+ source = "git+https://github.com/oxidize-rb/rb-sys.git?rev=daa12b6521a0c442a705d7f9a929029119aab1d6#daa12b6521a0c442a705d7f9a929029119aab1d6"
395
395
  dependencies = [
396
396
  "bindgen",
397
397
  "lazy_static",
@@ -439,9 +439,9 @@ checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
439
439
 
440
440
  [[package]]
441
441
  name = "rustc-hash"
442
- version = "2.1.1"
442
+ version = "2.1.2"
443
443
  source = "registry+https://github.com/rust-lang/crates.io-index"
444
- checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
444
+ checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"
445
445
 
446
446
  [[package]]
447
447
  name = "rustversion"
@@ -565,9 +565,9 @@ dependencies = [
565
565
 
566
566
  [[package]]
567
567
  name = "wasm-bindgen"
568
- version = "0.2.114"
568
+ version = "0.2.115"
569
569
  source = "registry+https://github.com/rust-lang/crates.io-index"
570
- checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e"
570
+ checksum = "6523d69017b7633e396a89c5efab138161ed5aafcbc8d3e5c5a42ae38f50495a"
571
571
  dependencies = [
572
572
  "cfg-if",
573
573
  "once_cell",
@@ -578,9 +578,9 @@ dependencies = [
578
578
 
579
579
  [[package]]
580
580
  name = "wasm-bindgen-macro"
581
- version = "0.2.114"
581
+ version = "0.2.115"
582
582
  source = "registry+https://github.com/rust-lang/crates.io-index"
583
- checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6"
583
+ checksum = "4e3a6c758eb2f701ed3d052ff5737f5bfe6614326ea7f3bbac7156192dc32e67"
584
584
  dependencies = [
585
585
  "quote",
586
586
  "wasm-bindgen-macro-support",
@@ -588,9 +588,9 @@ dependencies = [
588
588
 
589
589
  [[package]]
590
590
  name = "wasm-bindgen-macro-support"
591
- version = "0.2.114"
591
+ version = "0.2.115"
592
592
  source = "registry+https://github.com/rust-lang/crates.io-index"
593
- checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3"
593
+ checksum = "921de2737904886b52bcbb237301552d05969a6f9c40d261eb0533c8b055fedf"
594
594
  dependencies = [
595
595
  "bumpalo",
596
596
  "proc-macro2",
@@ -601,9 +601,9 @@ dependencies = [
601
601
 
602
602
  [[package]]
603
603
  name = "wasm-bindgen-shared"
604
- version = "0.2.114"
604
+ version = "0.2.115"
605
605
  source = "registry+https://github.com/rust-lang/crates.io-index"
606
- checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16"
606
+ checksum = "a93e946af942b58934c604527337bad9ae33ba1d5c6900bbb41c2c07c2364a93"
607
607
  dependencies = [
608
608
  "unicode-ident",
609
609
  ]
@@ -738,18 +738,18 @@ dependencies = [
738
738
 
739
739
  [[package]]
740
740
  name = "zerocopy"
741
- version = "0.8.42"
741
+ version = "0.8.48"
742
742
  source = "registry+https://github.com/rust-lang/crates.io-index"
743
- checksum = "f2578b716f8a7a858b7f02d5bd870c14bf4ddbbcf3a4c05414ba6503640505e3"
743
+ checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9"
744
744
  dependencies = [
745
745
  "zerocopy-derive",
746
746
  ]
747
747
 
748
748
  [[package]]
749
749
  name = "zerocopy-derive"
750
- version = "0.8.42"
750
+ version = "0.8.48"
751
751
  source = "registry+https://github.com/rust-lang/crates.io-index"
752
- checksum = "7e6cc098ea4d3bd6246687de65af3f920c430e236bee1e3bf2e441463f08a02f"
752
+ checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4"
753
753
  dependencies = [
754
754
  "proc-macro2",
755
755
  "quote",
data/Cargo.toml CHANGED
@@ -12,4 +12,4 @@ debug = true
12
12
  [patch.crates-io]
13
13
  magnus = { git = "https://github.com/matsadler/magnus", rev = "4e46772" }
14
14
  magnus-macros = { git = "https://github.com/matsadler/magnus", rev = "4e46772" }
15
- rb-sys = { git = "https://github.com/oxidize-rb/rb-sys.git", rev = "b42b5fba0424fcc6778fed9a01e05215eaa53f85" }
15
+ rb-sys = { git = "https://github.com/oxidize-rb/rb-sys.git", rev = "daa12b6521a0c442a705d7f9a929029119aab1d6" }
@@ -28,7 +28,7 @@ rb-sys = { version = "0.9.124", features = ["global-allocator"] }
28
28
  magnus = { version = "0.9" }
29
29
 
30
30
  # parsanol parser library (from git for latest features)
31
- parsanol = { git = "https://github.com/parsanol/parsanol-rs", rev = "3716de2", features = ["ruby"] }
31
+ parsanol = { git = "https://github.com/parsanol/parsanol-rs", branch = "main", features = ["ruby"] }
32
32
 
33
33
  # Logging
34
34
  log = "0.4"
@@ -115,7 +115,7 @@ module Parsanol
115
115
  # Convert to JSON for native parser
116
116
  #
117
117
  # @return [String] JSON representation
118
- def to_json(*_args)
118
+ def to_json(*)
119
119
  build.to_json
120
120
  end
121
121
 
@@ -57,9 +57,8 @@ module Parsanol
57
57
  # pre-decoded Ruby value from _parse_raw
58
58
  # @param input [String] Original input string (for Slice references)
59
59
  # @param slice_class [Class] The Slice class to use
60
- # @param grammar_atom [Parsanol::Atoms::Base] The grammar atom (unused, kept for API compat)
61
60
  # @return [Object] Transformed Ruby AST
62
- def decode_and_flatten(data, input, slice_class, _grammar_atom)
61
+ def decode_and_flatten(data, input, slice_class)
63
62
  # Check if data is batch data (flat u64 array) or already a Ruby value
64
63
  if data.is_a?(Integer) || (data.is_a?(Array) && data.first.is_a?(Integer))
65
64
  # Batch data (flat u64 array) - decode first, then transform
@@ -323,9 +323,26 @@ module Parsanol
323
323
  all_items_are_hashes = non_hash_items.empty?
324
324
 
325
325
  if all_items_are_hashes
326
- # Merge all inner hashes into merged_hash
327
- item.each do |sub_item|
328
- merged_hash.merge!(sub_item) if sub_item.is_a?(Hash)
326
+ # Check if merging would overwrite existing keys in merged_hash.
327
+ # If so, this is a repetition pattern (item >> (sep >> item).repeat)
328
+ # and should be kept as array, not merged.
329
+ # Example: merged_hash={namedTypeOrRename: A}, array=[{namedTypeOrRename: B}]
330
+ # → should produce [{namedTypeOrRename: A}, {namedTypeOrRename: B}]
331
+ existing_keys = merged_hash.keys
332
+ shares_keys = item.any? do |sub_item|
333
+ sub_item.is_a?(Hash) && sub_item.keys.intersect?(existing_keys)
334
+ end
335
+
336
+ if shares_keys
337
+ has_non_empty_array = true
338
+ item.each do |sub_item|
339
+ hash_count += 1 if sub_item.is_a?(Hash)
340
+ end
341
+ total_items += 1
342
+ else
343
+ item.each do |sub_item|
344
+ merged_hash.merge!(sub_item) if sub_item.is_a?(Hash)
345
+ end
329
346
  end
330
347
  else
331
348
  # Non-empty repetition with non-hash items - mark that we should keep as array
@@ -25,18 +25,5 @@ module Parsanol
25
25
  EMPTY_ARRAY = [].freeze
26
26
  EMPTY_HASH = {}.freeze
27
27
  end
28
-
29
- # Symbol cache to avoid repeated string-to-symbol conversions
30
- # This is a class variable to share across all transformations
31
- @@symbol_cache = {}
32
-
33
- # Convert string key to symbol with caching
34
- # @param key [String, Symbol] The key to convert
35
- # @return [Symbol] The symbol version of the key
36
- def self.cached_symbol(key)
37
- return key if key.is_a?(Symbol)
38
-
39
- @@symbol_cache[key] ||= key.to_sym
40
- end
41
28
  end
42
29
  end
@@ -10,8 +10,6 @@ require "parsanol/native/batch_decoder"
10
10
 
11
11
  module Parsanol
12
12
  module Native
13
- VERSION = "0.1.0"
14
-
15
13
  class << self
16
14
  # Check if native extension is available
17
15
  def available?
@@ -41,20 +39,38 @@ module Parsanol
41
39
  raise LoadError, "Native parser not available" unless available?
42
40
 
43
41
  # Handle both grammar atoms and pre-serialized JSON strings
44
- if grammar.is_a?(String)
45
- grammar_json = grammar
46
- grammar_atom = nil
47
- else
48
- grammar_json = Parser.serialize_grammar(grammar)
49
- grammar_atom = grammar
50
- end
42
+ grammar_json = if grammar.is_a?(String)
43
+ grammar
44
+ else
45
+ Parser.serialize_grammar(grammar)
46
+ end
51
47
 
52
48
  # Use _parse_raw which returns properly tagged Ruby arrays via transform_ast.
53
49
  # The batch format doesn't preserve :repetition/:sequence tags, so we use
54
50
  # the direct FFI path. Apply the Ruby transformer to handle tags correctly.
55
51
  raw_ast = _parse_raw(grammar_json, input)
56
- BatchDecoder.decode_and_flatten(raw_ast, input, Parsanol::Slice,
57
- grammar_atom)
52
+ BatchDecoder.decode_and_flatten(raw_ast, input, Parsanol::Slice)
53
+ end
54
+
55
+ # Memory-bounded parsing without packrat cache.
56
+ #
57
+ # This creates a fresh arena and empty cache per call, bounding memory
58
+ # to AST size rather than input × atoms. Use for large files.
59
+ #
60
+ # @param grammar [Parsanol::Atoms::Base] Ruby grammar definition
61
+ # @param input [String] Input string to parse
62
+ # @return [Hash, Array, Parsanol::Slice] Transformed AST
63
+ def parse_fresh(grammar, input)
64
+ raise LoadError, "Native parser not available" unless available?
65
+
66
+ grammar_json = if grammar.is_a?(String)
67
+ grammar
68
+ else
69
+ Parser.serialize_grammar(grammar)
70
+ end
71
+
72
+ raw_ast = _parse_fresh_raw(grammar_json, input)
73
+ BatchDecoder.decode_and_flatten(raw_ast, input, Parsanol::Slice)
58
74
  end
59
75
 
60
76
  # Parse and return RAW AST without transformation.
@@ -166,13 +182,6 @@ module Parsanol
166
182
  end
167
183
  stats
168
184
  end
169
-
170
- private
171
-
172
- # Get the Slice class
173
- def get_slice_class
174
- Parsanol::Slice
175
- end
176
185
  end
177
186
  end
178
187
  end
@@ -89,11 +89,10 @@ module Parsanol
89
89
  # Parse input and return direct Ruby objects (no serialization)
90
90
  #
91
91
  # @param input [String] The input string to parse
92
- # @param options [Hash] Parse options (ignored for zero-copy)
93
92
  # @return [Object] Direct Ruby object (type depends on grammar)
94
93
  # @raise [LoadError] If native extension not available
95
94
  # @raise [Parsanol::ParseFailed] If parsing fails
96
- def parse(input, _options = {})
95
+ def parse(input)
97
96
  unless Parsanol::Native.available?
98
97
  raise LoadError,
99
98
  "ZeroCopy mode requires native extension for direct FFI object construction. " \
@@ -67,6 +67,10 @@ module Parsanol
67
67
 
68
68
  alias length size
69
69
 
70
+ def empty?
71
+ content.empty?
72
+ end
73
+
70
74
  def +(other)
71
75
  self.class.new(@byte_position, content + other.to_s, @input)
72
76
  end
@@ -110,7 +114,7 @@ module Parsanol
110
114
  as_json.to_json(*)
111
115
  end
112
116
 
113
- def as_json(_options = {})
117
+ def as_json
114
118
  result = { "value" => content, "offset" => offset, "length" => length }
115
119
  if @input
116
120
  line, column = line_and_column
@@ -97,13 +97,6 @@ module Parsanol
97
97
  return hi if hi <= lo
98
98
  end
99
99
  end
100
-
101
- # Legacy method name for backward compatibility
102
- alias find_mid midpoint_index
103
- alias lbound lower_bound_index
104
100
  end
105
-
106
- # Legacy constant name for backward compatibility
107
- RangeSearch = IntervalLookup
108
101
  end
109
102
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Parsanol
4
- VERSION = "1.3.8"
4
+ VERSION = "1.3.10"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parsanol
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.8
4
+ version: 1.3.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.