RubyGems - oo_peg - Versions diffs - 0.1.0 - Mend

oo_peg 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

checksums.yaml +7 -0
data/LICENSE +235 -0
data/README.md +3 -0
data/lib/enumerable.rb +28 -0
data/lib/oo_peg/input.rb +34 -0
data/lib/oo_peg/ostruct.rb +8 -0
data/lib/oo_peg/parser/class_methods.rb +33 -0
data/lib/oo_peg/parser/combinators/lazy.rb +17 -0
data/lib/oo_peg/parser/combinators.rb +458 -0
data/lib/oo_peg/parser.rb +72 -0
data/lib/oo_peg/parsers/advanced/operator_parser.rb +22 -0
data/lib/oo_peg/parsers/advanced/sexp_parser.rb +77 -0
data/lib/oo_peg/parsers/advanced/string_parser.rb +40 -0
data/lib/oo_peg/parsers/advanced/symbol_parser.rb +22 -0
data/lib/oo_peg/parsers/advanced.rb +117 -0
data/lib/oo_peg/parsers/base_parsers.rb +252 -0
data/lib/oo_peg/parsers/common_parsers.rb +193 -0
data/lib/oo_peg/parsers/lispy_parser.rb +18 -0
data/lib/oo_peg/parsers/pseudo_parsers.rb +12 -0
data/lib/oo_peg/parsers/true_set.rb +11 -0
data/lib/oo_peg/parsers.rb +13 -0
data/lib/oo_peg/result.rb +88 -0
data/lib/oo_peg/version.rb +8 -0
data/lib/oo_peg.rb +257 -0
metadata +91 -0

data/lib/oo_peg/parsers/advanced.rb ADDED Viewed

@@ -0,0 +1,117 @@
+# frozen_string_literal: true
+require_relative '../parsers'
+require_relative 'advanced/operator_parser'
+require_relative 'advanced/sexp_parser'
+require_relative 'advanced/string_parser'
+require_relative 'advanced/symbol_parser'
+module OOPeg
+  module Parsers
+    ##
+    #
+    # == The String Parser
+    #
+    # By default it parses strings that start with a +'+ or +"+, do not
+    # support extrapolation and allow to escape the delimitting quotes
+    # with a backslash.
+    #
+    #    # example: default StringParser
+    #
+    #    parse(string_parser, '"Hello"').ast => 'Hello'
+    #    parse(string_parser, %{"'Hello'"}).ast => "'Hello'"
+    #    parse(string_parser, %{'"Hello"'}).ast => '"Hello"'
+    #
+    # Including quotes, can be done with backslashes
+    #
+    #    # example: escaping quotes
+    #
+    #    parse(string_parser, '"Hello\\"World\\""and more').ast => 'Hello"World"'
+    #
+    # Escaping quotes by doubling them is not the default behavior...
+    #
+    #    # example: no double escaping by default
+    #
+    #    parse(string_parser, '"Hello""World"""and more').ast => 'Hello'
+    #
+    # ... but can be enabled with the +doubled_escape+ flag
+    #
+    #    # example: double escaping enabled
+    #
+    #    parse(string_parser(doubled_escape: true), '"Hello""World"""and more').ast => 'Hello"World"'
+    #
+    # == The Symbol Parser
+    #
+    # is nothing more than a parser that parses a sequence of a prefix character (defaulting to +:+)
+    # followed by a sequence of characters parsed by the +id_parser+.
+    #
+    # Therefore all configurations of the +id_parser+ apply also to the +symbol_parser+.
+    #
+    # However, the returned ast is not a +String+, but a +Symbol+
+    #
+    #     # example: the default symbol parser
+    #
+    #     parse(symbol_parser, ':hello ').ast => :hello
+    #
+    # == Putting it all together: The SexpParser
+    #
+    # with all the above and all the other predefined parsers it is now very easy
+    # to expose an +sexp_parser+
+    #
+    # It is configured by the following parameters
+    #
+    # - the parentheses pairs which default to <tt>(), {}</tt> and <tt>[]</tt>.
+    # - the +head_parser+ which defaults to the +tail_parser+ but can set
+    #   be to a different parser. It defines how the first element of a sexp can be parsed.
+    # - the +tail_parser+ which defaults to a selection of +string_parser+, +int_parser+
+    #   +symbol_parser+, +operator_parser+ and, of course, recursively +sexp_parser+
+    # - the +seperation_parser+ which defaults to the +ws_parser+
+    #
+    # So basically the grammar which is parsed is like the following
+    #
+    #     s_exp ::= "(" inner ")" | "{" inner "}" | "[" inner "]"
+    #     inner ::=  [seperator? head [seperator tail]... seperator?]?
+    #     head  ::=  tail  ; unless configured differently
+    #     tail  ::=  string | symbol | integer | operator | s_exp
+    #
+    # Here is a quite long...
+    #
+    #     # example: An s-expression
+    #
+    #     s_expression = <<-EOS
+    #     (fn stupid [a]
+    #       (let [x 42 y (succ x) z (+ x y)]
+    #       (if (> a z)
+    #         (- a z)
+    #         (error 'a too small'))
+    #       ))
+    #     EOS
+    #
+    #     expected =
+    #      [:sexp,
+    #        [[:id, "fn"], [:id, "stupid"], [[:arr, [[:id, "a"]]]],
+    #          [[:sexp,
+    #            [[:id, "let"],
+    #             [[:arr, [[:id, "x"], [:int, 42], [:id, "y"], [[:sexp, [[:id, "succ"], [:id, "x"]]]], [:id, "z"], [[:sexp, [[:op, "+"], [:id, "x"], [:id, "y"]]]]]]],
+    #             [[:sexp,
+    #               [[:id, "if"], [[:sexp, [[:op, ">"], [:id, "a"], [:id, "z"]]]],
+    #                 [[:sexp, [[:op, "-"], [:id, "a"], [:id, "z"]]]],
+    #                 [[:sexp, [[:id, "error"], [:str, "a too small"]]]]]]]]]]]]
+    #
+    #     ast = parse(sexp_parser.map(&:first), s_expression).ast
+    #
+    #     ast => ^expected
+    #
+    module Advanced
+      def operator_parser(**kwds) = OperatorParser.make(**kwds)
+      def sexp_parser(**kwds) = SexpParser.make(**kwds)
+      def string_parser(delim: %{'"}, doubled_escape: nil, extra_parser: nil, escape_with: "\\", name: "StringParser") = StringParser.make(delim:, doubled_escape:, extra_parser:, escape_with:, name:)
+      def symbol_parser(prefix: %{:}, inner_class: [:alnum, '_'], lead_class: :alpha, name: nil) = SymbolParser.make(prefix:, inner_class:, lead_class:, name:)
+    end
+  end
+end
+# SPDX-License-Identifier: AGPL-3.0-or-later

data/lib/oo_peg/parsers/base_parsers.rb ADDED Viewed

@@ -0,0 +1,252 @@
+# frozen_string_literal: true
+require_relative 'true_set'
+require_relative '../parser'
+module OOPeg
+  module Parsers
+    ##
+    # +BaseParsers+
+    #
+    # Implementation of all parsers that do not rely on other parsers or combinators
+    # ===== +char_parser+
+    #
+    # Parser that parses a set of characters or any character
+    #
+    #     # example: parse any char
+    #
+    #     parser = char_parser
+    #
+    #     parse(char_parser, 'x').ast => 'x'
+    #     parse(char_parser, 'xa').ast => 'x'
+    #     parse(char_parser, '').ast => nil
+    #
+    #     # example: parse just one char
+    #
+    #     parser = char_parser('a')
+    #
+    #     parse(parser, 'a').ast => 'a'
+    #     parse(parser, 'b').ast => nil
+    #
+    #     # example: parse a set of chars
+    #
+    #     parser = char_parser('ab')
+    #
+    #     parse(parser, 'a').ast => 'a'
+    #     parse(parser, 'b').ast => 'b'
+    #     parse(parser, 'c').ast => nil
+    #
+    # Sometimes we want to parse the complementary set of characters, then we can pass
+    # in <tt>negate: true</tt>
+    #
+    #     # example: parse complement
+    #
+    #     consonne = char_parser('aeiouy', negate: true)
+    #
+    #     parse(consonne, 'x').ast => 'x'
+    #     parse(consonne, 'i') not! ok
+    #
+    # ===== +char_class_parser+
+    #
+    # Parses a Regexp character class
+    #
+    #     # example: parse digits
+    #
+    #     digit_parser = char_class_parser(:digit)
+    #
+    #     parse(digit_parser, '9').ast => '9'
+    #     parse(digit_parser, 'a').ast => nil
+    #
+    # But we can parse a union of character classes
+    #
+    #     # example: parse digits and lower case letters
+    #
+    #     parser = char_class_parser(:digit, :lower)
+    #
+    #     parse(parser, '8').ast => '8'
+    #     parse(parser, 'a8').ast => 'a'
+    #     parse(parser, 'é').ast => 'é'
+    #     parse(parser, 'A8').ast => nil
+    #
+    # The available character classes are defined here: https://ruby-doc.org/3.4.1/Regexp.html#class-Regexp-label-POSIX+Bracket+Expressions
+    #
+    # ==== _Pseudo_ _Parsers_
+    #
+    # These are useful parsers in the context of combinators, but they must be
+    # used with care as they do not advance the input.
+    #
+    # This holds also for the +many+ combinator unless a <tt>min: >0</tt> argument is
+    # provided.
+    #
+    # ===== +end_parser+
+    #
+    # Only parses an empty string, ast is always nil.
+    #
+    #     # example: end_parser
+    #
+    #     parse(end_parser, "") is! ok
+    #     parse(end_parser, "a") not! ok
+    #
+    # Its major use case is to assure that the whole input has been parsed, in other words, that
+    # there are no spurious characters at the end
+    #
+    #     # example: end_parser, useful after all
+    #
+    #     parser = int_parser.and(end_parser).map(&:first)
+    #
+    #     parse(parser, "1905").ast => 1905
+    #     parse(parser, "1905oh") not! ok
+    #
+    # ===== +true_parser+
+    #
+    # It always succeeds, but does not advance the input, this can be useful in some
+    # complex combinators we will show below
+    # OOPeg@Some+Complex+Combinators
+    #
+    #     # example: true parser
+    #
+    #     parse(true_parser, "hello") is! ok
+    #     # But it does not advance
+    #     parse(true_parser, "hello").input.content => %w[h e l l o]
+    #
+    # Very originally there is also the...
+    #
+    # ===== +false_parser+
+    #
+    # ... which always fails
+    #
+    #     # example: false_parser
+    #
+    #     # parse(false_parser, "hello") not! ok
+    #     parse(false_parser, "hello").input.content ==> %w[h e l l o]
+    #
+    module BaseParsers
+      # Parses a character which is a member of any of the `char_classes`
+      def char_class_parser(*char_classes, name: nil)
+        case char_classes
+        in [char_class]
+          _1_char_class_parser(char_class, name:)
+        else
+          _char_classes_parser(*char_classes, name:)
+        end
+      end
+      def char_parser(set=nil, name: nil, negate: false)
+        set = mk_set(set)
+        name ||= "char_parser(#{set.to_a.join})"
+        parser_name = name
+        Parser.new(name) do |input|
+          case input.content
+          in []
+            Result.nok(error: "unexpected end of input", input:, parser_name:)
+          in [h, *]
+            if set.member?(h) && !negate || !set.member?(h) && negate
+              Result.ok(ast: h, input: input.advance)
+            else
+              Result.nok(input:, error: "#{h} is not member of the required set #{set}", parser_name: name)
+            end
+          end
+        end
+      end
+      # Pseudo Parsers
+      def end_parser(name: nil)
+        parser_name = name || "end_parser"
+        Parser.new(parser_name) do |input|
+          case input.content
+          in []
+            Result.ok(ast: nil, input:)
+          in _
+            Result.nok(input: input, error: "not at end of input", parser_name:)
+          end
+        end
+      end
+      def false_parser(name: nil)
+        parser_name = name || "false_parser"
+        Parser.new(parser_name) { Result.nok(error: "false parser always fails", input: it, parser_name:) }
+      end
+      def true_parser(name: nil)
+        parser_name = name || "true_parser"
+        Parser.new(parser_name) { Result.ok(ast: nil, input: it) }
+      end
+      # def make_parser(parser)
+      #   case parser
+      #   when String
+      #     char_parser(parser)
+      #   else
+      #     parser
+      #   end
+      # end
+      # def make_parsers(*parsers)
+      #   parsers
+      #     .flatten
+      #     .map { make_parser it }
+      # end
+      private
+      def _1_char_class_parser(char_class, name:)
+        rgx = Regexp.compile("[[:#{char_class}:]]")
+        name ||= "char_class_parser(:#{char_class})"
+        Parser.new(name) do |input|
+          case input.content
+          in []
+            Result.nok(error: "unexpected end of input", parser_name: name, input:)
+          in [h, *]
+            if rgx.match?(h)
+              Result.ok(ast: h, input: input.advance)
+            else
+              Result.nok(input:, parser_name: name, error: "#{h} does not match the char class: :#{char_class}")
+            end
+          end
+        end
+      end
+      def _char_classes_parser(*char_classes, name:)
+        rgx = Regexp.compile("[#{_compile_char_classes(char_classes)}]")
+        name ||= "char_class_parser(#{char_classes.inspect})"
+        Parser.new(name) do |input|
+          case input.content
+          in []
+            Result.nok(error: "unexpected end of input", input:, parser_name: name)
+          in [h, *]
+            if rgx.match?(h)
+              Result.ok(ast: h, input: input.advance)
+            else
+              Result.nok(input:, error: "#{h} does not match the char class: :#{char_classes}")
+            end
+          end
+        end
+      end
+      def _compile_char_class(char_class)
+        case char_class
+        when Symbol
+          "[:#{char_class}:]"
+        when String
+          "[#{char_class}]"
+        end
+      end
+      def _compile_char_classes(char_classes)
+        "[" +
+          char_classes
+          .map { _compile_char_class it }
+          .join + "]"
+      end
+      def mk_set(set)
+        if set
+          Set.new(set.grapheme_clusters)
+        else
+          TrueSet
+        end
+      end
+    end
+  end
+end
+# SPDX-License-Identifier: AGPL-3.0-or-later

data/lib/oo_peg/parsers/common_parsers.rb ADDED Viewed

@@ -0,0 +1,193 @@
+# frozen_string_literal: true
+module OOPeg
+  module Parsers
+    ##
+    #
+    # These are _convenience_ parsers that parse tokens which are commonly used
+    # in modern languages like +Elixir+, +Lua+, +Scheme+ or +Javascript+
+    #
+    #
+    # ===== +int_parser+
+    #
+    # As seen in the Quick Start Section it uses the +.and+ and the +.map+ combinators
+    # on the basic parsers.
+    #
+    #     # example: int_parser (again)
+    #
+    #     parse(int_parser, "-42").ast => -42
+    #     parse(int_parser, "+3").ast => 3
+    #     parse(int_parser, "73").ast => 73
+    #     parse(int_parser, "x4") not! ok
+    #
+    #     # example: int_parser (with base 16)
+    #
+    #     parse(int_parser(hex: true), "cafee").ast => 831470
+    #
+    # Let us introduce the
+    # ===== +word_parser+
+    #
+    # also using the +.and+ combinator to make a hex_int_parser
+    #
+    #     # example: hex_int_parser
+    #
+    #     hex_int_parser = word_parser("0x").and(int_parser(hex: true))
+    #
+    #     parse(hex_int_parser, "0x1f").ast => ["0x", 31]
+    #
+    # Now let us also introduce the +.map+ combinator to get rid of the superflous output
+    #
+    #     # example: A better hex_int_parser
+    #
+    #     better_hex_int_parser = word_parser("0x")
+    #       .and(int_parser(hex: true))
+    #       .map(&:last)
+    #
+    #     parse(better_hex_int_parser, "0x1f").ast =>  31
+    #
+    # And this is also a perfect opportunity to introduce the +.or+ combinator by creating a
+    # more general int parser
+    #
+    #     # example: A more general int parser
+    #
+    #     general_int_parser =
+    #       word_parser("0x")
+    #         .and(int_parser(hex: true))
+    #         .map(&:last)
+    #         .or(int_parser)
+    #
+    #     parse(general_int_parser, "12").ast => 12
+    #     parse(general_int_parser, "0xff").ast => 255
+    #     parse(general_int_parser, "-4").ast => -4
+    #     parse(general_int_parser, "ff") not! ok
+    #
+    #
+    # ===== +id_parser+
+    #
+    # This is a highly configurable parser which allows to parse tokens which are
+    # typically names or identifiers.
+    #
+    # *N.B.* that it is not a combinator as none of its arguments is a parser.
+    #
+    # It parses an identifier based on a lead character class (for the first character) and
+    # an inner character class (for the rest of characters). Their default values are as follows
+    #
+    # <tt>lead_class: :alpha, inner_class: [:alnum, "_"]</tt>
+    #
+    # which parses like...
+    #
+    #     # example: the default id_parser
+    #
+    #     parse(id_parser, "_42") not! ok
+    #     parse(id_parser, "42") not! ok
+    #     parse(id_parser, "a_42").ast => "a_42"
+    #
+    # But if we want a more lispy style we could simply...
+    #
+    #     # example: the lispy id_parser
+    #
+    #     lispy_id_parser = id_parser(inner_class: [:alnum, "-"])
+    #
+    #     parse(lispy_id_parser, "-42") not! ok
+    #     parse(lispy_id_parser, "42") not! ok
+    #     parse(lispy_id_parser, "_42") not! ok
+    #     parse(lispy_id_parser, "a-42").ast => "a-42"
+    #
+    # And on the same token we can allow leading +-+ characters
+    #
+    #     # example: the weired lispy id_parser
+    #
+    #     weired_lispy_id_parser = id_parser(lead_class: [:alpha, "-"], inner_class: [:alnum, "-"])
+    #
+    #     parse(weired_lispy_id_parser, "42") not! ok
+    #     parse(weired_lispy_id_parser, "_42") not! ok
+    #     parse(weired_lispy_id_parser, "a-42").ast => "a-42"
+    #     parse(weired_lispy_id_parser, "-42").ast => "-42"
+    #
+    # ===== +kwd_parser+, restraining ids to a set of worlds
+    #
+    #     # example: only a defined set of identifiers
+    #
+    #     cond_parser = kwd_parser(Set.new(%w[if else unless]))
+    #
+    #     parse(cond_parser, "if").ast => "if"
+    #     parse(cond_parser, "unless").ast => "unless"
+    #     parse(cond_parser, "else").ast => "else"
+    #     parse(cond_parser, "end") not! ok
+    #
+    #     # example: kwd_parser can be customized like the id_parser
+    #
+    #     parser = kwd_parser(Set.new(%w[1a 2b]), lead_class: :digit)
+    #
+    #     parse(parser, "1a").ast => "1a"
+    #     parse(parser, "2b").ast => "2b"
+    #     parse(parser, "3c") not! ok
+    #
+    # ==== +set_parser+, a convenience parser (does not need a set put a splashed list)
+    #
+    #     # example: set_parser
+    #
+    #     greek_parser = set_parser('alpha', 'beta', 'gamma')
+    #
+    #     parse(greek_parser, 'alpha').ast => 'alpha'
+    #     parse(greek_parser, 'beta').ast => 'beta'
+    #     parse(greek_parser, 'gamma').ast => 'gamma'
+    #
+    #     parse(greek_parser, 'delta') not! ok
+    #
+    module CommonParsers
+      def id_parser(name: nil, lead_class: :alpha, inner_class: [:alnum, "_"] )
+        char_class_parser(*Array(lead_class))
+          .and(char_class_parser(*Array(inner_class)).many, name:)
+          .map { it.flatten.join }
+      end
+      # Just parses any string starting with either a `+` or `-` sign followed by at least one
+      # _decimal digit_.
+      #
+      # **N.B.** that leading zeroes are parsed (and therefore ignored) and will not parse
+      # it as a hexadecimal or octal number
+      def int_parser(name: nil, hex: false)
+        name ||= "int_parser"
+        char_class = hex ? :xdigit : :digit
+        base = hex ? 16 : 10
+        char_parser("+-")
+          .maybe
+          .and(char_class_parser(char_class).many(min: 1))
+          .map_or_rename(name:) {
+            # require "debug"; binding.break
+            it.join.to_i(base) }
+        # .debug
+      end
+      def kwd_parser(set, name: nil, lead_class: :alpha, inner_class: [:alnum, "_"])
+        id_parser(name:, lead_class:, inner_class:)
+          .satisfy { set.member? it }
+      end
+      def set_parser(*elements, name: nil, lead_class: :alpha, inner_class: [:alnum, "_"])
+        kwd_parser(Set.new(elements), name:, lead_class:, inner_class:)
+      end
+      def word_parser(word, name: nil)
+        name ||= "word_parser(#{word})"
+        word.grapheme_clusters => [head, *tails]
+        char_parser(head)
+          .and(*tails.map { char_parser it }, name:)
+          .map(&:join)
+      end
+      def ws_parser(name=nil, min: 1, ignore: true)
+        name ||= "ws_parser"
+        parser =
+        char_class_parser(:space)
+          .many(min:, name:)
+        return parser unless ignore
+        parser.ignore
+      end
+    end
+  end
+end
+# SPDX-License-Identifier: AGPL-3.0-or-later

data/lib/oo_peg/parsers/lispy_parser.rb ADDED Viewed

@@ -0,0 +1,18 @@
+# frozen_string_literal: true
+require_relative 'advanced'
+module OOPeg
+  module Parsers
+    ##
+    #
+    # A highly configurable parser for s-expressions
+    class LispyParser
+      private
+      def initialize(prefix: "([", suffix: "])", sep_parser: ws_parser)
+      end
+    end
+  end
+end
+# SPDX-License-Identifier: AGPL-3.0-or-later

data/lib/oo_peg/parsers/pseudo_parsers.rb ADDED Viewed

@@ -0,0 +1,12 @@
+# frozen_string_literal: true
+require_relative 'true_set'
+require_relative '../parser'
+module OOPeg
+  module Parsers
+    module PseudoParsers
+    end
+  end
+end
+# SPDX-License-Identifier: AGPL-3.0-or-later

data/lib/oo_peg/parsers/true_set.rb ADDED Viewed

@@ -0,0 +1,11 @@
+# frozen_string_literal: true
+module OOPeg
+  module Parsers
+    module TrueSet extend self
+      def member?(_) = true
+      def to_a = %w[TrueSet]
+    end
+  end
+end
+# SPDX-License-Identifier: AGPL-3.0-or-later

data/lib/oo_peg/parsers.rb ADDED Viewed

@@ -0,0 +1,13 @@
+# frozen_string_literal: true
+require_relative "parsers/base_parsers"
+require_relative "parsers/common_parsers"
+require_relative "parser/combinators/lazy"
+module OOPeg
+  module Parsers
+    include BaseParsers
+    include CommonParsers
+    include Parser::Combinators::Lazy
+  end
+end
+# SPDX-License-Identifier: AGPL-3.0-or-later