RubyGems - csv_plus_plus - Versions diffs - 0.1.3 → 0.2.1 - Mend

csv_plus_plus 0.1.3 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (82) hide show

checksums.yaml +4 -4
data/README.md +13 -3
data/docs/CHANGELOG.md +18 -0
data/lib/csv_plus_plus/a1_reference.rb +202 -0
data/lib/csv_plus_plus/benchmarked_compiler.rb +3 -3
data/lib/csv_plus_plus/cell.rb +1 -35
data/lib/csv_plus_plus/cli.rb +43 -80
data/lib/csv_plus_plus/cli_flag.rb +77 -70
data/lib/csv_plus_plus/color.rb +1 -1
data/lib/csv_plus_plus/compiler.rb +31 -21
data/lib/csv_plus_plus/entities/ast_builder.rb +11 -4
data/lib/csv_plus_plus/entities/boolean.rb +16 -9
data/lib/csv_plus_plus/entities/builtins.rb +68 -40
data/lib/csv_plus_plus/entities/date.rb +14 -11
data/lib/csv_plus_plus/entities/entity.rb +11 -29
data/lib/csv_plus_plus/entities/entity_with_arguments.rb +18 -31
data/lib/csv_plus_plus/entities/function.rb +22 -11
data/lib/csv_plus_plus/entities/function_call.rb +35 -11
data/lib/csv_plus_plus/entities/has_identifier.rb +19 -0
data/lib/csv_plus_plus/entities/number.rb +15 -10
data/lib/csv_plus_plus/entities/reference.rb +77 -0
data/lib/csv_plus_plus/entities/runtime_value.rb +36 -23
data/lib/csv_plus_plus/entities/string.rb +13 -10
data/lib/csv_plus_plus/entities.rb +2 -18
data/lib/csv_plus_plus/error/cli_error.rb +17 -0
data/lib/csv_plus_plus/error/compiler_error.rb +17 -0
data/lib/csv_plus_plus/error/error.rb +18 -5
data/lib/csv_plus_plus/error/formula_syntax_error.rb +12 -13
data/lib/csv_plus_plus/error/modifier_syntax_error.rb +10 -36
data/lib/csv_plus_plus/error/modifier_validation_error.rb +6 -32
data/lib/csv_plus_plus/error/positional_error.rb +15 -0
data/lib/csv_plus_plus/error/writer_error.rb +1 -1
data/lib/csv_plus_plus/error.rb +4 -1
data/lib/csv_plus_plus/error_formatter.rb +111 -0
data/lib/csv_plus_plus/google_api_client.rb +18 -8
data/lib/csv_plus_plus/lexer/racc_lexer.rb +144 -0
data/lib/csv_plus_plus/lexer/tokenizer.rb +53 -17
data/lib/csv_plus_plus/lexer.rb +40 -1
data/lib/csv_plus_plus/modifier/data_validation.rb +1 -1
data/lib/csv_plus_plus/modifier/expand.rb +17 -0
data/lib/csv_plus_plus/modifier.rb +6 -1
data/lib/csv_plus_plus/options/file_options.rb +49 -0
data/lib/csv_plus_plus/options/google_sheets_options.rb +42 -0
data/lib/csv_plus_plus/options/options.rb +102 -0
data/lib/csv_plus_plus/options.rb +22 -110
data/lib/csv_plus_plus/parser/cell_value.tab.rb +65 -66
data/lib/csv_plus_plus/parser/code_section.tab.rb +92 -84
data/lib/csv_plus_plus/parser/modifier.tab.rb +40 -30
data/lib/csv_plus_plus/reader/csv.rb +50 -0
data/lib/csv_plus_plus/reader/google_sheets.rb +129 -0
data/lib/csv_plus_plus/reader/reader.rb +27 -0
data/lib/csv_plus_plus/reader/rubyxl.rb +37 -0
data/lib/csv_plus_plus/reader.rb +14 -0
data/lib/csv_plus_plus/runtime/graph.rb +6 -6
data/lib/csv_plus_plus/runtime/{position_tracker.rb → position.rb} +16 -5
data/lib/csv_plus_plus/runtime/references.rb +32 -27
data/lib/csv_plus_plus/runtime/runtime.rb +73 -67
data/lib/csv_plus_plus/runtime/scope.rb +280 -0
data/lib/csv_plus_plus/runtime.rb +9 -9
data/lib/csv_plus_plus/source_code.rb +14 -9
data/lib/csv_plus_plus/template.rb +17 -12
data/lib/csv_plus_plus/version.rb +1 -1
data/lib/csv_plus_plus/writer/csv.rb +32 -5
data/lib/csv_plus_plus/writer/excel.rb +19 -6
data/lib/csv_plus_plus/writer/file_backer_upper.rb +27 -14
data/lib/csv_plus_plus/writer/google_sheets.rb +23 -129
data/lib/csv_plus_plus/writer/{google_sheet_builder.rb → google_sheets_builder.rb} +39 -55
data/lib/csv_plus_plus/writer/merger.rb +56 -0
data/lib/csv_plus_plus/writer/open_document.rb +16 -2
data/lib/csv_plus_plus/writer/rubyxl_builder.rb +68 -43
data/lib/csv_plus_plus/writer/writer.rb +42 -0
data/lib/csv_plus_plus/writer.rb +58 -19
data/lib/csv_plus_plus.rb +26 -14
metadata +43 -18
data/lib/csv_plus_plus/entities/cell_reference.rb +0 -231
data/lib/csv_plus_plus/entities/variable.rb +0 -37
data/lib/csv_plus_plus/error/syntax_error.rb +0 -71
data/lib/csv_plus_plus/google_options.rb +0 -32
data/lib/csv_plus_plus/lexer/lexer.rb +0 -89
data/lib/csv_plus_plus/runtime/can_define_references.rb +0 -87
data/lib/csv_plus_plus/runtime/can_resolve_references.rb +0 -209
data/lib/csv_plus_plus/writer/base_writer.rb +0 -45

data/lib/csv_plus_plus/error/modifier_syntax_error.rb CHANGED Viewed

@@ -1,62 +1,36 @@
 # typed: strict
 # frozen_string_literal: true
-require_relative './syntax_error'
 module CSVPlusPlus
   module Error
-    # An Error that wraps a +ModifierValidationError+ with a +Runtime+.
-    class ModifierSyntaxError < ::CSVPlusPlus::Error::SyntaxError
+    # A syntax error encountered when parsing a modifier definition
+    class ModifierSyntaxError < ::CSVPlusPlus::Error::Error
       extend ::T::Sig
+      include ::CSVPlusPlus::Error::PositionalError
       sig { returns(::String) }
       attr_reader :bad_input
-      sig { returns(::String) }
-      attr_reader :message
       sig { returns(::T.nilable(::Symbol)) }
       attr_reader :modifier
       sig do
         params(
-          runtime: ::CSVPlusPlus::Runtime::Runtime,
-          modifier_validation_error: ::CSVPlusPlus::Error::ModifierValidationError
-        ).returns(::CSVPlusPlus::Error::ModifierSyntaxError)
-      end
-      # Create a +ModifierSyntaxError+ given a +runtime+ and +ModifierValidationError+.
-      #
-      # @param runtime [Runtime]
-      # @param modifier_validation_error [ModifierValidationError]
-      #
-      # @return [ModifierSyntaxError]
-      def self.from_validation_error(runtime, modifier_validation_error)
-        new(
-          runtime,
-          modifier: modifier_validation_error.modifier,
-          bad_input: modifier_validation_error.bad_input,
-          message: modifier_validation_error.message,
-          wrapped_error: modifier_validation_error
-        )
-      end
-      sig do
-        params(
-          runtime: ::CSVPlusPlus::Runtime::Runtime,
-          bad_input: ::String,
           message: ::String,
+          bad_input: ::String,
           modifier: ::T.nilable(::Symbol),
           wrapped_error: ::T.nilable(::StandardError)
         ).void
       end
-      # @param runtime [Runtime] The current runtime
+      # @param message [String] The error message
+      # @param bad_input [String] The offending input
+      # @param modifier [Symbol] The modifier being parsed
       # @param wrapped_error [ModifierValidationError] The validtion error that this is wrapping
-      def initialize(runtime, bad_input:, message:, modifier: nil, wrapped_error: nil)
+      def initialize(message, bad_input:, modifier: nil, wrapped_error: nil)
+        super(message, wrapped_error:)
         @bad_input = bad_input
         @modifier = modifier
-        @message = message
-        super(runtime, wrapped_error:)
       end
       sig { override.returns(::String) }

data/lib/csv_plus_plus/error/modifier_validation_error.rb CHANGED Viewed

@@ -9,20 +9,9 @@ module CSVPlusPlus
     # @attr_reader bad_input [String] The offending input that caused the error to be thrown
     # @attr_reader choices [Array<Symbol>, nil] The choices that +value+ must be one of (but violated)
     # @attr_reader message [String, nil] A relevant message to show
-    class ModifierValidationError < ::CSVPlusPlus::Error::Error
+    class ModifierValidationError < ::CSVPlusPlus::Error::ModifierSyntaxError
       extend ::T::Sig
-      sig { returns(::String) }
-      attr_reader :bad_input
-      sig { returns(::T.nilable(::T.class_of(::T::Enum))) }
-      attr_reader :choices
-      sig { returns(::String) }
-      attr_reader :message
-      sig { returns(::Symbol) }
-      attr_reader :modifier
+      include ::CSVPlusPlus::Error::PositionalError
       sig do
         params(
@@ -38,31 +27,16 @@ module CSVPlusPlus
       # @param bad_input [String] The offending input that caused the error to be thrown
       # @param choices [Array<Symbol>, nil] The choices that +value+ must be one of (but violated)
       # @param message [String, nil] A relevant message to show
-      # rubocop:disable Metrics/MethodLength
       def initialize(modifier, bad_input:, choices: nil, message: nil)
-        @bad_input = bad_input
-        @choices = choices
-        @modifier = modifier
-        @message = ::T.let(
-          if @choices
-            "must be one of (#{@choices.values.map(&:serialize).join(', ')})"
+        message = ::T.let(
+          if choices
+            "must be one of (#{choices.values.map(&:serialize).join(', ')})"
           else
             ::T.must(message)
           end,
           ::String
         )
-        super(@message)
-      end
-      # rubocop:enable Metrics/MethodLength
-      sig { returns(::String) }
-      # A user-facing error message
-      #
-      # @return [::String]
-      def error_message
-        @message
+        super(message, bad_input:, modifier:)
       end
     end
   end

data/lib/csv_plus_plus/error/positional_error.rb ADDED Viewed

@@ -0,0 +1,15 @@
+# typed: strict
+# frozen_string_literal: true
+module CSVPlusPlus
+  module Error
+    # Methods that can be included into a class to denote that it's result it dependent on the current
+    # +Runtime::Position+
+    module PositionalError
+      extend ::T::Sig
+      extend ::T::Helpers
+      interface!
+    end
+  end
+end

data/lib/csv_plus_plus/error/writer_error.rb CHANGED Viewed

@@ -10,7 +10,7 @@ module CSVPlusPlus
       sig { override.returns(::String) }
       # @return [::String]
       def error_message
-        "Error writing template: #{message}"
+        "Error writing csvpp template: #{message}"
       end
     end
   end

data/lib/csv_plus_plus/error.rb CHANGED Viewed

@@ -2,10 +2,13 @@
 # frozen_string_literal: true
 require_relative './error/error'
+require_relative './error/positional_error'
+require_relative './error/cli_error'
+require_relative './error/compiler_error'
 require_relative './error/formula_syntax_error'
 require_relative './error/modifier_syntax_error'
 require_relative './error/modifier_validation_error'
-require_relative './error/syntax_error'
 require_relative './error/writer_error'
 module CSVPlusPlus

data/lib/csv_plus_plus/error_formatter.rb ADDED Viewed

@@ -0,0 +1,111 @@
+# typed: strict
+# frozen_string_literal: true
+module CSVPlusPlus
+  # Handle any errors potentially thrown during compilation.  This could be anything from a user error (for example
+  # calling with invalid csvpp code) to an error calling Google Sheets API or writing to the filesystem.
+  class ErrorFormatter
+    extend ::T::Sig
+    sig do
+      params(options: ::CSVPlusPlus::Options::Options, runtime: ::CSVPlusPlus::Runtime::Runtime).void
+    end
+    # @param options [Options]
+    # @param runtime [Runtime::Runtime]
+    def initialize(options:, runtime:)
+      @options = options
+      @runtime = runtime
+    end
+    sig { params(error: ::StandardError).void }
+    # Nicely handle a given error.  How it's handled depends on if it's our error and if @options.verbose
+    #
+    # @param error [CSVPlusPlus::Error, Google::Apis::ClientError, StandardError]
+    def handle_error(error)
+      # make sure that we're on a newline (verbose mode will probably be in the middle of printing a benchmark)
+      puts("\n\n") if @options.verbose
+      case error
+      when ::CSVPlusPlus::Error::Error
+        handle_internal_error(error)
+      when ::Google::Apis::ClientError
+        handle_google_error(error)
+      else
+        unhandled_error(error)
+      end
+    end
+    private
+    sig { params(error: ::StandardError).void }
+    # An error was thrown that we weren't planning on
+    def unhandled_error(error)
+      warn(
+        <<~ERROR_MESSAGE)
+          An unexpected error was encountered.  Please try running again with --verbose and
+          report the error at: https://github.com/patrickomatic/csv-plus-plus/issues/new'
+        ERROR_MESSAGE
+      return unless @options.verbose
+      warn(error.full_message)
+      warn("Cause: #{error.cause}") if error.cause
+    end
+    sig { params(error: ::CSVPlusPlus::Error::Error).void }
+    def handle_internal_error(error)
+      warn(with_position(error))
+      handle_wrapped_error(::T.must(error.wrapped_error)) if error.wrapped_error
+    end
+    sig { params(wrapped_error: ::StandardError).void }
+    def handle_wrapped_error(wrapped_error)
+      return unless @options.verbose
+      warn(wrapped_error.full_message)
+      warn((wrapped_error.backtrace || []).join("\n")) if wrapped_error.backtrace
+    end
+    sig { params(error: ::Google::Apis::ClientError).void }
+    def handle_google_error(error)
+      warn("Error making Google Sheets API request: #{error.message}")
+      return unless @options.verbose
+      warn("#{error.status_code} Error making Google API request [#{error.message}]: #{error.body}")
+    end
+    sig { params(error: ::CSVPlusPlus::Error::Error).returns(::String) }
+    # Output a user-helpful string that references the runtime state
+    #
+    # @param error [Error::Error] The error message to be prefixed with a filename and position
+    #
+    # @return [String]
+    def with_position(error)
+      message = error.error_message
+      case error
+      when ::CSVPlusPlus::Error::PositionalError
+        "#{message_prefix}#{cell_index} #{message}"
+      else
+        message
+      end
+    end
+    sig { returns(::String) }
+    def cell_index
+      if @runtime.parsing_csv_section?
+        "[#{@runtime.position.row_index},#{@runtime.position.cell_index}]"
+      else
+        ''
+      end
+    end
+    sig { returns(::String) }
+    def message_prefix
+      line_number = @runtime.position.line_number
+      filename = @runtime.source_code.filename
+      line_str = ":#{line_number}"
+      "#{filename}#{line_str}"
+    end
+  end
+end

data/lib/csv_plus_plus/google_api_client.rb CHANGED Viewed

@@ -10,20 +10,30 @@ module CSVPlusPlus
     # Get a +Google::Apis::SheetsV4::SheetsService+ instance configured to connect to the sheets API
     #
     # @return [Google::Apis::SheetsV4::SheetsService]
-    def self.sheets_client
-      ::Google::Apis::SheetsV4::SheetsService.new.tap do |s|
-        s.authorization = ::Google::Auth.get_application_default(['https://www.googleapis.com/auth/spreadsheets'].freeze)
-      end
+    def sheets_client
+      ::T.must(
+        @sheets_client ||= ::T.let(
+          ::Google::Apis::SheetsV4::SheetsService.new.tap do |s|
+            s.authorization = ::Google::Auth.get_application_default(['https://www.googleapis.com/auth/spreadsheets'].freeze)
+          end,
+          ::T.nilable(::Google::Apis::SheetsV4::SheetsService)
+        )
+      )
     end
     sig { returns(::Google::Apis::DriveV3::DriveService) }
     # Get a +Google::Apis::DriveV3::DriveService+ instance connected to the drive API
     #
     # @return [Google::Apis::DriveV3::DriveService]
-    def self.drive_client
-      ::Google::Apis::DriveV3::DriveService.new.tap do |d|
-        d.authorization = ::Google::Auth.get_application_default(['https://www.googleapis.com/auth/drive.file'].freeze)
-      end
+    def drive_client
+      ::T.must(
+        @drive_client ||= ::T.let(
+          ::Google::Apis::DriveV3::DriveService.new.tap do |d|
+            d.authorization = ::Google::Auth.get_application_default(['https://www.googleapis.com/auth/drive.file'].freeze)
+          end,
+          ::T.nilable(::Google::Apis::DriveV3::DriveService)
+        )
+      )
     end
   end
 end

data/lib/csv_plus_plus/lexer/racc_lexer.rb ADDED Viewed

@@ -0,0 +1,144 @@
+# typed: strict
+# frozen_string_literal: true
+module CSVPlusPlus
+  module Lexer
+    # TODO: ugh clean this up
+    RaccToken =
+      ::T.type_alias do
+        ::T.any(
+          [::String, ::Symbol],
+          [::Symbol, ::String],
+          [::String, ::String],
+          [::Symbol, ::Symbol],
+          [::FalseClass, ::FalseClass]
+        )
+      end
+    public_constant :RaccToken
+    # Common methods to be mixed into the Racc parsers
+    #
+    # @attr_reader tokens [Array]
+    module RaccLexer
+      extend ::T::Sig
+      extend ::T::Helpers
+      extend ::T::Generic
+      include ::Kernel
+      abstract!
+      ReturnType = type_member
+      public_constant :ReturnType
+      sig { returns(::T::Array[::CSVPlusPlus::Lexer::RaccToken]) }
+      attr_reader :tokens
+      sig { params(tokens: ::T::Array[::CSVPlusPlus::Lexer::RaccToken]).void }
+      # Initialize a lexer instance with an empty +@tokens+
+      def initialize(tokens: [])
+        @tokens = ::T.let(tokens, ::T::Array[::CSVPlusPlus::Lexer::RaccToken])
+      end
+      sig { returns(::T.nilable(::CSVPlusPlus::Lexer::RaccToken)) }
+      # Used by racc to iterate each token
+      #
+      # @return [Array<(Regexp, Symbol) | (false, false)>]
+      def next_token
+        @tokens.shift
+      end
+      sig { params(input: ::String).returns(::CSVPlusPlus::Lexer::RaccLexer::ReturnType) }
+      # Orchestate the tokenizing, parsing and error handling of parsing input.  Each instance will implement their own
+      # +#tokenizer+ method
+      #
+      # @return [RaccLexer#] Each instance will define it's own +return_value+ with the result of parsing
+      # rubocop:disable Metrics/MethodLength
+      def parse(input)
+        return return_value unless anything_to_parse?(input)
+        tokenize(input)
+        do_parse
+        return_value
+      rescue ::Racc::ParseError => e
+        raise(
+          ::CSVPlusPlus::Error::FormulaSyntaxError.new(
+            "Error parsing #{parse_subject}",
+            bad_input: e.message,
+            wrapped_error: e
+          )
+        )
+      end
+      # rubocop:enable Metrics/MethodLength
+      protected
+      sig { abstract.params(input: ::String).returns(::T::Boolean) }
+      # Is the input even worth parsing? for example we don't want to parse cells unless they're a formula (start
+      # with '=')
+      #
+      # @param input [String]
+      #
+      # @return [Boolean]
+      def anything_to_parse?(input); end
+      sig { abstract.returns(::String) }
+      # Used for error messages, what is the thing being parsed? ("cell value", "modifier", "code section")
+      def parse_subject; end
+      sig { abstract.returns(::CSVPlusPlus::Lexer::RaccLexer::ReturnType) }
+      # The output of the parser
+      def return_value; end
+      sig { abstract.returns(::CSVPlusPlus::Lexer::Tokenizer) }
+      # Returns a +Lexer::Tokenizer+ configured for the given
+      def tokenizer; end
+      private
+      sig { params(input: ::String).void }
+      def tokenize(input)
+        return if input.nil?
+        t = tokenizer.scan(input)
+        until t.scanner.empty?
+          next if t.matches_ignore?
+          return if t.stop?
+          t.scan_tokens!
+          consume_token(t)
+        end
+        @tokens << %i[EOL EOL]
+      end
+      sig { params(tokenizer: ::CSVPlusPlus::Lexer::Tokenizer).void }
+      # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
+      def consume_token(tokenizer)
+        if tokenizer.last_token&.token && tokenizer.last_match
+          @tokens << [::T.must(tokenizer.last_token).token, ::T.must(tokenizer.last_match)]
+        elsif tokenizer.scan_catchall
+          @tokens << [::T.must(tokenizer.last_match), ::T.must(tokenizer.last_match)]
+        # TODO: checking the +parse_subject+ like this is a little hacky... but we need to know if we're parsing
+        # modifiers or code_section (or formulas in a cell)
+        elsif parse_subject == 'modifier'
+          raise(
+            ::CSVPlusPlus::Error::ModifierSyntaxError.new(
+              "Unable to parse #{parse_subject} starting at",
+              bad_input: tokenizer.peek
+            )
+          )
+        else
+          raise(
+            ::CSVPlusPlus::Error::FormulaSyntaxError.new(
+              "Unable to parse #{parse_subject} starting at",
+              bad_input: tokenizer.peek
+            )
+          )
+        end
+      end
+      # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
+    end
+  end
+end

data/lib/csv_plus_plus/lexer/tokenizer.rb CHANGED Viewed

@@ -1,24 +1,33 @@
-# typed: true
+# typed: strict
 # frozen_string_literal: true
-require 'strscan'
 module CSVPlusPlus
   module Lexer
     # A class that contains the use-case-specific regexes for parsing
     #
-    # @attr_reader last_token [String] The last token that's been matched.
-    # @attr_reader scanner [StringScanner] The StringScanner instance that's parsing the input.
+    # @attr_reader last_token [String, nil] The last token that's been matched.
     class Tokenizer
-      attr_reader :last_token, :scanner
+      extend ::T::Sig
+      sig { returns(::T.nilable(::CSVPlusPlus::Lexer::Token)) }
+      attr_reader :last_token
+      sig do
+        params(
+          tokens: ::T::Enumerable[::CSVPlusPlus::Lexer::Token],
+          catchall: ::T.nilable(::Regexp),
+          ignore: ::T.nilable(::Regexp),
+          alter_matches: ::T::Hash[::Symbol, ::T.proc.params(s: ::String).returns(::String)],
+          stop_fn: ::T.nilable(::T.proc.params(s: ::StringScanner).returns(::T::Boolean))
+        ).void
+      end
       # @param tokens [Array<Regexp, String>] The list of tokens to scan
       # @param catchall [Regexp] A final regexp to try if nothing else matches
       # @param ignore [Regexp] Ignore anything matching this regexp
       # @param alter_matches [Object] A map of matches to alter
       # @param stop_fn [Proc] Stop parsing when this is true
       def initialize(tokens:, catchall: nil, ignore: nil, alter_matches: {}, stop_fn: nil)
-        @last_token = nil
+        @last_token = ::T.let(nil, ::T.nilable(::CSVPlusPlus::Lexer::Token))
         @catchall = catchall
         @ignore = ignore
@@ -27,67 +36,94 @@ module CSVPlusPlus
         @alter_matches = alter_matches
       end
+      sig { params(input: ::String).returns(::T.self_type) }
       # Initializers a scanner for the given input to be parsed
       #
       # @param input The input to be tokenized
+      #
       # @return [Tokenizer]
       def scan(input)
-        @scanner = ::StringScanner.new(input.strip)
+        @scanner = ::T.let(::StringScanner.new(input.strip), ::T.nilable(::StringScanner))
         self
       end
+      sig { returns(::StringScanner) }
+      # Returns the currently initialized +StringScanner+.  You must call +#scan+ first or else this will throw an
+      # exception.
+      #
+      # @return [StringScanner]
+      def scanner
+        # The caller needs to initialize this class with a call to #scan before we can do anything.  it sets up the
+        # +@scanner+ with it's necessary input.
+        unless @scanner
+          raise(::CSVPlusPlus::Error::CompilerError, 'Called Tokenizer#scanner without calling #scan first')
+        end
+        @scanner
+      end
+      sig { void }
       # Scan tokens and set +@last_token+ if any match
       #
       # @return [String, nil]
       def scan_tokens!
-        m = @tokens.find { |t| @scanner.scan(t.first) }
-        @last_token = m ? m[1] : nil
+        @last_token = @tokens.find { |t| scanner.scan(t.regexp) }
       end
+      sig { returns(::T.nilable(::String)) }
       # Scan input against the catchall pattern
       #
       # @return [String, nil]
       def scan_catchall
-        @scanner.scan(@catchall) if @catchall
+        scanner.scan(@catchall) if @catchall
       end
+      sig { returns(::T.nilable(::String)) }
       # Scan input against the ignore pattern
       #
       # @return [boolean]
       def matches_ignore?
-        @scanner.scan(@ignore) if @ignore
+        scanner.scan(@ignore) if @ignore
       end
+      sig { returns(::T.nilable(::String)) }
       # The value of the last token matched
       #
       # @return [String, nil]
       def last_match
-        return @alter_matches[@last_token].call(@scanner.matched) if @alter_matches.key?(@last_token)
+        # rubocop:disable Style/MissingElse
+        if @last_token && @alter_matches.key?(@last_token.token.to_sym)
+          # rubocop:enable Style/MissingElse
+          return ::T.must(@alter_matches[@last_token.token.to_sym]).call(scanner.matched)
+        end
-        @scanner.matched
+        scanner.matched
       end
+      sig { params(peek_characters: ::Integer).returns(::String) }
       # Read the input but don't consume it
       #
       # @param peek_characters [Integer]
       #
       # @return [String]
       def peek(peek_characters: 100)
-        @scanner.peek(peek_characters)
+        scanner.peek(peek_characters)
       end
+      sig { returns(::T::Boolean) }
       # Scan for our stop token (if there is one - some parsers stop early and some don't)
       #
       # @return [boolean]
       def stop?
-        @stop_fn ? @stop_fn.call(@scanner) : false
+        @stop_fn ? @stop_fn.call(scanner) : false
       end
+      sig { returns(::String) }
       # The rest of the un-parsed input.  The tokenizer might not need to parse the entire input
       #
       # @return [String]
       def rest
-        @scanner.rest
+        scanner.rest
       end
     end
   end

data/lib/csv_plus_plus/lexer.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 # typed: strict
 # frozen_string_literal: true
-require_relative './lexer/lexer'
+require_relative './lexer/racc_lexer'
 require_relative './lexer/tokenizer'
 module CSVPlusPlus
@@ -9,12 +9,51 @@ module CSVPlusPlus
   module Lexer
     extend ::T::Sig
+    # A token that's matched by +regexp+ and presented with +token+
+    class Token < ::T::Struct
+      const :regexp, ::Regexp
+      const :token, ::T.any(::String, ::Symbol)
+    end
     END_OF_CODE_SECTION = '---'
     public_constant :END_OF_CODE_SECTION
     VARIABLE_REF = '$$'
     public_constant :VARIABLE_REF
+    # @see https://github.com/ruby/racc/blob/master/lib/racc/parser.rb#L121
+    TOKEN_LIBRARY = ::T.let(
+      {
+        # A1_NOTATION: ::CSVPlusPlus::Lexer::Token.new(
+        # regexp: ::CSVPlusPlus::A1Reference::A1_NOTATION_REGEXP, token: :A1_NOTATION
+        # ),
+        FALSE: ::CSVPlusPlus::Lexer::Token.new(regexp: /false/i, token: :FALSE),
+        HEX_COLOR: ::CSVPlusPlus::Lexer::Token.new(regexp: ::CSVPlusPlus::Color::HEX_STRING_REGEXP, token: :HEX_COLOR),
+        INFIX_OP: ::CSVPlusPlus::Lexer::Token.new(regexp: %r{\^|\+|-|\*|/|&|<|>|<=|>=|<>}, token: :INFIX_OP),
+        NUMBER: ::CSVPlusPlus::Lexer::Token.new(regexp: /-?[\d.]+/, token: :NUMBER),
+        REF: ::CSVPlusPlus::Lexer::Token.new(regexp: /[$!\w:]+/, token: :REF),
+        STRING: ::CSVPlusPlus::Lexer::Token.new(
+          regexp: %r{"(?:[^"\\]|\\(?:["\\/bfnrt]|u[0-9a-fA-F]{4}))*"},
+          token: :STRING
+        ),
+        TRUE: ::CSVPlusPlus::Lexer::Token.new(regexp: /true/i, token: :TRUE),
+        VAR_REF: ::CSVPlusPlus::Lexer::Token.new(regexp: /\$\$/, token: :VAR_REF)
+      }.freeze,
+      ::T::Hash[::Symbol, ::CSVPlusPlus::Lexer::Token]
+    )
+    public_constant :TOKEN_LIBRARY
+    sig { params(str: ::String).returns(::String) }
+    # Run any transformations to the input before going into the CSV parser
+    #
+    # The CSV parser in particular does not like if there is whitespace after a double quote and before the next comma
+    #
+    # @param str [String]
+    # @return [String]
+    def self.preprocess(str)
+      str.gsub(/"\s*,/, '",')
+    end
     sig { params(str: ::String).returns(::String) }
     # When parsing a modifier with a quoted string field, we need a way to unescape.  Some examples of quoted and
     # unquoted results:

data/lib/csv_plus_plus/modifier/data_validation.rb CHANGED Viewed

@@ -44,7 +44,7 @@ module CSVPlusPlus
       end
       def a1_notation(arg)
-        return arg if ::CSVPlusPlus::Entities::CellReference.valid_cell_reference?(arg)
+        return arg if ::CSVPlusPlus::A1Reference.valid_cell_reference?(arg)
       end
       def a_date(arg, allow_relative_date: false)