RubyGems - csv_plus_plus - Versions diffs - 0.0.5 → 0.1.1 - Mend

csv_plus_plus 0.0.5 → 0.1.1

Files changed (44) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +14 -0
data/README.md +1 -0
data/lib/csv_plus_plus/cell.rb +24 -8
data/lib/csv_plus_plus/cli.rb +29 -16
data/lib/csv_plus_plus/cli_flag.rb +10 -2
data/lib/csv_plus_plus/code_section.rb +55 -3
data/lib/csv_plus_plus/color.rb +19 -5
data/lib/csv_plus_plus/google_options.rb +6 -2
data/lib/csv_plus_plus/graph.rb +0 -1
data/lib/csv_plus_plus/language/ast_builder.rb +68 -0
data/lib/csv_plus_plus/language/benchmarked_compiler.rb +65 -0
data/lib/csv_plus_plus/language/builtins.rb +46 -0
data/lib/csv_plus_plus/language/cell_value.tab.rb +106 -134
data/lib/csv_plus_plus/language/code_section.tab.rb +163 -192
data/lib/csv_plus_plus/language/compiler.rb +75 -92
data/lib/csv_plus_plus/language/entities/boolean.rb +3 -2
data/lib/csv_plus_plus/language/entities/cell_reference.rb +10 -3
data/lib/csv_plus_plus/language/entities/entity.rb +20 -8
data/lib/csv_plus_plus/language/entities/function.rb +6 -4
data/lib/csv_plus_plus/language/entities/function_call.rb +17 -5
data/lib/csv_plus_plus/language/entities/number.rb +6 -4
data/lib/csv_plus_plus/language/entities/runtime_value.rb +9 -8
data/lib/csv_plus_plus/language/entities/string.rb +6 -4
data/lib/csv_plus_plus/language/references.rb +22 -5
data/lib/csv_plus_plus/language/runtime.rb +80 -22
data/lib/csv_plus_plus/language/scope.rb +34 -39
data/lib/csv_plus_plus/language/syntax_error.rb +10 -5
data/lib/csv_plus_plus/lexer/lexer.rb +27 -13
data/lib/csv_plus_plus/lexer/tokenizer.rb +35 -11
data/lib/csv_plus_plus/modifier.rb +38 -18
data/lib/csv_plus_plus/modifier.tab.rb +2 -2
data/lib/csv_plus_plus/options.rb +20 -2
data/lib/csv_plus_plus/row.rb +15 -4
data/lib/csv_plus_plus/template.rb +26 -6
data/lib/csv_plus_plus/version.rb +1 -1
data/lib/csv_plus_plus/writer/excel.rb +2 -9
data/lib/csv_plus_plus/writer/file_backer_upper.rb +22 -20
data/lib/csv_plus_plus/writer/google_sheet_builder.rb +8 -10
data/lib/csv_plus_plus/writer/google_sheets.rb +4 -10
data/lib/csv_plus_plus/writer/rubyxl_builder.rb +23 -15
data/lib/csv_plus_plus/writer/rubyxl_modifier.rb +15 -8
data/lib/csv_plus_plus.rb +42 -8
metadata +5 -2

data/lib/csv_plus_plus/language/runtime.rb CHANGED Viewed

@@ -4,31 +4,44 @@ require_relative 'entities'
 require_relative 'syntax_error'
 require 'tempfile'
-ENTITIES = ::CSVPlusPlus::Language::Entities
-RUNTIME_VARIABLES = {
-  rownum: ::ENTITIES::RuntimeValue.new(->(r) { ::ENTITIES::Number.new(r.row_index + 1) }),
-  cellnum: ::ENTITIES::RuntimeValue.new(->(r) { ::ENTITIES::Number.new(r.cell_index + 1) })
-}.freeze
 module CSVPlusPlus
   module Language
-    ##
-    # The runtime state of the compiler (the current linenumber/row, cell, etc)
+    # The runtime state of the compiler (the current +line_number+/+row_index+, +cell+ being processed, etc).  We take
+    # multiple runs through the input file for parsing so it's really convenient to have a central place for these
+    # things to be managed.
+    #
+    # @attr_reader filename [String, nil] The filename that the input came from (mostly used for debugging since
+    #   +filename+ can be +nil+ if it's read from stdin.
+    # @attr_reader length_of_code_section [Integer] The length (count of lines) of the code section part of the original
+    #   input.
+    # @attr_reader length_of_csv_section [Integer] The length (count of lines) of the CSV part of the original csvpp
+    #   input.
+    # @attr_reader length_of_original_file [Integer] The length (count of lines) of the original csvpp input.
+    #
+    # @attr cell [Cell] The current cell being processed
+    # @attr cell_index [Integer] The index of the current cell being processed (starts at 0)
+    # @attr row_index [Integer] The index of the current row being processed (starts at 0)
+    # @attr line_number [Integer] The line number of the original csvpp template (starts at 1)
     class Runtime
       attr_reader :filename, :length_of_code_section, :length_of_csv_section, :length_of_original_file
       attr_accessor :cell, :cell_index, :row_index, :line_number
-      # initialize
+      # @param input [String] The input to be parsed
+      # @param filename [String, nil] The filename that the input came from (mostly used for debugging since +filename+
+      #   can be +nil+ if it's read from stdin
       def initialize(input:, filename:)
         @filename = filename || 'stdin'
         init_input!(input)
-        init!(1)
+        start!
       end
-      # map over an unparsed file and keep track of line_number and row_index
+      # Map over an a csvpp file and keep track of line_number and row_index
+      #
+      # @param lines [Array]
+      #
+      # @return [Array]
       def map_lines(lines, &block)
         @line_number = 1
         lines.map do |line|
@@ -36,7 +49,11 @@ module CSVPlusPlus
         end
       end
-      # map over a single row and keep track of the cell and it's index
+      # Map over a single row and keep track of the cell and it's index
+      #
+      # @param row [Array<Cell>] The row to map each cell over
+      #
+      # @return [Array]
       def map_row(row, &block)
         @cell_index = 0
         row.map.with_index do |cell, index|
@@ -45,7 +62,12 @@ module CSVPlusPlus
         end
       end
-      # map over all rows and keep track of row and line numbers
+      # Map over all rows and keep track of row and line numbers
+      #
+      # @param rows [Array<Row>] The rows to map over (and keep track of indexes)
+      # @param cells_too [boolean] If the cells of each +row+ should be iterated over also.
+      #
+      # @return [Array]
       def map_rows(rows, cells_too: false, &block)
         @row_index = 0
         map_lines(rows) do |row|
@@ -59,56 +81,92 @@ module CSVPlusPlus
       end
       # Increment state to the next line
+      #
+      # @return [Integer]
       def next_line!
         @row_index += 1 unless @row_index.nil?
         @line_number += 1
       end
+      # Return the current spreadsheet row number.  It parallels +@row_index+ but starts at 1.
+      #
+      # @return [Integer, nil]
+      def rownum
+        return if @row_index.nil?
+        @row_index + 1
+      end
       # Set the current cell and index
+      #
+      # @param cell [Cell] The current cell
+      # @param cell_index [Integer] The index of the cell
       def set_cell!(cell, cell_index)
         @cell = cell
         @cell_index = cell_index
       end
-      # Each time we run a parse on the input, call this so that the runtime state
-      # is set to it's default values
-      def init!(start_line_number_at)
+      # Each time we run a parse on the input, reset the runtime state starting at the beginning of the file
+      def start!
         @row_index = @cell_index = nil
-        @line_number = start_line_number_at
+        @line_number = 1
+      end
+      # Reset the runtime state starting at the CSV section
+      def start_at_csv!
+        # TODO: isn't the input re-written anyway without the code section? why do we need this?
+        start!
+        @line_number = @length_of_code_section || 1
       end
-      # to_s
+      # @return [String]
       def to_s
         "Runtime(cell: #{@cell}, row_index: #{@row_index}, cell_index: #{@cell_index})"
       end
-      # get the current (entity) value of a runtime value
+      # Get the current (entity) value of a runtime value
+      #
+      # @param var_id [String, Symbol] The Variable#id  of the variable being resolved.
+      #
+      # @return [Entity]
       def runtime_value(var_id)
         if runtime_variable?(var_id)
-          ::RUNTIME_VARIABLES[var_id.to_sym].resolve_fn.call(self)
+          ::CSVPlusPlus::Language::Builtins::VARIABLES[var_id.to_sym].resolve_fn.call(self)
         else
           raise_syntax_error('Undefined variable', var_id)
         end
       end
       # Is +var_id+ a runtime variable?  (it's a static variable otherwise)
+      #
+      # @param var_id [String, Symbol] The Variable#id to check if it's a runtime variable
+      #
+      # @return [boolean]
       def runtime_variable?(var_id)
-        ::RUNTIME_VARIABLES.key?(var_id.to_sym)
+        ::CSVPlusPlus::Language::Builtins::VARIABLES.key?(var_id.to_sym)
       end
       # Called when an error is encoutered during parsing.  It will construct a useful
       # error with the current +@row/@cell_index+, +@line_number+ and +@filename+
+      #
+      # @param message [String] A message relevant to why this error is being raised.
+      # @param bad_input [String] The offending input that caused this error to be thrown.
+      # @param wrapped_error [StandardError, nil] The underlying error that was raised (if it's not from our own logic)
       def raise_syntax_error(message, bad_input, wrapped_error: nil)
         raise(::CSVPlusPlus::Language::SyntaxError.new(message, bad_input, self, wrapped_error:))
       end
       # The currently available input for parsing.  The tmp state will be re-written
       # between parsing the code section and the CSV section
+      #
+      # @return [String]
       def input
         @tmp
       end
       # We mutate the input over and over. It's ok because it's just a Tempfile
+      #
+      # @param data [String] The data to rewrite our input file to
       def rewrite_input!(data)
         @tmp.truncate(0)
         @tmp.write(data)

data/lib/csv_plus_plus/language/scope.rb CHANGED Viewed

@@ -6,40 +6,29 @@ require_relative './entities'
 require_relative './references'
 require_relative './syntax_error'
-BUILTIN_FUNCTIONS = {
-  # =CELLREF(C) === =INDIRECT(CONCAT($$C, $$rownum))
-  cellref: ::CSVPlusPlus::Language::Entities::Function.new(
-    :cellref,
-    [:cell],
-    ::CSVPlusPlus::Language::Entities::FunctionCall.new(
-      :indirect,
-      [
-        ::CSVPlusPlus::Language::Entities::FunctionCall.new(
-          :concat,
-          [
-            ::CSVPlusPlus::Language::Entities::Variable.new(:cell),
-            ::CSVPlusPlus::Language::Entities::Variable.new(:rownum)
-          ]
-        )
-      ]
-    )
-  )
-}.freeze
 module CSVPlusPlus
   module Language
     # A class representing the scope of the current Template and responsible for resolving variables
+    #
+    # @attr_reader code_section [CodeSection] The CodeSection containing variables and functions to be resolved
+    # @attr_reader runtime [Runtime] The compiler's current runtime
+    #
     # rubocop:disable Metrics/ClassLength
     class Scope
       attr_reader :code_section, :runtime
       # initialize with a +Runtime+ and optional +CodeSection+
+      #
+      # @param runtime [Runtime]
+      # @param code_section [Runtime, nil]
       def initialize(runtime:, code_section: nil)
         @code_section = code_section if code_section
         @runtime = runtime
       end
       # Resolve all values in the ast of the current cell being processed
+      #
+      # @return [Entity]
       def resolve_cell_value
         return unless (ast = @runtime.cell&.ast)
@@ -56,14 +45,14 @@ module CSVPlusPlus
       end
       # Set the +code_section+ and resolve all inner dependencies in it's variables and functions.
+      #
+      # @param code_section [CodeSection] The code_section to be resolved
       def code_section=(code_section)
         @code_section = code_section
         resolve_static_variables!
-        resolve_static_functions!
       end
-      # to_s
+      # @return [String]
       def to_s
         "Scope(code_section: #{@code_section}, runtime: #{@runtime})"
       end
@@ -71,10 +60,10 @@ module CSVPlusPlus
       private
       # Resolve all variable references defined statically in the code section
+      # TODO: experiment with getting rid of this - does it even play correctly with runtime vars?
       def resolve_static_variables!
         variables = @code_section.variables
         last_var_dependencies = {}
-        # TODO: might not need the infinite loop wrap
         loop do
           var_dependencies, resolution_order = variable_resolution_order(only_static_vars(variables))
           return if var_dependencies == last_var_dependencies
@@ -89,14 +78,6 @@ module CSVPlusPlus
         var_dependencies.reject { |k| @runtime.runtime_variable?(k) }
       end
-      # Resolve all functions defined statically in the code section
-      def resolve_static_functions!
-        # TODO: I'm still torn if it's worth replacing function references
-        #
-        # my current theory is that if we resolve static functions befor processing each cell,
-        # overall compile time will be improved because there will be less to do for each cell
-      end
       def resolve_functions(ast, refs)
         refs.reduce(ast.dup) do |acc, elem|
           function_replace(acc, elem.id, resolve_function(elem.id))
@@ -110,26 +91,39 @@ module CSVPlusPlus
       end
       # Make a copy of the AST represented by +node+ and replace +fn_id+ with +replacement+ throughout
+      # rubocop:disable Metrics/MethodLength
       def function_replace(node, fn_id, replacement)
         if node.function_call? && node.id == fn_id
-          apply_arguments(replacement, node)
+          call_function_or_runtime_value(replacement, node)
         elsif node.function_call?
-          arguments = node.arguments.map { |n| function_replace(n, fn_id, replacement) }
-          ::CSVPlusPlus::Language::Entities::FunctionCall.new(node.id, arguments)
+          # not our function, but continue our depth first search on it
+          ::CSVPlusPlus::Language::Entities::FunctionCall.new(
+            node.id,
+            node.arguments.map { |n| function_replace(n, fn_id, replacement) },
+            infix: node.infix
+          )
         else
           node
         end
       end
+      # rubocop:enable Metrics/MethodLength
       def resolve_function(fn_id)
         id = fn_id.to_sym
         return @code_section.functions[id] if @code_section.defined_function?(id)
-        # this will throw a syntax error if it doesn't exist (which is what we want)
-        return ::BUILTIN_FUNCTIONS[id] if ::BUILTIN_FUNCTIONS.key?(id)
+        ::CSVPlusPlus::Language::Builtins::FUNCTIONS[id]
+      end
+      def call_function_or_runtime_value(function_or_runtime_value, function_call)
+        if function_or_runtime_value.function?
+          call_function(function_or_runtime_value, function_call)
+        else
+          function_or_runtime_value.resolve_fn.call(@runtime, function_call.arguments)
+        end
       end
-      def apply_arguments(function, function_call)
+      def call_function(function, function_call)
         i = 0
         function.arguments.reduce(function.body.dup) do |ast, argument|
           variable_replace(ast, argument, function_call.arguments[i]).tap do
@@ -142,7 +136,8 @@ module CSVPlusPlus
       def variable_replace(node, var_id, replacement)
         if node.function_call?
           arguments = node.arguments.map { |n| variable_replace(n, var_id, replacement) }
-          ::CSVPlusPlus::Language::Entities::FunctionCall.new(node.id, arguments)
+          # TODO: refactor these places where we copy functions... it's brittle with the kwargs
+          ::CSVPlusPlus::Language::Entities::FunctionCall.new(node.id, arguments, infix: node.infix)
         elsif node.variable? && node.id == var_id
           replacement
         else

data/lib/csv_plus_plus/language/syntax_error.rb CHANGED Viewed

@@ -2,10 +2,13 @@
 module CSVPlusPlus
   module Language
-    ##
     # An error that can be thrown for various syntax errors
     class SyntaxError < ::CSVPlusPlus::Error
-      # initialize
+      # @param message [String] The primary message to be shown to the user
+      # @param bad_input [String] The offending input that caused the error to be thrown
+      # @param runtime [Runtime] The current runtime
+      # @param wrapped_error [StandardError] The underlying error that caused the syntax error.  For example a
+      #   Racc::ParseError that was thrown
       def initialize(message, bad_input, runtime, wrapped_error: nil)
         @bad_input = bad_input.to_s
         @runtime = runtime
@@ -15,19 +18,21 @@ module CSVPlusPlus
         super(message)
       end
-      # to_s
+      # @return [String]
       def to_s
         to_trace
       end
       # Output a verbose user-helpful string that references the current runtime
       def to_verbose_trace
-        warn(@wrapped_error.full_message)
-        warn(@wrapped_error.backtrace)
+        warn(@wrapped_error.full_message) if @wrapped_error
+        warn(@wrapped_error.backtrace) if @wrapped_error
         to_trace
       end
       # Output a user-helpful string that references the runtime state
+      #
+      # @return [String]
       def to_trace
         "#{message_prefix}#{cell_index} #{message_postfix}"
       end

data/lib/csv_plus_plus/lexer/lexer.rb CHANGED Viewed

@@ -1,19 +1,28 @@
 # frozen_string_literal: true
 module CSVPlusPlus
-  # Common methods to be mixed into our Racc parsers
+  # Common methods to be mixed into the Racc parsers
+  #
+  # @attr_reader tokens [Array]
   module Lexer
-    # initialize
-    def initialize
-      @tokens = []
+    attr_reader :tokens
+    # Initialize a lexer instance with an empty +@tokens+
+    def initialize(tokens: [])
+      @tokens = tokens
     end
     # Used by racc to iterate each token
+    #
+    # @return [Array<(String, String)>]
     def next_token
       @tokens.shift
     end
-    # parse
+    # Orchestate the tokenizing, parsing and error handling of parsing input.  Each instance will implement their own
+    #   #tokenizer method
+    #
+    # @return [Lexer#return_value] Each instance will define it's own +return_value+ with the result of parsing
     def parse(input, runtime)
       return if input.nil?
@@ -26,12 +35,23 @@ module CSVPlusPlus
       runtime.raise_syntax_error("Error parsing #{parse_subject}", e.message, wrapped_error: e)
     end
-    protected
+    TOKEN_LIBRARY = {
+      TRUE: [/true/i, :TRUE],
+      FALSE: [/false/i, :FALSE],
+      NUMBER: [/-?[\d.]+/, :NUMBER],
+      STRING: [%r{"(?:[^"\\]|\\(?:["\\/bfnrt]|u[0-9a-fA-F]{4}))*"}, :STRING],
+      INFIX_OP: [%r{\^|\+|-|\*|/|&|<|>|<=|>=|<>}, :INFIX_OP],
+      VAR_REF: [/\$\$/, :VAR_REF],
+      ID: [/[$!\w:]+/, :ID]
+    }.freeze
+    public_constant :TOKEN_LIBRARY
+    private
     def tokenize(input, runtime)
       return if input.nil?
-      t = tokenizer(input)
+      t = tokenizer.scan(input)
       until t.scanner.empty?
         next if t.matches_ignore?
@@ -45,12 +65,6 @@ module CSVPlusPlus
       @tokens << %i[EOL EOL]
     end
-    def e(type, *entity_args)
-      ::CSVPlusPlus::Language::TYPES[type].new(*entity_args)
-    end
-    private
     def consume_token(tokenizer, runtime)
       if tokenizer.last_token
         @tokens << [tokenizer.last_token, tokenizer.last_match]

data/lib/csv_plus_plus/lexer/tokenizer.rb CHANGED Viewed

@@ -5,13 +5,14 @@ require 'strscan'
 module CSVPlusPlus
   module Lexer
     # A class that contains the use-case-specific regexes for parsing
+    #
+    # @attr_reader last_token [String] The last token that's been matched.
+    # @attr_reader scanner [StringScanner] The StringScanner instance that's parsing the input.
     class Tokenizer
       attr_reader :last_token, :scanner
-      # initialize
-      # rubocop:disable Metrics/ParameterLists
-      def initialize(input:, tokens:, catchall: nil, ignore: nil, alter_matches: {}, stop_fn: nil)
-        @scanner = ::StringScanner.new(input.strip)
+      # @param input [String]
+      def initialize(tokens:, catchall: nil, ignore: nil, alter_matches: {}, stop_fn: nil)
         @last_token = nil
         @catchall = catchall
@@ -20,43 +21,66 @@ module CSVPlusPlus
         @stop_fn = stop_fn
         @alter_matches = alter_matches
       end
-      # rubocop:enable Metrics/ParameterLists
-      # Scan tokens and see if any match
+      # Initializers a scanner for the given input to be parsed
+      #
+      # @param input The input to be tokenized
+      # @return [Tokenizer]
+      def scan(input)
+        @scanner = ::StringScanner.new(input.strip)
+        self
+      end
+      # Scan tokens and set +@last_token+ if any match
+      #
+      # @return [String, nil]
       def scan_tokens!
         m = @tokens.find { |t| @scanner.scan(t.first) }
         @last_token = m ? m[1] : nil
       end
       # Scan input against the catchall pattern
+      #
+      # @return [String, nil]
       def scan_catchall
         @scanner.scan(@catchall) if @catchall
       end
       # Scan input against the ignore pattern
+      #
+      # @return [boolean]
       def matches_ignore?
         @scanner.scan(@ignore) if @ignore
       end
       # The value of the last token matched
+      #
+      # @return [String, nil]
       def last_match
         return @alter_matches[@last_token].call(@scanner.matched) if @alter_matches.key?(@last_token)
         @scanner.matched
       end
-      # Peek the input
-      def peek
-        @scanner.peek(100)
+      # Read the input but don't consume it
+      #
+      # @param peek_characters [Integer]
+      #
+      # @return [String]
+      def peek(peek_characters: 100)
+        @scanner.peek(peek_characters)
       end
       # Scan for our stop token (if there is one - some parsers stop early and some don't)
+      #
+      # @return [boolean]
       def stop?
         @stop_fn ? @stop_fn.call(@scanner) : false
       end
-      # The rest of the un-parsed input.  The tokenizer might not need to
-      # parse the entire input
+      # The rest of the un-parsed input.  The tokenizer might not need to parse the entire input
+      #
+      # @return [String]
       def rest
         @scanner.rest
       end