RubyGems - csv_plus_plus - Versions diffs - 0.1.2 → 0.1.3 - Mend

csv_plus_plus 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

checksums.yaml +4 -4
data/README.md +1 -2
data/{CHANGELOG.md → docs/CHANGELOG.md} +9 -0
data/lib/csv_plus_plus/benchmarked_compiler.rb +70 -20
data/lib/csv_plus_plus/cell.rb +46 -24
data/lib/csv_plus_plus/cli.rb +23 -13
data/lib/csv_plus_plus/cli_flag.rb +1 -2
data/lib/csv_plus_plus/color.rb +32 -7
data/lib/csv_plus_plus/compiler.rb +82 -60
data/lib/csv_plus_plus/entities/ast_builder.rb +27 -43
data/lib/csv_plus_plus/entities/boolean.rb +18 -9
data/lib/csv_plus_plus/entities/builtins.rb +23 -9
data/lib/csv_plus_plus/entities/cell_reference.rb +200 -29
data/lib/csv_plus_plus/entities/date.rb +38 -5
data/lib/csv_plus_plus/entities/entity.rb +27 -61
data/lib/csv_plus_plus/entities/entity_with_arguments.rb +57 -0
data/lib/csv_plus_plus/entities/function.rb +23 -11
data/lib/csv_plus_plus/entities/function_call.rb +24 -9
data/lib/csv_plus_plus/entities/number.rb +24 -10
data/lib/csv_plus_plus/entities/runtime_value.rb +22 -5
data/lib/csv_plus_plus/entities/string.rb +19 -6
data/lib/csv_plus_plus/entities/variable.rb +16 -4
data/lib/csv_plus_plus/entities.rb +20 -13
data/lib/csv_plus_plus/error/error.rb +11 -1
data/lib/csv_plus_plus/error/formula_syntax_error.rb +1 -0
data/lib/csv_plus_plus/error/modifier_syntax_error.rb +53 -5
data/lib/csv_plus_plus/error/modifier_validation_error.rb +34 -14
data/lib/csv_plus_plus/error/syntax_error.rb +22 -9
data/lib/csv_plus_plus/error/writer_error.rb +8 -0
data/lib/csv_plus_plus/error.rb +1 -0
data/lib/csv_plus_plus/google_api_client.rb +7 -2
data/lib/csv_plus_plus/google_options.rb +23 -18
data/lib/csv_plus_plus/lexer/lexer.rb +8 -4
data/lib/csv_plus_plus/lexer/tokenizer.rb +6 -1
data/lib/csv_plus_plus/lexer.rb +24 -0
data/lib/csv_plus_plus/modifier/conditional_formatting.rb +1 -0
data/lib/csv_plus_plus/modifier/data_validation.rb +138 -0
data/lib/csv_plus_plus/modifier/expand.rb +61 -0
data/lib/csv_plus_plus/modifier/google_sheet_modifier.rb +133 -0
data/lib/csv_plus_plus/modifier/modifier.rb +222 -0
data/lib/csv_plus_plus/modifier/modifier_validator.rb +243 -0
data/lib/csv_plus_plus/modifier/rubyxl_modifier.rb +84 -0
data/lib/csv_plus_plus/modifier.rb +82 -158
data/lib/csv_plus_plus/options.rb +64 -19
data/lib/csv_plus_plus/parser/cell_value.tab.rb +5 -5
data/lib/csv_plus_plus/parser/code_section.tab.rb +8 -13
data/lib/csv_plus_plus/parser/modifier.tab.rb +17 -23
data/lib/csv_plus_plus/row.rb +53 -12
data/lib/csv_plus_plus/runtime/can_define_references.rb +87 -0
data/lib/csv_plus_plus/runtime/can_resolve_references.rb +209 -0
data/lib/csv_plus_plus/runtime/graph.rb +68 -0
data/lib/csv_plus_plus/runtime/position_tracker.rb +231 -0
data/lib/csv_plus_plus/runtime/references.rb +110 -0
data/lib/csv_plus_plus/runtime/runtime.rb +126 -0
data/lib/csv_plus_plus/runtime.rb +34 -191
data/lib/csv_plus_plus/source_code.rb +66 -0
data/lib/csv_plus_plus/template.rb +62 -35
data/lib/csv_plus_plus/version.rb +2 -1
data/lib/csv_plus_plus/writer/base_writer.rb +30 -5
data/lib/csv_plus_plus/writer/csv.rb +11 -9
data/lib/csv_plus_plus/writer/excel.rb +9 -2
data/lib/csv_plus_plus/writer/file_backer_upper.rb +1 -0
data/lib/csv_plus_plus/writer/google_sheet_builder.rb +71 -23
data/lib/csv_plus_plus/writer/google_sheets.rb +79 -29
data/lib/csv_plus_plus/writer/open_document.rb +6 -1
data/lib/csv_plus_plus/writer/rubyxl_builder.rb +103 -30
data/lib/csv_plus_plus/writer.rb +39 -9
data/lib/csv_plus_plus.rb +29 -12
metadata +18 -14
data/lib/csv_plus_plus/can_define_references.rb +0 -88
data/lib/csv_plus_plus/can_resolve_references.rb +0 -8
data/lib/csv_plus_plus/data_validation.rb +0 -138
data/lib/csv_plus_plus/expand.rb +0 -20
data/lib/csv_plus_plus/graph.rb +0 -62
data/lib/csv_plus_plus/references.rb +0 -68
data/lib/csv_plus_plus/scope.rb +0 -196
data/lib/csv_plus_plus/validated_modifier.rb +0 -164
data/lib/csv_plus_plus/writer/google_sheet_modifier.rb +0 -77
data/lib/csv_plus_plus/writer/rubyxl_modifier.rb +0 -59

data/lib/csv_plus_plus/runtime/can_resolve_references.rb ADDED Viewed

@@ -0,0 +1,209 @@
+# typed: false
+# frozen_string_literal: true
+module CSVPlusPlus
+  module Runtime
+    # Methods for resolving functions and variables.  These should be included onto a class that has +@variables+ and
+    # +@functions+ instance variables.
+    module CanResolveReferences
+      # Resolve all values in the ast of the current cell being processed
+      #
+      # @return [Entity]
+      def resolve_cell_value
+        return unless (ast = @cell&.ast)
+        last_round = nil
+        loop do
+          refs = ::CSVPlusPlus::Runtime::References.extract(ast, self)
+          return ast if refs.empty?
+          # TODO: throw an error here instead I think - basically we did a round and didn't make progress
+          return ast if last_round == refs
+          ast = resolve_functions(resolve_variables(ast, refs.variables), refs.functions)
+        end
+      end
+      # Bind +var_id+ to the current cell
+      #
+      # @param var_id [Symbol] The name of the variable to bind the cell reference to
+      #
+      # @return [CellReference]
+      def bind_variable_to_cell(var_id)
+        def_variable(
+          var_id,
+          ::CSVPlusPlus::Entities::CellReference.new(
+            cell_index: @cell_index,
+            row_index: @row_index
+          )
+        )
+      end
+      # Bind +var_id+ relative to an ![[expand]] modifier.
+      #
+      # @param var_id [Symbol] The name of the variable to bind the cell reference to
+      # @param expand [Expand] The expand where the variable is accessible (where it will be bound relative to)
+      #
+      # @return [CellReference]
+      def bind_variable_in_expand(var_id, expand)
+        def_variable(
+          var_id,
+          ::CSVPlusPlus::Entities::CellReference.new(
+            scoped_to_expand: expand,
+            cell_index: @cell_index
+          )
+        )
+      end
+      # Variables outside of an ![[expand=...] are always in scope.  If it's defined within an expand then things
+      # get trickier because the variable is only in scope while we're processing cells within that expand.
+      #
+      # @param var_id [Symbol] The variable's identifier that we are checking if it's in scope
+      #
+      # @return [boolean]
+      def in_scope?(var_id)
+        value = @variables[var_id]
+        raise_modifier_syntax_error('Undefined variable reference', var_id.to_s) if value.nil?
+        expand = value.type == ::CSVPlusPlus::Entities::Type::CellReference && value.scoped_to_expand
+        return true unless expand
+        unless expand.starts_at
+          raise(::CSVPlusPlus::Error::Error, 'Must call Template.expand_rows! before checking the scope of expands.')
+        end
+        @row_index >= expand.starts_at && (expand.ends_at.nil? || row_index <= expand.ends_at)
+      end
+      private
+      # Resolve all variable references defined statically in the code section
+      #     def resolve_static_variables!
+      #       last_var_dependencies = {}
+      #       loop do
+      #         var_dependencies, resolution_order = variable_resolution_order(only_static_vars(variables))
+      #         return if var_dependencies == last_var_dependencies
+      #
+      #         # TODO: make the contract better here
+      #         @variables = resolve_dependencies(var_dependencies, resolution_order, variables)
+      #         last_var_dependencies = var_dependencies.clone
+      #       end
+      #     end
+      #
+      #     def only_static_vars(var_dependencies)
+      #       var_dependencies.reject { |k| @runtime.builtin_variable?(k) }
+      #     end
+      def resolve_functions(ast, refs)
+        refs.reduce(ast.dup) do |acc, elem|
+          function_replace(acc, elem.id, resolve_function(elem.id))
+        end
+      end
+      def resolve_variables(ast, refs)
+        refs.reduce(ast.dup) do |acc, elem|
+          variable_replace(acc, elem.id, resolve_variable(elem.id))
+        end
+      end
+      # Make a copy of the AST represented by +node+ and replace +fn_id+ with +replacement+ throughout
+      # rubocop:disable Metrics/MethodLength
+      def function_replace(node, fn_id, replacement)
+        if node.type == ::CSVPlusPlus::Entities::Type::FunctionCall && node.id == fn_id
+          call_function_or_builtin(replacement, node)
+        elsif node.type == ::CSVPlusPlus::Entities::Type::FunctionCall
+          # not our function, but continue our depth first search on it
+          ::CSVPlusPlus::Entities::FunctionCall.new(
+            node.id,
+            node.arguments.map { |n| function_replace(n, fn_id, replacement) },
+            infix: node.infix
+          )
+        else
+          node
+        end
+      end
+      # rubocop:enable Metrics/MethodLength
+      def resolve_function(fn_id)
+        id = fn_id.to_sym
+        return @functions[id] if defined_function?(id)
+        ::CSVPlusPlus::Entities::Builtins::FUNCTIONS[id]
+      end
+      def call_function_or_builtin(function_or_builtin, function_call)
+        if function_or_builtin.type == ::CSVPlusPlus::Entities::Type::Function
+          call_function(function_or_builtin, function_call)
+        else
+          function_or_builtin.resolve_fn.call(self, function_call.arguments)
+        end
+      end
+      def call_function(function, function_call)
+        i = 0
+        function.arguments.reduce(function.body.dup) do |ast, argument|
+          variable_replace(ast, argument, function_call.arguments[i]).tap do
+            i += 1
+          end
+        end
+      end
+      # Make a copy of the AST represented by +node+ and replace +var_id+ with +replacement+ throughout
+      def variable_replace(node, var_id, replacement)
+        if node.type == ::CSVPlusPlus::Entities::Type::FunctionCall
+          arguments = node.arguments.map { |n| variable_replace(n, var_id, replacement) }
+          # TODO: refactor these places where we copy functions... it's brittle with the kwargs
+          ::CSVPlusPlus::Entities::FunctionCall.new(node.id, arguments, infix: node.infix)
+        elsif node.type == ::CSVPlusPlus::Entities::Type::Variable && node.id == var_id
+          replacement
+        else
+          node
+        end
+      end
+      def resolve_variable(var_id)
+        id = var_id.to_sym
+        return @variables[id] if defined_variable?(id)
+        raise_formula_syntax_error('Undefined variable', var_id) unless builtin_variable?(var_id)
+        ::CSVPlusPlus::Entities::Builtins::VARIABLES[var_id.to_sym].resolve_fn.call(self)
+      end
+      #       def check_unbound_vars(dependencies, variables)
+      #         unbound_vars = dependencies.values.flatten - variables.keys
+      #         return if unbound_vars.empty?
+      #
+      #         raise_formula_syntax_error('Undefined variables', unbound_vars.map(&:to_s).join(', '))
+      #       end
+      #       def variable_resolution_order(variables)
+      #         # we have a hash of variables => ASTs but they might have references to each other, so
+      #         # we need to interpolate them first (before interpolating the cell values)
+      #         var_dependencies = ::CSVPlusPlus::Graph.dependency_graph(variables, @runtime)
+      #         # are there any references that we don't have variables for? (undefined variable)
+      #         check_unbound_vars(var_dependencies, variables)
+      #
+      #         # a topological sort will give us the order of dependencies
+      #         [var_dependencies, ::CSVPlusPlus::Graph.topological_sort(var_dependencies)]
+      #         # TODO: don't expose this exception directly to the caller
+      #       rescue ::TSort::Cyclic
+      #         @runtime.raise_formula_syntax_error('Cyclic variable dependency detected', var_refs.keys)
+      #       end
+      #       def resolve_dependencies(var_dependencies, resolution_order, variables)
+      #         {}.tap do |resolved_vars|
+      #           # for each var and each dependency it has, build up and mutate resolved_vars
+      #           resolution_order.each do |var|
+      #             resolved_vars[var] = variables[var].dup
+      #
+      #             var_dependencies[var].each do |dependency|
+      #               resolved_vars[var] = variable_replace(resolved_vars[var], dependency, variables[dependency])
+      #             end
+      #           end
+      #         end
+      #       end
+    end
+  end
+end

data/lib/csv_plus_plus/runtime/graph.rb ADDED Viewed

@@ -0,0 +1,68 @@
+# typed: false
+# frozen_string_literal: true
+require 'tsort'
+module CSVPlusPlus
+  module Runtime
+    # Graph ordering and searching functions
+    module Graph
+      # Get a list of all variables references in a given +ast+
+      # TODO: this is only used in one place - refactor it
+      def self.variable_references(ast, runtime, include_runtime_variables: false)
+        depth_first_search(ast) do |node|
+          next unless node.type == ::CSVPlusPlus::Entities::Type::Variable
+          node.id if !runtime.builtin_variable?(node.id) || include_runtime_variables
+        end
+      end
+      # Create a dependency graph of +variables+
+      def self.dependency_graph(variables, runtime)
+        ::CSVPlusPlus::Runtime::Graph::DependencyGraph[
+          variables.map { |var_id, ast| [var_id, variable_references(ast, runtime)] }
+        ]
+      end
+      # TODO: I don't think we use this anymore - it was useful when I wanted to resolve variables in their dependency
+      #   order
+      #
+      # Perform a topological sort on a +DependencyGraph+.  A toplogical sort is noteworthy
+      # because it will give us the order in which we need to resolve our variable dependencies.
+      #
+      # Given this dependency graph:
+      #
+      #  { a: [b c], b: [c], c: [d], d: [] }
+      #
+      # it will return:
+      #
+      #  [d, c, b, a]
+      #
+      def self.topological_sort(dependencies)
+        dependencies.tsort
+      end
+      # Do a DFS on an AST starting at +node+
+      def self.depth_first_search(node, accum = [], &)
+        ret = yield(node)
+        accum << ret unless ret.nil?
+        return accum unless node.type == ::CSVPlusPlus::Entities::Type::FunctionCall
+        node.arguments.each { |n| depth_first_search(n, accum, &) }
+        accum
+      end
+      # A dependency graph represented as a +Hash+ which will be used by our +topological_sort+ function
+      class DependencyGraph < Hash
+        include ::TSort
+        alias tsort_each_node each_key
+        # sort each child
+        def tsort_each_child(node, &)
+          fetch(node).each(&)
+        end
+      end
+    end
+  end
+end

data/lib/csv_plus_plus/runtime/position_tracker.rb ADDED Viewed

@@ -0,0 +1,231 @@
+# typed: strict
+# frozen_string_literal: true
+module CSVPlusPlus
+  module Runtime
+    # Functions needed to track all of the runtime pointers: current line number, current row number, current cell, etc.
+    # rubocop:disable Metrics/ModuleLength
+    module PositionTracker
+      extend ::T::Sig
+      sig { params(cell: ::T.nilable(::CSVPlusPlus::Cell)).returns(::T.nilable(::CSVPlusPlus::Cell)) }
+      attr_writer :cell
+      sig { params(cell_index: ::T.nilable(::Integer)).returns(::T.nilable(::Integer)) }
+      attr_writer :cell_index
+      sig { params(line_number: ::T.nilable(::Integer)).returns(::T.nilable(::Integer)) }
+      attr_writer :line_number
+      sig { params(row_index: ::T.nilable(::Integer)).returns(::T.nilable(::Integer)) }
+      attr_writer :row_index
+      sig { returns(::CSVPlusPlus::Cell) }
+      # The current cell index.  This will only be set when processing the CSV section
+      #
+      # @return [Cell]
+      def cell
+        @cell ||= ::T.let(nil, ::T.nilable(::CSVPlusPlus::Cell))
+        assert_initted!(@cell)
+      end
+      sig { returns(::Integer) }
+      # The current CSV cell index.
+      #
+      # This will only be set when processing the CSV section and will throw an exception otherwise.  It is up to the
+      # caller (the compiler) to make sure it's called in the context of a compilation stage and/or a
+      # +#map_row+/+#map_rows+/+#map_lines+
+      #
+      # @return [Integer]
+      def cell_index
+        @cell_index ||= ::T.let(nil, ::T.nilable(::Integer))
+        assert_initted!(@cell_index)
+      end
+      sig { returns(::Integer) }
+      # The current CSV row index.  This will only be set when processing the CSV section
+      #
+      # This will only be set when processing the CSV section and will throw an exception otherwise.  It is up to the
+      # caller (the compiler) to make sure it's called in the context of a compilation stage and/or a
+      # +#map_row+/+#map_rows+/+#map_lines+
+      #
+      # @return [Integer]
+      def row_index
+        @row_index ||= ::T.let(nil, ::T.nilable(::Integer))
+        assert_initted!(@row_index)
+      end
+      sig { returns(::Integer) }
+      # The current line number being processed.  The line number is based on the entire file, irregardless of if it's
+      # parsing the code section or the CSV section
+      #
+      # This will only be set when processing the csvpp file and will throw an exception otherwise.  It is up to the
+      # caller (the compiler) to make sure it's called in the context of a compilation stage and/or a
+      # +#map_row+/+#map_rows+/+#map_lines+
+      #
+      # @return [Integer]
+      def line_number
+        @line_number ||= ::T.let(nil, ::T.nilable(::Integer))
+        assert_initted!(@line_number)
+      end
+      sig { void }
+      # Clean up the Tempfile we're using for parsing
+      def cleanup!
+        input&.close
+        input&.unlink
+      end
+      sig { returns(::T.nilable(::Tempfile)) }
+      # The currently available input for parsing.  The tmp state will be re-written
+      # between parsing the code section and the CSV section
+      #
+      # @return [::Tempfile]
+      def input
+        @input ||= ::T.let(::Tempfile.new, ::T.nilable(::Tempfile))
+      end
+      sig do
+        type_parameters(:I, :O).params(
+          lines: ::T::Enumerable[::T.type_parameter(:I)],
+          block: ::T.proc.params(args0: ::T.type_parameter(:I)).returns(::T.type_parameter(:O))
+        ).returns(::T::Array[::T.type_parameter(:O)])
+      end
+      # Map over a csvpp file and keep track of line_number and row_index
+      #
+      # @param lines [Array]
+      #
+      # @return [Array]
+      def map_lines(lines, &block)
+        self.line_number = 1
+        lines.map do |line|
+          ret = block.call(line)
+          next_line!
+          ret
+        end
+      end
+      sig do
+        type_parameters(:I, :O)
+          .params(
+            row: ::T::Enumerable[::T.all(::T.type_parameter(:I), ::Object)],
+            block: ::T.proc.params(
+              cell: ::T.all(::T.type_parameter(:I), ::Object),
+              index: ::Integer
+            ).returns(::T.type_parameter(:O))
+          )
+          .returns(::T::Array[::T.type_parameter(:O)])
+      end
+      # Map over a single row and keep track of the cell and it's index
+      #
+      # @param row [Array<Cell>] The row to map each cell over
+      #
+      # @return [Array]
+      def map_row(row, &block)
+        row.map.with_index do |cell, index|
+          self.cell_index = index
+          self.cell = cell if cell.is_a?(::CSVPlusPlus::Cell)
+          block.call(cell, index)
+        end
+      end
+      sig do
+        type_parameters(:O).params(
+          rows: ::T::Enumerable[::CSVPlusPlus::Row],
+          block: ::T.proc.params(row: ::CSVPlusPlus::Row).returns(::T.type_parameter(:O))
+        ).returns(::T::Array[::T.type_parameter(:O)])
+      end
+      # Map over all rows and keep track of row and line numbers
+      #
+      # @param rows [Array<Row>] The rows to map over (and keep track of indexes)
+      #
+      # @return [Array]
+      def map_rows(rows, &block)
+        self.row_index = 0
+        map_lines(rows) do |row|
+          block.call(row)
+        end
+      end
+      sig do
+        type_parameters(:R)
+          .params(rows: ::T::Enumerable[::CSVPlusPlus::Row],
+                  block: ::T.proc.params(cell: ::CSVPlusPlus::Cell, index: ::Integer).returns(::T.type_parameter(:R)))
+          .returns(::T::Array[::T::Array[::T.type_parameter(:R)]])
+      end
+      # Map over all +rows+ and over all of their +cells+, calling the +&block+ with each +Cell+
+      #
+      # @param rows [Array<Row>]
+      #
+      # @return [Array<Array>]
+      # rubocop:disable Naming/BlockForwarding
+      def map_all_cells(rows, &block)
+        self.row_index = 0
+        map_lines(rows) { |row| map_row(row.cells, &block) }
+      end
+      # rubocop:enable Naming/BlockForwarding
+      sig { returns(::Integer) }
+      # Return the current spreadsheet row number.  It parallels +@row_index+ but starts at 1.
+      #
+      # @return [Integer, nil]
+      def rownum
+        row_index + 1
+      end
+      sig do
+        type_parameters(:R).params(block: ::T.proc.returns(::T.type_parameter(:R))).returns(::T.type_parameter(:R))
+      end
+      # Each time we run a parse on the input, reset the runtime state starting at the beginning of the file
+      def start!(&block)
+        @row_index = @cell_index = 0
+        self.line_number = 1
+        ret = block.call
+        finish!
+        ret
+      end
+      sig { params(data: ::String).void }
+      # We mutate the input over and over. It's ok because it's just a Tempfile
+      #
+      # @param data [::String] The data to rewrite our input file to
+      def rewrite_input!(data)
+        input&.truncate(0)
+        input&.write(data)
+        input&.rewind
+      end
+      private
+      sig do
+        type_parameters(:R).params(runtime_value: ::T.nilable(::T.type_parameter(:R))).returns(::T.type_parameter(:R))
+      end
+      def assert_initted!(runtime_value)
+        ::T.must_because(runtime_value) do
+          'Runtime value accessed without an initialized runtime.  Make sure you call Runtime#start! or ' \
+            'Runtime#start_at_csv! first.'
+        end
+      end
+      sig { void }
+      # Called to mark the trackers dirty.  It should be an error to use them outside of an initialized context.
+      def finish!
+        @line_number = nil
+        @row_index = nil
+        @cell_index = nil
+        @cell = nil
+      end
+      sig { returns(::Integer) }
+      # Increment state to the next line
+      #
+      # @return [Integer]
+      def next_line!
+        self.row_index += 1
+        self.line_number += 1
+      end
+    end
+    # rubocop:enable Metrics/ModuleLength
+  end
+end

data/lib/csv_plus_plus/runtime/references.rb ADDED Viewed

@@ -0,0 +1,110 @@
+# typed: strict
+# frozen_string_literal: true
+module CSVPlusPlus
+  module Runtime
+    # References in an AST that need to be resolved
+    #
+    # @attr functions [Array<Entities::Function>] Functions references
+    # @attr variables [Array<Entities::Variable>] Variable references
+    # TODO: turn this into a CanExtractReferences?
+    class References
+      extend ::T::Sig
+      sig { returns(::T::Array[::CSVPlusPlus::Entities::FunctionCall]) }
+      attr_accessor :functions
+      sig { returns(::T::Array[::CSVPlusPlus::Entities::Variable]) }
+      attr_accessor :variables
+      sig do
+        params(
+          ast: ::CSVPlusPlus::Entities::Entity,
+          runtime: ::CSVPlusPlus::Runtime::Runtime
+        ).returns(::CSVPlusPlus::Runtime::References)
+      end
+      # Extract references from an AST and return them in a new +References+ object
+      #
+      # @param ast [Entity] An +Entity+ to do a depth first search on for references.  Entities can be
+      #   infinitely deep because they can contain other function calls as params to a function call
+      # @param runtime [Runtime] The current runtime
+      #
+      # @return [References]
+      def self.extract(ast, runtime)
+        new.tap do |refs|
+          ::CSVPlusPlus::Runtime::Graph.depth_first_search(ast) do |node|
+            unless node.type == ::CSVPlusPlus::Entities::Type::FunctionCall \
+                || node.type == ::CSVPlusPlus::Entities::Type::Variable
+              next
+            end
+            refs.functions << node if function_reference?(node, runtime)
+            refs.variables << node if variable_reference?(node, runtime)
+          end
+        end
+      end
+      sig do
+        params(node: ::CSVPlusPlus::Entities::Entity, runtime: ::CSVPlusPlus::Runtime::Runtime).returns(::T::Boolean)
+      end
+      # Is the node a resolvable variable reference?
+      #
+      # @param node [Entity] The node to check if it's resolvable
+      # @param runtime [Runtime] The current runtime
+      #
+      # @return [boolean]
+      def self.variable_reference?(node, runtime)
+        return false unless node.type == ::CSVPlusPlus::Entities::Type::Variable
+        if runtime.in_scope?(node.id)
+          true
+        else
+          runtime.raise_modifier_syntax_error(
+            "#{node.id} can only be referenced within the ![[expand]] where it was defined.",
+            node.id.to_s
+          )
+        end
+      end
+      private_class_method :variable_reference?
+      sig do
+        params(node: ::CSVPlusPlus::Entities::Entity, runtime: ::CSVPlusPlus::Runtime::Runtime).returns(::T::Boolean)
+      end
+      # Is the node a resolvable function reference?
+      #
+      # @param node [Entity] The node to check if it's resolvable
+      # @param runtime [Runtime] The current runtime
+      #
+      # @return [boolean]
+      def self.function_reference?(node, runtime)
+        node.type == ::CSVPlusPlus::Entities::Type::FunctionCall \
+          && (runtime.defined_function?(node.id) || runtime.builtin_function?(::T.must(node.id)))
+      end
+      private_class_method :function_reference?
+      sig { void }
+      # Create an object with empty references.  The caller will build them up as it depth-first-searches
+      def initialize
+        @functions = ::T.let([], ::T::Array[::CSVPlusPlus::Entities::FunctionCall])
+        @variables = ::T.let([], ::T::Array[::CSVPlusPlus::Entities::Variable])
+      end
+      sig { params(other: ::CSVPlusPlus::Runtime::References).returns(::T::Boolean) }
+      # @param other [References]
+      #
+      # @return [boolean]
+      def ==(other)
+        @functions == other.functions && @variables == other.variables
+      end
+      sig { returns(::T::Boolean) }
+      # Are there any references to be resolved?
+      #
+      # @return [::T::Boolean]
+      def empty?
+        @functions.empty? && @variables.empty?
+      end
+    end
+  end
+end