csv_plus_plus 0.0.5 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/README.md +1 -0
- data/lib/csv_plus_plus/cell.rb +24 -8
- data/lib/csv_plus_plus/cli.rb +29 -16
- data/lib/csv_plus_plus/cli_flag.rb +10 -2
- data/lib/csv_plus_plus/code_section.rb +55 -3
- data/lib/csv_plus_plus/color.rb +19 -5
- data/lib/csv_plus_plus/google_options.rb +6 -2
- data/lib/csv_plus_plus/graph.rb +0 -1
- data/lib/csv_plus_plus/language/ast_builder.rb +68 -0
- data/lib/csv_plus_plus/language/benchmarked_compiler.rb +65 -0
- data/lib/csv_plus_plus/language/builtins.rb +46 -0
- data/lib/csv_plus_plus/language/cell_value.tab.rb +106 -134
- data/lib/csv_plus_plus/language/code_section.tab.rb +163 -192
- data/lib/csv_plus_plus/language/compiler.rb +75 -92
- data/lib/csv_plus_plus/language/entities/boolean.rb +3 -2
- data/lib/csv_plus_plus/language/entities/cell_reference.rb +10 -3
- data/lib/csv_plus_plus/language/entities/entity.rb +20 -8
- data/lib/csv_plus_plus/language/entities/function.rb +6 -4
- data/lib/csv_plus_plus/language/entities/function_call.rb +17 -5
- data/lib/csv_plus_plus/language/entities/number.rb +6 -4
- data/lib/csv_plus_plus/language/entities/runtime_value.rb +9 -8
- data/lib/csv_plus_plus/language/entities/string.rb +6 -4
- data/lib/csv_plus_plus/language/references.rb +22 -5
- data/lib/csv_plus_plus/language/runtime.rb +80 -22
- data/lib/csv_plus_plus/language/scope.rb +34 -39
- data/lib/csv_plus_plus/language/syntax_error.rb +10 -5
- data/lib/csv_plus_plus/lexer/lexer.rb +27 -13
- data/lib/csv_plus_plus/lexer/tokenizer.rb +35 -11
- data/lib/csv_plus_plus/modifier.rb +38 -18
- data/lib/csv_plus_plus/modifier.tab.rb +2 -2
- data/lib/csv_plus_plus/options.rb +20 -2
- data/lib/csv_plus_plus/row.rb +15 -4
- data/lib/csv_plus_plus/template.rb +26 -6
- data/lib/csv_plus_plus/version.rb +1 -1
- data/lib/csv_plus_plus/writer/excel.rb +2 -9
- data/lib/csv_plus_plus/writer/file_backer_upper.rb +22 -20
- data/lib/csv_plus_plus/writer/google_sheet_builder.rb +8 -10
- data/lib/csv_plus_plus/writer/google_sheets.rb +4 -10
- data/lib/csv_plus_plus/writer/rubyxl_builder.rb +23 -15
- data/lib/csv_plus_plus/writer/rubyxl_modifier.rb +15 -8
- data/lib/csv_plus_plus.rb +42 -8
- metadata +5 -2
@@ -4,31 +4,44 @@ require_relative 'entities'
|
|
4
4
|
require_relative 'syntax_error'
|
5
5
|
require 'tempfile'
|
6
6
|
|
7
|
-
ENTITIES = ::CSVPlusPlus::Language::Entities
|
8
|
-
|
9
|
-
RUNTIME_VARIABLES = {
|
10
|
-
rownum: ::ENTITIES::RuntimeValue.new(->(r) { ::ENTITIES::Number.new(r.row_index + 1) }),
|
11
|
-
cellnum: ::ENTITIES::RuntimeValue.new(->(r) { ::ENTITIES::Number.new(r.cell_index + 1) })
|
12
|
-
}.freeze
|
13
|
-
|
14
7
|
module CSVPlusPlus
|
15
8
|
module Language
|
16
|
-
|
17
|
-
#
|
9
|
+
# The runtime state of the compiler (the current +line_number+/+row_index+, +cell+ being processed, etc). We take
|
10
|
+
# multiple runs through the input file for parsing so it's really convenient to have a central place for these
|
11
|
+
# things to be managed.
|
12
|
+
#
|
13
|
+
# @attr_reader filename [String, nil] The filename that the input came from (mostly used for debugging since
|
14
|
+
# +filename+ can be +nil+ if it's read from stdin.
|
15
|
+
# @attr_reader length_of_code_section [Integer] The length (count of lines) of the code section part of the original
|
16
|
+
# input.
|
17
|
+
# @attr_reader length_of_csv_section [Integer] The length (count of lines) of the CSV part of the original csvpp
|
18
|
+
# input.
|
19
|
+
# @attr_reader length_of_original_file [Integer] The length (count of lines) of the original csvpp input.
|
20
|
+
#
|
21
|
+
# @attr cell [Cell] The current cell being processed
|
22
|
+
# @attr cell_index [Integer] The index of the current cell being processed (starts at 0)
|
23
|
+
# @attr row_index [Integer] The index of the current row being processed (starts at 0)
|
24
|
+
# @attr line_number [Integer] The line number of the original csvpp template (starts at 1)
|
18
25
|
class Runtime
|
19
26
|
attr_reader :filename, :length_of_code_section, :length_of_csv_section, :length_of_original_file
|
20
27
|
|
21
28
|
attr_accessor :cell, :cell_index, :row_index, :line_number
|
22
29
|
|
23
|
-
#
|
30
|
+
# @param input [String] The input to be parsed
|
31
|
+
# @param filename [String, nil] The filename that the input came from (mostly used for debugging since +filename+
|
32
|
+
# can be +nil+ if it's read from stdin
|
24
33
|
def initialize(input:, filename:)
|
25
34
|
@filename = filename || 'stdin'
|
26
35
|
|
27
36
|
init_input!(input)
|
28
|
-
|
37
|
+
start!
|
29
38
|
end
|
30
39
|
|
31
|
-
#
|
40
|
+
# Map over an a csvpp file and keep track of line_number and row_index
|
41
|
+
#
|
42
|
+
# @param lines [Array]
|
43
|
+
#
|
44
|
+
# @return [Array]
|
32
45
|
def map_lines(lines, &block)
|
33
46
|
@line_number = 1
|
34
47
|
lines.map do |line|
|
@@ -36,7 +49,11 @@ module CSVPlusPlus
|
|
36
49
|
end
|
37
50
|
end
|
38
51
|
|
39
|
-
#
|
52
|
+
# Map over a single row and keep track of the cell and it's index
|
53
|
+
#
|
54
|
+
# @param row [Array<Cell>] The row to map each cell over
|
55
|
+
#
|
56
|
+
# @return [Array]
|
40
57
|
def map_row(row, &block)
|
41
58
|
@cell_index = 0
|
42
59
|
row.map.with_index do |cell, index|
|
@@ -45,7 +62,12 @@ module CSVPlusPlus
|
|
45
62
|
end
|
46
63
|
end
|
47
64
|
|
48
|
-
#
|
65
|
+
# Map over all rows and keep track of row and line numbers
|
66
|
+
#
|
67
|
+
# @param rows [Array<Row>] The rows to map over (and keep track of indexes)
|
68
|
+
# @param cells_too [boolean] If the cells of each +row+ should be iterated over also.
|
69
|
+
#
|
70
|
+
# @return [Array]
|
49
71
|
def map_rows(rows, cells_too: false, &block)
|
50
72
|
@row_index = 0
|
51
73
|
map_lines(rows) do |row|
|
@@ -59,56 +81,92 @@ module CSVPlusPlus
|
|
59
81
|
end
|
60
82
|
|
61
83
|
# Increment state to the next line
|
84
|
+
#
|
85
|
+
# @return [Integer]
|
62
86
|
def next_line!
|
63
87
|
@row_index += 1 unless @row_index.nil?
|
64
88
|
@line_number += 1
|
65
89
|
end
|
66
90
|
|
91
|
+
# Return the current spreadsheet row number. It parallels +@row_index+ but starts at 1.
|
92
|
+
#
|
93
|
+
# @return [Integer, nil]
|
94
|
+
def rownum
|
95
|
+
return if @row_index.nil?
|
96
|
+
|
97
|
+
@row_index + 1
|
98
|
+
end
|
99
|
+
|
67
100
|
# Set the current cell and index
|
101
|
+
#
|
102
|
+
# @param cell [Cell] The current cell
|
103
|
+
# @param cell_index [Integer] The index of the cell
|
68
104
|
def set_cell!(cell, cell_index)
|
69
105
|
@cell = cell
|
70
106
|
@cell_index = cell_index
|
71
107
|
end
|
72
108
|
|
73
|
-
# Each time we run a parse on the input,
|
74
|
-
|
75
|
-
def init!(start_line_number_at)
|
109
|
+
# Each time we run a parse on the input, reset the runtime state starting at the beginning of the file
|
110
|
+
def start!
|
76
111
|
@row_index = @cell_index = nil
|
77
|
-
@line_number =
|
112
|
+
@line_number = 1
|
113
|
+
end
|
114
|
+
|
115
|
+
# Reset the runtime state starting at the CSV section
|
116
|
+
def start_at_csv!
|
117
|
+
# TODO: isn't the input re-written anyway without the code section? why do we need this?
|
118
|
+
start!
|
119
|
+
@line_number = @length_of_code_section || 1
|
78
120
|
end
|
79
121
|
|
80
|
-
#
|
122
|
+
# @return [String]
|
81
123
|
def to_s
|
82
124
|
"Runtime(cell: #{@cell}, row_index: #{@row_index}, cell_index: #{@cell_index})"
|
83
125
|
end
|
84
126
|
|
85
|
-
#
|
127
|
+
# Get the current (entity) value of a runtime value
|
128
|
+
#
|
129
|
+
# @param var_id [String, Symbol] The Variable#id of the variable being resolved.
|
130
|
+
#
|
131
|
+
# @return [Entity]
|
86
132
|
def runtime_value(var_id)
|
87
133
|
if runtime_variable?(var_id)
|
88
|
-
::
|
134
|
+
::CSVPlusPlus::Language::Builtins::VARIABLES[var_id.to_sym].resolve_fn.call(self)
|
89
135
|
else
|
90
136
|
raise_syntax_error('Undefined variable', var_id)
|
91
137
|
end
|
92
138
|
end
|
93
139
|
|
94
140
|
# Is +var_id+ a runtime variable? (it's a static variable otherwise)
|
141
|
+
#
|
142
|
+
# @param var_id [String, Symbol] The Variable#id to check if it's a runtime variable
|
143
|
+
#
|
144
|
+
# @return [boolean]
|
95
145
|
def runtime_variable?(var_id)
|
96
|
-
::
|
146
|
+
::CSVPlusPlus::Language::Builtins::VARIABLES.key?(var_id.to_sym)
|
97
147
|
end
|
98
148
|
|
99
149
|
# Called when an error is encoutered during parsing. It will construct a useful
|
100
150
|
# error with the current +@row/@cell_index+, +@line_number+ and +@filename+
|
151
|
+
#
|
152
|
+
# @param message [String] A message relevant to why this error is being raised.
|
153
|
+
# @param bad_input [String] The offending input that caused this error to be thrown.
|
154
|
+
# @param wrapped_error [StandardError, nil] The underlying error that was raised (if it's not from our own logic)
|
101
155
|
def raise_syntax_error(message, bad_input, wrapped_error: nil)
|
102
156
|
raise(::CSVPlusPlus::Language::SyntaxError.new(message, bad_input, self, wrapped_error:))
|
103
157
|
end
|
104
158
|
|
105
159
|
# The currently available input for parsing. The tmp state will be re-written
|
106
160
|
# between parsing the code section and the CSV section
|
161
|
+
#
|
162
|
+
# @return [String]
|
107
163
|
def input
|
108
164
|
@tmp
|
109
165
|
end
|
110
166
|
|
111
167
|
# We mutate the input over and over. It's ok because it's just a Tempfile
|
168
|
+
#
|
169
|
+
# @param data [String] The data to rewrite our input file to
|
112
170
|
def rewrite_input!(data)
|
113
171
|
@tmp.truncate(0)
|
114
172
|
@tmp.write(data)
|
@@ -6,40 +6,29 @@ require_relative './entities'
|
|
6
6
|
require_relative './references'
|
7
7
|
require_relative './syntax_error'
|
8
8
|
|
9
|
-
BUILTIN_FUNCTIONS = {
|
10
|
-
# =CELLREF(C) === =INDIRECT(CONCAT($$C, $$rownum))
|
11
|
-
cellref: ::CSVPlusPlus::Language::Entities::Function.new(
|
12
|
-
:cellref,
|
13
|
-
[:cell],
|
14
|
-
::CSVPlusPlus::Language::Entities::FunctionCall.new(
|
15
|
-
:indirect,
|
16
|
-
[
|
17
|
-
::CSVPlusPlus::Language::Entities::FunctionCall.new(
|
18
|
-
:concat,
|
19
|
-
[
|
20
|
-
::CSVPlusPlus::Language::Entities::Variable.new(:cell),
|
21
|
-
::CSVPlusPlus::Language::Entities::Variable.new(:rownum)
|
22
|
-
]
|
23
|
-
)
|
24
|
-
]
|
25
|
-
)
|
26
|
-
)
|
27
|
-
}.freeze
|
28
|
-
|
29
9
|
module CSVPlusPlus
|
30
10
|
module Language
|
31
11
|
# A class representing the scope of the current Template and responsible for resolving variables
|
12
|
+
#
|
13
|
+
# @attr_reader code_section [CodeSection] The CodeSection containing variables and functions to be resolved
|
14
|
+
# @attr_reader runtime [Runtime] The compiler's current runtime
|
15
|
+
#
|
32
16
|
# rubocop:disable Metrics/ClassLength
|
33
17
|
class Scope
|
34
18
|
attr_reader :code_section, :runtime
|
35
19
|
|
36
20
|
# initialize with a +Runtime+ and optional +CodeSection+
|
21
|
+
#
|
22
|
+
# @param runtime [Runtime]
|
23
|
+
# @param code_section [Runtime, nil]
|
37
24
|
def initialize(runtime:, code_section: nil)
|
38
25
|
@code_section = code_section if code_section
|
39
26
|
@runtime = runtime
|
40
27
|
end
|
41
28
|
|
42
29
|
# Resolve all values in the ast of the current cell being processed
|
30
|
+
#
|
31
|
+
# @return [Entity]
|
43
32
|
def resolve_cell_value
|
44
33
|
return unless (ast = @runtime.cell&.ast)
|
45
34
|
|
@@ -56,14 +45,14 @@ module CSVPlusPlus
|
|
56
45
|
end
|
57
46
|
|
58
47
|
# Set the +code_section+ and resolve all inner dependencies in it's variables and functions.
|
48
|
+
#
|
49
|
+
# @param code_section [CodeSection] The code_section to be resolved
|
59
50
|
def code_section=(code_section)
|
60
51
|
@code_section = code_section
|
61
|
-
|
62
52
|
resolve_static_variables!
|
63
|
-
resolve_static_functions!
|
64
53
|
end
|
65
54
|
|
66
|
-
#
|
55
|
+
# @return [String]
|
67
56
|
def to_s
|
68
57
|
"Scope(code_section: #{@code_section}, runtime: #{@runtime})"
|
69
58
|
end
|
@@ -71,10 +60,10 @@ module CSVPlusPlus
|
|
71
60
|
private
|
72
61
|
|
73
62
|
# Resolve all variable references defined statically in the code section
|
63
|
+
# TODO: experiment with getting rid of this - does it even play correctly with runtime vars?
|
74
64
|
def resolve_static_variables!
|
75
65
|
variables = @code_section.variables
|
76
66
|
last_var_dependencies = {}
|
77
|
-
# TODO: might not need the infinite loop wrap
|
78
67
|
loop do
|
79
68
|
var_dependencies, resolution_order = variable_resolution_order(only_static_vars(variables))
|
80
69
|
return if var_dependencies == last_var_dependencies
|
@@ -89,14 +78,6 @@ module CSVPlusPlus
|
|
89
78
|
var_dependencies.reject { |k| @runtime.runtime_variable?(k) }
|
90
79
|
end
|
91
80
|
|
92
|
-
# Resolve all functions defined statically in the code section
|
93
|
-
def resolve_static_functions!
|
94
|
-
# TODO: I'm still torn if it's worth replacing function references
|
95
|
-
#
|
96
|
-
# my current theory is that if we resolve static functions befor processing each cell,
|
97
|
-
# overall compile time will be improved because there will be less to do for each cell
|
98
|
-
end
|
99
|
-
|
100
81
|
def resolve_functions(ast, refs)
|
101
82
|
refs.reduce(ast.dup) do |acc, elem|
|
102
83
|
function_replace(acc, elem.id, resolve_function(elem.id))
|
@@ -110,26 +91,39 @@ module CSVPlusPlus
|
|
110
91
|
end
|
111
92
|
|
112
93
|
# Make a copy of the AST represented by +node+ and replace +fn_id+ with +replacement+ throughout
|
94
|
+
# rubocop:disable Metrics/MethodLength
|
113
95
|
def function_replace(node, fn_id, replacement)
|
114
96
|
if node.function_call? && node.id == fn_id
|
115
|
-
|
97
|
+
call_function_or_runtime_value(replacement, node)
|
116
98
|
elsif node.function_call?
|
117
|
-
|
118
|
-
::CSVPlusPlus::Language::Entities::FunctionCall.new(
|
99
|
+
# not our function, but continue our depth first search on it
|
100
|
+
::CSVPlusPlus::Language::Entities::FunctionCall.new(
|
101
|
+
node.id,
|
102
|
+
node.arguments.map { |n| function_replace(n, fn_id, replacement) },
|
103
|
+
infix: node.infix
|
104
|
+
)
|
119
105
|
else
|
120
106
|
node
|
121
107
|
end
|
122
108
|
end
|
109
|
+
# rubocop:enable Metrics/MethodLength
|
123
110
|
|
124
111
|
def resolve_function(fn_id)
|
125
112
|
id = fn_id.to_sym
|
126
113
|
return @code_section.functions[id] if @code_section.defined_function?(id)
|
127
114
|
|
128
|
-
|
129
|
-
|
115
|
+
::CSVPlusPlus::Language::Builtins::FUNCTIONS[id]
|
116
|
+
end
|
117
|
+
|
118
|
+
def call_function_or_runtime_value(function_or_runtime_value, function_call)
|
119
|
+
if function_or_runtime_value.function?
|
120
|
+
call_function(function_or_runtime_value, function_call)
|
121
|
+
else
|
122
|
+
function_or_runtime_value.resolve_fn.call(@runtime, function_call.arguments)
|
123
|
+
end
|
130
124
|
end
|
131
125
|
|
132
|
-
def
|
126
|
+
def call_function(function, function_call)
|
133
127
|
i = 0
|
134
128
|
function.arguments.reduce(function.body.dup) do |ast, argument|
|
135
129
|
variable_replace(ast, argument, function_call.arguments[i]).tap do
|
@@ -142,7 +136,8 @@ module CSVPlusPlus
|
|
142
136
|
def variable_replace(node, var_id, replacement)
|
143
137
|
if node.function_call?
|
144
138
|
arguments = node.arguments.map { |n| variable_replace(n, var_id, replacement) }
|
145
|
-
|
139
|
+
# TODO: refactor these places where we copy functions... it's brittle with the kwargs
|
140
|
+
::CSVPlusPlus::Language::Entities::FunctionCall.new(node.id, arguments, infix: node.infix)
|
146
141
|
elsif node.variable? && node.id == var_id
|
147
142
|
replacement
|
148
143
|
else
|
@@ -2,10 +2,13 @@
|
|
2
2
|
|
3
3
|
module CSVPlusPlus
|
4
4
|
module Language
|
5
|
-
##
|
6
5
|
# An error that can be thrown for various syntax errors
|
7
6
|
class SyntaxError < ::CSVPlusPlus::Error
|
8
|
-
#
|
7
|
+
# @param message [String] The primary message to be shown to the user
|
8
|
+
# @param bad_input [String] The offending input that caused the error to be thrown
|
9
|
+
# @param runtime [Runtime] The current runtime
|
10
|
+
# @param wrapped_error [StandardError] The underlying error that caused the syntax error. For example a
|
11
|
+
# Racc::ParseError that was thrown
|
9
12
|
def initialize(message, bad_input, runtime, wrapped_error: nil)
|
10
13
|
@bad_input = bad_input.to_s
|
11
14
|
@runtime = runtime
|
@@ -15,19 +18,21 @@ module CSVPlusPlus
|
|
15
18
|
super(message)
|
16
19
|
end
|
17
20
|
|
18
|
-
#
|
21
|
+
# @return [String]
|
19
22
|
def to_s
|
20
23
|
to_trace
|
21
24
|
end
|
22
25
|
|
23
26
|
# Output a verbose user-helpful string that references the current runtime
|
24
27
|
def to_verbose_trace
|
25
|
-
warn(@wrapped_error.full_message)
|
26
|
-
warn(@wrapped_error.backtrace)
|
28
|
+
warn(@wrapped_error.full_message) if @wrapped_error
|
29
|
+
warn(@wrapped_error.backtrace) if @wrapped_error
|
27
30
|
to_trace
|
28
31
|
end
|
29
32
|
|
30
33
|
# Output a user-helpful string that references the runtime state
|
34
|
+
#
|
35
|
+
# @return [String]
|
31
36
|
def to_trace
|
32
37
|
"#{message_prefix}#{cell_index} #{message_postfix}"
|
33
38
|
end
|
@@ -1,19 +1,28 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module CSVPlusPlus
|
4
|
-
# Common methods to be mixed into
|
4
|
+
# Common methods to be mixed into the Racc parsers
|
5
|
+
#
|
6
|
+
# @attr_reader tokens [Array]
|
5
7
|
module Lexer
|
6
|
-
|
7
|
-
|
8
|
-
|
8
|
+
attr_reader :tokens
|
9
|
+
|
10
|
+
# Initialize a lexer instance with an empty +@tokens+
|
11
|
+
def initialize(tokens: [])
|
12
|
+
@tokens = tokens
|
9
13
|
end
|
10
14
|
|
11
15
|
# Used by racc to iterate each token
|
16
|
+
#
|
17
|
+
# @return [Array<(String, String)>]
|
12
18
|
def next_token
|
13
19
|
@tokens.shift
|
14
20
|
end
|
15
21
|
|
16
|
-
#
|
22
|
+
# Orchestate the tokenizing, parsing and error handling of parsing input. Each instance will implement their own
|
23
|
+
# #tokenizer method
|
24
|
+
#
|
25
|
+
# @return [Lexer#return_value] Each instance will define it's own +return_value+ with the result of parsing
|
17
26
|
def parse(input, runtime)
|
18
27
|
return if input.nil?
|
19
28
|
|
@@ -26,12 +35,23 @@ module CSVPlusPlus
|
|
26
35
|
runtime.raise_syntax_error("Error parsing #{parse_subject}", e.message, wrapped_error: e)
|
27
36
|
end
|
28
37
|
|
29
|
-
|
38
|
+
TOKEN_LIBRARY = {
|
39
|
+
TRUE: [/true/i, :TRUE],
|
40
|
+
FALSE: [/false/i, :FALSE],
|
41
|
+
NUMBER: [/-?[\d.]+/, :NUMBER],
|
42
|
+
STRING: [%r{"(?:[^"\\]|\\(?:["\\/bfnrt]|u[0-9a-fA-F]{4}))*"}, :STRING],
|
43
|
+
INFIX_OP: [%r{\^|\+|-|\*|/|&|<|>|<=|>=|<>}, :INFIX_OP],
|
44
|
+
VAR_REF: [/\$\$/, :VAR_REF],
|
45
|
+
ID: [/[$!\w:]+/, :ID]
|
46
|
+
}.freeze
|
47
|
+
public_constant :TOKEN_LIBRARY
|
48
|
+
|
49
|
+
private
|
30
50
|
|
31
51
|
def tokenize(input, runtime)
|
32
52
|
return if input.nil?
|
33
53
|
|
34
|
-
t = tokenizer(input)
|
54
|
+
t = tokenizer.scan(input)
|
35
55
|
|
36
56
|
until t.scanner.empty?
|
37
57
|
next if t.matches_ignore?
|
@@ -45,12 +65,6 @@ module CSVPlusPlus
|
|
45
65
|
@tokens << %i[EOL EOL]
|
46
66
|
end
|
47
67
|
|
48
|
-
def e(type, *entity_args)
|
49
|
-
::CSVPlusPlus::Language::TYPES[type].new(*entity_args)
|
50
|
-
end
|
51
|
-
|
52
|
-
private
|
53
|
-
|
54
68
|
def consume_token(tokenizer, runtime)
|
55
69
|
if tokenizer.last_token
|
56
70
|
@tokens << [tokenizer.last_token, tokenizer.last_match]
|
@@ -5,13 +5,14 @@ require 'strscan'
|
|
5
5
|
module CSVPlusPlus
|
6
6
|
module Lexer
|
7
7
|
# A class that contains the use-case-specific regexes for parsing
|
8
|
+
#
|
9
|
+
# @attr_reader last_token [String] The last token that's been matched.
|
10
|
+
# @attr_reader scanner [StringScanner] The StringScanner instance that's parsing the input.
|
8
11
|
class Tokenizer
|
9
12
|
attr_reader :last_token, :scanner
|
10
13
|
|
11
|
-
#
|
12
|
-
|
13
|
-
def initialize(input:, tokens:, catchall: nil, ignore: nil, alter_matches: {}, stop_fn: nil)
|
14
|
-
@scanner = ::StringScanner.new(input.strip)
|
14
|
+
# @param input [String]
|
15
|
+
def initialize(tokens:, catchall: nil, ignore: nil, alter_matches: {}, stop_fn: nil)
|
15
16
|
@last_token = nil
|
16
17
|
|
17
18
|
@catchall = catchall
|
@@ -20,43 +21,66 @@ module CSVPlusPlus
|
|
20
21
|
@stop_fn = stop_fn
|
21
22
|
@alter_matches = alter_matches
|
22
23
|
end
|
23
|
-
# rubocop:enable Metrics/ParameterLists
|
24
24
|
|
25
|
-
#
|
25
|
+
# Initializers a scanner for the given input to be parsed
|
26
|
+
#
|
27
|
+
# @param input The input to be tokenized
|
28
|
+
# @return [Tokenizer]
|
29
|
+
def scan(input)
|
30
|
+
@scanner = ::StringScanner.new(input.strip)
|
31
|
+
self
|
32
|
+
end
|
33
|
+
|
34
|
+
# Scan tokens and set +@last_token+ if any match
|
35
|
+
#
|
36
|
+
# @return [String, nil]
|
26
37
|
def scan_tokens!
|
27
38
|
m = @tokens.find { |t| @scanner.scan(t.first) }
|
28
39
|
@last_token = m ? m[1] : nil
|
29
40
|
end
|
30
41
|
|
31
42
|
# Scan input against the catchall pattern
|
43
|
+
#
|
44
|
+
# @return [String, nil]
|
32
45
|
def scan_catchall
|
33
46
|
@scanner.scan(@catchall) if @catchall
|
34
47
|
end
|
35
48
|
|
36
49
|
# Scan input against the ignore pattern
|
50
|
+
#
|
51
|
+
# @return [boolean]
|
37
52
|
def matches_ignore?
|
38
53
|
@scanner.scan(@ignore) if @ignore
|
39
54
|
end
|
40
55
|
|
41
56
|
# The value of the last token matched
|
57
|
+
#
|
58
|
+
# @return [String, nil]
|
42
59
|
def last_match
|
43
60
|
return @alter_matches[@last_token].call(@scanner.matched) if @alter_matches.key?(@last_token)
|
44
61
|
|
45
62
|
@scanner.matched
|
46
63
|
end
|
47
64
|
|
48
|
-
#
|
49
|
-
|
50
|
-
|
65
|
+
# Read the input but don't consume it
|
66
|
+
#
|
67
|
+
# @param peek_characters [Integer]
|
68
|
+
#
|
69
|
+
# @return [String]
|
70
|
+
def peek(peek_characters: 100)
|
71
|
+
@scanner.peek(peek_characters)
|
51
72
|
end
|
52
73
|
|
53
74
|
# Scan for our stop token (if there is one - some parsers stop early and some don't)
|
75
|
+
#
|
76
|
+
# @return [boolean]
|
54
77
|
def stop?
|
55
78
|
@stop_fn ? @stop_fn.call(@scanner) : false
|
56
79
|
end
|
57
80
|
|
58
|
-
# The rest of the un-parsed input. The tokenizer might not need to
|
59
|
-
#
|
81
|
+
# The rest of the un-parsed input. The tokenizer might not need to parse the entire input
|
82
|
+
#
|
83
|
+
# @return [String]
|
60
84
|
def rest
|
61
85
|
@scanner.rest
|
62
86
|
end
|