csv_plus_plus 0.0.5 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/README.md +1 -0
- data/lib/csv_plus_plus/cell.rb +24 -8
- data/lib/csv_plus_plus/cli.rb +29 -16
- data/lib/csv_plus_plus/cli_flag.rb +10 -2
- data/lib/csv_plus_plus/code_section.rb +55 -3
- data/lib/csv_plus_plus/color.rb +19 -5
- data/lib/csv_plus_plus/google_options.rb +6 -2
- data/lib/csv_plus_plus/graph.rb +0 -1
- data/lib/csv_plus_plus/language/ast_builder.rb +68 -0
- data/lib/csv_plus_plus/language/benchmarked_compiler.rb +65 -0
- data/lib/csv_plus_plus/language/builtins.rb +46 -0
- data/lib/csv_plus_plus/language/cell_value.tab.rb +106 -134
- data/lib/csv_plus_plus/language/code_section.tab.rb +163 -192
- data/lib/csv_plus_plus/language/compiler.rb +75 -92
- data/lib/csv_plus_plus/language/entities/boolean.rb +3 -2
- data/lib/csv_plus_plus/language/entities/cell_reference.rb +10 -3
- data/lib/csv_plus_plus/language/entities/entity.rb +20 -8
- data/lib/csv_plus_plus/language/entities/function.rb +6 -4
- data/lib/csv_plus_plus/language/entities/function_call.rb +17 -5
- data/lib/csv_plus_plus/language/entities/number.rb +6 -4
- data/lib/csv_plus_plus/language/entities/runtime_value.rb +9 -8
- data/lib/csv_plus_plus/language/entities/string.rb +6 -4
- data/lib/csv_plus_plus/language/references.rb +22 -5
- data/lib/csv_plus_plus/language/runtime.rb +80 -22
- data/lib/csv_plus_plus/language/scope.rb +34 -39
- data/lib/csv_plus_plus/language/syntax_error.rb +10 -5
- data/lib/csv_plus_plus/lexer/lexer.rb +27 -13
- data/lib/csv_plus_plus/lexer/tokenizer.rb +35 -11
- data/lib/csv_plus_plus/modifier.rb +38 -18
- data/lib/csv_plus_plus/modifier.tab.rb +2 -2
- data/lib/csv_plus_plus/options.rb +20 -2
- data/lib/csv_plus_plus/row.rb +15 -4
- data/lib/csv_plus_plus/template.rb +26 -6
- data/lib/csv_plus_plus/version.rb +1 -1
- data/lib/csv_plus_plus/writer/excel.rb +2 -9
- data/lib/csv_plus_plus/writer/file_backer_upper.rb +22 -20
- data/lib/csv_plus_plus/writer/google_sheet_builder.rb +8 -10
- data/lib/csv_plus_plus/writer/google_sheets.rb +4 -10
- data/lib/csv_plus_plus/writer/rubyxl_builder.rb +23 -15
- data/lib/csv_plus_plus/writer/rubyxl_modifier.rb +15 -8
- data/lib/csv_plus_plus.rb +42 -8
- metadata +5 -2
@@ -4,31 +4,44 @@ require_relative 'entities'
|
|
4
4
|
require_relative 'syntax_error'
|
5
5
|
require 'tempfile'
|
6
6
|
|
7
|
-
ENTITIES = ::CSVPlusPlus::Language::Entities
|
8
|
-
|
9
|
-
RUNTIME_VARIABLES = {
|
10
|
-
rownum: ::ENTITIES::RuntimeValue.new(->(r) { ::ENTITIES::Number.new(r.row_index + 1) }),
|
11
|
-
cellnum: ::ENTITIES::RuntimeValue.new(->(r) { ::ENTITIES::Number.new(r.cell_index + 1) })
|
12
|
-
}.freeze
|
13
|
-
|
14
7
|
module CSVPlusPlus
|
15
8
|
module Language
|
16
|
-
|
17
|
-
#
|
9
|
+
# The runtime state of the compiler (the current +line_number+/+row_index+, +cell+ being processed, etc). We take
|
10
|
+
# multiple runs through the input file for parsing so it's really convenient to have a central place for these
|
11
|
+
# things to be managed.
|
12
|
+
#
|
13
|
+
# @attr_reader filename [String, nil] The filename that the input came from (mostly used for debugging since
|
14
|
+
# +filename+ can be +nil+ if it's read from stdin.
|
15
|
+
# @attr_reader length_of_code_section [Integer] The length (count of lines) of the code section part of the original
|
16
|
+
# input.
|
17
|
+
# @attr_reader length_of_csv_section [Integer] The length (count of lines) of the CSV part of the original csvpp
|
18
|
+
# input.
|
19
|
+
# @attr_reader length_of_original_file [Integer] The length (count of lines) of the original csvpp input.
|
20
|
+
#
|
21
|
+
# @attr cell [Cell] The current cell being processed
|
22
|
+
# @attr cell_index [Integer] The index of the current cell being processed (starts at 0)
|
23
|
+
# @attr row_index [Integer] The index of the current row being processed (starts at 0)
|
24
|
+
# @attr line_number [Integer] The line number of the original csvpp template (starts at 1)
|
18
25
|
class Runtime
|
19
26
|
attr_reader :filename, :length_of_code_section, :length_of_csv_section, :length_of_original_file
|
20
27
|
|
21
28
|
attr_accessor :cell, :cell_index, :row_index, :line_number
|
22
29
|
|
23
|
-
#
|
30
|
+
# @param input [String] The input to be parsed
|
31
|
+
# @param filename [String, nil] The filename that the input came from (mostly used for debugging since +filename+
|
32
|
+
# can be +nil+ if it's read from stdin
|
24
33
|
def initialize(input:, filename:)
|
25
34
|
@filename = filename || 'stdin'
|
26
35
|
|
27
36
|
init_input!(input)
|
28
|
-
|
37
|
+
start!
|
29
38
|
end
|
30
39
|
|
31
|
-
#
|
40
|
+
# Map over an a csvpp file and keep track of line_number and row_index
|
41
|
+
#
|
42
|
+
# @param lines [Array]
|
43
|
+
#
|
44
|
+
# @return [Array]
|
32
45
|
def map_lines(lines, &block)
|
33
46
|
@line_number = 1
|
34
47
|
lines.map do |line|
|
@@ -36,7 +49,11 @@ module CSVPlusPlus
|
|
36
49
|
end
|
37
50
|
end
|
38
51
|
|
39
|
-
#
|
52
|
+
# Map over a single row and keep track of the cell and it's index
|
53
|
+
#
|
54
|
+
# @param row [Array<Cell>] The row to map each cell over
|
55
|
+
#
|
56
|
+
# @return [Array]
|
40
57
|
def map_row(row, &block)
|
41
58
|
@cell_index = 0
|
42
59
|
row.map.with_index do |cell, index|
|
@@ -45,7 +62,12 @@ module CSVPlusPlus
|
|
45
62
|
end
|
46
63
|
end
|
47
64
|
|
48
|
-
#
|
65
|
+
# Map over all rows and keep track of row and line numbers
|
66
|
+
#
|
67
|
+
# @param rows [Array<Row>] The rows to map over (and keep track of indexes)
|
68
|
+
# @param cells_too [boolean] If the cells of each +row+ should be iterated over also.
|
69
|
+
#
|
70
|
+
# @return [Array]
|
49
71
|
def map_rows(rows, cells_too: false, &block)
|
50
72
|
@row_index = 0
|
51
73
|
map_lines(rows) do |row|
|
@@ -59,56 +81,92 @@ module CSVPlusPlus
|
|
59
81
|
end
|
60
82
|
|
61
83
|
# Increment state to the next line
|
84
|
+
#
|
85
|
+
# @return [Integer]
|
62
86
|
def next_line!
|
63
87
|
@row_index += 1 unless @row_index.nil?
|
64
88
|
@line_number += 1
|
65
89
|
end
|
66
90
|
|
91
|
+
# Return the current spreadsheet row number. It parallels +@row_index+ but starts at 1.
|
92
|
+
#
|
93
|
+
# @return [Integer, nil]
|
94
|
+
def rownum
|
95
|
+
return if @row_index.nil?
|
96
|
+
|
97
|
+
@row_index + 1
|
98
|
+
end
|
99
|
+
|
67
100
|
# Set the current cell and index
|
101
|
+
#
|
102
|
+
# @param cell [Cell] The current cell
|
103
|
+
# @param cell_index [Integer] The index of the cell
|
68
104
|
def set_cell!(cell, cell_index)
|
69
105
|
@cell = cell
|
70
106
|
@cell_index = cell_index
|
71
107
|
end
|
72
108
|
|
73
|
-
# Each time we run a parse on the input,
|
74
|
-
|
75
|
-
def init!(start_line_number_at)
|
109
|
+
# Each time we run a parse on the input, reset the runtime state starting at the beginning of the file
|
110
|
+
def start!
|
76
111
|
@row_index = @cell_index = nil
|
77
|
-
@line_number =
|
112
|
+
@line_number = 1
|
113
|
+
end
|
114
|
+
|
115
|
+
# Reset the runtime state starting at the CSV section
|
116
|
+
def start_at_csv!
|
117
|
+
# TODO: isn't the input re-written anyway without the code section? why do we need this?
|
118
|
+
start!
|
119
|
+
@line_number = @length_of_code_section || 1
|
78
120
|
end
|
79
121
|
|
80
|
-
#
|
122
|
+
# @return [String]
|
81
123
|
def to_s
|
82
124
|
"Runtime(cell: #{@cell}, row_index: #{@row_index}, cell_index: #{@cell_index})"
|
83
125
|
end
|
84
126
|
|
85
|
-
#
|
127
|
+
# Get the current (entity) value of a runtime value
|
128
|
+
#
|
129
|
+
# @param var_id [String, Symbol] The Variable#id of the variable being resolved.
|
130
|
+
#
|
131
|
+
# @return [Entity]
|
86
132
|
def runtime_value(var_id)
|
87
133
|
if runtime_variable?(var_id)
|
88
|
-
::
|
134
|
+
::CSVPlusPlus::Language::Builtins::VARIABLES[var_id.to_sym].resolve_fn.call(self)
|
89
135
|
else
|
90
136
|
raise_syntax_error('Undefined variable', var_id)
|
91
137
|
end
|
92
138
|
end
|
93
139
|
|
94
140
|
# Is +var_id+ a runtime variable? (it's a static variable otherwise)
|
141
|
+
#
|
142
|
+
# @param var_id [String, Symbol] The Variable#id to check if it's a runtime variable
|
143
|
+
#
|
144
|
+
# @return [boolean]
|
95
145
|
def runtime_variable?(var_id)
|
96
|
-
::
|
146
|
+
::CSVPlusPlus::Language::Builtins::VARIABLES.key?(var_id.to_sym)
|
97
147
|
end
|
98
148
|
|
99
149
|
# Called when an error is encoutered during parsing. It will construct a useful
|
100
150
|
# error with the current +@row/@cell_index+, +@line_number+ and +@filename+
|
151
|
+
#
|
152
|
+
# @param message [String] A message relevant to why this error is being raised.
|
153
|
+
# @param bad_input [String] The offending input that caused this error to be thrown.
|
154
|
+
# @param wrapped_error [StandardError, nil] The underlying error that was raised (if it's not from our own logic)
|
101
155
|
def raise_syntax_error(message, bad_input, wrapped_error: nil)
|
102
156
|
raise(::CSVPlusPlus::Language::SyntaxError.new(message, bad_input, self, wrapped_error:))
|
103
157
|
end
|
104
158
|
|
105
159
|
# The currently available input for parsing. The tmp state will be re-written
|
106
160
|
# between parsing the code section and the CSV section
|
161
|
+
#
|
162
|
+
# @return [String]
|
107
163
|
def input
|
108
164
|
@tmp
|
109
165
|
end
|
110
166
|
|
111
167
|
# We mutate the input over and over. It's ok because it's just a Tempfile
|
168
|
+
#
|
169
|
+
# @param data [String] The data to rewrite our input file to
|
112
170
|
def rewrite_input!(data)
|
113
171
|
@tmp.truncate(0)
|
114
172
|
@tmp.write(data)
|
@@ -6,40 +6,29 @@ require_relative './entities'
|
|
6
6
|
require_relative './references'
|
7
7
|
require_relative './syntax_error'
|
8
8
|
|
9
|
-
BUILTIN_FUNCTIONS = {
|
10
|
-
# =CELLREF(C) === =INDIRECT(CONCAT($$C, $$rownum))
|
11
|
-
cellref: ::CSVPlusPlus::Language::Entities::Function.new(
|
12
|
-
:cellref,
|
13
|
-
[:cell],
|
14
|
-
::CSVPlusPlus::Language::Entities::FunctionCall.new(
|
15
|
-
:indirect,
|
16
|
-
[
|
17
|
-
::CSVPlusPlus::Language::Entities::FunctionCall.new(
|
18
|
-
:concat,
|
19
|
-
[
|
20
|
-
::CSVPlusPlus::Language::Entities::Variable.new(:cell),
|
21
|
-
::CSVPlusPlus::Language::Entities::Variable.new(:rownum)
|
22
|
-
]
|
23
|
-
)
|
24
|
-
]
|
25
|
-
)
|
26
|
-
)
|
27
|
-
}.freeze
|
28
|
-
|
29
9
|
module CSVPlusPlus
|
30
10
|
module Language
|
31
11
|
# A class representing the scope of the current Template and responsible for resolving variables
|
12
|
+
#
|
13
|
+
# @attr_reader code_section [CodeSection] The CodeSection containing variables and functions to be resolved
|
14
|
+
# @attr_reader runtime [Runtime] The compiler's current runtime
|
15
|
+
#
|
32
16
|
# rubocop:disable Metrics/ClassLength
|
33
17
|
class Scope
|
34
18
|
attr_reader :code_section, :runtime
|
35
19
|
|
36
20
|
# initialize with a +Runtime+ and optional +CodeSection+
|
21
|
+
#
|
22
|
+
# @param runtime [Runtime]
|
23
|
+
# @param code_section [Runtime, nil]
|
37
24
|
def initialize(runtime:, code_section: nil)
|
38
25
|
@code_section = code_section if code_section
|
39
26
|
@runtime = runtime
|
40
27
|
end
|
41
28
|
|
42
29
|
# Resolve all values in the ast of the current cell being processed
|
30
|
+
#
|
31
|
+
# @return [Entity]
|
43
32
|
def resolve_cell_value
|
44
33
|
return unless (ast = @runtime.cell&.ast)
|
45
34
|
|
@@ -56,14 +45,14 @@ module CSVPlusPlus
|
|
56
45
|
end
|
57
46
|
|
58
47
|
# Set the +code_section+ and resolve all inner dependencies in it's variables and functions.
|
48
|
+
#
|
49
|
+
# @param code_section [CodeSection] The code_section to be resolved
|
59
50
|
def code_section=(code_section)
|
60
51
|
@code_section = code_section
|
61
|
-
|
62
52
|
resolve_static_variables!
|
63
|
-
resolve_static_functions!
|
64
53
|
end
|
65
54
|
|
66
|
-
#
|
55
|
+
# @return [String]
|
67
56
|
def to_s
|
68
57
|
"Scope(code_section: #{@code_section}, runtime: #{@runtime})"
|
69
58
|
end
|
@@ -71,10 +60,10 @@ module CSVPlusPlus
|
|
71
60
|
private
|
72
61
|
|
73
62
|
# Resolve all variable references defined statically in the code section
|
63
|
+
# TODO: experiment with getting rid of this - does it even play correctly with runtime vars?
|
74
64
|
def resolve_static_variables!
|
75
65
|
variables = @code_section.variables
|
76
66
|
last_var_dependencies = {}
|
77
|
-
# TODO: might not need the infinite loop wrap
|
78
67
|
loop do
|
79
68
|
var_dependencies, resolution_order = variable_resolution_order(only_static_vars(variables))
|
80
69
|
return if var_dependencies == last_var_dependencies
|
@@ -89,14 +78,6 @@ module CSVPlusPlus
|
|
89
78
|
var_dependencies.reject { |k| @runtime.runtime_variable?(k) }
|
90
79
|
end
|
91
80
|
|
92
|
-
# Resolve all functions defined statically in the code section
|
93
|
-
def resolve_static_functions!
|
94
|
-
# TODO: I'm still torn if it's worth replacing function references
|
95
|
-
#
|
96
|
-
# my current theory is that if we resolve static functions befor processing each cell,
|
97
|
-
# overall compile time will be improved because there will be less to do for each cell
|
98
|
-
end
|
99
|
-
|
100
81
|
def resolve_functions(ast, refs)
|
101
82
|
refs.reduce(ast.dup) do |acc, elem|
|
102
83
|
function_replace(acc, elem.id, resolve_function(elem.id))
|
@@ -110,26 +91,39 @@ module CSVPlusPlus
|
|
110
91
|
end
|
111
92
|
|
112
93
|
# Make a copy of the AST represented by +node+ and replace +fn_id+ with +replacement+ throughout
|
94
|
+
# rubocop:disable Metrics/MethodLength
|
113
95
|
def function_replace(node, fn_id, replacement)
|
114
96
|
if node.function_call? && node.id == fn_id
|
115
|
-
|
97
|
+
call_function_or_runtime_value(replacement, node)
|
116
98
|
elsif node.function_call?
|
117
|
-
|
118
|
-
::CSVPlusPlus::Language::Entities::FunctionCall.new(
|
99
|
+
# not our function, but continue our depth first search on it
|
100
|
+
::CSVPlusPlus::Language::Entities::FunctionCall.new(
|
101
|
+
node.id,
|
102
|
+
node.arguments.map { |n| function_replace(n, fn_id, replacement) },
|
103
|
+
infix: node.infix
|
104
|
+
)
|
119
105
|
else
|
120
106
|
node
|
121
107
|
end
|
122
108
|
end
|
109
|
+
# rubocop:enable Metrics/MethodLength
|
123
110
|
|
124
111
|
def resolve_function(fn_id)
|
125
112
|
id = fn_id.to_sym
|
126
113
|
return @code_section.functions[id] if @code_section.defined_function?(id)
|
127
114
|
|
128
|
-
|
129
|
-
|
115
|
+
::CSVPlusPlus::Language::Builtins::FUNCTIONS[id]
|
116
|
+
end
|
117
|
+
|
118
|
+
def call_function_or_runtime_value(function_or_runtime_value, function_call)
|
119
|
+
if function_or_runtime_value.function?
|
120
|
+
call_function(function_or_runtime_value, function_call)
|
121
|
+
else
|
122
|
+
function_or_runtime_value.resolve_fn.call(@runtime, function_call.arguments)
|
123
|
+
end
|
130
124
|
end
|
131
125
|
|
132
|
-
def
|
126
|
+
def call_function(function, function_call)
|
133
127
|
i = 0
|
134
128
|
function.arguments.reduce(function.body.dup) do |ast, argument|
|
135
129
|
variable_replace(ast, argument, function_call.arguments[i]).tap do
|
@@ -142,7 +136,8 @@ module CSVPlusPlus
|
|
142
136
|
def variable_replace(node, var_id, replacement)
|
143
137
|
if node.function_call?
|
144
138
|
arguments = node.arguments.map { |n| variable_replace(n, var_id, replacement) }
|
145
|
-
|
139
|
+
# TODO: refactor these places where we copy functions... it's brittle with the kwargs
|
140
|
+
::CSVPlusPlus::Language::Entities::FunctionCall.new(node.id, arguments, infix: node.infix)
|
146
141
|
elsif node.variable? && node.id == var_id
|
147
142
|
replacement
|
148
143
|
else
|
@@ -2,10 +2,13 @@
|
|
2
2
|
|
3
3
|
module CSVPlusPlus
|
4
4
|
module Language
|
5
|
-
##
|
6
5
|
# An error that can be thrown for various syntax errors
|
7
6
|
class SyntaxError < ::CSVPlusPlus::Error
|
8
|
-
#
|
7
|
+
# @param message [String] The primary message to be shown to the user
|
8
|
+
# @param bad_input [String] The offending input that caused the error to be thrown
|
9
|
+
# @param runtime [Runtime] The current runtime
|
10
|
+
# @param wrapped_error [StandardError] The underlying error that caused the syntax error. For example a
|
11
|
+
# Racc::ParseError that was thrown
|
9
12
|
def initialize(message, bad_input, runtime, wrapped_error: nil)
|
10
13
|
@bad_input = bad_input.to_s
|
11
14
|
@runtime = runtime
|
@@ -15,19 +18,21 @@ module CSVPlusPlus
|
|
15
18
|
super(message)
|
16
19
|
end
|
17
20
|
|
18
|
-
#
|
21
|
+
# @return [String]
|
19
22
|
def to_s
|
20
23
|
to_trace
|
21
24
|
end
|
22
25
|
|
23
26
|
# Output a verbose user-helpful string that references the current runtime
|
24
27
|
def to_verbose_trace
|
25
|
-
warn(@wrapped_error.full_message)
|
26
|
-
warn(@wrapped_error.backtrace)
|
28
|
+
warn(@wrapped_error.full_message) if @wrapped_error
|
29
|
+
warn(@wrapped_error.backtrace) if @wrapped_error
|
27
30
|
to_trace
|
28
31
|
end
|
29
32
|
|
30
33
|
# Output a user-helpful string that references the runtime state
|
34
|
+
#
|
35
|
+
# @return [String]
|
31
36
|
def to_trace
|
32
37
|
"#{message_prefix}#{cell_index} #{message_postfix}"
|
33
38
|
end
|
@@ -1,19 +1,28 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module CSVPlusPlus
|
4
|
-
# Common methods to be mixed into
|
4
|
+
# Common methods to be mixed into the Racc parsers
|
5
|
+
#
|
6
|
+
# @attr_reader tokens [Array]
|
5
7
|
module Lexer
|
6
|
-
|
7
|
-
|
8
|
-
|
8
|
+
attr_reader :tokens
|
9
|
+
|
10
|
+
# Initialize a lexer instance with an empty +@tokens+
|
11
|
+
def initialize(tokens: [])
|
12
|
+
@tokens = tokens
|
9
13
|
end
|
10
14
|
|
11
15
|
# Used by racc to iterate each token
|
16
|
+
#
|
17
|
+
# @return [Array<(String, String)>]
|
12
18
|
def next_token
|
13
19
|
@tokens.shift
|
14
20
|
end
|
15
21
|
|
16
|
-
#
|
22
|
+
# Orchestate the tokenizing, parsing and error handling of parsing input. Each instance will implement their own
|
23
|
+
# #tokenizer method
|
24
|
+
#
|
25
|
+
# @return [Lexer#return_value] Each instance will define it's own +return_value+ with the result of parsing
|
17
26
|
def parse(input, runtime)
|
18
27
|
return if input.nil?
|
19
28
|
|
@@ -26,12 +35,23 @@ module CSVPlusPlus
|
|
26
35
|
runtime.raise_syntax_error("Error parsing #{parse_subject}", e.message, wrapped_error: e)
|
27
36
|
end
|
28
37
|
|
29
|
-
|
38
|
+
TOKEN_LIBRARY = {
|
39
|
+
TRUE: [/true/i, :TRUE],
|
40
|
+
FALSE: [/false/i, :FALSE],
|
41
|
+
NUMBER: [/-?[\d.]+/, :NUMBER],
|
42
|
+
STRING: [%r{"(?:[^"\\]|\\(?:["\\/bfnrt]|u[0-9a-fA-F]{4}))*"}, :STRING],
|
43
|
+
INFIX_OP: [%r{\^|\+|-|\*|/|&|<|>|<=|>=|<>}, :INFIX_OP],
|
44
|
+
VAR_REF: [/\$\$/, :VAR_REF],
|
45
|
+
ID: [/[$!\w:]+/, :ID]
|
46
|
+
}.freeze
|
47
|
+
public_constant :TOKEN_LIBRARY
|
48
|
+
|
49
|
+
private
|
30
50
|
|
31
51
|
def tokenize(input, runtime)
|
32
52
|
return if input.nil?
|
33
53
|
|
34
|
-
t = tokenizer(input)
|
54
|
+
t = tokenizer.scan(input)
|
35
55
|
|
36
56
|
until t.scanner.empty?
|
37
57
|
next if t.matches_ignore?
|
@@ -45,12 +65,6 @@ module CSVPlusPlus
|
|
45
65
|
@tokens << %i[EOL EOL]
|
46
66
|
end
|
47
67
|
|
48
|
-
def e(type, *entity_args)
|
49
|
-
::CSVPlusPlus::Language::TYPES[type].new(*entity_args)
|
50
|
-
end
|
51
|
-
|
52
|
-
private
|
53
|
-
|
54
68
|
def consume_token(tokenizer, runtime)
|
55
69
|
if tokenizer.last_token
|
56
70
|
@tokens << [tokenizer.last_token, tokenizer.last_match]
|
@@ -5,13 +5,14 @@ require 'strscan'
|
|
5
5
|
module CSVPlusPlus
|
6
6
|
module Lexer
|
7
7
|
# A class that contains the use-case-specific regexes for parsing
|
8
|
+
#
|
9
|
+
# @attr_reader last_token [String] The last token that's been matched.
|
10
|
+
# @attr_reader scanner [StringScanner] The StringScanner instance that's parsing the input.
|
8
11
|
class Tokenizer
|
9
12
|
attr_reader :last_token, :scanner
|
10
13
|
|
11
|
-
#
|
12
|
-
|
13
|
-
def initialize(input:, tokens:, catchall: nil, ignore: nil, alter_matches: {}, stop_fn: nil)
|
14
|
-
@scanner = ::StringScanner.new(input.strip)
|
14
|
+
# @param input [String]
|
15
|
+
def initialize(tokens:, catchall: nil, ignore: nil, alter_matches: {}, stop_fn: nil)
|
15
16
|
@last_token = nil
|
16
17
|
|
17
18
|
@catchall = catchall
|
@@ -20,43 +21,66 @@ module CSVPlusPlus
|
|
20
21
|
@stop_fn = stop_fn
|
21
22
|
@alter_matches = alter_matches
|
22
23
|
end
|
23
|
-
# rubocop:enable Metrics/ParameterLists
|
24
24
|
|
25
|
-
#
|
25
|
+
# Initializers a scanner for the given input to be parsed
|
26
|
+
#
|
27
|
+
# @param input The input to be tokenized
|
28
|
+
# @return [Tokenizer]
|
29
|
+
def scan(input)
|
30
|
+
@scanner = ::StringScanner.new(input.strip)
|
31
|
+
self
|
32
|
+
end
|
33
|
+
|
34
|
+
# Scan tokens and set +@last_token+ if any match
|
35
|
+
#
|
36
|
+
# @return [String, nil]
|
26
37
|
def scan_tokens!
|
27
38
|
m = @tokens.find { |t| @scanner.scan(t.first) }
|
28
39
|
@last_token = m ? m[1] : nil
|
29
40
|
end
|
30
41
|
|
31
42
|
# Scan input against the catchall pattern
|
43
|
+
#
|
44
|
+
# @return [String, nil]
|
32
45
|
def scan_catchall
|
33
46
|
@scanner.scan(@catchall) if @catchall
|
34
47
|
end
|
35
48
|
|
36
49
|
# Scan input against the ignore pattern
|
50
|
+
#
|
51
|
+
# @return [boolean]
|
37
52
|
def matches_ignore?
|
38
53
|
@scanner.scan(@ignore) if @ignore
|
39
54
|
end
|
40
55
|
|
41
56
|
# The value of the last token matched
|
57
|
+
#
|
58
|
+
# @return [String, nil]
|
42
59
|
def last_match
|
43
60
|
return @alter_matches[@last_token].call(@scanner.matched) if @alter_matches.key?(@last_token)
|
44
61
|
|
45
62
|
@scanner.matched
|
46
63
|
end
|
47
64
|
|
48
|
-
#
|
49
|
-
|
50
|
-
|
65
|
+
# Read the input but don't consume it
|
66
|
+
#
|
67
|
+
# @param peek_characters [Integer]
|
68
|
+
#
|
69
|
+
# @return [String]
|
70
|
+
def peek(peek_characters: 100)
|
71
|
+
@scanner.peek(peek_characters)
|
51
72
|
end
|
52
73
|
|
53
74
|
# Scan for our stop token (if there is one - some parsers stop early and some don't)
|
75
|
+
#
|
76
|
+
# @return [boolean]
|
54
77
|
def stop?
|
55
78
|
@stop_fn ? @stop_fn.call(@scanner) : false
|
56
79
|
end
|
57
80
|
|
58
|
-
# The rest of the un-parsed input. The tokenizer might not need to
|
59
|
-
#
|
81
|
+
# The rest of the un-parsed input. The tokenizer might not need to parse the entire input
|
82
|
+
#
|
83
|
+
# @return [String]
|
60
84
|
def rest
|
61
85
|
@scanner.rest
|
62
86
|
end
|