csv_plus_plus 0.1.3 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +13 -3
- data/docs/CHANGELOG.md +18 -0
- data/lib/csv_plus_plus/a1_reference.rb +202 -0
- data/lib/csv_plus_plus/benchmarked_compiler.rb +3 -3
- data/lib/csv_plus_plus/cell.rb +1 -35
- data/lib/csv_plus_plus/cli.rb +43 -80
- data/lib/csv_plus_plus/cli_flag.rb +77 -70
- data/lib/csv_plus_plus/color.rb +1 -1
- data/lib/csv_plus_plus/compiler.rb +31 -21
- data/lib/csv_plus_plus/entities/ast_builder.rb +11 -4
- data/lib/csv_plus_plus/entities/boolean.rb +16 -9
- data/lib/csv_plus_plus/entities/builtins.rb +68 -40
- data/lib/csv_plus_plus/entities/date.rb +14 -11
- data/lib/csv_plus_plus/entities/entity.rb +11 -29
- data/lib/csv_plus_plus/entities/entity_with_arguments.rb +18 -31
- data/lib/csv_plus_plus/entities/function.rb +22 -11
- data/lib/csv_plus_plus/entities/function_call.rb +35 -11
- data/lib/csv_plus_plus/entities/has_identifier.rb +19 -0
- data/lib/csv_plus_plus/entities/number.rb +15 -10
- data/lib/csv_plus_plus/entities/reference.rb +77 -0
- data/lib/csv_plus_plus/entities/runtime_value.rb +36 -23
- data/lib/csv_plus_plus/entities/string.rb +13 -10
- data/lib/csv_plus_plus/entities.rb +2 -18
- data/lib/csv_plus_plus/error/cli_error.rb +17 -0
- data/lib/csv_plus_plus/error/compiler_error.rb +17 -0
- data/lib/csv_plus_plus/error/error.rb +18 -5
- data/lib/csv_plus_plus/error/formula_syntax_error.rb +12 -13
- data/lib/csv_plus_plus/error/modifier_syntax_error.rb +10 -36
- data/lib/csv_plus_plus/error/modifier_validation_error.rb +6 -32
- data/lib/csv_plus_plus/error/positional_error.rb +15 -0
- data/lib/csv_plus_plus/error/writer_error.rb +1 -1
- data/lib/csv_plus_plus/error.rb +4 -1
- data/lib/csv_plus_plus/error_formatter.rb +111 -0
- data/lib/csv_plus_plus/google_api_client.rb +18 -8
- data/lib/csv_plus_plus/lexer/racc_lexer.rb +144 -0
- data/lib/csv_plus_plus/lexer/tokenizer.rb +53 -17
- data/lib/csv_plus_plus/lexer.rb +40 -1
- data/lib/csv_plus_plus/modifier/data_validation.rb +1 -1
- data/lib/csv_plus_plus/modifier/expand.rb +17 -0
- data/lib/csv_plus_plus/modifier.rb +6 -1
- data/lib/csv_plus_plus/options/file_options.rb +49 -0
- data/lib/csv_plus_plus/options/google_sheets_options.rb +42 -0
- data/lib/csv_plus_plus/options/options.rb +102 -0
- data/lib/csv_plus_plus/options.rb +22 -110
- data/lib/csv_plus_plus/parser/cell_value.tab.rb +65 -66
- data/lib/csv_plus_plus/parser/code_section.tab.rb +92 -84
- data/lib/csv_plus_plus/parser/modifier.tab.rb +40 -30
- data/lib/csv_plus_plus/reader/csv.rb +50 -0
- data/lib/csv_plus_plus/reader/google_sheets.rb +129 -0
- data/lib/csv_plus_plus/reader/reader.rb +27 -0
- data/lib/csv_plus_plus/reader/rubyxl.rb +37 -0
- data/lib/csv_plus_plus/reader.rb +14 -0
- data/lib/csv_plus_plus/runtime/graph.rb +6 -6
- data/lib/csv_plus_plus/runtime/{position_tracker.rb → position.rb} +16 -5
- data/lib/csv_plus_plus/runtime/references.rb +32 -27
- data/lib/csv_plus_plus/runtime/runtime.rb +73 -67
- data/lib/csv_plus_plus/runtime/scope.rb +280 -0
- data/lib/csv_plus_plus/runtime.rb +9 -9
- data/lib/csv_plus_plus/source_code.rb +14 -9
- data/lib/csv_plus_plus/template.rb +17 -12
- data/lib/csv_plus_plus/version.rb +1 -1
- data/lib/csv_plus_plus/writer/csv.rb +32 -5
- data/lib/csv_plus_plus/writer/excel.rb +19 -6
- data/lib/csv_plus_plus/writer/file_backer_upper.rb +27 -14
- data/lib/csv_plus_plus/writer/google_sheets.rb +23 -129
- data/lib/csv_plus_plus/writer/{google_sheet_builder.rb → google_sheets_builder.rb} +39 -55
- data/lib/csv_plus_plus/writer/merger.rb +56 -0
- data/lib/csv_plus_plus/writer/open_document.rb +16 -2
- data/lib/csv_plus_plus/writer/rubyxl_builder.rb +68 -43
- data/lib/csv_plus_plus/writer/writer.rb +42 -0
- data/lib/csv_plus_plus/writer.rb +58 -19
- data/lib/csv_plus_plus.rb +26 -14
- metadata +43 -18
- data/lib/csv_plus_plus/entities/cell_reference.rb +0 -231
- data/lib/csv_plus_plus/entities/variable.rb +0 -37
- data/lib/csv_plus_plus/error/syntax_error.rb +0 -71
- data/lib/csv_plus_plus/google_options.rb +0 -32
- data/lib/csv_plus_plus/lexer/lexer.rb +0 -89
- data/lib/csv_plus_plus/runtime/can_define_references.rb +0 -87
- data/lib/csv_plus_plus/runtime/can_resolve_references.rb +0 -209
- data/lib/csv_plus_plus/writer/base_writer.rb +0 -45
|
@@ -1,62 +1,36 @@
|
|
|
1
1
|
# typed: strict
|
|
2
2
|
# frozen_string_literal: true
|
|
3
3
|
|
|
4
|
-
require_relative './syntax_error'
|
|
5
|
-
|
|
6
4
|
module CSVPlusPlus
|
|
7
5
|
module Error
|
|
8
|
-
#
|
|
9
|
-
class ModifierSyntaxError < ::CSVPlusPlus::Error::
|
|
6
|
+
# A syntax error encountered when parsing a modifier definition
|
|
7
|
+
class ModifierSyntaxError < ::CSVPlusPlus::Error::Error
|
|
10
8
|
extend ::T::Sig
|
|
9
|
+
include ::CSVPlusPlus::Error::PositionalError
|
|
11
10
|
|
|
12
11
|
sig { returns(::String) }
|
|
13
12
|
attr_reader :bad_input
|
|
14
13
|
|
|
15
|
-
sig { returns(::String) }
|
|
16
|
-
attr_reader :message
|
|
17
|
-
|
|
18
14
|
sig { returns(::T.nilable(::Symbol)) }
|
|
19
15
|
attr_reader :modifier
|
|
20
16
|
|
|
21
17
|
sig do
|
|
22
18
|
params(
|
|
23
|
-
runtime: ::CSVPlusPlus::Runtime::Runtime,
|
|
24
|
-
modifier_validation_error: ::CSVPlusPlus::Error::ModifierValidationError
|
|
25
|
-
).returns(::CSVPlusPlus::Error::ModifierSyntaxError)
|
|
26
|
-
end
|
|
27
|
-
# Create a +ModifierSyntaxError+ given a +runtime+ and +ModifierValidationError+.
|
|
28
|
-
#
|
|
29
|
-
# @param runtime [Runtime]
|
|
30
|
-
# @param modifier_validation_error [ModifierValidationError]
|
|
31
|
-
#
|
|
32
|
-
# @return [ModifierSyntaxError]
|
|
33
|
-
def self.from_validation_error(runtime, modifier_validation_error)
|
|
34
|
-
new(
|
|
35
|
-
runtime,
|
|
36
|
-
modifier: modifier_validation_error.modifier,
|
|
37
|
-
bad_input: modifier_validation_error.bad_input,
|
|
38
|
-
message: modifier_validation_error.message,
|
|
39
|
-
wrapped_error: modifier_validation_error
|
|
40
|
-
)
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
sig do
|
|
44
|
-
params(
|
|
45
|
-
runtime: ::CSVPlusPlus::Runtime::Runtime,
|
|
46
|
-
bad_input: ::String,
|
|
47
19
|
message: ::String,
|
|
20
|
+
bad_input: ::String,
|
|
48
21
|
modifier: ::T.nilable(::Symbol),
|
|
49
22
|
wrapped_error: ::T.nilable(::StandardError)
|
|
50
23
|
).void
|
|
51
24
|
end
|
|
52
|
-
# @param
|
|
25
|
+
# @param message [String] The error message
|
|
26
|
+
# @param bad_input [String] The offending input
|
|
27
|
+
# @param modifier [Symbol] The modifier being parsed
|
|
53
28
|
# @param wrapped_error [ModifierValidationError] The validtion error that this is wrapping
|
|
54
|
-
def initialize(
|
|
29
|
+
def initialize(message, bad_input:, modifier: nil, wrapped_error: nil)
|
|
30
|
+
super(message, wrapped_error:)
|
|
31
|
+
|
|
55
32
|
@bad_input = bad_input
|
|
56
33
|
@modifier = modifier
|
|
57
|
-
@message = message
|
|
58
|
-
|
|
59
|
-
super(runtime, wrapped_error:)
|
|
60
34
|
end
|
|
61
35
|
|
|
62
36
|
sig { override.returns(::String) }
|
|
@@ -9,20 +9,9 @@ module CSVPlusPlus
|
|
|
9
9
|
# @attr_reader bad_input [String] The offending input that caused the error to be thrown
|
|
10
10
|
# @attr_reader choices [Array<Symbol>, nil] The choices that +value+ must be one of (but violated)
|
|
11
11
|
# @attr_reader message [String, nil] A relevant message to show
|
|
12
|
-
class ModifierValidationError < ::CSVPlusPlus::Error::
|
|
12
|
+
class ModifierValidationError < ::CSVPlusPlus::Error::ModifierSyntaxError
|
|
13
13
|
extend ::T::Sig
|
|
14
|
-
|
|
15
|
-
sig { returns(::String) }
|
|
16
|
-
attr_reader :bad_input
|
|
17
|
-
|
|
18
|
-
sig { returns(::T.nilable(::T.class_of(::T::Enum))) }
|
|
19
|
-
attr_reader :choices
|
|
20
|
-
|
|
21
|
-
sig { returns(::String) }
|
|
22
|
-
attr_reader :message
|
|
23
|
-
|
|
24
|
-
sig { returns(::Symbol) }
|
|
25
|
-
attr_reader :modifier
|
|
14
|
+
include ::CSVPlusPlus::Error::PositionalError
|
|
26
15
|
|
|
27
16
|
sig do
|
|
28
17
|
params(
|
|
@@ -38,31 +27,16 @@ module CSVPlusPlus
|
|
|
38
27
|
# @param bad_input [String] The offending input that caused the error to be thrown
|
|
39
28
|
# @param choices [Array<Symbol>, nil] The choices that +value+ must be one of (but violated)
|
|
40
29
|
# @param message [String, nil] A relevant message to show
|
|
41
|
-
# rubocop:disable Metrics/MethodLength
|
|
42
30
|
def initialize(modifier, bad_input:, choices: nil, message: nil)
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
@message = ::T.let(
|
|
48
|
-
if @choices
|
|
49
|
-
"must be one of (#{@choices.values.map(&:serialize).join(', ')})"
|
|
31
|
+
message = ::T.let(
|
|
32
|
+
if choices
|
|
33
|
+
"must be one of (#{choices.values.map(&:serialize).join(', ')})"
|
|
50
34
|
else
|
|
51
35
|
::T.must(message)
|
|
52
36
|
end,
|
|
53
37
|
::String
|
|
54
38
|
)
|
|
55
|
-
|
|
56
|
-
super(@message)
|
|
57
|
-
end
|
|
58
|
-
# rubocop:enable Metrics/MethodLength
|
|
59
|
-
|
|
60
|
-
sig { returns(::String) }
|
|
61
|
-
# A user-facing error message
|
|
62
|
-
#
|
|
63
|
-
# @return [::String]
|
|
64
|
-
def error_message
|
|
65
|
-
@message
|
|
39
|
+
super(message, bad_input:, modifier:)
|
|
66
40
|
end
|
|
67
41
|
end
|
|
68
42
|
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# typed: strict
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
module CSVPlusPlus
|
|
5
|
+
module Error
|
|
6
|
+
# Methods that can be included into a class to denote that it's result it dependent on the current
|
|
7
|
+
# +Runtime::Position+
|
|
8
|
+
module PositionalError
|
|
9
|
+
extend ::T::Sig
|
|
10
|
+
extend ::T::Helpers
|
|
11
|
+
|
|
12
|
+
interface!
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
data/lib/csv_plus_plus/error.rb
CHANGED
|
@@ -2,10 +2,13 @@
|
|
|
2
2
|
# frozen_string_literal: true
|
|
3
3
|
|
|
4
4
|
require_relative './error/error'
|
|
5
|
+
require_relative './error/positional_error'
|
|
6
|
+
|
|
7
|
+
require_relative './error/cli_error'
|
|
8
|
+
require_relative './error/compiler_error'
|
|
5
9
|
require_relative './error/formula_syntax_error'
|
|
6
10
|
require_relative './error/modifier_syntax_error'
|
|
7
11
|
require_relative './error/modifier_validation_error'
|
|
8
|
-
require_relative './error/syntax_error'
|
|
9
12
|
require_relative './error/writer_error'
|
|
10
13
|
|
|
11
14
|
module CSVPlusPlus
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# typed: strict
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
module CSVPlusPlus
|
|
5
|
+
# Handle any errors potentially thrown during compilation. This could be anything from a user error (for example
|
|
6
|
+
# calling with invalid csvpp code) to an error calling Google Sheets API or writing to the filesystem.
|
|
7
|
+
class ErrorFormatter
|
|
8
|
+
extend ::T::Sig
|
|
9
|
+
|
|
10
|
+
sig do
|
|
11
|
+
params(options: ::CSVPlusPlus::Options::Options, runtime: ::CSVPlusPlus::Runtime::Runtime).void
|
|
12
|
+
end
|
|
13
|
+
# @param options [Options]
|
|
14
|
+
# @param runtime [Runtime::Runtime]
|
|
15
|
+
def initialize(options:, runtime:)
|
|
16
|
+
@options = options
|
|
17
|
+
@runtime = runtime
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
sig { params(error: ::StandardError).void }
|
|
21
|
+
# Nicely handle a given error. How it's handled depends on if it's our error and if @options.verbose
|
|
22
|
+
#
|
|
23
|
+
# @param error [CSVPlusPlus::Error, Google::Apis::ClientError, StandardError]
|
|
24
|
+
def handle_error(error)
|
|
25
|
+
# make sure that we're on a newline (verbose mode will probably be in the middle of printing a benchmark)
|
|
26
|
+
puts("\n\n") if @options.verbose
|
|
27
|
+
|
|
28
|
+
case error
|
|
29
|
+
when ::CSVPlusPlus::Error::Error
|
|
30
|
+
handle_internal_error(error)
|
|
31
|
+
when ::Google::Apis::ClientError
|
|
32
|
+
handle_google_error(error)
|
|
33
|
+
else
|
|
34
|
+
unhandled_error(error)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
private
|
|
39
|
+
|
|
40
|
+
sig { params(error: ::StandardError).void }
|
|
41
|
+
# An error was thrown that we weren't planning on
|
|
42
|
+
def unhandled_error(error)
|
|
43
|
+
warn(
|
|
44
|
+
<<~ERROR_MESSAGE)
|
|
45
|
+
An unexpected error was encountered. Please try running again with --verbose and
|
|
46
|
+
report the error at: https://github.com/patrickomatic/csv-plus-plus/issues/new'
|
|
47
|
+
ERROR_MESSAGE
|
|
48
|
+
|
|
49
|
+
return unless @options.verbose
|
|
50
|
+
|
|
51
|
+
warn(error.full_message)
|
|
52
|
+
warn("Cause: #{error.cause}") if error.cause
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
sig { params(error: ::CSVPlusPlus::Error::Error).void }
|
|
56
|
+
def handle_internal_error(error)
|
|
57
|
+
warn(with_position(error))
|
|
58
|
+
handle_wrapped_error(::T.must(error.wrapped_error)) if error.wrapped_error
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
sig { params(wrapped_error: ::StandardError).void }
|
|
62
|
+
def handle_wrapped_error(wrapped_error)
|
|
63
|
+
return unless @options.verbose
|
|
64
|
+
|
|
65
|
+
warn(wrapped_error.full_message)
|
|
66
|
+
warn((wrapped_error.backtrace || []).join("\n")) if wrapped_error.backtrace
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
sig { params(error: ::Google::Apis::ClientError).void }
|
|
70
|
+
def handle_google_error(error)
|
|
71
|
+
warn("Error making Google Sheets API request: #{error.message}")
|
|
72
|
+
return unless @options.verbose
|
|
73
|
+
|
|
74
|
+
warn("#{error.status_code} Error making Google API request [#{error.message}]: #{error.body}")
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
sig { params(error: ::CSVPlusPlus::Error::Error).returns(::String) }
|
|
78
|
+
# Output a user-helpful string that references the runtime state
|
|
79
|
+
#
|
|
80
|
+
# @param error [Error::Error] The error message to be prefixed with a filename and position
|
|
81
|
+
#
|
|
82
|
+
# @return [String]
|
|
83
|
+
def with_position(error)
|
|
84
|
+
message = error.error_message
|
|
85
|
+
case error
|
|
86
|
+
when ::CSVPlusPlus::Error::PositionalError
|
|
87
|
+
"#{message_prefix}#{cell_index} #{message}"
|
|
88
|
+
else
|
|
89
|
+
message
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
sig { returns(::String) }
|
|
94
|
+
def cell_index
|
|
95
|
+
if @runtime.parsing_csv_section?
|
|
96
|
+
"[#{@runtime.position.row_index},#{@runtime.position.cell_index}]"
|
|
97
|
+
else
|
|
98
|
+
''
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
sig { returns(::String) }
|
|
103
|
+
def message_prefix
|
|
104
|
+
line_number = @runtime.position.line_number
|
|
105
|
+
filename = @runtime.source_code.filename
|
|
106
|
+
|
|
107
|
+
line_str = ":#{line_number}"
|
|
108
|
+
"#{filename}#{line_str}"
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
@@ -10,20 +10,30 @@ module CSVPlusPlus
|
|
|
10
10
|
# Get a +Google::Apis::SheetsV4::SheetsService+ instance configured to connect to the sheets API
|
|
11
11
|
#
|
|
12
12
|
# @return [Google::Apis::SheetsV4::SheetsService]
|
|
13
|
-
def
|
|
14
|
-
::
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
def sheets_client
|
|
14
|
+
::T.must(
|
|
15
|
+
@sheets_client ||= ::T.let(
|
|
16
|
+
::Google::Apis::SheetsV4::SheetsService.new.tap do |s|
|
|
17
|
+
s.authorization = ::Google::Auth.get_application_default(['https://www.googleapis.com/auth/spreadsheets'].freeze)
|
|
18
|
+
end,
|
|
19
|
+
::T.nilable(::Google::Apis::SheetsV4::SheetsService)
|
|
20
|
+
)
|
|
21
|
+
)
|
|
17
22
|
end
|
|
18
23
|
|
|
19
24
|
sig { returns(::Google::Apis::DriveV3::DriveService) }
|
|
20
25
|
# Get a +Google::Apis::DriveV3::DriveService+ instance connected to the drive API
|
|
21
26
|
#
|
|
22
27
|
# @return [Google::Apis::DriveV3::DriveService]
|
|
23
|
-
def
|
|
24
|
-
::
|
|
25
|
-
|
|
26
|
-
|
|
28
|
+
def drive_client
|
|
29
|
+
::T.must(
|
|
30
|
+
@drive_client ||= ::T.let(
|
|
31
|
+
::Google::Apis::DriveV3::DriveService.new.tap do |d|
|
|
32
|
+
d.authorization = ::Google::Auth.get_application_default(['https://www.googleapis.com/auth/drive.file'].freeze)
|
|
33
|
+
end,
|
|
34
|
+
::T.nilable(::Google::Apis::DriveV3::DriveService)
|
|
35
|
+
)
|
|
36
|
+
)
|
|
27
37
|
end
|
|
28
38
|
end
|
|
29
39
|
end
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
# typed: strict
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
module CSVPlusPlus
|
|
5
|
+
module Lexer
|
|
6
|
+
# TODO: ugh clean this up
|
|
7
|
+
RaccToken =
|
|
8
|
+
::T.type_alias do
|
|
9
|
+
::T.any(
|
|
10
|
+
[::String, ::Symbol],
|
|
11
|
+
[::Symbol, ::String],
|
|
12
|
+
[::String, ::String],
|
|
13
|
+
[::Symbol, ::Symbol],
|
|
14
|
+
[::FalseClass, ::FalseClass]
|
|
15
|
+
)
|
|
16
|
+
end
|
|
17
|
+
public_constant :RaccToken
|
|
18
|
+
|
|
19
|
+
# Common methods to be mixed into the Racc parsers
|
|
20
|
+
#
|
|
21
|
+
# @attr_reader tokens [Array]
|
|
22
|
+
module RaccLexer
|
|
23
|
+
extend ::T::Sig
|
|
24
|
+
extend ::T::Helpers
|
|
25
|
+
extend ::T::Generic
|
|
26
|
+
include ::Kernel
|
|
27
|
+
|
|
28
|
+
abstract!
|
|
29
|
+
|
|
30
|
+
ReturnType = type_member
|
|
31
|
+
public_constant :ReturnType
|
|
32
|
+
|
|
33
|
+
sig { returns(::T::Array[::CSVPlusPlus::Lexer::RaccToken]) }
|
|
34
|
+
attr_reader :tokens
|
|
35
|
+
|
|
36
|
+
sig { params(tokens: ::T::Array[::CSVPlusPlus::Lexer::RaccToken]).void }
|
|
37
|
+
# Initialize a lexer instance with an empty +@tokens+
|
|
38
|
+
def initialize(tokens: [])
|
|
39
|
+
@tokens = ::T.let(tokens, ::T::Array[::CSVPlusPlus::Lexer::RaccToken])
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
sig { returns(::T.nilable(::CSVPlusPlus::Lexer::RaccToken)) }
|
|
43
|
+
# Used by racc to iterate each token
|
|
44
|
+
#
|
|
45
|
+
# @return [Array<(Regexp, Symbol) | (false, false)>]
|
|
46
|
+
def next_token
|
|
47
|
+
@tokens.shift
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
sig { params(input: ::String).returns(::CSVPlusPlus::Lexer::RaccLexer::ReturnType) }
|
|
51
|
+
# Orchestate the tokenizing, parsing and error handling of parsing input. Each instance will implement their own
|
|
52
|
+
# +#tokenizer+ method
|
|
53
|
+
#
|
|
54
|
+
# @return [RaccLexer#] Each instance will define it's own +return_value+ with the result of parsing
|
|
55
|
+
# rubocop:disable Metrics/MethodLength
|
|
56
|
+
def parse(input)
|
|
57
|
+
return return_value unless anything_to_parse?(input)
|
|
58
|
+
|
|
59
|
+
tokenize(input)
|
|
60
|
+
do_parse
|
|
61
|
+
return_value
|
|
62
|
+
rescue ::Racc::ParseError => e
|
|
63
|
+
raise(
|
|
64
|
+
::CSVPlusPlus::Error::FormulaSyntaxError.new(
|
|
65
|
+
"Error parsing #{parse_subject}",
|
|
66
|
+
bad_input: e.message,
|
|
67
|
+
wrapped_error: e
|
|
68
|
+
)
|
|
69
|
+
)
|
|
70
|
+
end
|
|
71
|
+
# rubocop:enable Metrics/MethodLength
|
|
72
|
+
|
|
73
|
+
protected
|
|
74
|
+
|
|
75
|
+
sig { abstract.params(input: ::String).returns(::T::Boolean) }
|
|
76
|
+
# Is the input even worth parsing? for example we don't want to parse cells unless they're a formula (start
|
|
77
|
+
# with '=')
|
|
78
|
+
#
|
|
79
|
+
# @param input [String]
|
|
80
|
+
#
|
|
81
|
+
# @return [Boolean]
|
|
82
|
+
def anything_to_parse?(input); end
|
|
83
|
+
|
|
84
|
+
sig { abstract.returns(::String) }
|
|
85
|
+
# Used for error messages, what is the thing being parsed? ("cell value", "modifier", "code section")
|
|
86
|
+
def parse_subject; end
|
|
87
|
+
|
|
88
|
+
sig { abstract.returns(::CSVPlusPlus::Lexer::RaccLexer::ReturnType) }
|
|
89
|
+
# The output of the parser
|
|
90
|
+
def return_value; end
|
|
91
|
+
|
|
92
|
+
sig { abstract.returns(::CSVPlusPlus::Lexer::Tokenizer) }
|
|
93
|
+
# Returns a +Lexer::Tokenizer+ configured for the given
|
|
94
|
+
def tokenizer; end
|
|
95
|
+
|
|
96
|
+
private
|
|
97
|
+
|
|
98
|
+
sig { params(input: ::String).void }
|
|
99
|
+
def tokenize(input)
|
|
100
|
+
return if input.nil?
|
|
101
|
+
|
|
102
|
+
t = tokenizer.scan(input)
|
|
103
|
+
|
|
104
|
+
until t.scanner.empty?
|
|
105
|
+
next if t.matches_ignore?
|
|
106
|
+
|
|
107
|
+
return if t.stop?
|
|
108
|
+
|
|
109
|
+
t.scan_tokens!
|
|
110
|
+
consume_token(t)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
@tokens << %i[EOL EOL]
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
sig { params(tokenizer: ::CSVPlusPlus::Lexer::Tokenizer).void }
|
|
117
|
+
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
|
118
|
+
def consume_token(tokenizer)
|
|
119
|
+
if tokenizer.last_token&.token && tokenizer.last_match
|
|
120
|
+
@tokens << [::T.must(tokenizer.last_token).token, ::T.must(tokenizer.last_match)]
|
|
121
|
+
elsif tokenizer.scan_catchall
|
|
122
|
+
@tokens << [::T.must(tokenizer.last_match), ::T.must(tokenizer.last_match)]
|
|
123
|
+
# TODO: checking the +parse_subject+ like this is a little hacky... but we need to know if we're parsing
|
|
124
|
+
# modifiers or code_section (or formulas in a cell)
|
|
125
|
+
elsif parse_subject == 'modifier'
|
|
126
|
+
raise(
|
|
127
|
+
::CSVPlusPlus::Error::ModifierSyntaxError.new(
|
|
128
|
+
"Unable to parse #{parse_subject} starting at",
|
|
129
|
+
bad_input: tokenizer.peek
|
|
130
|
+
)
|
|
131
|
+
)
|
|
132
|
+
else
|
|
133
|
+
raise(
|
|
134
|
+
::CSVPlusPlus::Error::FormulaSyntaxError.new(
|
|
135
|
+
"Unable to parse #{parse_subject} starting at",
|
|
136
|
+
bad_input: tokenizer.peek
|
|
137
|
+
)
|
|
138
|
+
)
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
end
|
|
@@ -1,24 +1,33 @@
|
|
|
1
|
-
# typed:
|
|
1
|
+
# typed: strict
|
|
2
2
|
# frozen_string_literal: true
|
|
3
3
|
|
|
4
|
-
require 'strscan'
|
|
5
|
-
|
|
6
4
|
module CSVPlusPlus
|
|
7
5
|
module Lexer
|
|
8
6
|
# A class that contains the use-case-specific regexes for parsing
|
|
9
7
|
#
|
|
10
|
-
# @attr_reader last_token [String] The last token that's been matched.
|
|
11
|
-
# @attr_reader scanner [StringScanner] The StringScanner instance that's parsing the input.
|
|
8
|
+
# @attr_reader last_token [String, nil] The last token that's been matched.
|
|
12
9
|
class Tokenizer
|
|
13
|
-
|
|
10
|
+
extend ::T::Sig
|
|
11
|
+
|
|
12
|
+
sig { returns(::T.nilable(::CSVPlusPlus::Lexer::Token)) }
|
|
13
|
+
attr_reader :last_token
|
|
14
14
|
|
|
15
|
+
sig do
|
|
16
|
+
params(
|
|
17
|
+
tokens: ::T::Enumerable[::CSVPlusPlus::Lexer::Token],
|
|
18
|
+
catchall: ::T.nilable(::Regexp),
|
|
19
|
+
ignore: ::T.nilable(::Regexp),
|
|
20
|
+
alter_matches: ::T::Hash[::Symbol, ::T.proc.params(s: ::String).returns(::String)],
|
|
21
|
+
stop_fn: ::T.nilable(::T.proc.params(s: ::StringScanner).returns(::T::Boolean))
|
|
22
|
+
).void
|
|
23
|
+
end
|
|
15
24
|
# @param tokens [Array<Regexp, String>] The list of tokens to scan
|
|
16
25
|
# @param catchall [Regexp] A final regexp to try if nothing else matches
|
|
17
26
|
# @param ignore [Regexp] Ignore anything matching this regexp
|
|
18
27
|
# @param alter_matches [Object] A map of matches to alter
|
|
19
28
|
# @param stop_fn [Proc] Stop parsing when this is true
|
|
20
29
|
def initialize(tokens:, catchall: nil, ignore: nil, alter_matches: {}, stop_fn: nil)
|
|
21
|
-
@last_token = nil
|
|
30
|
+
@last_token = ::T.let(nil, ::T.nilable(::CSVPlusPlus::Lexer::Token))
|
|
22
31
|
|
|
23
32
|
@catchall = catchall
|
|
24
33
|
@ignore = ignore
|
|
@@ -27,67 +36,94 @@ module CSVPlusPlus
|
|
|
27
36
|
@alter_matches = alter_matches
|
|
28
37
|
end
|
|
29
38
|
|
|
39
|
+
sig { params(input: ::String).returns(::T.self_type) }
|
|
30
40
|
# Initializers a scanner for the given input to be parsed
|
|
31
41
|
#
|
|
32
42
|
# @param input The input to be tokenized
|
|
43
|
+
#
|
|
33
44
|
# @return [Tokenizer]
|
|
34
45
|
def scan(input)
|
|
35
|
-
@scanner = ::StringScanner.new(input.strip)
|
|
46
|
+
@scanner = ::T.let(::StringScanner.new(input.strip), ::T.nilable(::StringScanner))
|
|
36
47
|
self
|
|
37
48
|
end
|
|
38
49
|
|
|
50
|
+
sig { returns(::StringScanner) }
|
|
51
|
+
# Returns the currently initialized +StringScanner+. You must call +#scan+ first or else this will throw an
|
|
52
|
+
# exception.
|
|
53
|
+
#
|
|
54
|
+
# @return [StringScanner]
|
|
55
|
+
def scanner
|
|
56
|
+
# The caller needs to initialize this class with a call to #scan before we can do anything. it sets up the
|
|
57
|
+
# +@scanner+ with it's necessary input.
|
|
58
|
+
unless @scanner
|
|
59
|
+
raise(::CSVPlusPlus::Error::CompilerError, 'Called Tokenizer#scanner without calling #scan first')
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
@scanner
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
sig { void }
|
|
39
66
|
# Scan tokens and set +@last_token+ if any match
|
|
40
67
|
#
|
|
41
68
|
# @return [String, nil]
|
|
42
69
|
def scan_tokens!
|
|
43
|
-
|
|
44
|
-
@last_token = m ? m[1] : nil
|
|
70
|
+
@last_token = @tokens.find { |t| scanner.scan(t.regexp) }
|
|
45
71
|
end
|
|
46
72
|
|
|
73
|
+
sig { returns(::T.nilable(::String)) }
|
|
47
74
|
# Scan input against the catchall pattern
|
|
48
75
|
#
|
|
49
76
|
# @return [String, nil]
|
|
50
77
|
def scan_catchall
|
|
51
|
-
|
|
78
|
+
scanner.scan(@catchall) if @catchall
|
|
52
79
|
end
|
|
53
80
|
|
|
81
|
+
sig { returns(::T.nilable(::String)) }
|
|
54
82
|
# Scan input against the ignore pattern
|
|
55
83
|
#
|
|
56
84
|
# @return [boolean]
|
|
57
85
|
def matches_ignore?
|
|
58
|
-
|
|
86
|
+
scanner.scan(@ignore) if @ignore
|
|
59
87
|
end
|
|
60
88
|
|
|
89
|
+
sig { returns(::T.nilable(::String)) }
|
|
61
90
|
# The value of the last token matched
|
|
62
91
|
#
|
|
63
92
|
# @return [String, nil]
|
|
64
93
|
def last_match
|
|
65
|
-
|
|
94
|
+
# rubocop:disable Style/MissingElse
|
|
95
|
+
if @last_token && @alter_matches.key?(@last_token.token.to_sym)
|
|
96
|
+
# rubocop:enable Style/MissingElse
|
|
97
|
+
return ::T.must(@alter_matches[@last_token.token.to_sym]).call(scanner.matched)
|
|
98
|
+
end
|
|
66
99
|
|
|
67
|
-
|
|
100
|
+
scanner.matched
|
|
68
101
|
end
|
|
69
102
|
|
|
103
|
+
sig { params(peek_characters: ::Integer).returns(::String) }
|
|
70
104
|
# Read the input but don't consume it
|
|
71
105
|
#
|
|
72
106
|
# @param peek_characters [Integer]
|
|
73
107
|
#
|
|
74
108
|
# @return [String]
|
|
75
109
|
def peek(peek_characters: 100)
|
|
76
|
-
|
|
110
|
+
scanner.peek(peek_characters)
|
|
77
111
|
end
|
|
78
112
|
|
|
113
|
+
sig { returns(::T::Boolean) }
|
|
79
114
|
# Scan for our stop token (if there is one - some parsers stop early and some don't)
|
|
80
115
|
#
|
|
81
116
|
# @return [boolean]
|
|
82
117
|
def stop?
|
|
83
|
-
@stop_fn ? @stop_fn.call(
|
|
118
|
+
@stop_fn ? @stop_fn.call(scanner) : false
|
|
84
119
|
end
|
|
85
120
|
|
|
121
|
+
sig { returns(::String) }
|
|
86
122
|
# The rest of the un-parsed input. The tokenizer might not need to parse the entire input
|
|
87
123
|
#
|
|
88
124
|
# @return [String]
|
|
89
125
|
def rest
|
|
90
|
-
|
|
126
|
+
scanner.rest
|
|
91
127
|
end
|
|
92
128
|
end
|
|
93
129
|
end
|
data/lib/csv_plus_plus/lexer.rb
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# typed: strict
|
|
2
2
|
# frozen_string_literal: true
|
|
3
3
|
|
|
4
|
-
require_relative './lexer/
|
|
4
|
+
require_relative './lexer/racc_lexer'
|
|
5
5
|
require_relative './lexer/tokenizer'
|
|
6
6
|
|
|
7
7
|
module CSVPlusPlus
|
|
@@ -9,12 +9,51 @@ module CSVPlusPlus
|
|
|
9
9
|
module Lexer
|
|
10
10
|
extend ::T::Sig
|
|
11
11
|
|
|
12
|
+
# A token that's matched by +regexp+ and presented with +token+
|
|
13
|
+
class Token < ::T::Struct
|
|
14
|
+
const :regexp, ::Regexp
|
|
15
|
+
const :token, ::T.any(::String, ::Symbol)
|
|
16
|
+
end
|
|
17
|
+
|
|
12
18
|
END_OF_CODE_SECTION = '---'
|
|
13
19
|
public_constant :END_OF_CODE_SECTION
|
|
14
20
|
|
|
15
21
|
VARIABLE_REF = '$$'
|
|
16
22
|
public_constant :VARIABLE_REF
|
|
17
23
|
|
|
24
|
+
# @see https://github.com/ruby/racc/blob/master/lib/racc/parser.rb#L121
|
|
25
|
+
TOKEN_LIBRARY = ::T.let(
|
|
26
|
+
{
|
|
27
|
+
# A1_NOTATION: ::CSVPlusPlus::Lexer::Token.new(
|
|
28
|
+
# regexp: ::CSVPlusPlus::A1Reference::A1_NOTATION_REGEXP, token: :A1_NOTATION
|
|
29
|
+
# ),
|
|
30
|
+
FALSE: ::CSVPlusPlus::Lexer::Token.new(regexp: /false/i, token: :FALSE),
|
|
31
|
+
HEX_COLOR: ::CSVPlusPlus::Lexer::Token.new(regexp: ::CSVPlusPlus::Color::HEX_STRING_REGEXP, token: :HEX_COLOR),
|
|
32
|
+
INFIX_OP: ::CSVPlusPlus::Lexer::Token.new(regexp: %r{\^|\+|-|\*|/|&|<|>|<=|>=|<>}, token: :INFIX_OP),
|
|
33
|
+
NUMBER: ::CSVPlusPlus::Lexer::Token.new(regexp: /-?[\d.]+/, token: :NUMBER),
|
|
34
|
+
REF: ::CSVPlusPlus::Lexer::Token.new(regexp: /[$!\w:]+/, token: :REF),
|
|
35
|
+
STRING: ::CSVPlusPlus::Lexer::Token.new(
|
|
36
|
+
regexp: %r{"(?:[^"\\]|\\(?:["\\/bfnrt]|u[0-9a-fA-F]{4}))*"},
|
|
37
|
+
token: :STRING
|
|
38
|
+
),
|
|
39
|
+
TRUE: ::CSVPlusPlus::Lexer::Token.new(regexp: /true/i, token: :TRUE),
|
|
40
|
+
VAR_REF: ::CSVPlusPlus::Lexer::Token.new(regexp: /\$\$/, token: :VAR_REF)
|
|
41
|
+
}.freeze,
|
|
42
|
+
::T::Hash[::Symbol, ::CSVPlusPlus::Lexer::Token]
|
|
43
|
+
)
|
|
44
|
+
public_constant :TOKEN_LIBRARY
|
|
45
|
+
|
|
46
|
+
sig { params(str: ::String).returns(::String) }
|
|
47
|
+
# Run any transformations to the input before going into the CSV parser
|
|
48
|
+
#
|
|
49
|
+
# The CSV parser in particular does not like if there is whitespace after a double quote and before the next comma
|
|
50
|
+
#
|
|
51
|
+
# @param str [String]
|
|
52
|
+
# @return [String]
|
|
53
|
+
def self.preprocess(str)
|
|
54
|
+
str.gsub(/"\s*,/, '",')
|
|
55
|
+
end
|
|
56
|
+
|
|
18
57
|
sig { params(str: ::String).returns(::String) }
|
|
19
58
|
# When parsing a modifier with a quoted string field, we need a way to unescape. Some examples of quoted and
|
|
20
59
|
# unquoted results:
|
|
@@ -44,7 +44,7 @@ module CSVPlusPlus
|
|
|
44
44
|
end
|
|
45
45
|
|
|
46
46
|
def a1_notation(arg)
|
|
47
|
-
return arg if ::CSVPlusPlus::
|
|
47
|
+
return arg if ::CSVPlusPlus::A1Reference.valid_cell_reference?(arg)
|
|
48
48
|
end
|
|
49
49
|
|
|
50
50
|
def a_date(arg, allow_relative_date: false)
|