csv_plus_plus 0.1.3 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +13 -3
  3. data/docs/CHANGELOG.md +18 -0
  4. data/lib/csv_plus_plus/a1_reference.rb +202 -0
  5. data/lib/csv_plus_plus/benchmarked_compiler.rb +3 -3
  6. data/lib/csv_plus_plus/cell.rb +1 -35
  7. data/lib/csv_plus_plus/cli.rb +43 -80
  8. data/lib/csv_plus_plus/cli_flag.rb +77 -70
  9. data/lib/csv_plus_plus/color.rb +1 -1
  10. data/lib/csv_plus_plus/compiler.rb +31 -21
  11. data/lib/csv_plus_plus/entities/ast_builder.rb +11 -4
  12. data/lib/csv_plus_plus/entities/boolean.rb +16 -9
  13. data/lib/csv_plus_plus/entities/builtins.rb +68 -40
  14. data/lib/csv_plus_plus/entities/date.rb +14 -11
  15. data/lib/csv_plus_plus/entities/entity.rb +11 -29
  16. data/lib/csv_plus_plus/entities/entity_with_arguments.rb +18 -31
  17. data/lib/csv_plus_plus/entities/function.rb +22 -11
  18. data/lib/csv_plus_plus/entities/function_call.rb +35 -11
  19. data/lib/csv_plus_plus/entities/has_identifier.rb +19 -0
  20. data/lib/csv_plus_plus/entities/number.rb +15 -10
  21. data/lib/csv_plus_plus/entities/reference.rb +77 -0
  22. data/lib/csv_plus_plus/entities/runtime_value.rb +36 -23
  23. data/lib/csv_plus_plus/entities/string.rb +13 -10
  24. data/lib/csv_plus_plus/entities.rb +2 -18
  25. data/lib/csv_plus_plus/error/cli_error.rb +17 -0
  26. data/lib/csv_plus_plus/error/compiler_error.rb +17 -0
  27. data/lib/csv_plus_plus/error/error.rb +18 -5
  28. data/lib/csv_plus_plus/error/formula_syntax_error.rb +12 -13
  29. data/lib/csv_plus_plus/error/modifier_syntax_error.rb +10 -36
  30. data/lib/csv_plus_plus/error/modifier_validation_error.rb +6 -32
  31. data/lib/csv_plus_plus/error/positional_error.rb +15 -0
  32. data/lib/csv_plus_plus/error/writer_error.rb +1 -1
  33. data/lib/csv_plus_plus/error.rb +4 -1
  34. data/lib/csv_plus_plus/error_formatter.rb +111 -0
  35. data/lib/csv_plus_plus/google_api_client.rb +18 -8
  36. data/lib/csv_plus_plus/lexer/racc_lexer.rb +144 -0
  37. data/lib/csv_plus_plus/lexer/tokenizer.rb +53 -17
  38. data/lib/csv_plus_plus/lexer.rb +40 -1
  39. data/lib/csv_plus_plus/modifier/data_validation.rb +1 -1
  40. data/lib/csv_plus_plus/modifier/expand.rb +17 -0
  41. data/lib/csv_plus_plus/modifier.rb +6 -1
  42. data/lib/csv_plus_plus/options/file_options.rb +49 -0
  43. data/lib/csv_plus_plus/options/google_sheets_options.rb +42 -0
  44. data/lib/csv_plus_plus/options/options.rb +102 -0
  45. data/lib/csv_plus_plus/options.rb +22 -110
  46. data/lib/csv_plus_plus/parser/cell_value.tab.rb +65 -66
  47. data/lib/csv_plus_plus/parser/code_section.tab.rb +92 -84
  48. data/lib/csv_plus_plus/parser/modifier.tab.rb +40 -30
  49. data/lib/csv_plus_plus/reader/csv.rb +50 -0
  50. data/lib/csv_plus_plus/reader/google_sheets.rb +129 -0
  51. data/lib/csv_plus_plus/reader/reader.rb +27 -0
  52. data/lib/csv_plus_plus/reader/rubyxl.rb +37 -0
  53. data/lib/csv_plus_plus/reader.rb +14 -0
  54. data/lib/csv_plus_plus/runtime/graph.rb +6 -6
  55. data/lib/csv_plus_plus/runtime/{position_tracker.rb → position.rb} +16 -5
  56. data/lib/csv_plus_plus/runtime/references.rb +32 -27
  57. data/lib/csv_plus_plus/runtime/runtime.rb +73 -67
  58. data/lib/csv_plus_plus/runtime/scope.rb +280 -0
  59. data/lib/csv_plus_plus/runtime.rb +9 -9
  60. data/lib/csv_plus_plus/source_code.rb +14 -9
  61. data/lib/csv_plus_plus/template.rb +17 -12
  62. data/lib/csv_plus_plus/version.rb +1 -1
  63. data/lib/csv_plus_plus/writer/csv.rb +32 -5
  64. data/lib/csv_plus_plus/writer/excel.rb +19 -6
  65. data/lib/csv_plus_plus/writer/file_backer_upper.rb +27 -14
  66. data/lib/csv_plus_plus/writer/google_sheets.rb +23 -129
  67. data/lib/csv_plus_plus/writer/{google_sheet_builder.rb → google_sheets_builder.rb} +39 -55
  68. data/lib/csv_plus_plus/writer/merger.rb +56 -0
  69. data/lib/csv_plus_plus/writer/open_document.rb +16 -2
  70. data/lib/csv_plus_plus/writer/rubyxl_builder.rb +68 -43
  71. data/lib/csv_plus_plus/writer/writer.rb +42 -0
  72. data/lib/csv_plus_plus/writer.rb +58 -19
  73. data/lib/csv_plus_plus.rb +26 -14
  74. metadata +43 -18
  75. data/lib/csv_plus_plus/entities/cell_reference.rb +0 -231
  76. data/lib/csv_plus_plus/entities/variable.rb +0 -37
  77. data/lib/csv_plus_plus/error/syntax_error.rb +0 -71
  78. data/lib/csv_plus_plus/google_options.rb +0 -32
  79. data/lib/csv_plus_plus/lexer/lexer.rb +0 -89
  80. data/lib/csv_plus_plus/runtime/can_define_references.rb +0 -87
  81. data/lib/csv_plus_plus/runtime/can_resolve_references.rb +0 -209
  82. data/lib/csv_plus_plus/writer/base_writer.rb +0 -45
@@ -1,62 +1,36 @@
1
1
  # typed: strict
2
2
  # frozen_string_literal: true
3
3
 
4
- require_relative './syntax_error'
5
-
6
4
  module CSVPlusPlus
7
5
  module Error
8
- # An Error that wraps a +ModifierValidationError+ with a +Runtime+.
9
- class ModifierSyntaxError < ::CSVPlusPlus::Error::SyntaxError
6
+ # A syntax error encountered when parsing a modifier definition
7
+ class ModifierSyntaxError < ::CSVPlusPlus::Error::Error
10
8
  extend ::T::Sig
9
+ include ::CSVPlusPlus::Error::PositionalError
11
10
 
12
11
  sig { returns(::String) }
13
12
  attr_reader :bad_input
14
13
 
15
- sig { returns(::String) }
16
- attr_reader :message
17
-
18
14
  sig { returns(::T.nilable(::Symbol)) }
19
15
  attr_reader :modifier
20
16
 
21
17
  sig do
22
18
  params(
23
- runtime: ::CSVPlusPlus::Runtime::Runtime,
24
- modifier_validation_error: ::CSVPlusPlus::Error::ModifierValidationError
25
- ).returns(::CSVPlusPlus::Error::ModifierSyntaxError)
26
- end
27
- # Create a +ModifierSyntaxError+ given a +runtime+ and +ModifierValidationError+.
28
- #
29
- # @param runtime [Runtime]
30
- # @param modifier_validation_error [ModifierValidationError]
31
- #
32
- # @return [ModifierSyntaxError]
33
- def self.from_validation_error(runtime, modifier_validation_error)
34
- new(
35
- runtime,
36
- modifier: modifier_validation_error.modifier,
37
- bad_input: modifier_validation_error.bad_input,
38
- message: modifier_validation_error.message,
39
- wrapped_error: modifier_validation_error
40
- )
41
- end
42
-
43
- sig do
44
- params(
45
- runtime: ::CSVPlusPlus::Runtime::Runtime,
46
- bad_input: ::String,
47
19
  message: ::String,
20
+ bad_input: ::String,
48
21
  modifier: ::T.nilable(::Symbol),
49
22
  wrapped_error: ::T.nilable(::StandardError)
50
23
  ).void
51
24
  end
52
- # @param runtime [Runtime] The current runtime
25
+ # @param message [String] The error message
26
+ # @param bad_input [String] The offending input
27
+ # @param modifier [Symbol] The modifier being parsed
53
28
  # @param wrapped_error [ModifierValidationError] The validtion error that this is wrapping
54
- def initialize(runtime, bad_input:, message:, modifier: nil, wrapped_error: nil)
29
+ def initialize(message, bad_input:, modifier: nil, wrapped_error: nil)
30
+ super(message, wrapped_error:)
31
+
55
32
  @bad_input = bad_input
56
33
  @modifier = modifier
57
- @message = message
58
-
59
- super(runtime, wrapped_error:)
60
34
  end
61
35
 
62
36
  sig { override.returns(::String) }
@@ -9,20 +9,9 @@ module CSVPlusPlus
9
9
  # @attr_reader bad_input [String] The offending input that caused the error to be thrown
10
10
  # @attr_reader choices [Array<Symbol>, nil] The choices that +value+ must be one of (but violated)
11
11
  # @attr_reader message [String, nil] A relevant message to show
12
- class ModifierValidationError < ::CSVPlusPlus::Error::Error
12
+ class ModifierValidationError < ::CSVPlusPlus::Error::ModifierSyntaxError
13
13
  extend ::T::Sig
14
-
15
- sig { returns(::String) }
16
- attr_reader :bad_input
17
-
18
- sig { returns(::T.nilable(::T.class_of(::T::Enum))) }
19
- attr_reader :choices
20
-
21
- sig { returns(::String) }
22
- attr_reader :message
23
-
24
- sig { returns(::Symbol) }
25
- attr_reader :modifier
14
+ include ::CSVPlusPlus::Error::PositionalError
26
15
 
27
16
  sig do
28
17
  params(
@@ -38,31 +27,16 @@ module CSVPlusPlus
38
27
  # @param bad_input [String] The offending input that caused the error to be thrown
39
28
  # @param choices [Array<Symbol>, nil] The choices that +value+ must be one of (but violated)
40
29
  # @param message [String, nil] A relevant message to show
41
- # rubocop:disable Metrics/MethodLength
42
30
  def initialize(modifier, bad_input:, choices: nil, message: nil)
43
- @bad_input = bad_input
44
- @choices = choices
45
- @modifier = modifier
46
-
47
- @message = ::T.let(
48
- if @choices
49
- "must be one of (#{@choices.values.map(&:serialize).join(', ')})"
31
+ message = ::T.let(
32
+ if choices
33
+ "must be one of (#{choices.values.map(&:serialize).join(', ')})"
50
34
  else
51
35
  ::T.must(message)
52
36
  end,
53
37
  ::String
54
38
  )
55
-
56
- super(@message)
57
- end
58
- # rubocop:enable Metrics/MethodLength
59
-
60
- sig { returns(::String) }
61
- # A user-facing error message
62
- #
63
- # @return [::String]
64
- def error_message
65
- @message
39
+ super(message, bad_input:, modifier:)
66
40
  end
67
41
  end
68
42
  end
@@ -0,0 +1,15 @@
1
+ # typed: strict
2
+ # frozen_string_literal: true
3
+
4
+ module CSVPlusPlus
5
+ module Error
6
+ # Methods that can be included into a class to denote that it's result it dependent on the current
7
+ # +Runtime::Position+
8
+ module PositionalError
9
+ extend ::T::Sig
10
+ extend ::T::Helpers
11
+
12
+ interface!
13
+ end
14
+ end
15
+ end
@@ -10,7 +10,7 @@ module CSVPlusPlus
10
10
  sig { override.returns(::String) }
11
11
  # @return [::String]
12
12
  def error_message
13
- "Error writing template: #{message}"
13
+ "Error writing csvpp template: #{message}"
14
14
  end
15
15
  end
16
16
  end
@@ -2,10 +2,13 @@
2
2
  # frozen_string_literal: true
3
3
 
4
4
  require_relative './error/error'
5
+ require_relative './error/positional_error'
6
+
7
+ require_relative './error/cli_error'
8
+ require_relative './error/compiler_error'
5
9
  require_relative './error/formula_syntax_error'
6
10
  require_relative './error/modifier_syntax_error'
7
11
  require_relative './error/modifier_validation_error'
8
- require_relative './error/syntax_error'
9
12
  require_relative './error/writer_error'
10
13
 
11
14
  module CSVPlusPlus
@@ -0,0 +1,111 @@
1
+ # typed: strict
2
+ # frozen_string_literal: true
3
+
4
+ module CSVPlusPlus
5
+ # Handle any errors potentially thrown during compilation. This could be anything from a user error (for example
6
+ # calling with invalid csvpp code) to an error calling Google Sheets API or writing to the filesystem.
7
+ class ErrorFormatter
8
+ extend ::T::Sig
9
+
10
+ sig do
11
+ params(options: ::CSVPlusPlus::Options::Options, runtime: ::CSVPlusPlus::Runtime::Runtime).void
12
+ end
13
+ # @param options [Options]
14
+ # @param runtime [Runtime::Runtime]
15
+ def initialize(options:, runtime:)
16
+ @options = options
17
+ @runtime = runtime
18
+ end
19
+
20
+ sig { params(error: ::StandardError).void }
21
+ # Nicely handle a given error. How it's handled depends on if it's our error and if @options.verbose
22
+ #
23
+ # @param error [CSVPlusPlus::Error, Google::Apis::ClientError, StandardError]
24
+ def handle_error(error)
25
+ # make sure that we're on a newline (verbose mode will probably be in the middle of printing a benchmark)
26
+ puts("\n\n") if @options.verbose
27
+
28
+ case error
29
+ when ::CSVPlusPlus::Error::Error
30
+ handle_internal_error(error)
31
+ when ::Google::Apis::ClientError
32
+ handle_google_error(error)
33
+ else
34
+ unhandled_error(error)
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ sig { params(error: ::StandardError).void }
41
+ # An error was thrown that we weren't planning on
42
+ def unhandled_error(error)
43
+ warn(
44
+ <<~ERROR_MESSAGE)
45
+ An unexpected error was encountered. Please try running again with --verbose and
46
+ report the error at: https://github.com/patrickomatic/csv-plus-plus/issues/new'
47
+ ERROR_MESSAGE
48
+
49
+ return unless @options.verbose
50
+
51
+ warn(error.full_message)
52
+ warn("Cause: #{error.cause}") if error.cause
53
+ end
54
+
55
+ sig { params(error: ::CSVPlusPlus::Error::Error).void }
56
+ def handle_internal_error(error)
57
+ warn(with_position(error))
58
+ handle_wrapped_error(::T.must(error.wrapped_error)) if error.wrapped_error
59
+ end
60
+
61
+ sig { params(wrapped_error: ::StandardError).void }
62
+ def handle_wrapped_error(wrapped_error)
63
+ return unless @options.verbose
64
+
65
+ warn(wrapped_error.full_message)
66
+ warn((wrapped_error.backtrace || []).join("\n")) if wrapped_error.backtrace
67
+ end
68
+
69
+ sig { params(error: ::Google::Apis::ClientError).void }
70
+ def handle_google_error(error)
71
+ warn("Error making Google Sheets API request: #{error.message}")
72
+ return unless @options.verbose
73
+
74
+ warn("#{error.status_code} Error making Google API request [#{error.message}]: #{error.body}")
75
+ end
76
+
77
+ sig { params(error: ::CSVPlusPlus::Error::Error).returns(::String) }
78
+ # Output a user-helpful string that references the runtime state
79
+ #
80
+ # @param error [Error::Error] The error message to be prefixed with a filename and position
81
+ #
82
+ # @return [String]
83
+ def with_position(error)
84
+ message = error.error_message
85
+ case error
86
+ when ::CSVPlusPlus::Error::PositionalError
87
+ "#{message_prefix}#{cell_index} #{message}"
88
+ else
89
+ message
90
+ end
91
+ end
92
+
93
+ sig { returns(::String) }
94
+ def cell_index
95
+ if @runtime.parsing_csv_section?
96
+ "[#{@runtime.position.row_index},#{@runtime.position.cell_index}]"
97
+ else
98
+ ''
99
+ end
100
+ end
101
+
102
+ sig { returns(::String) }
103
+ def message_prefix
104
+ line_number = @runtime.position.line_number
105
+ filename = @runtime.source_code.filename
106
+
107
+ line_str = ":#{line_number}"
108
+ "#{filename}#{line_str}"
109
+ end
110
+ end
111
+ end
@@ -10,20 +10,30 @@ module CSVPlusPlus
10
10
  # Get a +Google::Apis::SheetsV4::SheetsService+ instance configured to connect to the sheets API
11
11
  #
12
12
  # @return [Google::Apis::SheetsV4::SheetsService]
13
- def self.sheets_client
14
- ::Google::Apis::SheetsV4::SheetsService.new.tap do |s|
15
- s.authorization = ::Google::Auth.get_application_default(['https://www.googleapis.com/auth/spreadsheets'].freeze)
16
- end
13
+ def sheets_client
14
+ ::T.must(
15
+ @sheets_client ||= ::T.let(
16
+ ::Google::Apis::SheetsV4::SheetsService.new.tap do |s|
17
+ s.authorization = ::Google::Auth.get_application_default(['https://www.googleapis.com/auth/spreadsheets'].freeze)
18
+ end,
19
+ ::T.nilable(::Google::Apis::SheetsV4::SheetsService)
20
+ )
21
+ )
17
22
  end
18
23
 
19
24
  sig { returns(::Google::Apis::DriveV3::DriveService) }
20
25
  # Get a +Google::Apis::DriveV3::DriveService+ instance connected to the drive API
21
26
  #
22
27
  # @return [Google::Apis::DriveV3::DriveService]
23
- def self.drive_client
24
- ::Google::Apis::DriveV3::DriveService.new.tap do |d|
25
- d.authorization = ::Google::Auth.get_application_default(['https://www.googleapis.com/auth/drive.file'].freeze)
26
- end
28
+ def drive_client
29
+ ::T.must(
30
+ @drive_client ||= ::T.let(
31
+ ::Google::Apis::DriveV3::DriveService.new.tap do |d|
32
+ d.authorization = ::Google::Auth.get_application_default(['https://www.googleapis.com/auth/drive.file'].freeze)
33
+ end,
34
+ ::T.nilable(::Google::Apis::DriveV3::DriveService)
35
+ )
36
+ )
27
37
  end
28
38
  end
29
39
  end
@@ -0,0 +1,144 @@
1
+ # typed: strict
2
+ # frozen_string_literal: true
3
+
4
+ module CSVPlusPlus
5
+ module Lexer
6
+ # TODO: ugh clean this up
7
+ RaccToken =
8
+ ::T.type_alias do
9
+ ::T.any(
10
+ [::String, ::Symbol],
11
+ [::Symbol, ::String],
12
+ [::String, ::String],
13
+ [::Symbol, ::Symbol],
14
+ [::FalseClass, ::FalseClass]
15
+ )
16
+ end
17
+ public_constant :RaccToken
18
+
19
+ # Common methods to be mixed into the Racc parsers
20
+ #
21
+ # @attr_reader tokens [Array]
22
+ module RaccLexer
23
+ extend ::T::Sig
24
+ extend ::T::Helpers
25
+ extend ::T::Generic
26
+ include ::Kernel
27
+
28
+ abstract!
29
+
30
+ ReturnType = type_member
31
+ public_constant :ReturnType
32
+
33
+ sig { returns(::T::Array[::CSVPlusPlus::Lexer::RaccToken]) }
34
+ attr_reader :tokens
35
+
36
+ sig { params(tokens: ::T::Array[::CSVPlusPlus::Lexer::RaccToken]).void }
37
+ # Initialize a lexer instance with an empty +@tokens+
38
+ def initialize(tokens: [])
39
+ @tokens = ::T.let(tokens, ::T::Array[::CSVPlusPlus::Lexer::RaccToken])
40
+ end
41
+
42
+ sig { returns(::T.nilable(::CSVPlusPlus::Lexer::RaccToken)) }
43
+ # Used by racc to iterate each token
44
+ #
45
+ # @return [Array<(Regexp, Symbol) | (false, false)>]
46
+ def next_token
47
+ @tokens.shift
48
+ end
49
+
50
+ sig { params(input: ::String).returns(::CSVPlusPlus::Lexer::RaccLexer::ReturnType) }
51
+ # Orchestate the tokenizing, parsing and error handling of parsing input. Each instance will implement their own
52
+ # +#tokenizer+ method
53
+ #
54
+ # @return [RaccLexer#] Each instance will define it's own +return_value+ with the result of parsing
55
+ # rubocop:disable Metrics/MethodLength
56
+ def parse(input)
57
+ return return_value unless anything_to_parse?(input)
58
+
59
+ tokenize(input)
60
+ do_parse
61
+ return_value
62
+ rescue ::Racc::ParseError => e
63
+ raise(
64
+ ::CSVPlusPlus::Error::FormulaSyntaxError.new(
65
+ "Error parsing #{parse_subject}",
66
+ bad_input: e.message,
67
+ wrapped_error: e
68
+ )
69
+ )
70
+ end
71
+ # rubocop:enable Metrics/MethodLength
72
+
73
+ protected
74
+
75
+ sig { abstract.params(input: ::String).returns(::T::Boolean) }
76
+ # Is the input even worth parsing? for example we don't want to parse cells unless they're a formula (start
77
+ # with '=')
78
+ #
79
+ # @param input [String]
80
+ #
81
+ # @return [Boolean]
82
+ def anything_to_parse?(input); end
83
+
84
+ sig { abstract.returns(::String) }
85
+ # Used for error messages, what is the thing being parsed? ("cell value", "modifier", "code section")
86
+ def parse_subject; end
87
+
88
+ sig { abstract.returns(::CSVPlusPlus::Lexer::RaccLexer::ReturnType) }
89
+ # The output of the parser
90
+ def return_value; end
91
+
92
+ sig { abstract.returns(::CSVPlusPlus::Lexer::Tokenizer) }
93
+ # Returns a +Lexer::Tokenizer+ configured for the given
94
+ def tokenizer; end
95
+
96
+ private
97
+
98
+ sig { params(input: ::String).void }
99
+ def tokenize(input)
100
+ return if input.nil?
101
+
102
+ t = tokenizer.scan(input)
103
+
104
+ until t.scanner.empty?
105
+ next if t.matches_ignore?
106
+
107
+ return if t.stop?
108
+
109
+ t.scan_tokens!
110
+ consume_token(t)
111
+ end
112
+
113
+ @tokens << %i[EOL EOL]
114
+ end
115
+
116
+ sig { params(tokenizer: ::CSVPlusPlus::Lexer::Tokenizer).void }
117
+ # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
118
+ def consume_token(tokenizer)
119
+ if tokenizer.last_token&.token && tokenizer.last_match
120
+ @tokens << [::T.must(tokenizer.last_token).token, ::T.must(tokenizer.last_match)]
121
+ elsif tokenizer.scan_catchall
122
+ @tokens << [::T.must(tokenizer.last_match), ::T.must(tokenizer.last_match)]
123
+ # TODO: checking the +parse_subject+ like this is a little hacky... but we need to know if we're parsing
124
+ # modifiers or code_section (or formulas in a cell)
125
+ elsif parse_subject == 'modifier'
126
+ raise(
127
+ ::CSVPlusPlus::Error::ModifierSyntaxError.new(
128
+ "Unable to parse #{parse_subject} starting at",
129
+ bad_input: tokenizer.peek
130
+ )
131
+ )
132
+ else
133
+ raise(
134
+ ::CSVPlusPlus::Error::FormulaSyntaxError.new(
135
+ "Unable to parse #{parse_subject} starting at",
136
+ bad_input: tokenizer.peek
137
+ )
138
+ )
139
+ end
140
+ end
141
+ # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
142
+ end
143
+ end
144
+ end
@@ -1,24 +1,33 @@
1
- # typed: true
1
+ # typed: strict
2
2
  # frozen_string_literal: true
3
3
 
4
- require 'strscan'
5
-
6
4
  module CSVPlusPlus
7
5
  module Lexer
8
6
  # A class that contains the use-case-specific regexes for parsing
9
7
  #
10
- # @attr_reader last_token [String] The last token that's been matched.
11
- # @attr_reader scanner [StringScanner] The StringScanner instance that's parsing the input.
8
+ # @attr_reader last_token [String, nil] The last token that's been matched.
12
9
  class Tokenizer
13
- attr_reader :last_token, :scanner
10
+ extend ::T::Sig
11
+
12
+ sig { returns(::T.nilable(::CSVPlusPlus::Lexer::Token)) }
13
+ attr_reader :last_token
14
14
 
15
+ sig do
16
+ params(
17
+ tokens: ::T::Enumerable[::CSVPlusPlus::Lexer::Token],
18
+ catchall: ::T.nilable(::Regexp),
19
+ ignore: ::T.nilable(::Regexp),
20
+ alter_matches: ::T::Hash[::Symbol, ::T.proc.params(s: ::String).returns(::String)],
21
+ stop_fn: ::T.nilable(::T.proc.params(s: ::StringScanner).returns(::T::Boolean))
22
+ ).void
23
+ end
15
24
  # @param tokens [Array<Regexp, String>] The list of tokens to scan
16
25
  # @param catchall [Regexp] A final regexp to try if nothing else matches
17
26
  # @param ignore [Regexp] Ignore anything matching this regexp
18
27
  # @param alter_matches [Object] A map of matches to alter
19
28
  # @param stop_fn [Proc] Stop parsing when this is true
20
29
  def initialize(tokens:, catchall: nil, ignore: nil, alter_matches: {}, stop_fn: nil)
21
- @last_token = nil
30
+ @last_token = ::T.let(nil, ::T.nilable(::CSVPlusPlus::Lexer::Token))
22
31
 
23
32
  @catchall = catchall
24
33
  @ignore = ignore
@@ -27,67 +36,94 @@ module CSVPlusPlus
27
36
  @alter_matches = alter_matches
28
37
  end
29
38
 
39
+ sig { params(input: ::String).returns(::T.self_type) }
30
40
  # Initializers a scanner for the given input to be parsed
31
41
  #
32
42
  # @param input The input to be tokenized
43
+ #
33
44
  # @return [Tokenizer]
34
45
  def scan(input)
35
- @scanner = ::StringScanner.new(input.strip)
46
+ @scanner = ::T.let(::StringScanner.new(input.strip), ::T.nilable(::StringScanner))
36
47
  self
37
48
  end
38
49
 
50
+ sig { returns(::StringScanner) }
51
+ # Returns the currently initialized +StringScanner+. You must call +#scan+ first or else this will throw an
52
+ # exception.
53
+ #
54
+ # @return [StringScanner]
55
+ def scanner
56
+ # The caller needs to initialize this class with a call to #scan before we can do anything. it sets up the
57
+ # +@scanner+ with it's necessary input.
58
+ unless @scanner
59
+ raise(::CSVPlusPlus::Error::CompilerError, 'Called Tokenizer#scanner without calling #scan first')
60
+ end
61
+
62
+ @scanner
63
+ end
64
+
65
+ sig { void }
39
66
  # Scan tokens and set +@last_token+ if any match
40
67
  #
41
68
  # @return [String, nil]
42
69
  def scan_tokens!
43
- m = @tokens.find { |t| @scanner.scan(t.first) }
44
- @last_token = m ? m[1] : nil
70
+ @last_token = @tokens.find { |t| scanner.scan(t.regexp) }
45
71
  end
46
72
 
73
+ sig { returns(::T.nilable(::String)) }
47
74
  # Scan input against the catchall pattern
48
75
  #
49
76
  # @return [String, nil]
50
77
  def scan_catchall
51
- @scanner.scan(@catchall) if @catchall
78
+ scanner.scan(@catchall) if @catchall
52
79
  end
53
80
 
81
+ sig { returns(::T.nilable(::String)) }
54
82
  # Scan input against the ignore pattern
55
83
  #
56
84
  # @return [boolean]
57
85
  def matches_ignore?
58
- @scanner.scan(@ignore) if @ignore
86
+ scanner.scan(@ignore) if @ignore
59
87
  end
60
88
 
89
+ sig { returns(::T.nilable(::String)) }
61
90
  # The value of the last token matched
62
91
  #
63
92
  # @return [String, nil]
64
93
  def last_match
65
- return @alter_matches[@last_token].call(@scanner.matched) if @alter_matches.key?(@last_token)
94
+ # rubocop:disable Style/MissingElse
95
+ if @last_token && @alter_matches.key?(@last_token.token.to_sym)
96
+ # rubocop:enable Style/MissingElse
97
+ return ::T.must(@alter_matches[@last_token.token.to_sym]).call(scanner.matched)
98
+ end
66
99
 
67
- @scanner.matched
100
+ scanner.matched
68
101
  end
69
102
 
103
+ sig { params(peek_characters: ::Integer).returns(::String) }
70
104
  # Read the input but don't consume it
71
105
  #
72
106
  # @param peek_characters [Integer]
73
107
  #
74
108
  # @return [String]
75
109
  def peek(peek_characters: 100)
76
- @scanner.peek(peek_characters)
110
+ scanner.peek(peek_characters)
77
111
  end
78
112
 
113
+ sig { returns(::T::Boolean) }
79
114
  # Scan for our stop token (if there is one - some parsers stop early and some don't)
80
115
  #
81
116
  # @return [boolean]
82
117
  def stop?
83
- @stop_fn ? @stop_fn.call(@scanner) : false
118
+ @stop_fn ? @stop_fn.call(scanner) : false
84
119
  end
85
120
 
121
+ sig { returns(::String) }
86
122
  # The rest of the un-parsed input. The tokenizer might not need to parse the entire input
87
123
  #
88
124
  # @return [String]
89
125
  def rest
90
- @scanner.rest
126
+ scanner.rest
91
127
  end
92
128
  end
93
129
  end
@@ -1,7 +1,7 @@
1
1
  # typed: strict
2
2
  # frozen_string_literal: true
3
3
 
4
- require_relative './lexer/lexer'
4
+ require_relative './lexer/racc_lexer'
5
5
  require_relative './lexer/tokenizer'
6
6
 
7
7
  module CSVPlusPlus
@@ -9,12 +9,51 @@ module CSVPlusPlus
9
9
  module Lexer
10
10
  extend ::T::Sig
11
11
 
12
+ # A token that's matched by +regexp+ and presented with +token+
13
+ class Token < ::T::Struct
14
+ const :regexp, ::Regexp
15
+ const :token, ::T.any(::String, ::Symbol)
16
+ end
17
+
12
18
  END_OF_CODE_SECTION = '---'
13
19
  public_constant :END_OF_CODE_SECTION
14
20
 
15
21
  VARIABLE_REF = '$$'
16
22
  public_constant :VARIABLE_REF
17
23
 
24
+ # @see https://github.com/ruby/racc/blob/master/lib/racc/parser.rb#L121
25
+ TOKEN_LIBRARY = ::T.let(
26
+ {
27
+ # A1_NOTATION: ::CSVPlusPlus::Lexer::Token.new(
28
+ # regexp: ::CSVPlusPlus::A1Reference::A1_NOTATION_REGEXP, token: :A1_NOTATION
29
+ # ),
30
+ FALSE: ::CSVPlusPlus::Lexer::Token.new(regexp: /false/i, token: :FALSE),
31
+ HEX_COLOR: ::CSVPlusPlus::Lexer::Token.new(regexp: ::CSVPlusPlus::Color::HEX_STRING_REGEXP, token: :HEX_COLOR),
32
+ INFIX_OP: ::CSVPlusPlus::Lexer::Token.new(regexp: %r{\^|\+|-|\*|/|&|<|>|<=|>=|<>}, token: :INFIX_OP),
33
+ NUMBER: ::CSVPlusPlus::Lexer::Token.new(regexp: /-?[\d.]+/, token: :NUMBER),
34
+ REF: ::CSVPlusPlus::Lexer::Token.new(regexp: /[$!\w:]+/, token: :REF),
35
+ STRING: ::CSVPlusPlus::Lexer::Token.new(
36
+ regexp: %r{"(?:[^"\\]|\\(?:["\\/bfnrt]|u[0-9a-fA-F]{4}))*"},
37
+ token: :STRING
38
+ ),
39
+ TRUE: ::CSVPlusPlus::Lexer::Token.new(regexp: /true/i, token: :TRUE),
40
+ VAR_REF: ::CSVPlusPlus::Lexer::Token.new(regexp: /\$\$/, token: :VAR_REF)
41
+ }.freeze,
42
+ ::T::Hash[::Symbol, ::CSVPlusPlus::Lexer::Token]
43
+ )
44
+ public_constant :TOKEN_LIBRARY
45
+
46
+ sig { params(str: ::String).returns(::String) }
47
+ # Run any transformations to the input before going into the CSV parser
48
+ #
49
+ # The CSV parser in particular does not like if there is whitespace after a double quote and before the next comma
50
+ #
51
+ # @param str [String]
52
+ # @return [String]
53
+ def self.preprocess(str)
54
+ str.gsub(/"\s*,/, '",')
55
+ end
56
+
18
57
  sig { params(str: ::String).returns(::String) }
19
58
  # When parsing a modifier with a quoted string field, we need a way to unescape. Some examples of quoted and
20
59
  # unquoted results:
@@ -44,7 +44,7 @@ module CSVPlusPlus
44
44
  end
45
45
 
46
46
  def a1_notation(arg)
47
- return arg if ::CSVPlusPlus::Entities::CellReference.valid_cell_reference?(arg)
47
+ return arg if ::CSVPlusPlus::A1Reference.valid_cell_reference?(arg)
48
48
  end
49
49
 
50
50
  def a_date(arg, allow_relative_date: false)