argstring 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/argstring/config.rb +127 -0
- data/lib/argstring/errors.rb +24 -0
- data/lib/argstring/lexer/assignment_finder.rb +76 -0
- data/lib/argstring/lexer/segmenter.rb +119 -0
- data/lib/argstring/lexer/token_parser.rb +124 -0
- data/lib/argstring/models/argument.rb +16 -0
- data/lib/argstring/models/arguments.rb +29 -0
- data/lib/argstring/models/name.rb +6 -0
- data/lib/argstring/models/token.rb +19 -0
- data/lib/argstring/models/value.rb +6 -0
- data/lib/argstring/parser.rb +229 -0
- data/lib/argstring/version.rb +5 -0
- data/lib/argstring.rb +18 -0
- metadata +100 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: 372c89e811880ac8fcf4c3d27720d01b83c17a2d
|
|
4
|
+
data.tar.gz: c75d279c3a11f1a7ed355c814bf2ef3d108bf91c
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 85cc4207cb9b70d18e4074719c6a54335ed0804efb14296821c855e071be2b6f763ad82aa47f25b475e9e1a4163f293ea31b457b318a1b4baebbd63edc8ee625
|
|
7
|
+
data.tar.gz: d58aa2cf20b1074e076cf21bb0d94342cccb71826f1af34e36111a3460623b642ea4a3b967ec2f683e092149570fd3f16d06a0ecdc2124a6441b90b6a39d8621
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Argstring
|
|
4
|
+
class Config
|
|
5
|
+
DEFAULT_ENCLOSERS = [{ open: '"', close: '"' }, { open: "'", close: "'" }].freeze
|
|
6
|
+
WHITESPACE_SEPARATOR_SENTINEL = /\s/
|
|
7
|
+
|
|
8
|
+
attr_reader :separator, :escape, :assignment, :enclosers, :duplicates, :flags_enabled, :positional_enabled
|
|
9
|
+
|
|
10
|
+
def initialize(separator: WHITESPACE_SEPARATOR_SENTINEL, escape: "\\", assignment: ["="], enclosers: DEFAULT_ENCLOSERS, duplicates: :last, flags: true, positional: true)
|
|
11
|
+
@separator = separator
|
|
12
|
+
@escape = escape
|
|
13
|
+
@assignment = assignment
|
|
14
|
+
@enclosers = enclosers
|
|
15
|
+
@duplicates = duplicates
|
|
16
|
+
@flags_enabled = flags
|
|
17
|
+
@positional_enabled = positional
|
|
18
|
+
|
|
19
|
+
validate!
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def whitespace_separator?
|
|
23
|
+
@separator.is_a?(Regexp) && @separator == WHITESPACE_SEPARATOR_SENTINEL
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def escape_enabled?
|
|
27
|
+
@escape && @escape != ""
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def whitespace_char?(ch)
|
|
31
|
+
!!(ch =~ /\s/)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def encloser_open_pair_for(ch)
|
|
35
|
+
@enclosers.find { |pair| pair[:open] == ch }
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def separator_boundary_at?(string, index)
|
|
39
|
+
ch = string[index]
|
|
40
|
+
return whitespace_char?(ch) if whitespace_separator?
|
|
41
|
+
ch == @separator
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
private
|
|
45
|
+
|
|
46
|
+
def validate!
|
|
47
|
+
validate_separator!
|
|
48
|
+
validate_escape!
|
|
49
|
+
validate_assignment!
|
|
50
|
+
validate_enclosers!
|
|
51
|
+
validate_duplicates!
|
|
52
|
+
validate_character_uniqueness!
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def validate_separator!
|
|
56
|
+
if @separator.is_a?(Regexp)
|
|
57
|
+
unless @separator == WHITESPACE_SEPARATOR_SENTINEL
|
|
58
|
+
raise ConfigurationError, "separator only supports the exact regex /\\s/ or a single character string"
|
|
59
|
+
end
|
|
60
|
+
else
|
|
61
|
+
validate_single_char!("separator", @separator)
|
|
62
|
+
if whitespace_char?(@separator)
|
|
63
|
+
raise ConfigurationError, "separator cannot be whitespace unless it is the exact regex /\\s/"
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def validate_escape!
|
|
69
|
+
return if @escape.nil? || @escape == ""
|
|
70
|
+
validate_single_char!("escape", @escape)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def validate_assignment!
|
|
74
|
+
unless @assignment.is_a?(Array)
|
|
75
|
+
raise ConfigurationError, "assignment must be an array of single-character strings"
|
|
76
|
+
end
|
|
77
|
+
@assignment.each { |ch| validate_single_char!("assignment operator", ch) }
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def validate_enclosers!
|
|
81
|
+
unless @enclosers.is_a?(Array)
|
|
82
|
+
raise ConfigurationError, "enclosers must be an array of {open:, close:} hashes"
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
@enclosers.each do |pair|
|
|
86
|
+
unless pair.is_a?(Hash) && pair.key?(:open) && pair.key?(:close)
|
|
87
|
+
raise ConfigurationError, "each encloser must be a hash with keys :open and :close"
|
|
88
|
+
end
|
|
89
|
+
validate_single_char!("encloser open", pair[:open])
|
|
90
|
+
validate_single_char!("encloser close", pair[:close])
|
|
91
|
+
if pair[:open] == "" || pair[:close] == ""
|
|
92
|
+
raise ConfigurationError, "encloser open/close cannot be empty"
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def validate_duplicates!
|
|
98
|
+
unless [:first, :last].include?(@duplicates)
|
|
99
|
+
raise ConfigurationError, "duplicates must be :first or :last"
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def validate_single_char!(label, value)
|
|
104
|
+
unless value.is_a?(String) && value.length == 1
|
|
105
|
+
raise ConfigurationError, "#{label} must be a single-character string"
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def validate_character_uniqueness!
|
|
110
|
+
chars = []
|
|
111
|
+
chars << @separator if @separator.is_a?(String)
|
|
112
|
+
chars << @escape if escape_enabled?
|
|
113
|
+
chars.concat(@assignment)
|
|
114
|
+
|
|
115
|
+
encloser_chars = []
|
|
116
|
+
@enclosers.each do |pair|
|
|
117
|
+
encloser_chars << pair[:open]
|
|
118
|
+
encloser_chars << pair[:close] unless pair[:close] == pair[:open]
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
all = chars + encloser_chars
|
|
122
|
+
if all.uniq.length != all.length
|
|
123
|
+
raise ConfigurationError, "separator/escape/assignment/enclosers must use unique characters (except open==close within an encloser pair)"
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Argstring
|
|
4
|
+
class ConfigurationError < StandardError; end
|
|
5
|
+
|
|
6
|
+
# Structured parse error.
|
|
7
|
+
class ParseError
|
|
8
|
+
attr_reader :code, :message, :raw
|
|
9
|
+
|
|
10
|
+
# code: symbol identifying the error type
|
|
11
|
+
# message: human-readable explanation
|
|
12
|
+
# raw: raw input or segment associated with the error (may be nil)
|
|
13
|
+
def initialize(code:, message:, raw: nil)
|
|
14
|
+
@code = code
|
|
15
|
+
@message = message
|
|
16
|
+
@raw = raw
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def to_s
|
|
20
|
+
return @message unless @raw
|
|
21
|
+
"#{@message} (#{@raw.inspect})"
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Argstring
|
|
4
|
+
module Lexer
|
|
5
|
+
class AssignmentFinder
|
|
6
|
+
def initialize(config)
|
|
7
|
+
@config = config
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
# Find all top-level assignment operators in the segment.
|
|
11
|
+
# Returns array of indices into the segment string.
|
|
12
|
+
def indices(segment, errors:)
|
|
13
|
+
indices = []
|
|
14
|
+
|
|
15
|
+
i = 0
|
|
16
|
+
in_enclosure = false
|
|
17
|
+
current_encloser = nil
|
|
18
|
+
|
|
19
|
+
while i < segment.length
|
|
20
|
+
ch = segment[i]
|
|
21
|
+
|
|
22
|
+
if in_enclosure
|
|
23
|
+
if @config.escape_enabled? && ch == @config.escape
|
|
24
|
+
i = advance_past_escape(segment, i)
|
|
25
|
+
next
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
if ch == current_encloser[:close]
|
|
29
|
+
in_enclosure = false
|
|
30
|
+
current_encloser = nil
|
|
31
|
+
end
|
|
32
|
+
i += 1
|
|
33
|
+
next
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
pair = @config.encloser_open_pair_for(ch)
|
|
37
|
+
if pair
|
|
38
|
+
in_enclosure = true
|
|
39
|
+
current_encloser = pair
|
|
40
|
+
i += 1
|
|
41
|
+
next
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
if @config.escape_enabled? && ch == @config.escape
|
|
45
|
+
i = advance_past_escape(segment, i)
|
|
46
|
+
next
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
if @config.assignment.include?(ch)
|
|
50
|
+
indices << i
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
i += 1
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
if in_enclosure
|
|
57
|
+
errors << ParseError.new(
|
|
58
|
+
code: :unterminated_enclosure,
|
|
59
|
+
message: "Unterminated enclosure in segment",
|
|
60
|
+
raw: segment
|
|
61
|
+
)
|
|
62
|
+
return []
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
indices
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
private
|
|
69
|
+
|
|
70
|
+
def advance_past_escape(string, index)
|
|
71
|
+
return index + 1 if index + 1 >= string.length
|
|
72
|
+
index + 2
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Argstring
|
|
4
|
+
module Lexer
|
|
5
|
+
class Segmenter
|
|
6
|
+
def initialize(config)
|
|
7
|
+
@config = config
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
# Split argstring into raw segments using the configured separator.
|
|
11
|
+
# Enclosures and escapes prevent splitting.
|
|
12
|
+
def split(argstring, errors:)
|
|
13
|
+
# Always strip leading/trailing whitespace globally in all modes.
|
|
14
|
+
# Users who want leading/trailing whitespace must enclose it.
|
|
15
|
+
argstring = argstring.to_s.strip
|
|
16
|
+
|
|
17
|
+
segments = []
|
|
18
|
+
buffer = +""
|
|
19
|
+
|
|
20
|
+
i = 0
|
|
21
|
+
in_enclosure = false
|
|
22
|
+
current_encloser = nil
|
|
23
|
+
|
|
24
|
+
while i < argstring.length
|
|
25
|
+
ch = argstring[i]
|
|
26
|
+
|
|
27
|
+
if in_enclosure
|
|
28
|
+
# Escape inside enclosure consumes next character literally.
|
|
29
|
+
if @config.escape_enabled? && ch == @config.escape
|
|
30
|
+
if i + 1 < argstring.length
|
|
31
|
+
buffer << ch
|
|
32
|
+
buffer << argstring[i + 1]
|
|
33
|
+
i += 2
|
|
34
|
+
else
|
|
35
|
+
buffer << ch
|
|
36
|
+
i += 1
|
|
37
|
+
end
|
|
38
|
+
next
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
buffer << ch
|
|
42
|
+
if ch == current_encloser[:close]
|
|
43
|
+
in_enclosure = false
|
|
44
|
+
current_encloser = nil
|
|
45
|
+
end
|
|
46
|
+
i += 1
|
|
47
|
+
next
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Start enclosure if configured.
|
|
51
|
+
pair = @config.encloser_open_pair_for(ch)
|
|
52
|
+
if pair
|
|
53
|
+
in_enclosure = true
|
|
54
|
+
current_encloser = pair
|
|
55
|
+
buffer << ch
|
|
56
|
+
i += 1
|
|
57
|
+
next
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Escape outside enclosure.
|
|
61
|
+
if @config.escape_enabled? && ch == @config.escape
|
|
62
|
+
# If separator is whitespace, escaping whitespace separators is not allowed.
|
|
63
|
+
if @config.whitespace_separator? && i + 1 < argstring.length && @config.whitespace_char?(argstring[i + 1])
|
|
64
|
+
i += 1
|
|
65
|
+
next
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
if i + 1 < argstring.length
|
|
69
|
+
buffer << ch
|
|
70
|
+
buffer << argstring[i + 1]
|
|
71
|
+
i += 2
|
|
72
|
+
else
|
|
73
|
+
buffer << ch
|
|
74
|
+
i += 1
|
|
75
|
+
end
|
|
76
|
+
next
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Separator handling.
|
|
80
|
+
if @config.separator_boundary_at?(argstring, i)
|
|
81
|
+
flush_segment!(segments, buffer)
|
|
82
|
+
|
|
83
|
+
if @config.whitespace_separator?
|
|
84
|
+
# Collapse whitespace runs.
|
|
85
|
+
while i < argstring.length && @config.whitespace_char?(argstring[i])
|
|
86
|
+
i += 1
|
|
87
|
+
end
|
|
88
|
+
else
|
|
89
|
+
# Non-whitespace separator does not collapse.
|
|
90
|
+
i += 1
|
|
91
|
+
end
|
|
92
|
+
next
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
buffer << ch
|
|
96
|
+
i += 1
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
if in_enclosure
|
|
100
|
+
errors << ParseError.new(
|
|
101
|
+
code: :unterminated_enclosure,
|
|
102
|
+
message: "Unterminated enclosure in input"
|
|
103
|
+
)
|
|
104
|
+
return []
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
flush_segment!(segments, buffer)
|
|
108
|
+
segments
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
private
|
|
112
|
+
|
|
113
|
+
def flush_segment!(segments, buffer)
|
|
114
|
+
segments << buffer.dup
|
|
115
|
+
buffer.clear
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Argstring
|
|
4
|
+
module Lexer
|
|
5
|
+
class TokenParser
|
|
6
|
+
def initialize(config)
|
|
7
|
+
@config = config
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def parse_name(slice, raw:, errors:)
|
|
11
|
+
parse_token(slice, raw: raw, errors: errors, token_class: Name)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def parse_value(slice, raw:, errors:)
|
|
15
|
+
parse_token(slice, raw: raw, errors: errors, token_class: Value)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
private
|
|
19
|
+
|
|
20
|
+
def parse_token(slice, raw:, errors:, token_class:)
|
|
21
|
+
# For non-whitespace separators, the segment itself is trimmed. For safety, trim token edges too.
|
|
22
|
+
unless @config.whitespace_separator?
|
|
23
|
+
slice = slice.strip
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
if slice == ""
|
|
27
|
+
return token_class.new(text: "", raw: raw, enclosed: false, enclosure: nil)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
pair = @config.encloser_open_pair_for(slice[0])
|
|
31
|
+
if pair
|
|
32
|
+
return parse_enclosed_token(slice, raw: raw, pair: pair, errors: errors, token_class: token_class)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
text = unescape_text(slice)
|
|
36
|
+
token_class.new(text: text, raw: raw, enclosed: false, enclosure: nil)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def parse_enclosed_token(slice, raw:, pair:, errors:, token_class:)
|
|
40
|
+
open_ch = pair[:open]
|
|
41
|
+
close_ch = pair[:close]
|
|
42
|
+
|
|
43
|
+
i = 1
|
|
44
|
+
text_buffer = +""
|
|
45
|
+
found_close = false
|
|
46
|
+
|
|
47
|
+
while i < slice.length
|
|
48
|
+
ch = slice[i]
|
|
49
|
+
|
|
50
|
+
if @config.escape_enabled? && ch == @config.escape
|
|
51
|
+
if i + 1 < slice.length
|
|
52
|
+
text_buffer << slice[i + 1]
|
|
53
|
+
i += 2
|
|
54
|
+
next
|
|
55
|
+
end
|
|
56
|
+
# Trailing escape disappears.
|
|
57
|
+
i += 1
|
|
58
|
+
next
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
if ch == close_ch
|
|
62
|
+
found_close = true
|
|
63
|
+
i += 1
|
|
64
|
+
break
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
text_buffer << ch
|
|
68
|
+
i += 1
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
unless found_close
|
|
72
|
+
errors << ParseError.new(
|
|
73
|
+
code: :unterminated_enclosure,
|
|
74
|
+
message: "Unterminated enclosed token",
|
|
75
|
+
raw: raw
|
|
76
|
+
)
|
|
77
|
+
return nil
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
trailing = slice[i..-1] || ""
|
|
81
|
+
unless trailing.strip == ""
|
|
82
|
+
errors << ParseError.new(
|
|
83
|
+
code: :trailing_after_enclosure,
|
|
84
|
+
message: "Unexpected trailing characters after enclosure",
|
|
85
|
+
raw: raw
|
|
86
|
+
)
|
|
87
|
+
return nil
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
token_class.new(text: text_buffer, raw: raw, enclosed: true, enclosure: { open: open_ch, close: close_ch })
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def unescape_text(text)
|
|
94
|
+
return text.dup unless @config.escape_enabled?
|
|
95
|
+
|
|
96
|
+
out = +""
|
|
97
|
+
i = 0
|
|
98
|
+
while i < text.length
|
|
99
|
+
ch = text[i]
|
|
100
|
+
if ch == @config.escape
|
|
101
|
+
# In whitespace-separator mode, escaping whitespace separators is not allowed.
|
|
102
|
+
if @config.whitespace_separator? && i + 1 < text.length && @config.whitespace_char?(text[i + 1])
|
|
103
|
+
i += 1
|
|
104
|
+
next
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
if i + 1 < text.length
|
|
108
|
+
out << text[i + 1]
|
|
109
|
+
i += 2
|
|
110
|
+
else
|
|
111
|
+
# Trailing escape disappears.
|
|
112
|
+
i += 1
|
|
113
|
+
end
|
|
114
|
+
next
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
out << ch
|
|
118
|
+
i += 1
|
|
119
|
+
end
|
|
120
|
+
out
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Argstring
|
|
4
|
+
class Argument
|
|
5
|
+
# kind: :positional, :named, :flag
|
|
6
|
+
attr_reader :kind, :position, :name, :value, :values
|
|
7
|
+
|
|
8
|
+
def initialize(kind:, position: nil, name: nil, value: nil, values: nil)
|
|
9
|
+
@kind = kind
|
|
10
|
+
@position = position
|
|
11
|
+
@name = name
|
|
12
|
+
@value = value
|
|
13
|
+
@values = values || (value ? [value] : [])
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Argstring
|
|
4
|
+
class Arguments
|
|
5
|
+
attr_reader :valid, :errors, :all, :positional, :named, :flags
|
|
6
|
+
attr_reader :position, :name, :flag
|
|
7
|
+
|
|
8
|
+
def initialize(valid:, errors:, all:, positional:, named:, flags:, position:, name:, flag:)
|
|
9
|
+
@valid = valid
|
|
10
|
+
@errors = errors
|
|
11
|
+
@all = all
|
|
12
|
+
@positional = positional
|
|
13
|
+
@named = named
|
|
14
|
+
@flags = flags
|
|
15
|
+
@position = position
|
|
16
|
+
@name = name
|
|
17
|
+
@flag = flag
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def valid?
|
|
21
|
+
@valid
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Human-readable errors
|
|
25
|
+
def error_messages
|
|
26
|
+
@errors.map(&:to_s)
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Argstring
|
|
4
|
+
class Token
|
|
5
|
+
attr_reader :text, :raw, :enclosed, :enclosure
|
|
6
|
+
|
|
7
|
+
# enclosure is a hash {open:, close:} or nil
|
|
8
|
+
def initialize(text:, raw:, enclosed: false, enclosure: nil)
|
|
9
|
+
@text = text
|
|
10
|
+
@raw = raw
|
|
11
|
+
@enclosed = enclosed
|
|
12
|
+
@enclosure = enclosure
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def enclosed?
|
|
16
|
+
@enclosed
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Argstring
|
|
4
|
+
class Parser
|
|
5
|
+
def initialize(separator: Config::WHITESPACE_SEPARATOR_SENTINEL, escape: "\\", assignment: ["="], enclosers: Config::DEFAULT_ENCLOSERS, duplicates: :last, flags: true, positional: true)
|
|
6
|
+
@config = Config.new(
|
|
7
|
+
separator: separator,
|
|
8
|
+
escape: escape,
|
|
9
|
+
assignment: assignment,
|
|
10
|
+
enclosers: enclosers,
|
|
11
|
+
duplicates: duplicates,
|
|
12
|
+
flags: flags,
|
|
13
|
+
positional: positional
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
@segmenter = Lexer::Segmenter.new(@config)
|
|
17
|
+
@assignment_finder = Lexer::AssignmentFinder.new(@config)
|
|
18
|
+
@token_parser = Lexer::TokenParser.new(@config)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def parse(argstring)
|
|
22
|
+
result = parse_internal(argstring.to_s)
|
|
23
|
+
unless result.valid?
|
|
24
|
+
warn("Argstring: #{result.error_messages.join('; ')}")
|
|
25
|
+
end
|
|
26
|
+
result
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
private
|
|
30
|
+
|
|
31
|
+
def parse_internal(argstring)
|
|
32
|
+
errors = []
|
|
33
|
+
all_args = []
|
|
34
|
+
positional_args = []
|
|
35
|
+
named_args = []
|
|
36
|
+
flag_args = []
|
|
37
|
+
|
|
38
|
+
position_hash = {}
|
|
39
|
+
name_hash = {}
|
|
40
|
+
flag_hash = {}
|
|
41
|
+
|
|
42
|
+
segments = @segmenter.split(argstring, errors: errors)
|
|
43
|
+
return build_invalid(errors) if errors.any?
|
|
44
|
+
|
|
45
|
+
pos_index = 0
|
|
46
|
+
|
|
47
|
+
segments.each do |segment_raw|
|
|
48
|
+
segment = segment_raw
|
|
49
|
+
|
|
50
|
+
# Non-whitespace separator: ignore leading/trailing whitespace around segments.
|
|
51
|
+
segment = segment.strip unless @config.whitespace_separator?
|
|
52
|
+
|
|
53
|
+
# Empty segment is allowed when separator is non-collapsing.
|
|
54
|
+
if segment == ""
|
|
55
|
+
if @config.positional_enabled
|
|
56
|
+
pos_index += 1
|
|
57
|
+
value_token = Value.new(text: "", raw: segment_raw, enclosed: false, enclosure: nil)
|
|
58
|
+
pos_arg = Argument.new(kind: :positional, position: pos_index, value: value_token, values: [value_token])
|
|
59
|
+
all_args << pos_arg
|
|
60
|
+
positional_args << pos_arg
|
|
61
|
+
position_hash[pos_index] = value_token
|
|
62
|
+
|
|
63
|
+
add_flag_for_positional!(all_args, flag_args, flag_hash, value_token) if @config.flags_enabled
|
|
64
|
+
end
|
|
65
|
+
next
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
parsed = parse_segment(segment, raw_segment: segment_raw, errors: errors)
|
|
69
|
+
return build_invalid(errors) if errors.any?
|
|
70
|
+
|
|
71
|
+
if parsed[:kind] == :named
|
|
72
|
+
upsert_named_argument!(
|
|
73
|
+
all_args: all_args,
|
|
74
|
+
named_args: named_args,
|
|
75
|
+
name_hash: name_hash,
|
|
76
|
+
name_token: parsed[:name_token],
|
|
77
|
+
value_token: parsed[:value_token]
|
|
78
|
+
)
|
|
79
|
+
next
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Positional
|
|
83
|
+
if @config.positional_enabled
|
|
84
|
+
pos_index += 1
|
|
85
|
+
value_token = parsed[:value_token]
|
|
86
|
+
pos_arg = Argument.new(kind: :positional, position: pos_index, value: value_token, values: [value_token])
|
|
87
|
+
all_args << pos_arg
|
|
88
|
+
positional_args << pos_arg
|
|
89
|
+
position_hash[pos_index] = value_token
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
add_flag_for_positional!(all_args, flag_args, flag_hash, parsed[:value_token]) if @config.flags_enabled
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
Arguments.new(
|
|
96
|
+
valid: true,
|
|
97
|
+
errors: [],
|
|
98
|
+
all: all_args,
|
|
99
|
+
positional: positional_args,
|
|
100
|
+
named: named_args,
|
|
101
|
+
flags: flag_args,
|
|
102
|
+
position: position_hash,
|
|
103
|
+
name: name_hash,
|
|
104
|
+
flag: flag_hash
|
|
105
|
+
)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def build_invalid(errors)
|
|
109
|
+
Arguments.new(
|
|
110
|
+
valid: false,
|
|
111
|
+
errors: errors,
|
|
112
|
+
all: [],
|
|
113
|
+
positional: [],
|
|
114
|
+
named: [],
|
|
115
|
+
flags: [],
|
|
116
|
+
position: {},
|
|
117
|
+
name: {},
|
|
118
|
+
flag: {}
|
|
119
|
+
)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def parse_segment(segment, raw_segment:, errors:)
|
|
123
|
+
assignment_indices = @assignment_finder.indices(segment, errors: errors)
|
|
124
|
+
return nil if errors.any?
|
|
125
|
+
|
|
126
|
+
# No assignment operators => positional
|
|
127
|
+
if assignment_indices.length == 0
|
|
128
|
+
value_token = @token_parser.parse_value(segment, raw: raw_segment, errors: errors)
|
|
129
|
+
return nil if errors.any?
|
|
130
|
+
return { kind: :positional, value_token: value_token }
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# More than one assignment operator in a single segment is always invalid
|
|
134
|
+
if assignment_indices.length > 1
|
|
135
|
+
errors << ParseError.new(
|
|
136
|
+
code: :multiple_assignment,
|
|
137
|
+
message: "Multiple assignment operators in one argument",
|
|
138
|
+
raw: segment
|
|
139
|
+
)
|
|
140
|
+
return nil
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Exactly one assignment operator
|
|
144
|
+
idx = assignment_indices[0]
|
|
145
|
+
|
|
146
|
+
# If the entire segment is just the assignment operator, treat it as a positional literal.
|
|
147
|
+
# This allows '=' (or other configured assignment chars) to be used as a positional/flag.
|
|
148
|
+
if segment.length == 1 && @config.assignment.include?(segment)
|
|
149
|
+
value_token = @token_parser.parse_value(segment, raw: raw_segment, errors: errors)
|
|
150
|
+
return nil if errors.any?
|
|
151
|
+
return { kind: :positional, value_token: value_token }
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
left_raw = segment[0...idx]
|
|
155
|
+
right_raw = segment[(idx + 1)..-1] || ""
|
|
156
|
+
|
|
157
|
+
# Non-whitespace separator: ignore whitespace immediately around '=' (outside enclosures).
|
|
158
|
+
unless @config.whitespace_separator?
|
|
159
|
+
left_raw = left_raw.rstrip
|
|
160
|
+
right_raw = right_raw.lstrip
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
if left_raw == ""
|
|
164
|
+
errors << ParseError.new(
|
|
165
|
+
code: :missing_name,
|
|
166
|
+
message: "Missing name before assignment operator",
|
|
167
|
+
raw: segment
|
|
168
|
+
)
|
|
169
|
+
return nil
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
if right_raw == ""
|
|
173
|
+
errors << ParseError.new(
|
|
174
|
+
code: :missing_value,
|
|
175
|
+
message: "Missing value after assignment operator",
|
|
176
|
+
raw: segment
|
|
177
|
+
)
|
|
178
|
+
return nil
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
name_token = @token_parser.parse_name(left_raw, raw: left_raw, errors: errors)
|
|
182
|
+
return nil if errors.any?
|
|
183
|
+
|
|
184
|
+
value_token = @token_parser.parse_value(right_raw, raw: right_raw, errors: errors)
|
|
185
|
+
return nil if errors.any?
|
|
186
|
+
|
|
187
|
+
{ kind: :named, name_token: name_token, value_token: value_token }
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def add_flag_for_positional!(all_args, flag_args, flag_hash, positional_value_token)
|
|
191
|
+
flag_name = Name.new(
|
|
192
|
+
text: positional_value_token.text,
|
|
193
|
+
raw: positional_value_token.raw,
|
|
194
|
+
enclosed: positional_value_token.enclosed?,
|
|
195
|
+
enclosure: positional_value_token.enclosure
|
|
196
|
+
)
|
|
197
|
+
flag_value = Value.new(text: "true", raw: "true", enclosed: false, enclosure: nil)
|
|
198
|
+
flag_arg = Argument.new(kind: :flag, name: flag_name, value: flag_value, values: [flag_value])
|
|
199
|
+
all_args << flag_arg
|
|
200
|
+
flag_args << flag_arg
|
|
201
|
+
flag_hash[flag_name.text] = true
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
def upsert_named_argument!(all_args:, named_args:, name_hash:, name_token:, value_token:)
|
|
205
|
+
key = name_token.text
|
|
206
|
+
existing = named_args.find { |a| a.name.text == key }
|
|
207
|
+
|
|
208
|
+
if existing.nil?
|
|
209
|
+
arg = Argument.new(kind: :named, name: name_token, value: value_token, values: [value_token])
|
|
210
|
+
all_args << arg
|
|
211
|
+
named_args << arg
|
|
212
|
+
name_hash[key] = value_token
|
|
213
|
+
return arg
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
if @config.duplicates == :first
|
|
217
|
+
existing.values << value_token
|
|
218
|
+
name_hash[key] = existing.value
|
|
219
|
+
return existing
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
# :last
|
|
223
|
+
existing.values.unshift(value_token)
|
|
224
|
+
existing.instance_variable_set(:@value, value_token)
|
|
225
|
+
name_hash[key] = existing.value
|
|
226
|
+
existing
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
end
|
data/lib/argstring.rb
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# version must come first so errors, etc. have a version constant available
|
|
4
|
+
require_relative "argstring/version"
|
|
5
|
+
require_relative "argstring/errors"
|
|
6
|
+
require_relative "argstring/config"
|
|
7
|
+
|
|
8
|
+
require_relative "argstring/models/token"
|
|
9
|
+
require_relative "argstring/models/name"
|
|
10
|
+
require_relative "argstring/models/value"
|
|
11
|
+
require_relative "argstring/models/argument"
|
|
12
|
+
require_relative "argstring/models/arguments"
|
|
13
|
+
|
|
14
|
+
require_relative "argstring/lexer/segmenter"
|
|
15
|
+
require_relative "argstring/lexer/assignment_finder"
|
|
16
|
+
require_relative "argstring/lexer/token_parser"
|
|
17
|
+
|
|
18
|
+
require_relative "argstring/parser"
|
metadata
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: argstring
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Convincible Media
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2026-01-05 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: bundler
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '2.0'
|
|
20
|
+
type: :development
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '2.0'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: rake
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '13.0'
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '13.0'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: rspec
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - "~>"
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '3.0'
|
|
48
|
+
type: :development
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - "~>"
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '3.0'
|
|
55
|
+
description: Configurable parser for single-line argument strings, returning a structured
|
|
56
|
+
set of sub-strings representing positional arguments, named arguments and flags.
|
|
57
|
+
email:
|
|
58
|
+
- development@convincible.media
|
|
59
|
+
executables: []
|
|
60
|
+
extensions: []
|
|
61
|
+
extra_rdoc_files: []
|
|
62
|
+
files:
|
|
63
|
+
- lib/argstring.rb
|
|
64
|
+
- lib/argstring/config.rb
|
|
65
|
+
- lib/argstring/errors.rb
|
|
66
|
+
- lib/argstring/lexer/assignment_finder.rb
|
|
67
|
+
- lib/argstring/lexer/segmenter.rb
|
|
68
|
+
- lib/argstring/lexer/token_parser.rb
|
|
69
|
+
- lib/argstring/models/argument.rb
|
|
70
|
+
- lib/argstring/models/arguments.rb
|
|
71
|
+
- lib/argstring/models/name.rb
|
|
72
|
+
- lib/argstring/models/token.rb
|
|
73
|
+
- lib/argstring/models/value.rb
|
|
74
|
+
- lib/argstring/parser.rb
|
|
75
|
+
- lib/argstring/version.rb
|
|
76
|
+
homepage: https://github.com/ConvincibleMedia/ruby-gem-argstring
|
|
77
|
+
licenses:
|
|
78
|
+
- MIT
|
|
79
|
+
metadata: {}
|
|
80
|
+
post_install_message:
|
|
81
|
+
rdoc_options: []
|
|
82
|
+
require_paths:
|
|
83
|
+
- lib
|
|
84
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
85
|
+
requirements:
|
|
86
|
+
- - ">="
|
|
87
|
+
- !ruby/object:Gem::Version
|
|
88
|
+
version: '2.4'
|
|
89
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
90
|
+
requirements:
|
|
91
|
+
- - ">="
|
|
92
|
+
- !ruby/object:Gem::Version
|
|
93
|
+
version: '0'
|
|
94
|
+
requirements: []
|
|
95
|
+
rubyforge_project:
|
|
96
|
+
rubygems_version: 2.6.14.4
|
|
97
|
+
signing_key:
|
|
98
|
+
specification_version: 4
|
|
99
|
+
summary: Parses argument strings into structured substrings.
|
|
100
|
+
test_files: []
|