code-lexer 0.6 → 0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/code-lexer/abstractor.rb +195 -55
- data/lib/code-lexer/config.rb +1 -2
- data/lib/code-lexer/languages/java.yml +38 -0
- data/lib/code-lexer/languages/javascript.yml +6 -4
- data/lib/code-lexer/lexer.rb +80 -7
- data/lib/code-lexer/token.rb +24 -7
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 28e3de74936d4c5e81abc995cec85c4831c2383eedc098ef9097c45002e59bbb
|
4
|
+
data.tar.gz: a7f113035e970f213de2e0301454b6851eba80f661adb4ce4720545e280fbcef
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1ccf664386fd4ca8b505658d0059d1d44a4b428b84ab79ed5ea1f9d12219f7daa6261d1d058b797080d8719c1d842fad47441d2b99da578c4691c00e5e109efe
|
7
|
+
data.tar.gz: 65914ad6a9f937ce884a5e7c737e4e9857b5be9aa5dc81ba912d6c25a44deff5efeaff2b11b1a0cfe6ad7b77859a2862fe745d9a280b3f639be2e3cb3a678d6e
|
@@ -1,11 +1,14 @@
|
|
1
1
|
require_relative 'token'
|
2
2
|
|
3
3
|
module CodeLexer
|
4
|
-
class Abstractor
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
@dictionary
|
4
|
+
class Abstractor
|
5
|
+
def initialize(identifiers_dictionary = [], strings_dictionary = [], numbers_dictionary = [])
|
6
|
+
@dictionary = {}
|
7
|
+
@dictionary[:identifiers] = ['NOOP'] + identifiers_dictionary
|
8
|
+
@dictionary[:strings] = strings_dictionary
|
9
|
+
@dictionary[:numbers] = numbers_dictionary
|
10
|
+
|
11
|
+
@abstractor_pieces = []
|
9
12
|
end
|
10
13
|
|
11
14
|
def abstract_everything
|
@@ -18,103 +21,240 @@ module CodeLexer
|
|
18
21
|
return self
|
19
22
|
end
|
20
23
|
|
24
|
+
def dictionary
|
25
|
+
warn "[DEPRECATION] The method CodeLexer::Abstractor#dictionary is deprecated; used CodeLexer::Abstractor#identifiers_dictionary instead"
|
26
|
+
self.identifiers_dictionary
|
27
|
+
end
|
28
|
+
|
29
|
+
def identifiers_dictionary
|
30
|
+
@dictionary[:identifiers]
|
31
|
+
end
|
32
|
+
|
33
|
+
def strings_dictionary
|
34
|
+
@dictionary[:strings]
|
35
|
+
end
|
36
|
+
|
37
|
+
def numbers_dictionary
|
38
|
+
@dictionary[:numbers]
|
39
|
+
end
|
40
|
+
|
41
|
+
def dictionaries
|
42
|
+
@dictionary
|
43
|
+
end
|
44
|
+
|
21
45
|
def abstract_identifiers
|
22
|
-
@
|
46
|
+
@abstractor_pieces << IdentifierAbstractor.new(self)
|
23
47
|
return self
|
24
48
|
end
|
25
49
|
|
26
50
|
def abstract_numbers
|
27
|
-
@
|
51
|
+
@abstractor_pieces << NumberAbstractor.new(self)
|
28
52
|
return self
|
29
53
|
end
|
30
54
|
|
31
55
|
def abstract_comments
|
32
|
-
@
|
56
|
+
@abstractor_pieces << CommentAbstractor.new(self)
|
33
57
|
return self
|
34
58
|
end
|
35
59
|
|
36
60
|
def abstract_strings
|
37
|
-
@
|
61
|
+
@abstractor_pieces << StringAbstractor.new(self)
|
38
62
|
return self
|
39
63
|
end
|
40
64
|
|
41
65
|
def abstract_spaces
|
42
|
-
@
|
66
|
+
@abstractor_pieces << SpaceAbstractor.new(self)
|
43
67
|
return self
|
44
68
|
end
|
45
69
|
|
46
70
|
def remove_spaces
|
47
|
-
@
|
71
|
+
@abstractor_pieces << SpaceRemover.new(self)
|
48
72
|
return self
|
49
73
|
end
|
50
74
|
|
51
75
|
def remove_newlines
|
52
|
-
@
|
76
|
+
@abstractor_pieces << NewlineRemover.new(self)
|
53
77
|
return self
|
54
78
|
end
|
55
79
|
|
56
80
|
def remove_comments
|
57
|
-
@
|
81
|
+
@abstractor_pieces << CommentRemover.new(self)
|
58
82
|
return self
|
59
83
|
end
|
60
84
|
|
61
85
|
def abstract!(tokens)
|
62
|
-
|
63
|
-
|
64
|
-
identifiers = identifier_tokens.map { |id| id.value }.uniq
|
65
|
-
|
66
|
-
identifiers.each do |id|
|
67
|
-
if @dictionary.include?(id)
|
68
|
-
abstracted_id = @dictionary.index(id)
|
69
|
-
else
|
70
|
-
abstracted_id = @dictionary.size
|
71
|
-
@dictionary << id
|
72
|
-
end
|
73
|
-
|
74
|
-
identifier_tokens.select { |t| t.value == id }.each do |matching_token|
|
75
|
-
matching_token.abstracted_value = Token.special("ID#{abstracted_id}")
|
76
|
-
end
|
77
|
-
end
|
86
|
+
@abstractor_pieces.each do |abstractor_piece|
|
87
|
+
tokens = abstractor_piece.abstract(tokens)
|
78
88
|
end
|
79
89
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
90
|
+
return self
|
91
|
+
end
|
92
|
+
|
93
|
+
def deabstract!(tokens)
|
94
|
+
@abstractor_pieces.each do |abstractor_piece|
|
95
|
+
tokens = abstractor_piece.deabstract(tokens)
|
86
96
|
end
|
87
97
|
|
88
|
-
|
89
|
-
|
90
|
-
|
98
|
+
return self
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
class AbstractorPiece
|
103
|
+
def initialize(abstractor)
|
104
|
+
@abstractor = abstractor
|
105
|
+
end
|
106
|
+
|
107
|
+
def abstract(tokens)
|
108
|
+
return tokens
|
109
|
+
end
|
110
|
+
|
111
|
+
def deabstract(tokens)
|
112
|
+
return tokens
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
class IdentifierAbstractor < AbstractorPiece
|
117
|
+
def abstract(tokens)
|
118
|
+
identifier_tokens = tokens.select { |t| t.type == :identifier }
|
119
|
+
identifiers = identifier_tokens.map { |id| id.value }.uniq
|
120
|
+
|
121
|
+
identifiers.each do |id|
|
122
|
+
if @abstractor.identifiers_dictionary.include?(id)
|
123
|
+
abstracted_id = @abstractor.identifiers_dictionary.index(id)
|
124
|
+
else
|
125
|
+
abstracted_id = @abstractor.identifiers_dictionary.size
|
126
|
+
@abstractor.identifiers_dictionary << id
|
127
|
+
end
|
128
|
+
|
129
|
+
identifier_tokens.select { |t| t.value == id }.each do |matching_token|
|
130
|
+
matching_token.abstracted_value = Token.special("ID#{abstracted_id}")
|
91
131
|
end
|
92
132
|
end
|
93
133
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
134
|
+
return tokens
|
135
|
+
end
|
136
|
+
|
137
|
+
def deabstract(tokens)
|
138
|
+
tokens.select { |t| t.abstracted_value.match?(/.ID[0-9]+./) }.each do |token|
|
139
|
+
id = token.abstracted_value.scan(/.ID([0-9]+)./).flatten[0].to_i
|
140
|
+
|
141
|
+
token.type = :identifier
|
142
|
+
token.value = @abstractor.identifiers_dictionary[id]
|
98
143
|
end
|
99
144
|
|
100
|
-
|
101
|
-
|
145
|
+
return tokens
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
class NumberAbstractor < AbstractorPiece
|
150
|
+
def abstract(tokens)
|
151
|
+
tokens.select { |t| t.type == :number }.each do |number_token|
|
152
|
+
number_token.abstracted_value = Token.special("NUMBER")
|
153
|
+
@abstractor.numbers_dictionary << number_token.value
|
102
154
|
end
|
103
155
|
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
156
|
+
return tokens
|
157
|
+
end
|
158
|
+
|
159
|
+
def deabstract(tokens)
|
160
|
+
id = 0
|
161
|
+
tokens.select { |t| t.abstracted_value == Token.special("NUMBER") }.each do |token|
|
162
|
+
token.type = :number
|
163
|
+
token.value = @abstractor.numbers_dictionary[id]
|
164
|
+
|
165
|
+
id += 1
|
166
|
+
end
|
167
|
+
|
168
|
+
return tokens
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
class StringAbstractor < AbstractorPiece
|
173
|
+
def abstract(tokens)
|
174
|
+
tokens.select { |t| t.type == :string }.each do |string_token|
|
175
|
+
string_token.abstracted_value = Token.special("STRING")
|
176
|
+
@abstractor.strings_dictionary << string_token.value
|
177
|
+
end
|
178
|
+
|
179
|
+
return tokens
|
180
|
+
end
|
181
|
+
|
182
|
+
def deabstract(tokens)
|
183
|
+
id = 0
|
184
|
+
tokens.select { |t| t.abstracted_value == Token.special("STRING") }.each do |token|
|
185
|
+
token.type = :string
|
186
|
+
token.value = '"' + @abstractor.strings_dictionary[id] + '"'
|
187
|
+
|
188
|
+
id += 1
|
189
|
+
end
|
190
|
+
|
191
|
+
return tokens
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
class CommentAbstractor < AbstractorPiece
|
196
|
+
def abstract(tokens)
|
197
|
+
tokens.select { |t| t.type == :comment }.each do |comment_token|
|
198
|
+
comment_token.abstracted_value = Token.special("COMMENT")
|
199
|
+
end
|
200
|
+
return tokens
|
201
|
+
end
|
202
|
+
|
203
|
+
def deabstract(tokens)
|
204
|
+
tokens.select { |t| t.abstracted_value == Token.special("COMMENT") }.each do |token|
|
205
|
+
token.type = :comment
|
206
|
+
token.value = 'Unknown comment'
|
207
|
+
end
|
208
|
+
|
209
|
+
return tokens
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
class SpaceAbstractor < AbstractorPiece
|
214
|
+
def abstract(tokens)
|
215
|
+
tokens.select { |t| t.type == :space }.each do |space_token|
|
216
|
+
previous_index = tokens.index(space_token) - 1
|
217
|
+
if previous_index < 0 || tokens[previous_index].type == :newline
|
218
|
+
space_token.abstracted_value = Token.special("INDENTATION")
|
219
|
+
else
|
220
|
+
space_token.abstracted_value = Token.special("WHITESPACE")
|
114
221
|
end
|
115
222
|
end
|
116
223
|
|
117
|
-
return
|
224
|
+
return tokens
|
225
|
+
end
|
226
|
+
|
227
|
+
def deabstract(tokens)
|
228
|
+
tokens.select do |t|
|
229
|
+
t.abstracted_value == Token.special("INDENTATION") ||
|
230
|
+
t.abstracted_value == Token.special("WHITESPACE")
|
231
|
+
end.each do |token|
|
232
|
+
token.type = :space
|
233
|
+
token.value = ' '
|
234
|
+
end
|
235
|
+
|
236
|
+
return tokens
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
class SpaceRemover < AbstractorPiece
|
241
|
+
def abstract(tokens)
|
242
|
+
tokens.delete_if { |t| t.type == :space }
|
243
|
+
return tokens
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
class NewlineRemover < AbstractorPiece
|
248
|
+
def abstract(tokens)
|
249
|
+
tokens.delete_if { |t| t.type == :newline }
|
250
|
+
return tokens
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
class CommentRemover < AbstractorPiece
|
255
|
+
def abstract(tokens)
|
256
|
+
tokens.delete_if { |t| t.type == :comment }
|
257
|
+
return tokens
|
118
258
|
end
|
119
259
|
end
|
120
260
|
end
|
data/lib/code-lexer/config.rb
CHANGED
@@ -0,0 +1,38 @@
|
|
1
|
+
lexer:
|
2
|
+
keyword:
|
3
|
+
- (?:abstract|arguments|boolean|break|byte|case|catch|char|const|continue|debugger|default|delete|double|do|else|eval|false|finally|final|float|for|function|goto|if|implements|int|in|instanceof|interface|let|long|native|new|null|package|private|protected|public|return|short|static|switch|synchronized|this|throws|throw|transient|true|try|typeof|var|void|volatile|while|with|yield|class|enum|export|extends|import|super|from|strictfp)
|
4
|
+
identifier:
|
5
|
+
- "[$A-Za-z_][$A-Za-z0-9_]*"
|
6
|
+
comment:
|
7
|
+
- \/\/[^\n\r]*(?=[\n\r])
|
8
|
+
- \/\/.*$
|
9
|
+
- \/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+\/
|
10
|
+
string:
|
11
|
+
- \"([^"]|\\\")*\"
|
12
|
+
- \'[^']*\'
|
13
|
+
regex:
|
14
|
+
- \/([^/]|\\\/)*\/[gim]*
|
15
|
+
number:
|
16
|
+
- \-?[0-9]*\.[0-9]e\-?[0-9]+
|
17
|
+
- \-?[0-9]*\.[0-9]
|
18
|
+
- \-?[1-9][0-9]*
|
19
|
+
- \-?0[Xx][0-9A-Fa-f]+
|
20
|
+
- \-?[0-9]
|
21
|
+
- \-?0[0-7]+
|
22
|
+
operator:
|
23
|
+
- (\=\=\=|\!\=\=)
|
24
|
+
- (\<\=|\>\=|\=\=|\!\=|\=\>)
|
25
|
+
- (\&\&|\|\||\!)
|
26
|
+
- (\+\=|\-\=|\/\=|\*\=|\%\=|\+\+|\-\-)
|
27
|
+
- (\&|\||\~|\^|\<\<|\>\>)
|
28
|
+
- (\=|\+|\-|\/|\*|\%)
|
29
|
+
- (\.|\,|\:)
|
30
|
+
- (\<|\>|\?)
|
31
|
+
parenthesis:
|
32
|
+
- (\(|\)|\[|\]|\{|\})
|
33
|
+
semicolon:
|
34
|
+
- \;
|
35
|
+
newline:
|
36
|
+
- "[\\n\\r]"
|
37
|
+
space:
|
38
|
+
- \s+
|
@@ -1,15 +1,17 @@
|
|
1
1
|
lexer:
|
2
2
|
keyword:
|
3
|
-
- (?:abstract|arguments|boolean|break|byte|case|catch|char|const|continue|debugger|default|delete|do|
|
3
|
+
- (?:abstract|arguments|boolean|break|byte|case|catch|char|const|continue|debugger|default|delete|double|do|else|eval|false|finally|final|float|for|function|goto|if|implements|int|in|instanceof|interface|let|long|native|new|null|package|private|protected|public|return|short|static|switch|synchronized|this|throws|throw|transient|true|try|typeof|var|void|volatile|while|with|yield|class|enum|export|extends|import|super|from)
|
4
4
|
identifier:
|
5
5
|
- "[$A-Za-z_][$A-Za-z0-9_]*"
|
6
6
|
comment:
|
7
|
-
- \/\/[^\n\r]*[\n\r]
|
7
|
+
- \/\/[^\n\r]*(?=[\n\r])
|
8
8
|
- \/\/.*$
|
9
9
|
- \/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+\/
|
10
10
|
string:
|
11
11
|
- \"([^"]|\\\")*\"
|
12
12
|
- \'[^']*\'
|
13
|
+
regex:
|
14
|
+
- \/([^/]|\\\/)*\/[gim]*
|
13
15
|
number:
|
14
16
|
- \-?[0-9]*\.[0-9]e\-?[0-9]+
|
15
17
|
- \-?[0-9]*\.[0-9]
|
@@ -19,13 +21,13 @@ lexer:
|
|
19
21
|
- \-?0[0-7]+
|
20
22
|
operator:
|
21
23
|
- (\=\=\=|\!\=\=)
|
22
|
-
- (
|
24
|
+
- (\<\=|\>\=|\=\=|\!\=|\=\>)
|
23
25
|
- (\&\&|\|\||\!)
|
24
26
|
- (\+\=|\-\=|\/\=|\*\=|\%\=|\+\+|\-\-)
|
25
27
|
- (\&|\||\~|\^|\<\<|\>\>)
|
26
28
|
- (\=|\+|\-|\/|\*|\%)
|
27
29
|
- (\.|\,|\:)
|
28
|
-
- (
|
30
|
+
- (\<|\>|\?)
|
29
31
|
parenthesis:
|
30
32
|
- (\(|\)|\[|\]|\{|\})
|
31
33
|
semicolon:
|
data/lib/code-lexer/lexer.rb
CHANGED
@@ -12,7 +12,7 @@ module CodeLexer
|
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
|
-
def lex(content)
|
15
|
+
def lex(content, abstractor = nil)
|
16
16
|
content = content.clone
|
17
17
|
tokens = []
|
18
18
|
while content.length > 0
|
@@ -23,17 +23,31 @@ module CodeLexer
|
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
|
-
return LexedContent.new(tokens)
|
26
|
+
return LexedContent.new(tokens, abstractor)
|
27
27
|
end
|
28
28
|
end
|
29
29
|
|
30
30
|
class LexedContent
|
31
31
|
attr_reader :tokens
|
32
|
+
attr_reader :abstractor
|
32
33
|
|
33
|
-
def
|
34
|
+
def self.from_stream_string(stream, abstractor)
|
35
|
+
tokens = stream.split(" ").map { |t| Token.from_string(t) }
|
36
|
+
abstractor.deabstract!(tokens)
|
37
|
+
return LexedContent.new(tokens, abstractor)
|
38
|
+
end
|
39
|
+
|
40
|
+
def initialize(tokens, abstractor = nil)
|
34
41
|
@tokens = tokens
|
42
|
+
@abstractor = abstractor
|
43
|
+
|
44
|
+
@abstractor.abstract!(@tokens) if @abstractor
|
45
|
+
end
|
46
|
+
|
47
|
+
def reconstruct
|
48
|
+
@tokens.map { |t| t.value.to_s }.join("")
|
35
49
|
end
|
36
|
-
|
50
|
+
|
37
51
|
def token_lines
|
38
52
|
result = []
|
39
53
|
current_line = []
|
@@ -53,14 +67,73 @@ module CodeLexer
|
|
53
67
|
end
|
54
68
|
|
55
69
|
def token_stream(abstractor = nil)
|
56
|
-
abstractor.abstract!(@tokens) if abstractor
|
57
|
-
|
58
70
|
result = []
|
59
|
-
|
71
|
+
|
72
|
+
tokens = @tokens
|
73
|
+
if abstractor
|
74
|
+
tokens = tokens.map { |t| t.clone }
|
75
|
+
tokens.each { |t| t.reset_abstraction }
|
76
|
+
abstractor.abstract!(tokens)
|
77
|
+
end
|
78
|
+
|
79
|
+
tokens.each do |token|
|
60
80
|
result << token.abstracted_value
|
61
81
|
end
|
62
82
|
|
63
83
|
return result.join(" ")
|
64
84
|
end
|
85
|
+
|
86
|
+
def to_s
|
87
|
+
@tokens.map { |t| t.value }.join("")
|
88
|
+
end
|
89
|
+
|
90
|
+
def dump(filename, mode = "w", force = false)
|
91
|
+
if mode.downcase.include?("w") && !force
|
92
|
+
if FileTest.exist?(filename) || FileTest.exist?(lexdata(filename))
|
93
|
+
raise "Destination filename or lexdata filename already exist."
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
File.open(filename, mode) do |f|
|
98
|
+
f << self.token_stream + "\n"
|
99
|
+
end
|
100
|
+
|
101
|
+
File.open(lexdata(filename), "#{mode}b") do |f|
|
102
|
+
f << Marshal.dump(@abstractor)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def self.load(file_or_filename, lexdata_or_lexdata_filename = nil)
|
107
|
+
if file_or_filename.is_a?(String) && (lexdata_or_lexdata_filename.is_a?(String) || !lexdata_or_lexdata_filename)
|
108
|
+
unless lexdata_or_lexdata_filename
|
109
|
+
return self.load_filename(file_or_filename)
|
110
|
+
else
|
111
|
+
return self.load_filename(file_or_filename, lexdata_or_lexdata_filename)
|
112
|
+
end
|
113
|
+
elsif file_or_filename.is_a?(File) && lexdata_or_lexdata_filename.is_a?(File)
|
114
|
+
return self.load_file(file_or_filename, lexdata_or_lexdata_filename)
|
115
|
+
else
|
116
|
+
raise "Unable to call with the provided input types: expected (String, String), (String), or (File, File)"
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def self.load_filename(filename, lexdata_filename = filename + ".lexdata")
|
121
|
+
File.open(filename, "r") do |file|
|
122
|
+
File.open(lexdata_filename, "rb") do |lexdata_file|
|
123
|
+
return LexedContent.load_file(file, lexdata_file)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def self.load_file(file, lexdata_file)
|
129
|
+
line = file.readline
|
130
|
+
abstractor = Marshal.load(lexdata_file)
|
131
|
+
return LexedContent.from_stream_string(line, abstractor)
|
132
|
+
end
|
133
|
+
|
134
|
+
private
|
135
|
+
def lexdata(filename)
|
136
|
+
filename + ".lexdata"
|
137
|
+
end
|
65
138
|
end
|
66
139
|
end
|
data/lib/code-lexer/token.rb
CHANGED
@@ -11,6 +11,19 @@ module CodeLexer
|
|
11
11
|
attr_accessor :value
|
12
12
|
attr_accessor :abstracted_value
|
13
13
|
|
14
|
+
def self.from_string(string)
|
15
|
+
unless string.start_with?(SPECIAL_TOKEN_OPEN)
|
16
|
+
value = string
|
17
|
+
else
|
18
|
+
value = nil
|
19
|
+
end
|
20
|
+
|
21
|
+
token = Token.new(:unknown, value)
|
22
|
+
token.abstracted_value = string
|
23
|
+
|
24
|
+
return token
|
25
|
+
end
|
26
|
+
|
14
27
|
def initialize(type, value)
|
15
28
|
@type = type
|
16
29
|
self.value = value
|
@@ -18,13 +31,7 @@ module CodeLexer
|
|
18
31
|
|
19
32
|
def value=(v)
|
20
33
|
@value = v
|
21
|
-
|
22
|
-
@abstracted_value = Token.special("NEWLINE")
|
23
|
-
elsif v =~ /\s/
|
24
|
-
@abstracted_value = Token.special(v.gsub(/\s/, "·"))
|
25
|
-
else
|
26
|
-
@abstracted_value = v
|
27
|
-
end
|
34
|
+
self.reset_abstraction
|
28
35
|
end
|
29
36
|
|
30
37
|
def to_s
|
@@ -38,5 +45,15 @@ module CodeLexer
|
|
38
45
|
def ==(oth)
|
39
46
|
@type == oth.type && @value == oth.value && @abstracted_value == oth.abstracted_value
|
40
47
|
end
|
48
|
+
|
49
|
+
def reset_abstraction
|
50
|
+
if @type == :newline
|
51
|
+
@abstracted_value = Token.special("NEWLINE")
|
52
|
+
elsif @value =~ /\s/
|
53
|
+
@abstracted_value = Token.special(@value.gsub(/\s/, "·"))
|
54
|
+
else
|
55
|
+
@abstracted_value = @value.clone
|
56
|
+
end
|
57
|
+
end
|
41
58
|
end
|
42
59
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: code-lexer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.8'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Simone Scalabrino
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-11-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: code-assertions
|
@@ -40,6 +40,7 @@ files:
|
|
40
40
|
- lib/code-lexer.rb
|
41
41
|
- lib/code-lexer/abstractor.rb
|
42
42
|
- lib/code-lexer/config.rb
|
43
|
+
- lib/code-lexer/languages/java.yml
|
43
44
|
- lib/code-lexer/languages/javascript.yml
|
44
45
|
- lib/code-lexer/lexer.rb
|
45
46
|
- lib/code-lexer/token.rb
|
@@ -62,7 +63,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
62
63
|
- !ruby/object:Gem::Version
|
63
64
|
version: '0'
|
64
65
|
requirements: []
|
65
|
-
rubygems_version: 3.
|
66
|
+
rubygems_version: 3.3.7
|
66
67
|
signing_key:
|
67
68
|
specification_version: 4
|
68
69
|
summary: Simple source code lexer
|