sas-lexer 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +661 -0
- data/README.md +111 -0
- data/Rakefile +208 -0
- data/ffi-wrapper/Cargo.lock +336 -0
- data/ffi-wrapper/Cargo.toml +12 -0
- data/ffi-wrapper/src/lib.rs +281 -0
- data/lib/native/arm64-darwin/libsas_lexer_ffi.dylib +0 -0
- data/lib/native/x86_64-linux/libsas_lexer_ffi.so +0 -0
- data/lib/sas_lexer/error.rb +5 -0
- data/lib/sas_lexer/lexer.rb +489 -0
- data/lib/sas_lexer/version.rb +5 -0
- data/lib/sas_lexer.rb +5 -0
- metadata +73 -0
|
@@ -0,0 +1,489 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "ffi"
|
|
4
|
+
|
|
5
|
+
module SasLexer
|
|
6
|
+
# Ruby wrapper around the `sas-lexer` Rust crate, accessed through
|
|
7
|
+
# the small C ABI shim built from `ffi-wrapper/`.
|
|
8
|
+
#
|
|
9
|
+
# Loads the prebuilt shared library shipped under `lib/native/` (or
|
|
10
|
+
# the dev-build flat path produced by `rake sas_lexer:install`).
|
|
11
|
+
class Lexer
|
|
12
|
+
extend FFI::Library
|
|
13
|
+
|
|
14
|
+
gem_root = File.expand_path("../..", __dir__)
|
|
15
|
+
lib_native_dir = File.join(gem_root, "lib", "native")
|
|
16
|
+
|
|
17
|
+
host_os = case RbConfig::CONFIG["host_os"]
|
|
18
|
+
when /darwin/ then "darwin"
|
|
19
|
+
when /linux/ then "linux"
|
|
20
|
+
when /mswin|mingw|cygwin/ then "windows"
|
|
21
|
+
else
|
|
22
|
+
raise SasLexer::Error,
|
|
23
|
+
"Unsupported host OS: #{RbConfig::CONFIG["host_os"]}"
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
library_ext = { "darwin" => "dylib", "linux" => "so", "windows" => "dll" }.fetch(host_os)
|
|
27
|
+
host_platform = "#{RbConfig::CONFIG["host_cpu"]}-#{host_os}"
|
|
28
|
+
|
|
29
|
+
# Probe order:
|
|
30
|
+
# 1. `lib/native/<host_cpu>-<host_os>/libsas_lexer_ffi.<ext>` —
|
|
31
|
+
# prebuilt artifact shipped inside the published universal gem
|
|
32
|
+
# for the host's exact platform.
|
|
33
|
+
# 2. `lib/native/libsas_lexer_ffi.<ext>` — flat path produced
|
|
34
|
+
# by `bundle exec rake sas_lexer:install` for local
|
|
35
|
+
# development.
|
|
36
|
+
LIBRARY_PATH = [
|
|
37
|
+
File.join(lib_native_dir, host_platform, "libsas_lexer_ffi.#{library_ext}"),
|
|
38
|
+
File.join(lib_native_dir, "libsas_lexer_ffi.#{library_ext}"),
|
|
39
|
+
].find { |path| File.exist?(path) }
|
|
40
|
+
|
|
41
|
+
if LIBRARY_PATH.nil?
|
|
42
|
+
raise SasLexer::Error,
|
|
43
|
+
"Could not find a prebuilt sas-lexer FFI library at " \
|
|
44
|
+
"lib/native/#{host_platform}/libsas_lexer_ffi.#{library_ext}. " \
|
|
45
|
+
"Build one with `bundle exec rake sas_lexer:install` " \
|
|
46
|
+
"or add a prebuilt for #{host_platform} under lib/native/."
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
ffi_lib LIBRARY_PATH
|
|
50
|
+
|
|
51
|
+
# FFI struct mirroring the `SasToken` C struct in `ffi-wrapper/src/lib.rs`.
|
|
52
|
+
class Token < FFI::Struct
|
|
53
|
+
layout :token_type, :uint32,
|
|
54
|
+
:channel, :uint8,
|
|
55
|
+
:start, :size_t,
|
|
56
|
+
:end, :size_t,
|
|
57
|
+
:start_line, :uint32,
|
|
58
|
+
:end_line, :uint32,
|
|
59
|
+
:start_column, :uint32,
|
|
60
|
+
:end_column, :uint32
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
attach_function :sas_lexer_new, [], :pointer
|
|
64
|
+
attach_function :sas_lexer_free, [:pointer], :void
|
|
65
|
+
attach_function :sas_lexer_tokenize, [:pointer, :string], :int
|
|
66
|
+
attach_function :sas_lexer_token_count, [:pointer], :size_t
|
|
67
|
+
attach_function :sas_lexer_get_token, [:pointer, :size_t, :pointer], :int
|
|
68
|
+
attach_function :sas_lexer_get_token_text, [:pointer, :size_t], :pointer
|
|
69
|
+
attach_function :sas_lexer_free_string, [:pointer], :void
|
|
70
|
+
attach_function :sas_lexer_get_last_error, [], :pointer
|
|
71
|
+
attach_function :sas_lexer_clear_error, [], :void
|
|
72
|
+
|
|
73
|
+
# Error code values returned by the C ABI.
|
|
74
|
+
module ErrorCode
|
|
75
|
+
SUCCESS = 0
|
|
76
|
+
NULL_POINTER = 1
|
|
77
|
+
INVALID_UTF8 = 2
|
|
78
|
+
LEXING_ERROR = 3
|
|
79
|
+
INDEX_OUT_OF_BOUNDS = 4
|
|
80
|
+
TOKEN_NOT_FOUND = 5
|
|
81
|
+
BUFFER_NOT_INITIALIZED = 6
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Token channels — match the `TokenChannel` enum in the Rust crate.
|
|
85
|
+
module TokenChannel
|
|
86
|
+
DEFAULT = 0 # Most tokens (keywords, identifiers, operators, etc.)
|
|
87
|
+
HIDDEN = 1 # Whitespace and other insignificant tokens
|
|
88
|
+
COMMENT = 2 # All comment tokens
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Token type constants — match the `TokenType` enum in the Rust crate.
|
|
92
|
+
# Full enum: https://github.com/mishamsk/sas-lexer/blob/main/crates/sas-lexer/src/lexer/token_type.rs
|
|
93
|
+
module TokenType
|
|
94
|
+
# Special tokens
|
|
95
|
+
EOF = 0
|
|
96
|
+
MACRO_SEP = 1
|
|
97
|
+
CATCH_ALL = 2
|
|
98
|
+
WS = 3
|
|
99
|
+
WHITESPACE = 3 # Alias for WS
|
|
100
|
+
|
|
101
|
+
# Punctuation and operators
|
|
102
|
+
SEMI = 4 # ';'
|
|
103
|
+
AMP = 5 # '&'
|
|
104
|
+
PERCENT = 6 # '%'
|
|
105
|
+
LPAREN = 7 # '('
|
|
106
|
+
RPAREN = 8 # ')'
|
|
107
|
+
LCURLY = 9 # '{'
|
|
108
|
+
RCURLY = 10 # '}'
|
|
109
|
+
LBRACK = 11 # '['
|
|
110
|
+
RBRACK = 12 # ']'
|
|
111
|
+
STAR = 13 # '*'
|
|
112
|
+
EXCL = 14 # '!'
|
|
113
|
+
EXCL2 = 15 # '!!'
|
|
114
|
+
BPIPE = 16 # '!|'
|
|
115
|
+
BPIPE2 = 17 # '!|!'
|
|
116
|
+
PIPE2 = 18 # '||'
|
|
117
|
+
STAR2 = 19 # '**'
|
|
118
|
+
NOT = 20 # '¬' or '^'
|
|
119
|
+
FSLASH = 21 # '/'
|
|
120
|
+
PLUS = 22 # '+'
|
|
121
|
+
MINUS = 23 # '-'
|
|
122
|
+
GTLT = 24 # '><'
|
|
123
|
+
LTGT = 25 # '<>'
|
|
124
|
+
LT = 26 # '<'
|
|
125
|
+
LE = 27 # '<='
|
|
126
|
+
NE = 28 # '!=' or '^='
|
|
127
|
+
GT = 29 # '>'
|
|
128
|
+
GE = 30 # '>='
|
|
129
|
+
SOUNDS_LIKE = 31 # '=*'
|
|
130
|
+
PIPE = 32 # '|'
|
|
131
|
+
DOT = 33 # '.'
|
|
132
|
+
COMMA = 34 # ','
|
|
133
|
+
COLON = 35 # ':'
|
|
134
|
+
ASSIGN = 36 # '='
|
|
135
|
+
DOLLAR = 37 # '$'
|
|
136
|
+
AT = 38 # '@'
|
|
137
|
+
HASH = 39 # '#'
|
|
138
|
+
QUESTION = 40 # '?'
|
|
139
|
+
|
|
140
|
+
# Logical operators (mnemonic keywords)
|
|
141
|
+
KW_LT = 41 # LT
|
|
142
|
+
KW_LE = 42 # LE
|
|
143
|
+
KW_EQ = 43 # EQ
|
|
144
|
+
KW_IN = 44 # IN
|
|
145
|
+
KW_NE = 45 # NE
|
|
146
|
+
KW_GT = 46 # GT
|
|
147
|
+
KW_GE = 47 # GE
|
|
148
|
+
KW_AND = 48 # AND
|
|
149
|
+
KW_OR = 49 # OR
|
|
150
|
+
KW_NOT = 50 # NOT
|
|
151
|
+
|
|
152
|
+
# Literals
|
|
153
|
+
INTEGER_LITERAL = 51
|
|
154
|
+
FLOAT_LITERAL = 52
|
|
155
|
+
FLOAT_EXPONENT_LITERAL = 53
|
|
156
|
+
STRING_LITERAL = 54
|
|
157
|
+
BIT_TESTING_LITERAL = 55
|
|
158
|
+
DATE_LITERAL = 56
|
|
159
|
+
DATE_TIME_LITERAL = 57
|
|
160
|
+
NAME_LITERAL = 58
|
|
161
|
+
TIME_LITERAL = 59
|
|
162
|
+
HEX_STRING_LITERAL = 60
|
|
163
|
+
STRING_EXPR_START = 61
|
|
164
|
+
STRING_EXPR_TEXT = 62
|
|
165
|
+
STRING_EXPR_END = 63
|
|
166
|
+
BIT_TESTING_LITERAL_EXPR_END = 64
|
|
167
|
+
DATE_LITERAL_EXPR_END = 65
|
|
168
|
+
DATE_TIME_LITERAL_EXPR_END = 66
|
|
169
|
+
NAME_LITERAL_EXPR_END = 67
|
|
170
|
+
TIME_LITERAL_EXPR_END = 68
|
|
171
|
+
HEX_STRING_LITERAL_EXPR_END = 69
|
|
172
|
+
|
|
173
|
+
# Comments
|
|
174
|
+
C_STYLE_COMMENT = 70 # /* ... */
|
|
175
|
+
PREDICTED_COMMENT_STAT = 71 # * ...; and ** ... **;
|
|
176
|
+
COMMENT_STAT = 71 # Alias for PREDICTED_COMMENT_STAT
|
|
177
|
+
DATALINES_START = 72
|
|
178
|
+
DATALINES_DATA = 73
|
|
179
|
+
CHAR_FORMAT = 74
|
|
180
|
+
MACRO_COMMENT = 75 # %* ...;
|
|
181
|
+
MACRO_VAR_RESOLVE = 76
|
|
182
|
+
MACRO_VAR_TERM = 77
|
|
183
|
+
MACRO_STRING = 78
|
|
184
|
+
MACRO_STRING_EMPTY = 79
|
|
185
|
+
MACRO_LABEL = 80
|
|
186
|
+
MACRO_IDENTIFIER = 81
|
|
187
|
+
|
|
188
|
+
# Macro functions
|
|
189
|
+
KWM_CMPRES = 82
|
|
190
|
+
KWM_COMPSTOR = 83
|
|
191
|
+
KWM_DATATYP = 84
|
|
192
|
+
KWM_EVAL = 85
|
|
193
|
+
KWM_INDEX = 86
|
|
194
|
+
KWM_LEFT = 87
|
|
195
|
+
KWM_LENGTH = 88
|
|
196
|
+
KWM_LOWCASE = 89
|
|
197
|
+
KWM_SCAN = 90
|
|
198
|
+
KWM_SUBSTR = 91
|
|
199
|
+
KWM_SYM_EXIST = 92
|
|
200
|
+
KWM_SYM_GLOBL = 93
|
|
201
|
+
KWM_SYM_LOCAL = 94
|
|
202
|
+
KWM_SYSEVALF = 95
|
|
203
|
+
KWM_SYSFUNC = 96
|
|
204
|
+
KWM_SYSGET = 97
|
|
205
|
+
KWM_SYSMACEXEC = 98
|
|
206
|
+
KWM_SYSMACEXIST = 99
|
|
207
|
+
KWM_SYSMEXECDEPTH = 100
|
|
208
|
+
KWM_SYSMEXECNAME = 101
|
|
209
|
+
KWM_SYSPROD = 102
|
|
210
|
+
KWM_TRIM = 103
|
|
211
|
+
KWM_UNQUOTE = 104
|
|
212
|
+
KWM_UPCASE = 105
|
|
213
|
+
KWM_VERIFY = 106
|
|
214
|
+
KWM_K_CMPRES = 107
|
|
215
|
+
KWM_K_INDEX = 108
|
|
216
|
+
KWM_K_LEFT = 109
|
|
217
|
+
KWM_K_LENGTH = 110
|
|
218
|
+
KWM_K_LOWCASE = 111
|
|
219
|
+
KWM_K_SCAN = 112
|
|
220
|
+
KWM_K_SUBSTR = 113
|
|
221
|
+
KWM_K_TRIM = 114
|
|
222
|
+
KWM_K_UPCASE = 115
|
|
223
|
+
KWM_K_VERIFY = 116
|
|
224
|
+
KWM_VALIDCHS = 117
|
|
225
|
+
KWM_Q_CMPRES = 118
|
|
226
|
+
KWM_Q_LEFT = 119
|
|
227
|
+
KWM_Q_LOWCASE = 120
|
|
228
|
+
KWM_Q_SCAN = 121
|
|
229
|
+
KWM_Q_SUBSTR = 122
|
|
230
|
+
KWM_Q_TRIM = 123
|
|
231
|
+
KWM_Q_SYSFUNC = 124
|
|
232
|
+
KWM_Q_UPCASE = 125
|
|
233
|
+
KWM_QK_CMPRES = 126
|
|
234
|
+
KWM_QK_LEFT = 127
|
|
235
|
+
KWM_QK_LOWCASE = 128
|
|
236
|
+
KWM_QK_SCAN = 129
|
|
237
|
+
KWM_QK_SUBSTR = 130
|
|
238
|
+
KWM_QK_TRIM = 131
|
|
239
|
+
KWM_QK_UPCASE = 132
|
|
240
|
+
KWM_BQUOTE = 133
|
|
241
|
+
KWM_NR_BQUOTE = 134
|
|
242
|
+
KWM_NR_QUOTE = 135
|
|
243
|
+
KWM_QUOTE = 136
|
|
244
|
+
KWM_SUPERQ = 137
|
|
245
|
+
KWM_STR = 138
|
|
246
|
+
KWM_NR_STR = 139
|
|
247
|
+
|
|
248
|
+
# Macro statements
|
|
249
|
+
KWM_ABORT = 140
|
|
250
|
+
KWM_COPY = 141
|
|
251
|
+
KWM_DISPLAY = 142
|
|
252
|
+
KWM_DO = 143
|
|
253
|
+
KWM_TO = 144
|
|
254
|
+
KWM_BY = 145
|
|
255
|
+
KWM_UNTIL = 146
|
|
256
|
+
KWM_WHILE = 147
|
|
257
|
+
KWM_END = 148
|
|
258
|
+
KWM_GLOBAL = 149
|
|
259
|
+
KWM_GOTO = 150
|
|
260
|
+
KWM_IF = 151
|
|
261
|
+
KWM_THEN = 152
|
|
262
|
+
KWM_ELSE = 153
|
|
263
|
+
KWM_INPUT = 154
|
|
264
|
+
KWM_LET = 155
|
|
265
|
+
KWM_LOCAL = 156
|
|
266
|
+
KWM_MACRO = 157
|
|
267
|
+
KWM_MEND = 158
|
|
268
|
+
KWM_PUT = 159
|
|
269
|
+
KWM_RETURN = 160
|
|
270
|
+
KWM_SYMDEL = 161
|
|
271
|
+
KWM_SYSCALL = 162
|
|
272
|
+
KWM_SYSEXEC = 163
|
|
273
|
+
KWM_SYSLPUT = 164
|
|
274
|
+
KWM_SYSMACDELETE = 165
|
|
275
|
+
KWM_SYSMSTORECLEAR = 166
|
|
276
|
+
KWM_SYSRPUT = 167
|
|
277
|
+
KWM_WINDOW = 168
|
|
278
|
+
KWM_INCLUDE = 169
|
|
279
|
+
KWM_LIST = 170
|
|
280
|
+
KWM_RUN = 171
|
|
281
|
+
|
|
282
|
+
# Identifiers (variable names, dataset names, etc.)
|
|
283
|
+
IDENTIFIER = 172
|
|
284
|
+
|
|
285
|
+
# Additional keyword operators
|
|
286
|
+
KW_EQT = 173
|
|
287
|
+
KW_GTT = 174
|
|
288
|
+
KW_LTT = 175
|
|
289
|
+
KW_GET = 176
|
|
290
|
+
KW_LET = 177
|
|
291
|
+
KW_NET = 178
|
|
292
|
+
|
|
293
|
+
# SAS statement keywords
|
|
294
|
+
KW_LIBNAME = 179
|
|
295
|
+
KW_FILENAME = 180
|
|
296
|
+
KW_CLEAR = 181
|
|
297
|
+
KW_LIST = 182
|
|
298
|
+
KW_CANCEL = 183
|
|
299
|
+
KW_ALL_VAR = 184
|
|
300
|
+
KW_ARRAY = 185
|
|
301
|
+
KW_ATTRIB = 186
|
|
302
|
+
KW_CALL = 187
|
|
303
|
+
KW_DATA = 188
|
|
304
|
+
KW_DEFAULT = 189
|
|
305
|
+
KW_DESCENDING = 190
|
|
306
|
+
KW_FORMAT = 191
|
|
307
|
+
KW_GROUPFORMAT = 192
|
|
308
|
+
KW_ID = 193
|
|
309
|
+
KW_IF = 194
|
|
310
|
+
KW_INFILE = 195
|
|
311
|
+
KW_INFORMAT = 196
|
|
312
|
+
KW_KEEP = 197
|
|
313
|
+
KW_LABEL = 198
|
|
314
|
+
KW_LENGTH = 199
|
|
315
|
+
KW_MERGE = 200
|
|
316
|
+
KW_NULL_DATASET = 201
|
|
317
|
+
KW_OUTPUT = 202
|
|
318
|
+
KW_PGM = 203
|
|
319
|
+
KW_RENAME = 204
|
|
320
|
+
KW_RUN = 205
|
|
321
|
+
KW_SET = 206
|
|
322
|
+
KW_STOP = 207
|
|
323
|
+
KW_VAR = 208
|
|
324
|
+
KW_VIEW = 209
|
|
325
|
+
KW_WITH = 210
|
|
326
|
+
KW_DELETE = 211
|
|
327
|
+
KW_NOTSORTED = 212
|
|
328
|
+
KW_PROC = 213
|
|
329
|
+
KW_QUIT = 214
|
|
330
|
+
KW_RANKS = 215
|
|
331
|
+
|
|
332
|
+
# SQL and other SAS keywords
|
|
333
|
+
KW_ALL = 216
|
|
334
|
+
KW_ANY = 217
|
|
335
|
+
KW_AS = 218
|
|
336
|
+
KW_ASC = 219
|
|
337
|
+
KW_BETWEEN = 220
|
|
338
|
+
KW_BOTH = 221
|
|
339
|
+
KW_BTRIM = 222
|
|
340
|
+
KW_BY = 223
|
|
341
|
+
KW_CALCULATED = 224
|
|
342
|
+
KW_CASE = 225
|
|
343
|
+
KW_CONNECT = 226
|
|
344
|
+
KW_CONNECTION = 227
|
|
345
|
+
KW_CONTAINS = 228
|
|
346
|
+
KW_CORR = 229
|
|
347
|
+
KW_CREATE = 230
|
|
348
|
+
KW_CROSS = 231
|
|
349
|
+
KW_DESC = 232
|
|
350
|
+
KW_DISCONNECT = 233
|
|
351
|
+
KW_DISTINCT = 234
|
|
352
|
+
KW_DO = 235
|
|
353
|
+
KW_DROP = 236
|
|
354
|
+
KW_ELSE = 237
|
|
355
|
+
KW_END = 238
|
|
356
|
+
KW_ESCAPE = 239
|
|
357
|
+
KW_EXCEPT = 240
|
|
358
|
+
KW_EXECUTE = 241
|
|
359
|
+
KW_EXISTS = 242
|
|
360
|
+
KW_FOR = 243
|
|
361
|
+
KW_FROM = 244
|
|
362
|
+
KW_FULL = 245
|
|
363
|
+
KW_GROUP = 246
|
|
364
|
+
KW_HAVING = 247
|
|
365
|
+
KW_INDEX = 248
|
|
366
|
+
KW_INNER = 249
|
|
367
|
+
KW_INSERT = 250
|
|
368
|
+
KW_INTERSECT = 251
|
|
369
|
+
KW_INTO = 252
|
|
370
|
+
KW_IS = 253
|
|
371
|
+
KW_JOIN = 254
|
|
372
|
+
KW_KEY = 255
|
|
373
|
+
KW_LEADING = 256
|
|
374
|
+
KW_LEFT = 257
|
|
375
|
+
KW_LIKE = 258
|
|
376
|
+
KW_MISSING = 259
|
|
377
|
+
KW_NATURAL = 260
|
|
378
|
+
KW_NOTRIM = 261
|
|
379
|
+
KW_NULL = 262
|
|
380
|
+
KW_ON = 263
|
|
381
|
+
KW_ORDER = 264
|
|
382
|
+
KW_OUTER = 265
|
|
383
|
+
KW_PRIMARY = 266
|
|
384
|
+
KW_RIGHT = 267
|
|
385
|
+
KW_SELECT = 268
|
|
386
|
+
KW_SEPARATED = 269
|
|
387
|
+
KW_SUBSTRING = 270
|
|
388
|
+
KW_TABLE = 271
|
|
389
|
+
KW_THEN = 272
|
|
390
|
+
KW_TO = 273
|
|
391
|
+
KW_TRAILING = 274
|
|
392
|
+
KW_TRIMMED = 275
|
|
393
|
+
KW_UNION = 276
|
|
394
|
+
KW_UNIQUE = 277
|
|
395
|
+
KW_UPDATE = 278
|
|
396
|
+
KW_USING = 279
|
|
397
|
+
KW_VALUES = 280
|
|
398
|
+
KW_WHEN = 281
|
|
399
|
+
KW_WHERE = 282
|
|
400
|
+
KW_DECLARE = 283
|
|
401
|
+
KW_HASH = 284
|
|
402
|
+
KW_HITER = 285
|
|
403
|
+
KW_INPUT = 286
|
|
404
|
+
KW_PUT = 287
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
def initialize
|
|
408
|
+
@lexer_ptr = self.class.sas_lexer_new
|
|
409
|
+
raise SasLexer::Error, "Failed to create SAS lexer" if @lexer_ptr.null?
|
|
410
|
+
end
|
|
411
|
+
|
|
412
|
+
def tokenize(sas_code)
|
|
413
|
+
raise SasLexer::Error, "Lexer has been freed" if @lexer_ptr.null?
|
|
414
|
+
raise SasLexer::Error, "Null pointer provided" if sas_code.nil?
|
|
415
|
+
|
|
416
|
+
result = self.class.sas_lexer_tokenize(@lexer_ptr, sas_code)
|
|
417
|
+
|
|
418
|
+
if result != ErrorCode::SUCCESS
|
|
419
|
+
error_msg = get_last_error_message
|
|
420
|
+
raise SasLexer::Error, error_msg || "Failed to tokenize SAS code (error code: #{result})"
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
token_count = self.class.sas_lexer_token_count(@lexer_ptr)
|
|
424
|
+
|
|
425
|
+
tokens = []
|
|
426
|
+
|
|
427
|
+
(0...token_count).each do |index|
|
|
428
|
+
text_ptr = self.class.sas_lexer_get_token_text(@lexer_ptr, index)
|
|
429
|
+
|
|
430
|
+
next if text_ptr.null?
|
|
431
|
+
|
|
432
|
+
text = text_ptr.read_string
|
|
433
|
+
|
|
434
|
+
token_struct = Token.new
|
|
435
|
+
token_result = self.class.sas_lexer_get_token(@lexer_ptr, index, token_struct)
|
|
436
|
+
|
|
437
|
+
if token_result == ErrorCode::SUCCESS
|
|
438
|
+
tokens << {
|
|
439
|
+
index: index,
|
|
440
|
+
text: text,
|
|
441
|
+
type: token_struct[:token_type],
|
|
442
|
+
channel: token_struct[:channel],
|
|
443
|
+
start: token_struct[:start],
|
|
444
|
+
end: token_struct[:end],
|
|
445
|
+
start_line: token_struct[:start_line],
|
|
446
|
+
end_line: token_struct[:end_line],
|
|
447
|
+
start_column: token_struct[:start_column],
|
|
448
|
+
end_column: token_struct[:end_column]
|
|
449
|
+
}
|
|
450
|
+
else
|
|
451
|
+
tokens << {
|
|
452
|
+
index: index,
|
|
453
|
+
text: text,
|
|
454
|
+
type: nil,
|
|
455
|
+
channel: nil
|
|
456
|
+
}
|
|
457
|
+
end
|
|
458
|
+
|
|
459
|
+
self.class.sas_lexer_free_string(text_ptr)
|
|
460
|
+
end
|
|
461
|
+
|
|
462
|
+
tokens
|
|
463
|
+
end
|
|
464
|
+
|
|
465
|
+
def free
|
|
466
|
+
return if @lexer_ptr.null?
|
|
467
|
+
|
|
468
|
+
self.class.sas_lexer_free(@lexer_ptr)
|
|
469
|
+
@lexer_ptr = FFI::Pointer::NULL
|
|
470
|
+
end
|
|
471
|
+
|
|
472
|
+
def finalize
|
|
473
|
+
free
|
|
474
|
+
end
|
|
475
|
+
|
|
476
|
+
private
|
|
477
|
+
|
|
478
|
+
def get_last_error_message
|
|
479
|
+
error_ptr = self.class.sas_lexer_get_last_error
|
|
480
|
+
return nil if error_ptr.null?
|
|
481
|
+
|
|
482
|
+
begin
|
|
483
|
+
error_ptr.read_string
|
|
484
|
+
ensure
|
|
485
|
+
self.class.sas_lexer_free_string(error_ptr)
|
|
486
|
+
end
|
|
487
|
+
end
|
|
488
|
+
end
|
|
489
|
+
end
|
data/lib/sas_lexer.rb
ADDED
metadata
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: sas-lexer
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Craig McNamara
|
|
8
|
+
bindir: bin
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: ffi
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - "~>"
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '1.15'
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - "~>"
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '1.15'
|
|
26
|
+
description: |
|
|
27
|
+
A Ruby FFI binding to the `sas-lexer` Rust crate by Misha Perlov
|
|
28
|
+
(https://github.com/mishamsk/sas-lexer). Tokenizes SAS source code
|
|
29
|
+
into a stream of typed tokens with full position metadata. Ships
|
|
30
|
+
prebuilt native libraries for supported platforms; a runtime FFI
|
|
31
|
+
loader picks the matching one for the host.
|
|
32
|
+
email:
|
|
33
|
+
- craig@monami.io
|
|
34
|
+
executables: []
|
|
35
|
+
extensions: []
|
|
36
|
+
extra_rdoc_files: []
|
|
37
|
+
files:
|
|
38
|
+
- LICENSE
|
|
39
|
+
- README.md
|
|
40
|
+
- Rakefile
|
|
41
|
+
- ffi-wrapper/Cargo.lock
|
|
42
|
+
- ffi-wrapper/Cargo.toml
|
|
43
|
+
- ffi-wrapper/src/lib.rs
|
|
44
|
+
- lib/native/arm64-darwin/libsas_lexer_ffi.dylib
|
|
45
|
+
- lib/native/x86_64-linux/libsas_lexer_ffi.so
|
|
46
|
+
- lib/sas_lexer.rb
|
|
47
|
+
- lib/sas_lexer/error.rb
|
|
48
|
+
- lib/sas_lexer/lexer.rb
|
|
49
|
+
- lib/sas_lexer/version.rb
|
|
50
|
+
homepage: https://github.com/mes-amis/sas-lexer-rb
|
|
51
|
+
licenses:
|
|
52
|
+
- AGPL-3.0-or-later
|
|
53
|
+
metadata:
|
|
54
|
+
homepage_uri: https://github.com/mes-amis/sas-lexer-rb
|
|
55
|
+
rubygems_mfa_required: 'true'
|
|
56
|
+
rdoc_options: []
|
|
57
|
+
require_paths:
|
|
58
|
+
- lib
|
|
59
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
60
|
+
requirements:
|
|
61
|
+
- - ">="
|
|
62
|
+
- !ruby/object:Gem::Version
|
|
63
|
+
version: 3.4.0
|
|
64
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - ">="
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: '0'
|
|
69
|
+
requirements: []
|
|
70
|
+
rubygems_version: 4.0.6
|
|
71
|
+
specification_version: 4
|
|
72
|
+
summary: Ruby FFI wrapper around the sas-lexer Rust crate.
|
|
73
|
+
test_files: []
|