sas-lexer 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +661 -0
- data/README.md +111 -0
- data/Rakefile +208 -0
- data/ffi-wrapper/Cargo.lock +336 -0
- data/ffi-wrapper/Cargo.toml +12 -0
- data/ffi-wrapper/src/lib.rs +281 -0
- data/lib/native/arm64-darwin/libsas_lexer_ffi.dylib +0 -0
- data/lib/native/x86_64-linux/libsas_lexer_ffi.so +0 -0
- data/lib/sas_lexer/error.rb +5 -0
- data/lib/sas_lexer/lexer.rb +489 -0
- data/lib/sas_lexer/version.rb +5 -0
- data/lib/sas_lexer.rb +5 -0
- metadata +73 -0
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
use std::ffi::{CStr, CString};
|
|
2
|
+
use std::os::raw::c_char;
|
|
3
|
+
use std::ptr;
|
|
4
|
+
use std::sync::Mutex;
|
|
5
|
+
use sas_lexer::{lex_program, TokenIdx};
|
|
6
|
+
|
|
7
|
+
// Thread-local storage for the last error message
|
|
8
|
+
thread_local! {
|
|
9
|
+
static LAST_ERROR: Mutex<Option<String>> = Mutex::new(None);
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
/// Error codes for the FFI interface
|
|
13
|
+
#[repr(C)]
|
|
14
|
+
#[derive(Debug, Clone, Copy, PartialEq)]
|
|
15
|
+
pub enum SasLexerError {
|
|
16
|
+
Success = 0,
|
|
17
|
+
NullPointer = 1,
|
|
18
|
+
InvalidUtf8 = 2,
|
|
19
|
+
LexingError = 3,
|
|
20
|
+
IndexOutOfBounds = 4,
|
|
21
|
+
TokenNotFound = 5,
|
|
22
|
+
BufferNotInitialized = 6,
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
impl SasLexerError {
|
|
26
|
+
fn set_last_error(&self, details: Option<String>) {
|
|
27
|
+
let message = match self {
|
|
28
|
+
SasLexerError::Success => return, // Don't set error for success
|
|
29
|
+
SasLexerError::NullPointer => "Null pointer provided".to_string(),
|
|
30
|
+
SasLexerError::InvalidUtf8 => "Invalid UTF-8 in input string".to_string(),
|
|
31
|
+
SasLexerError::LexingError => format!("Failed to lex SAS code{}",
|
|
32
|
+
details.as_ref().map(|s| format!(": {}", s)).unwrap_or_default()),
|
|
33
|
+
SasLexerError::IndexOutOfBounds => format!("Token index out of bounds{}",
|
|
34
|
+
details.as_ref().map(|s| format!(": {}", s)).unwrap_or_default()),
|
|
35
|
+
SasLexerError::TokenNotFound => "Token not found in buffer".to_string(),
|
|
36
|
+
SasLexerError::BufferNotInitialized => "Token buffer not initialized - call tokenize first".to_string(),
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
LAST_ERROR.with(|e| {
|
|
40
|
+
*e.lock().unwrap() = Some(message);
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/// Get the last error message as a C string
|
|
46
|
+
/// The caller must free the returned string using sas_lexer_free_string
|
|
47
|
+
#[no_mangle]
|
|
48
|
+
pub extern "C" fn sas_lexer_get_last_error() -> *mut c_char {
|
|
49
|
+
LAST_ERROR.with(|e| {
|
|
50
|
+
let error = e.lock().unwrap();
|
|
51
|
+
match &*error {
|
|
52
|
+
Some(msg) => {
|
|
53
|
+
match CString::new(msg.as_str()) {
|
|
54
|
+
Ok(c_string) => c_string.into_raw(),
|
|
55
|
+
Err(_) => ptr::null_mut(),
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
None => ptr::null_mut(),
|
|
59
|
+
}
|
|
60
|
+
})
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/// Clear the last error message
|
|
64
|
+
#[no_mangle]
|
|
65
|
+
pub extern "C" fn sas_lexer_clear_error() {
|
|
66
|
+
LAST_ERROR.with(|e| {
|
|
67
|
+
*e.lock().unwrap() = None;
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/// A structure to hold comprehensive token metadata
|
|
72
|
+
#[repr(C)]
|
|
73
|
+
pub struct SasToken {
|
|
74
|
+
pub token_type: u32,
|
|
75
|
+
pub channel: u8,
|
|
76
|
+
pub start: usize,
|
|
77
|
+
pub end: usize,
|
|
78
|
+
pub start_line: u32,
|
|
79
|
+
pub end_line: u32,
|
|
80
|
+
pub start_column: u32,
|
|
81
|
+
pub end_column: u32,
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/// A structure to hold the lexer state and results
|
|
85
|
+
#[repr(C)]
|
|
86
|
+
pub struct SasLexer {
|
|
87
|
+
buffer: Option<sas_lexer::TokenizedBuffer>,
|
|
88
|
+
source: Option<String>,
|
|
89
|
+
tokens: Vec<TokenIdx>,
|
|
90
|
+
current_index: usize,
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/// Create a new SAS lexer instance
|
|
94
|
+
#[no_mangle]
|
|
95
|
+
pub extern "C" fn sas_lexer_new() -> *mut SasLexer {
|
|
96
|
+
let lexer = Box::new(SasLexer {
|
|
97
|
+
buffer: None,
|
|
98
|
+
source: None,
|
|
99
|
+
tokens: Vec::new(),
|
|
100
|
+
current_index: 0,
|
|
101
|
+
});
|
|
102
|
+
Box::into_raw(lexer)
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/// Free a SAS lexer instance
|
|
106
|
+
#[no_mangle]
|
|
107
|
+
pub extern "C" fn sas_lexer_free(lexer: *mut SasLexer) {
|
|
108
|
+
if !lexer.is_null() {
|
|
109
|
+
unsafe {
|
|
110
|
+
drop(Box::from_raw(lexer));
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/// Tokenize SAS code
|
|
116
|
+
/// Returns SasLexerError enum value
|
|
117
|
+
#[no_mangle]
|
|
118
|
+
pub extern "C" fn sas_lexer_tokenize(lexer: *mut SasLexer, code: *const c_char) -> SasLexerError {
|
|
119
|
+
if lexer.is_null() || code.is_null() {
|
|
120
|
+
let error = SasLexerError::NullPointer;
|
|
121
|
+
error.set_last_error(None);
|
|
122
|
+
return error;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
let code_str = unsafe {
|
|
126
|
+
match CStr::from_ptr(code).to_str() {
|
|
127
|
+
Ok(s) => s,
|
|
128
|
+
Err(e) => {
|
|
129
|
+
let error = SasLexerError::InvalidUtf8;
|
|
130
|
+
error.set_last_error(Some(e.to_string()));
|
|
131
|
+
return error;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
let lexer_ref = unsafe { &mut *lexer };
|
|
137
|
+
|
|
138
|
+
match lex_program(&code_str) {
|
|
139
|
+
Ok(result) => {
|
|
140
|
+
let tokens: Vec<TokenIdx> = result.buffer.iter_tokens().collect();
|
|
141
|
+
lexer_ref.buffer = Some(result.buffer);
|
|
142
|
+
lexer_ref.source = Some(code_str.to_string());
|
|
143
|
+
lexer_ref.tokens = tokens;
|
|
144
|
+
lexer_ref.current_index = 0;
|
|
145
|
+
SasLexerError::Success
|
|
146
|
+
}
|
|
147
|
+
Err(e) => {
|
|
148
|
+
let error = SasLexerError::LexingError;
|
|
149
|
+
error.set_last_error(Some(format!("{:?}", e)));
|
|
150
|
+
error
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/// Get the number of tokens
|
|
156
|
+
#[no_mangle]
|
|
157
|
+
pub extern "C" fn sas_lexer_token_count(lexer: *const SasLexer) -> usize {
|
|
158
|
+
if lexer.is_null() {
|
|
159
|
+
return 0;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
let lexer_ref = unsafe { &*lexer };
|
|
163
|
+
lexer_ref.tokens.len()
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/// Get a token by index with full metadata
|
|
167
|
+
#[no_mangle]
|
|
168
|
+
pub extern "C" fn sas_lexer_get_token(
|
|
169
|
+
lexer: *const SasLexer,
|
|
170
|
+
index: usize,
|
|
171
|
+
token_out: *mut SasToken
|
|
172
|
+
) -> SasLexerError {
|
|
173
|
+
if lexer.is_null() || token_out.is_null() {
|
|
174
|
+
let error = SasLexerError::NullPointer;
|
|
175
|
+
error.set_last_error(None);
|
|
176
|
+
return error;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
let lexer_ref = unsafe { &*lexer };
|
|
180
|
+
|
|
181
|
+
if index >= lexer_ref.tokens.len() {
|
|
182
|
+
let error = SasLexerError::IndexOutOfBounds;
|
|
183
|
+
error.set_last_error(Some(format!("index {} >= token count {}", index, lexer_ref.tokens.len())));
|
|
184
|
+
return error;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
if let Some(buffer) = &lexer_ref.buffer {
|
|
188
|
+
let token_idx = lexer_ref.tokens[index];
|
|
189
|
+
|
|
190
|
+
// Get all token metadata
|
|
191
|
+
let token_type = buffer.get_token_type(token_idx);
|
|
192
|
+
let channel = buffer.get_token_channel(token_idx);
|
|
193
|
+
let start = buffer.get_token_start(token_idx);
|
|
194
|
+
let end = buffer.get_token_end(token_idx);
|
|
195
|
+
let start_line = buffer.get_token_start_line(token_idx);
|
|
196
|
+
let end_line = buffer.get_token_end_line(token_idx);
|
|
197
|
+
let start_column = buffer.get_token_start_column(token_idx);
|
|
198
|
+
let end_column = buffer.get_token_end_column(token_idx);
|
|
199
|
+
|
|
200
|
+
// Check if all queries succeeded
|
|
201
|
+
if let (Ok(tt), Ok(ch), Ok(s), Ok(e), Ok(sl), Ok(el), Ok(sc), Ok(ec)) =
|
|
202
|
+
(token_type, channel, start, end, start_line, end_line, start_column, end_column) {
|
|
203
|
+
unsafe {
|
|
204
|
+
(*token_out).token_type = tt as u32;
|
|
205
|
+
(*token_out).channel = ch as u8;
|
|
206
|
+
(*token_out).start = s.get() as usize;
|
|
207
|
+
(*token_out).end = e.get() as usize;
|
|
208
|
+
(*token_out).start_line = sl;
|
|
209
|
+
(*token_out).end_line = el;
|
|
210
|
+
(*token_out).start_column = sc;
|
|
211
|
+
(*token_out).end_column = ec;
|
|
212
|
+
}
|
|
213
|
+
return SasLexerError::Success;
|
|
214
|
+
} else {
|
|
215
|
+
let error = SasLexerError::TokenNotFound;
|
|
216
|
+
error.set_last_error(None);
|
|
217
|
+
return error;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
let error = SasLexerError::BufferNotInitialized;
|
|
222
|
+
error.set_last_error(None);
|
|
223
|
+
error
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
/// Get token text by index
|
|
227
|
+
#[no_mangle]
|
|
228
|
+
pub extern "C" fn sas_lexer_get_token_text(
|
|
229
|
+
lexer: *const SasLexer,
|
|
230
|
+
index: usize
|
|
231
|
+
) -> *mut c_char {
|
|
232
|
+
if lexer.is_null() {
|
|
233
|
+
let error = SasLexerError::NullPointer;
|
|
234
|
+
error.set_last_error(Some("lexer pointer is null".to_string()));
|
|
235
|
+
return ptr::null_mut();
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
let lexer_ref = unsafe { &*lexer };
|
|
239
|
+
|
|
240
|
+
if index >= lexer_ref.tokens.len() {
|
|
241
|
+
let error = SasLexerError::IndexOutOfBounds;
|
|
242
|
+
error.set_last_error(Some(format!("index {} >= token count {}", index, lexer_ref.tokens.len())));
|
|
243
|
+
return ptr::null_mut();
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
if let (Some(buffer), Some(source)) = (&lexer_ref.buffer, &lexer_ref.source) {
|
|
247
|
+
let token_idx = lexer_ref.tokens[index];
|
|
248
|
+
match buffer.get_token_raw_text(token_idx, source) {
|
|
249
|
+
Ok(Some(text)) => {
|
|
250
|
+
if let Ok(c_string) = CString::new(text) {
|
|
251
|
+
return c_string.into_raw();
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
Ok(None) => {
|
|
255
|
+
// Empty range, return empty string
|
|
256
|
+
if let Ok(c_string) = CString::new("") {
|
|
257
|
+
return c_string.into_raw();
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
Err(e) => {
|
|
261
|
+
let error = SasLexerError::TokenNotFound;
|
|
262
|
+
error.set_last_error(Some(format!("Failed to get token text: {:?}", e)));
|
|
263
|
+
return ptr::null_mut();
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
let error = SasLexerError::BufferNotInitialized;
|
|
269
|
+
error.set_last_error(None);
|
|
270
|
+
ptr::null_mut()
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
/// Free a string returned by sas_lexer_get_token_text or sas_lexer_get_last_error
|
|
274
|
+
#[no_mangle]
|
|
275
|
+
pub extern "C" fn sas_lexer_free_string(s: *mut c_char) {
|
|
276
|
+
if !s.is_null() {
|
|
277
|
+
unsafe {
|
|
278
|
+
drop(CString::from_raw(s));
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
}
|
|
Binary file
|
|
Binary file
|