sas-lexer 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,281 @@
1
+ use std::ffi::{CStr, CString};
2
+ use std::os::raw::c_char;
3
+ use std::ptr;
4
+ use std::sync::Mutex;
5
+ use sas_lexer::{lex_program, TokenIdx};
6
+
7
+ // Thread-local storage for the last error message
8
+ thread_local! {
9
+ static LAST_ERROR: Mutex<Option<String>> = Mutex::new(None);
10
+ }
11
+
12
+ /// Error codes for the FFI interface
13
+ #[repr(C)]
14
+ #[derive(Debug, Clone, Copy, PartialEq)]
15
+ pub enum SasLexerError {
16
+ Success = 0,
17
+ NullPointer = 1,
18
+ InvalidUtf8 = 2,
19
+ LexingError = 3,
20
+ IndexOutOfBounds = 4,
21
+ TokenNotFound = 5,
22
+ BufferNotInitialized = 6,
23
+ }
24
+
25
+ impl SasLexerError {
26
+ fn set_last_error(&self, details: Option<String>) {
27
+ let message = match self {
28
+ SasLexerError::Success => return, // Don't set error for success
29
+ SasLexerError::NullPointer => "Null pointer provided".to_string(),
30
+ SasLexerError::InvalidUtf8 => "Invalid UTF-8 in input string".to_string(),
31
+ SasLexerError::LexingError => format!("Failed to lex SAS code{}",
32
+ details.as_ref().map(|s| format!(": {}", s)).unwrap_or_default()),
33
+ SasLexerError::IndexOutOfBounds => format!("Token index out of bounds{}",
34
+ details.as_ref().map(|s| format!(": {}", s)).unwrap_or_default()),
35
+ SasLexerError::TokenNotFound => "Token not found in buffer".to_string(),
36
+ SasLexerError::BufferNotInitialized => "Token buffer not initialized - call tokenize first".to_string(),
37
+ };
38
+
39
+ LAST_ERROR.with(|e| {
40
+ *e.lock().unwrap() = Some(message);
41
+ });
42
+ }
43
+ }
44
+
45
+ /// Get the last error message as a C string
46
+ /// The caller must free the returned string using sas_lexer_free_string
47
+ #[no_mangle]
48
+ pub extern "C" fn sas_lexer_get_last_error() -> *mut c_char {
49
+ LAST_ERROR.with(|e| {
50
+ let error = e.lock().unwrap();
51
+ match &*error {
52
+ Some(msg) => {
53
+ match CString::new(msg.as_str()) {
54
+ Ok(c_string) => c_string.into_raw(),
55
+ Err(_) => ptr::null_mut(),
56
+ }
57
+ }
58
+ None => ptr::null_mut(),
59
+ }
60
+ })
61
+ }
62
+
63
+ /// Clear the last error message
64
+ #[no_mangle]
65
+ pub extern "C" fn sas_lexer_clear_error() {
66
+ LAST_ERROR.with(|e| {
67
+ *e.lock().unwrap() = None;
68
+ });
69
+ }
70
+
71
+ /// A structure to hold comprehensive token metadata
72
+ #[repr(C)]
73
+ pub struct SasToken {
74
+ pub token_type: u32,
75
+ pub channel: u8,
76
+ pub start: usize,
77
+ pub end: usize,
78
+ pub start_line: u32,
79
+ pub end_line: u32,
80
+ pub start_column: u32,
81
+ pub end_column: u32,
82
+ }
83
+
84
+ /// A structure to hold the lexer state and results
85
+ #[repr(C)]
86
+ pub struct SasLexer {
87
+ buffer: Option<sas_lexer::TokenizedBuffer>,
88
+ source: Option<String>,
89
+ tokens: Vec<TokenIdx>,
90
+ current_index: usize,
91
+ }
92
+
93
+ /// Create a new SAS lexer instance
94
+ #[no_mangle]
95
+ pub extern "C" fn sas_lexer_new() -> *mut SasLexer {
96
+ let lexer = Box::new(SasLexer {
97
+ buffer: None,
98
+ source: None,
99
+ tokens: Vec::new(),
100
+ current_index: 0,
101
+ });
102
+ Box::into_raw(lexer)
103
+ }
104
+
105
+ /// Free a SAS lexer instance
106
+ #[no_mangle]
107
+ pub extern "C" fn sas_lexer_free(lexer: *mut SasLexer) {
108
+ if !lexer.is_null() {
109
+ unsafe {
110
+ drop(Box::from_raw(lexer));
111
+ }
112
+ }
113
+ }
114
+
115
+ /// Tokenize SAS code
116
+ /// Returns SasLexerError enum value
117
+ #[no_mangle]
118
+ pub extern "C" fn sas_lexer_tokenize(lexer: *mut SasLexer, code: *const c_char) -> SasLexerError {
119
+ if lexer.is_null() || code.is_null() {
120
+ let error = SasLexerError::NullPointer;
121
+ error.set_last_error(None);
122
+ return error;
123
+ }
124
+
125
+ let code_str = unsafe {
126
+ match CStr::from_ptr(code).to_str() {
127
+ Ok(s) => s,
128
+ Err(e) => {
129
+ let error = SasLexerError::InvalidUtf8;
130
+ error.set_last_error(Some(e.to_string()));
131
+ return error;
132
+ }
133
+ }
134
+ };
135
+
136
+ let lexer_ref = unsafe { &mut *lexer };
137
+
138
+ match lex_program(&code_str) {
139
+ Ok(result) => {
140
+ let tokens: Vec<TokenIdx> = result.buffer.iter_tokens().collect();
141
+ lexer_ref.buffer = Some(result.buffer);
142
+ lexer_ref.source = Some(code_str.to_string());
143
+ lexer_ref.tokens = tokens;
144
+ lexer_ref.current_index = 0;
145
+ SasLexerError::Success
146
+ }
147
+ Err(e) => {
148
+ let error = SasLexerError::LexingError;
149
+ error.set_last_error(Some(format!("{:?}", e)));
150
+ error
151
+ }
152
+ }
153
+ }
154
+
155
+ /// Get the number of tokens
156
+ #[no_mangle]
157
+ pub extern "C" fn sas_lexer_token_count(lexer: *const SasLexer) -> usize {
158
+ if lexer.is_null() {
159
+ return 0;
160
+ }
161
+
162
+ let lexer_ref = unsafe { &*lexer };
163
+ lexer_ref.tokens.len()
164
+ }
165
+
166
+ /// Get a token by index with full metadata
167
+ #[no_mangle]
168
+ pub extern "C" fn sas_lexer_get_token(
169
+ lexer: *const SasLexer,
170
+ index: usize,
171
+ token_out: *mut SasToken
172
+ ) -> SasLexerError {
173
+ if lexer.is_null() || token_out.is_null() {
174
+ let error = SasLexerError::NullPointer;
175
+ error.set_last_error(None);
176
+ return error;
177
+ }
178
+
179
+ let lexer_ref = unsafe { &*lexer };
180
+
181
+ if index >= lexer_ref.tokens.len() {
182
+ let error = SasLexerError::IndexOutOfBounds;
183
+ error.set_last_error(Some(format!("index {} >= token count {}", index, lexer_ref.tokens.len())));
184
+ return error;
185
+ }
186
+
187
+ if let Some(buffer) = &lexer_ref.buffer {
188
+ let token_idx = lexer_ref.tokens[index];
189
+
190
+ // Get all token metadata
191
+ let token_type = buffer.get_token_type(token_idx);
192
+ let channel = buffer.get_token_channel(token_idx);
193
+ let start = buffer.get_token_start(token_idx);
194
+ let end = buffer.get_token_end(token_idx);
195
+ let start_line = buffer.get_token_start_line(token_idx);
196
+ let end_line = buffer.get_token_end_line(token_idx);
197
+ let start_column = buffer.get_token_start_column(token_idx);
198
+ let end_column = buffer.get_token_end_column(token_idx);
199
+
200
+ // Check if all queries succeeded
201
+ if let (Ok(tt), Ok(ch), Ok(s), Ok(e), Ok(sl), Ok(el), Ok(sc), Ok(ec)) =
202
+ (token_type, channel, start, end, start_line, end_line, start_column, end_column) {
203
+ unsafe {
204
+ (*token_out).token_type = tt as u32;
205
+ (*token_out).channel = ch as u8;
206
+ (*token_out).start = s.get() as usize;
207
+ (*token_out).end = e.get() as usize;
208
+ (*token_out).start_line = sl;
209
+ (*token_out).end_line = el;
210
+ (*token_out).start_column = sc;
211
+ (*token_out).end_column = ec;
212
+ }
213
+ return SasLexerError::Success;
214
+ } else {
215
+ let error = SasLexerError::TokenNotFound;
216
+ error.set_last_error(None);
217
+ return error;
218
+ }
219
+ }
220
+
221
+ let error = SasLexerError::BufferNotInitialized;
222
+ error.set_last_error(None);
223
+ error
224
+ }
225
+
226
+ /// Get token text by index
227
+ #[no_mangle]
228
+ pub extern "C" fn sas_lexer_get_token_text(
229
+ lexer: *const SasLexer,
230
+ index: usize
231
+ ) -> *mut c_char {
232
+ if lexer.is_null() {
233
+ let error = SasLexerError::NullPointer;
234
+ error.set_last_error(Some("lexer pointer is null".to_string()));
235
+ return ptr::null_mut();
236
+ }
237
+
238
+ let lexer_ref = unsafe { &*lexer };
239
+
240
+ if index >= lexer_ref.tokens.len() {
241
+ let error = SasLexerError::IndexOutOfBounds;
242
+ error.set_last_error(Some(format!("index {} >= token count {}", index, lexer_ref.tokens.len())));
243
+ return ptr::null_mut();
244
+ }
245
+
246
+ if let (Some(buffer), Some(source)) = (&lexer_ref.buffer, &lexer_ref.source) {
247
+ let token_idx = lexer_ref.tokens[index];
248
+ match buffer.get_token_raw_text(token_idx, source) {
249
+ Ok(Some(text)) => {
250
+ if let Ok(c_string) = CString::new(text) {
251
+ return c_string.into_raw();
252
+ }
253
+ }
254
+ Ok(None) => {
255
+ // Empty range, return empty string
256
+ if let Ok(c_string) = CString::new("") {
257
+ return c_string.into_raw();
258
+ }
259
+ }
260
+ Err(e) => {
261
+ let error = SasLexerError::TokenNotFound;
262
+ error.set_last_error(Some(format!("Failed to get token text: {:?}", e)));
263
+ return ptr::null_mut();
264
+ }
265
+ }
266
+ }
267
+
268
+ let error = SasLexerError::BufferNotInitialized;
269
+ error.set_last_error(None);
270
+ ptr::null_mut()
271
+ }
272
+
273
+ /// Free a string returned by sas_lexer_get_token_text or sas_lexer_get_last_error
274
+ #[no_mangle]
275
+ pub extern "C" fn sas_lexer_free_string(s: *mut c_char) {
276
+ if !s.is_null() {
277
+ unsafe {
278
+ drop(CString::from_raw(s));
279
+ }
280
+ }
281
+ }
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SasLexer
4
+ class Error < StandardError; end
5
+ end