rfmt 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,347 @@
1
+ use crate::ast::{Comment, Node, NodeType};
2
+ use crate::config::{Config, IndentStyle};
3
+ use crate::error::Result;
4
+ use std::fmt::Write;
5
+
6
+ /// Code emitter that converts AST back to Ruby source code
7
+ pub struct Emitter {
8
+ config: Config,
9
+ source: String,
10
+ buffer: String,
11
+ all_comments: Vec<Comment>,
12
+ emitted_comment_indices: Vec<usize>,
13
+ }
14
+
15
+ impl Emitter {
16
+ pub fn new(config: Config) -> Self {
17
+ Self {
18
+ config,
19
+ source: String::new(),
20
+ buffer: String::new(),
21
+ all_comments: Vec::new(),
22
+ emitted_comment_indices: Vec::new(),
23
+ }
24
+ }
25
+
26
+ /// Create emitter with source code for fallback extraction
27
+ pub fn with_source(config: Config, source: String) -> Self {
28
+ Self {
29
+ config,
30
+ source,
31
+ buffer: String::new(),
32
+ all_comments: Vec::new(),
33
+ emitted_comment_indices: Vec::new(),
34
+ }
35
+ }
36
+
37
+ /// Emit Ruby source code from an AST
38
+ pub fn emit(&mut self, ast: &Node) -> Result<String> {
39
+ self.buffer.clear();
40
+ self.emitted_comment_indices.clear();
41
+
42
+ // Collect all comments from the AST
43
+ self.collect_comments(ast);
44
+
45
+ self.emit_node(ast, 0)?;
46
+ Ok(self.buffer.clone())
47
+ }
48
+
49
+ /// Recursively collect all comments from the AST
50
+ fn collect_comments(&mut self, node: &Node) {
51
+ self.all_comments.extend(node.comments.clone());
52
+ for child in &node.children {
53
+ self.collect_comments(child);
54
+ }
55
+ }
56
+
57
+ /// Emit comments that appear before a given line
58
+ fn emit_comments_before(&mut self, line: usize, indent_level: usize) -> Result<()> {
59
+ let indent_str = match self.config.formatting.indent_style {
60
+ IndentStyle::Spaces => " ".repeat(self.config.formatting.indent_width * indent_level),
61
+ IndentStyle::Tabs => "\t".repeat(indent_level),
62
+ };
63
+
64
+ let mut indices_to_emit = Vec::new();
65
+ for (idx, comment) in self.all_comments.iter().enumerate() {
66
+ if self.emitted_comment_indices.contains(&idx) {
67
+ continue;
68
+ }
69
+
70
+ // Collect comments that end before this line
71
+ if comment.location.end_line < line {
72
+ indices_to_emit.push((idx, comment.text.clone()));
73
+ }
74
+ }
75
+
76
+ // Now emit the collected comments
77
+ for (idx, text) in indices_to_emit {
78
+ writeln!(self.buffer, "{}{}", indent_str, text)?;
79
+ self.emitted_comment_indices.push(idx);
80
+ }
81
+
82
+ Ok(())
83
+ }
84
+
85
+ /// Emit comments that appear on the same line (trailing comments)
86
+ fn emit_trailing_comments(&mut self, line: usize) -> Result<()> {
87
+ let mut indices_to_emit = Vec::new();
88
+ for (idx, comment) in self.all_comments.iter().enumerate() {
89
+ if self.emitted_comment_indices.contains(&idx) {
90
+ continue;
91
+ }
92
+
93
+ // Collect comments on the same line (trailing)
94
+ if comment.location.start_line == line {
95
+ indices_to_emit.push((idx, comment.text.clone()));
96
+ }
97
+ }
98
+
99
+ // Now emit the collected comments
100
+ for (idx, text) in indices_to_emit {
101
+ write!(self.buffer, " {}", text)?;
102
+ self.emitted_comment_indices.push(idx);
103
+ }
104
+
105
+ Ok(())
106
+ }
107
+
108
+ /// Emit a node with given indentation level
109
+ fn emit_node(&mut self, node: &Node, indent_level: usize) -> Result<()> {
110
+ match &node.node_type {
111
+ NodeType::ProgramNode => self.emit_program(node, indent_level)?,
112
+ NodeType::StatementsNode => self.emit_statements(node, indent_level)?,
113
+ NodeType::ClassNode => self.emit_class(node, indent_level)?,
114
+ NodeType::ModuleNode => self.emit_module(node, indent_level)?,
115
+ NodeType::DefNode => self.emit_method(node, indent_level)?,
116
+ _ => self.emit_generic(node, indent_level)?,
117
+ }
118
+ Ok(())
119
+ }
120
+
121
+ /// Emit program node (root)
122
+ fn emit_program(&mut self, node: &Node, indent_level: usize) -> Result<()> {
123
+ for (i, child) in node.children.iter().enumerate() {
124
+ self.emit_node(child, indent_level)?;
125
+
126
+ // Add newline between top-level statements
127
+ if i < node.children.len() - 1 {
128
+ self.buffer.push('\n');
129
+ }
130
+ }
131
+ Ok(())
132
+ }
133
+
134
+ /// Emit statements node (body of class/module/def)
135
+ fn emit_statements(&mut self, node: &Node, indent_level: usize) -> Result<()> {
136
+ for (i, child) in node.children.iter().enumerate() {
137
+ self.emit_node(child, indent_level)?;
138
+
139
+ // Add newline between statements
140
+ if i < node.children.len() - 1 {
141
+ self.buffer.push('\n');
142
+ }
143
+ }
144
+ Ok(())
145
+ }
146
+
147
+ /// Emit class definition
148
+ fn emit_class(&mut self, node: &Node, indent_level: usize) -> Result<()> {
149
+ // Emit any comments before this class
150
+ self.emit_comments_before(node.location.start_line, indent_level)?;
151
+
152
+ self.emit_indent(indent_level)?;
153
+ write!(self.buffer, "class ")?;
154
+
155
+ if let Some(name) = node.metadata.get("name") {
156
+ write!(self.buffer, "{}", name)?;
157
+ }
158
+
159
+ if let Some(superclass) = node.metadata.get("superclass") {
160
+ write!(self.buffer, " < {}", superclass)?;
161
+ }
162
+
163
+ self.buffer.push('\n');
164
+
165
+ // Emit body (children), but skip structural nodes like constant_read_node
166
+ for child in &node.children {
167
+ if self.is_structural_node(&child.node_type) {
168
+ continue;
169
+ }
170
+ self.emit_node(child, indent_level + 1)?;
171
+ // Note: don't add newline here, statements node will handle it
172
+ }
173
+
174
+ // Add newline before end if there was body content
175
+ if node
176
+ .children
177
+ .iter()
178
+ .any(|c| !self.is_structural_node(&c.node_type))
179
+ {
180
+ self.buffer.push('\n');
181
+ }
182
+
183
+ self.emit_indent(indent_level)?;
184
+ write!(self.buffer, "end")?;
185
+
186
+ Ok(())
187
+ }
188
+
189
+ /// Emit module definition
190
+ fn emit_module(&mut self, node: &Node, indent_level: usize) -> Result<()> {
191
+ // Emit any comments before this module
192
+ self.emit_comments_before(node.location.start_line, indent_level)?;
193
+
194
+ self.emit_indent(indent_level)?;
195
+ write!(self.buffer, "module ")?;
196
+
197
+ if let Some(name) = node.metadata.get("name") {
198
+ write!(self.buffer, "{}", name)?;
199
+ }
200
+
201
+ self.buffer.push('\n');
202
+
203
+ // Emit body (children), but skip structural nodes
204
+ for child in &node.children {
205
+ if self.is_structural_node(&child.node_type) {
206
+ continue;
207
+ }
208
+ self.emit_node(child, indent_level + 1)?;
209
+ }
210
+
211
+ // Add newline before end if there was body content
212
+ if node
213
+ .children
214
+ .iter()
215
+ .any(|c| !self.is_structural_node(&c.node_type))
216
+ {
217
+ self.buffer.push('\n');
218
+ }
219
+
220
+ self.emit_indent(indent_level)?;
221
+ write!(self.buffer, "end")?;
222
+
223
+ Ok(())
224
+ }
225
+
226
+ /// Emit method definition
227
+ fn emit_method(&mut self, node: &Node, indent_level: usize) -> Result<()> {
228
+ // Emit any comments before this method
229
+ self.emit_comments_before(node.location.start_line, indent_level)?;
230
+
231
+ self.emit_indent(indent_level)?;
232
+ write!(self.buffer, "def ")?;
233
+
234
+ if let Some(name) = node.metadata.get("name") {
235
+ write!(self.buffer, "{}", name)?;
236
+ }
237
+
238
+ // TODO: Handle parameters properly
239
+ // For now, extract from source if method has parameters
240
+ if node
241
+ .metadata
242
+ .get("parameters_count")
243
+ .and_then(|s| s.parse::<usize>().ok())
244
+ .unwrap_or(0)
245
+ > 0
246
+ {
247
+ // Extract parameter part from source
248
+ if !self.source.is_empty() && node.location.end_offset <= self.source.len() {
249
+ if let Some(source_text) = self
250
+ .source
251
+ .get(node.location.start_offset..node.location.end_offset)
252
+ {
253
+ // Find parameters in source (between def name and \n or ;)
254
+ if let Some(def_line) = source_text.lines().next() {
255
+ if let Some(params_start) = def_line.find('(') {
256
+ if let Some(params_end) = def_line.find(')') {
257
+ let params = &def_line[params_start..=params_end];
258
+ write!(self.buffer, "{}", params)?;
259
+ }
260
+ }
261
+ }
262
+ }
263
+ }
264
+ }
265
+
266
+ self.buffer.push('\n');
267
+
268
+ // Emit body (children), but skip structural nodes like parameter nodes
269
+ for child in &node.children {
270
+ if self.is_structural_node(&child.node_type) {
271
+ continue;
272
+ }
273
+ self.emit_node(child, indent_level + 1)?;
274
+ }
275
+
276
+ // Add newline before end if there was body content
277
+ if node
278
+ .children
279
+ .iter()
280
+ .any(|c| !self.is_structural_node(&c.node_type))
281
+ {
282
+ self.buffer.push('\n');
283
+ }
284
+
285
+ self.emit_indent(indent_level)?;
286
+ write!(self.buffer, "end")?;
287
+
288
+ Ok(())
289
+ }
290
+
291
+ /// Emit generic node by extracting from source
292
+ fn emit_generic(&mut self, node: &Node, indent_level: usize) -> Result<()> {
293
+ // Emit any comments before this node
294
+ self.emit_comments_before(node.location.start_line, indent_level)?;
295
+
296
+ if !self.source.is_empty() {
297
+ let start = node.location.start_offset;
298
+ let end = node.location.end_offset;
299
+
300
+ // Clone text first to avoid borrow conflict
301
+ let text_owned = self.source.get(start..end).map(|s| s.to_string());
302
+
303
+ if let Some(text) = text_owned {
304
+ // Add indentation before the extracted text
305
+ self.emit_indent(indent_level)?;
306
+ write!(self.buffer, "{}", text)?;
307
+
308
+ // Emit any trailing comments on the same line
309
+ self.emit_trailing_comments(node.location.end_line)?;
310
+ }
311
+ }
312
+ Ok(())
313
+ }
314
+
315
+ /// Emit indentation
316
+ fn emit_indent(&mut self, level: usize) -> Result<()> {
317
+ let indent_str = match self.config.formatting.indent_style {
318
+ IndentStyle::Spaces => " ".repeat(self.config.formatting.indent_width * level),
319
+ IndentStyle::Tabs => "\t".repeat(level),
320
+ };
321
+
322
+ write!(self.buffer, "{}", indent_str)?;
323
+ Ok(())
324
+ }
325
+
326
+ /// Check if node is structural (part of definition syntax, not body)
327
+ fn is_structural_node(&self, node_type: &NodeType) -> bool {
328
+ matches!(
329
+ node_type,
330
+ NodeType::ConstantReadNode
331
+ | NodeType::ConstantWriteNode
332
+ | NodeType::ConstantPathNode
333
+ | NodeType::RequiredParameterNode
334
+ | NodeType::OptionalParameterNode
335
+ | NodeType::RestParameterNode
336
+ | NodeType::KeywordParameterNode
337
+ | NodeType::KeywordRestParameterNode
338
+ | NodeType::BlockParameterNode
339
+ )
340
+ }
341
+ }
342
+
343
+ impl Default for Emitter {
344
+ fn default() -> Self {
345
+ Self::new(Config::default())
346
+ }
347
+ }
@@ -0,0 +1,48 @@
1
+ use magnus::{Error as MagnusError, Ruby};
2
+ use thiserror::Error;
3
+
4
+ pub type Result<T> = std::result::Result<T, RfmtError>;
5
+
6
+ #[derive(Error, Debug)]
7
+ pub enum RfmtError {
8
+ #[error("Prism integration error: {0}")]
9
+ PrismError(String),
10
+
11
+ #[error("Format error: {0}")]
12
+ FormatError(String),
13
+
14
+ #[error("Unsupported feature: {feature}\n{explanation}")]
15
+ UnsupportedFeature {
16
+ feature: String,
17
+ explanation: String,
18
+ },
19
+
20
+ #[error("Configuration error: {message}")]
21
+ #[cfg(test)]
22
+ ConfigError { message: String },
23
+ }
24
+
25
+ // Implement From for std::fmt::Error
26
+ impl From<std::fmt::Error> for RfmtError {
27
+ fn from(err: std::fmt::Error) -> Self {
28
+ RfmtError::FormatError(err.to_string())
29
+ }
30
+ }
31
+
32
+ impl RfmtError {
33
+ /// Convert RfmtError to Magnus Error for Ruby interop
34
+ pub fn to_magnus_error(&self, ruby: &Ruby) -> MagnusError {
35
+ let exception_class = match self {
36
+ RfmtError::PrismError(_) => "PrismError",
37
+ RfmtError::FormatError(_) => "FormatError",
38
+ RfmtError::UnsupportedFeature { .. } => "UnsupportedFeature",
39
+ #[cfg(test)]
40
+ RfmtError::ConfigError { .. } => "ConfigError",
41
+ };
42
+
43
+ MagnusError::new(
44
+ ruby.exception_standard_error(),
45
+ format!("[Rfmt::{}] {}", exception_class, self),
46
+ )
47
+ }
48
+ }
data/ext/rfmt/src/lib.rs CHANGED
@@ -1,48 +1,71 @@
1
+ mod ast;
2
+ mod config;
3
+ mod emitter;
4
+ mod error;
5
+ mod logging;
6
+ mod parser;
7
+ mod policy;
8
+
9
+ use policy::SecurityPolicy;
10
+
11
+ use config::Config;
12
+ use emitter::Emitter;
1
13
  use magnus::{define_module, function, prelude::*, Error, Ruby};
14
+ use parser::{PrismAdapter, RubyParser};
15
+
16
+ fn format_ruby_code(ruby: &Ruby, source: String, json: String) -> Result<String, Error> {
17
+ let policy = SecurityPolicy::default();
18
+
19
+ policy
20
+ .validate_source_size(&source)
21
+ .map_err(|e| e.to_magnus_error(ruby))?;
22
+
23
+ log::debug!("Source code validated, size: {} bytes", source.len());
24
+
25
+ let parser = PrismAdapter::new();
26
+ let ast = parser.parse(&json).map_err(|e| e.to_magnus_error(ruby))?;
27
+
28
+ let config = Config::default();
29
+ let mut emitter = Emitter::with_source(config, source);
30
+
31
+ let formatted = emitter.emit(&ast).map_err(|e| e.to_magnus_error(ruby))?;
2
32
 
3
- fn format_ruby_code(source: String) -> String {
4
- // indent with 2 spaces
5
- let lines: Vec<&str> = source.lines().collect();
6
- let mut result = Vec::new();
7
- let mut indent_level: i32 = 0;
8
-
9
- for line in lines {
10
- let trimmed = line.trim();
11
-
12
- if trimmed == "end" || trimmed.starts_with("end ")
13
- || trimmed == "}" || trimmed == "]" {
14
- indent_level = indent_level.saturating_sub(1);
15
- }
16
-
17
- // eval indent
18
- if !trimmed.is_empty() {
19
- result.push(format!("{}{}", " ".repeat(indent_level as usize), trimmed));
20
- } else {
21
- result.push(String::new());
22
- }
23
-
24
- // add indent lebel
25
- if trimmed.starts_with("def ") || trimmed.starts_with("class ")
26
- || trimmed.starts_with("module ") || trimmed.starts_with("if ")
27
- || trimmed.starts_with("unless ") || trimmed.starts_with("while ")
28
- || trimmed.starts_with("for ") || trimmed.starts_with("do ")
29
- || trimmed == "do" || trimmed.ends_with(" do")
30
- || trimmed == "{" || trimmed == "[" {
31
- indent_level += 1;
32
- }
33
- }
34
-
35
- result.join("\n")
33
+ Ok(formatted)
34
+ }
35
+
36
+ /// Parse Ruby source code and return JSON AST representation
37
+ /// This is useful for debugging and integration testing
38
+ fn parse_to_json(ruby: &Ruby, source: String) -> Result<String, Error> {
39
+ let parser = PrismAdapter::new();
40
+ let ast = parser.parse(&source).map_err(|e| e.to_magnus_error(ruby))?;
41
+
42
+ Ok(format!("{:#?}", ast))
36
43
  }
37
44
 
38
45
  fn rust_version() -> String {
39
- "0.1.0 (Rust)".to_string()
46
+ "0.2.0 (Rust)".to_string()
40
47
  }
41
48
 
42
49
  #[magnus::init]
43
- fn init(_ruby: &Ruby) -> Result<(), Error> {
50
+ fn init(ruby: &Ruby) -> Result<(), Error> {
51
+ logging::RfmtLogger::init();
52
+ log::info!("Initializing rfmt Rust extension");
53
+
44
54
  let module = define_module("Rfmt")?;
45
- module.define_singleton_method("format_code", function!(format_ruby_code, 1))?;
55
+
56
+ module.define_singleton_method("format_code", function!(format_ruby_code, 2))?;
57
+ module.define_singleton_method("parse_to_json", function!(parse_to_json, 1))?;
46
58
  module.define_singleton_method("rust_version", function!(rust_version, 0))?;
59
+
60
+ let rfmt_error = ruby.define_error("RfmtError", ruby.exception_standard_error())?;
61
+ ruby.define_error("ParseError", rfmt_error)?;
62
+ ruby.define_error("ConfigError", rfmt_error)?;
63
+ ruby.define_error("PrismError", rfmt_error)?;
64
+ ruby.define_error("RuleError", rfmt_error)?;
65
+ ruby.define_error("InternalError", rfmt_error)?;
66
+ ruby.define_error("FormattingError", rfmt_error)?;
67
+ ruby.define_error("UnsupportedFeature", rfmt_error)?;
68
+
69
+ log::info!("rfmt Rust extension initialized successfully");
47
70
  Ok(())
48
71
  }
@@ -0,0 +1,128 @@
1
+ use log::{LevelFilter, Log, Metadata, Record};
2
+ use std::io::Write;
3
+ use std::sync::Mutex;
4
+
5
+ pub struct RfmtLogger {
6
+ level: LevelFilter,
7
+ output: Mutex<Box<dyn Write + Send>>,
8
+ }
9
+
10
+ impl RfmtLogger {
11
+ pub fn new(level: LevelFilter) -> Self {
12
+ Self {
13
+ level,
14
+ output: Mutex::new(Box::new(std::io::stderr())),
15
+ }
16
+ }
17
+
18
+ #[cfg(test)]
19
+ pub fn with_output(mut self, output: Box<dyn Write + Send>) -> Self {
20
+ self.output = Mutex::new(output);
21
+ self
22
+ }
23
+
24
+ pub fn init() {
25
+ let logger = Self::new(LevelFilter::Info);
26
+ log::set_boxed_logger(Box::new(logger)).expect("Failed to initialize logger");
27
+ log::set_max_level(LevelFilter::Trace);
28
+ }
29
+ }
30
+
31
+ impl Log for RfmtLogger {
32
+ fn enabled(&self, metadata: &Metadata) -> bool {
33
+ metadata.level() <= self.level
34
+ }
35
+
36
+ fn log(&self, record: &Record) {
37
+ if !self.enabled(record.metadata()) {
38
+ return;
39
+ }
40
+
41
+ let mut output = self.output.lock().unwrap();
42
+
43
+ writeln!(
44
+ output,
45
+ "[{}] {} - {}",
46
+ record.level(),
47
+ record.target(),
48
+ record.args()
49
+ )
50
+ .ok();
51
+ }
52
+
53
+ fn flush(&self) {
54
+ let mut output = self.output.lock().unwrap();
55
+ output.flush().ok();
56
+ }
57
+ }
58
+
59
+ #[cfg(test)]
60
+ mod tests {
61
+ use super::*;
62
+ use log::{debug, error, info, trace, warn};
63
+ use std::sync::{Arc, Mutex};
64
+
65
+ struct TestWriter {
66
+ data: Arc<Mutex<Vec<u8>>>,
67
+ }
68
+
69
+ impl TestWriter {
70
+ fn new() -> (Self, Arc<Mutex<Vec<u8>>>) {
71
+ let data = Arc::new(Mutex::new(Vec::new()));
72
+ (
73
+ Self {
74
+ data: Arc::clone(&data),
75
+ },
76
+ data,
77
+ )
78
+ }
79
+ }
80
+
81
+ impl Write for TestWriter {
82
+ fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
83
+ self.data.lock().unwrap().write(buf)
84
+ }
85
+
86
+ fn flush(&mut self) -> std::io::Result<()> {
87
+ self.data.lock().unwrap().flush()
88
+ }
89
+ }
90
+
91
+ #[test]
92
+ fn test_logger_creation() {
93
+ let logger = RfmtLogger::new(LevelFilter::Info);
94
+ assert!(logger.enabled(&Metadata::builder().level(log::Level::Info).build()));
95
+ assert!(!logger.enabled(&Metadata::builder().level(log::Level::Debug).build()));
96
+ }
97
+
98
+ #[test]
99
+ fn test_logger_level_filtering() {
100
+ let logger = RfmtLogger::new(LevelFilter::Warn);
101
+
102
+ assert!(logger.enabled(&Metadata::builder().level(log::Level::Error).build()));
103
+ assert!(logger.enabled(&Metadata::builder().level(log::Level::Warn).build()));
104
+ assert!(!logger.enabled(&Metadata::builder().level(log::Level::Info).build()));
105
+ assert!(!logger.enabled(&Metadata::builder().level(log::Level::Debug).build()));
106
+ assert!(!logger.enabled(&Metadata::builder().level(log::Level::Trace).build()));
107
+ }
108
+
109
+ #[test]
110
+ fn test_logger_output() {
111
+ let (writer, data) = TestWriter::new();
112
+ let logger = RfmtLogger::new(LevelFilter::Info).with_output(Box::new(writer));
113
+
114
+ let record = Record::builder()
115
+ .level(log::Level::Info)
116
+ .target("test")
117
+ .args(format_args!("test message"))
118
+ .build();
119
+
120
+ logger.log(&record);
121
+ logger.flush();
122
+
123
+ let output = String::from_utf8(data.lock().unwrap().clone()).unwrap();
124
+ assert!(output.contains("[INFO]"));
125
+ assert!(output.contains("test"));
126
+ assert!(output.contains("test message"));
127
+ }
128
+ }
@@ -0,0 +1,3 @@
1
+ pub mod logger;
2
+
3
+ pub use logger::RfmtLogger;
@@ -0,0 +1,9 @@
1
+ use crate::ast::Node;
2
+ use crate::error::Result;
3
+
4
+ pub mod prism_adapter;
5
+ pub use prism_adapter::PrismAdapter;
6
+
7
+ pub trait RubyParser: Send + Sync {
8
+ fn parse(&self, source: &str) -> Result<Node>;
9
+ }