RubyGems - rfmt - Versions diffs - 1.5.3 → 1.6.0 - Mend

rfmt 1.5.3 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +31 -0
data/Cargo.lock +1 -1
data/README.md +22 -18
data/ext/rfmt/Cargo.toml +4 -1
data/ext/rfmt/src/doc/builders.rs +528 -0
data/ext/rfmt/src/doc/mod.rs +220 -0
data/ext/rfmt/src/doc/printer.rs +684 -0
data/ext/rfmt/src/format/context.rs +448 -0
data/ext/rfmt/src/format/formatter.rs +226 -0
data/ext/rfmt/src/format/mod.rs +35 -0
data/ext/rfmt/src/format/registry.rs +195 -0
data/ext/rfmt/src/format/rule.rs +555 -0
data/ext/rfmt/src/format/rules/begin.rs +295 -0
data/ext/rfmt/src/format/rules/body_end.rs +109 -0
data/ext/rfmt/src/format/rules/call.rs +409 -0
data/ext/rfmt/src/format/rules/case.rs +359 -0
data/ext/rfmt/src/format/rules/class.rs +160 -0
data/ext/rfmt/src/format/rules/def.rs +216 -0
data/ext/rfmt/src/format/rules/fallback.rs +116 -0
data/ext/rfmt/src/format/rules/if_unless.rs +407 -0
data/ext/rfmt/src/format/rules/loops.rs +325 -0
data/ext/rfmt/src/format/rules/mod.rs +31 -0
data/ext/rfmt/src/format/rules/module.rs +150 -0
data/ext/rfmt/src/format/rules/singleton_class.rs +202 -0
data/ext/rfmt/src/format/rules/statements.rs +122 -0
data/ext/rfmt/src/format/rules/variable_write.rs +296 -0
data/ext/rfmt/src/lib.rs +8 -5
data/ext/rfmt/src/parser/prism_adapter.rs +157 -2
data/lib/rfmt/version.rb +1 -1
data/lib/ruby_lsp/rfmt/formatter_runner.rb +2 -0
metadata +23 -2
data/ext/rfmt/src/emitter/mod.rs +0 -1844

data/ext/rfmt/src/format/context.rs ADDED Viewed

@@ -0,0 +1,448 @@
+//! FormatContext - State management for formatting
+//!
+//! FormatContext encapsulates all state needed during formatting:
+//! - Source code reference
+//! - Configuration
+//! - Comment tracking and emission
+//! - Group ID generation for Doc IR
+use crate::ast::{Comment, Node};
+use crate::config::Config;
+use std::collections::{BTreeMap, HashSet};
+/// Formatting context that manages state during AST traversal.
+///
+/// This struct is passed to FormatRules and provides access to:
+/// - Source code for extraction
+/// - Configuration settings
+/// - Comment management (collection, emission tracking)
+/// - Group ID generation for Doc IR
+pub struct FormatContext<'a> {
+    /// Reference to the configuration
+    config: &'a Config,
+    /// Reference to the source code
+    source: &'a str,
+    /// Source lines cached for efficient access
+    source_lines: Vec<&'a str>,
+    /// All comments collected from the AST
+    all_comments: Vec<Comment>,
+    /// Indices of comments that have been emitted
+    emitted_comment_indices: HashSet<usize>,
+    /// Index of comment indices by start line for O(log n) lookup
+    /// Key: start_line, Value: Vec of comment indices that start on that line
+    comments_by_line: BTreeMap<usize, Vec<usize>>,
+    /// Counter for generating unique group IDs
+    next_group_id: u32,
+}
+impl<'a> FormatContext<'a> {
+    /// Creates a new FormatContext with the given configuration and source code.
+    pub fn new(config: &'a Config, source: &'a str) -> Self {
+        Self {
+            config,
+            source,
+            source_lines: source.lines().collect(),
+            all_comments: Vec::new(),
+            emitted_comment_indices: HashSet::new(),
+            comments_by_line: BTreeMap::new(),
+            next_group_id: 0,
+        }
+    }
+    /// Returns a reference to the configuration.
+    pub fn config(&self) -> &Config {
+        self.config
+    }
+    /// Returns a reference to the source code.
+    pub fn source(&self) -> &str {
+        self.source
+    }
+    /// Generates a new unique group ID for Doc IR.
+    pub fn next_group_id(&mut self) -> u32 {
+        let id = self.next_group_id;
+        self.next_group_id += 1;
+        id
+    }
+    /// Collects all comments from the AST recursively.
+    pub fn collect_comments(&mut self, root: &Node) {
+        self.all_comments.clear();
+        self.emitted_comment_indices.clear();
+        self.comments_by_line.clear();
+        // Use iterative approach with stack to avoid deep recursion
+        let mut stack = vec![root];
+        while let Some(node) = stack.pop() {
+            // Reserve capacity hint based on typical comment count
+            if self.all_comments.is_empty() && !node.comments.is_empty() {
+                self.all_comments.reserve(node.comments.len() * 4);
+            }
+            self.all_comments.extend(node.comments.iter().cloned());
+            // Process children in reverse order to maintain order when popping
+            stack.extend(node.children.iter().rev());
+        }
+        self.build_comment_index();
+    }
+    /// Builds the comment index by start line for O(log n) range lookups.
+    fn build_comment_index(&mut self) {
+        for (idx, comment) in self.all_comments.iter().enumerate() {
+            self.comments_by_line
+                .entry(comment.location.start_line)
+                .or_default()
+                .push(idx);
+        }
+    }
+    /// Gets comments that appear before a given line (not emitted yet).
+    ///
+    /// Returns comments where the entire comment ends before the given line.
+    pub fn get_comments_before(&self, line: usize) -> Vec<&Comment> {
+        self.comments_by_line
+            .range(..line)
+            .flat_map(|(_, indices)| indices.iter())
+            .filter(|&&idx| {
+                !self.emitted_comment_indices.contains(&idx)
+                    && self.all_comments[idx].location.end_line < line
+            })
+            .map(|&idx| &self.all_comments[idx])
+            .collect()
+    }
+    /// Gets trailing comments on a specific line (not emitted yet).
+    ///
+    /// Trailing comments are comments on the same line as code.
+    pub fn get_trailing_comments(&self, line: usize) -> Vec<&Comment> {
+        self.comments_by_line
+            .get(&line)
+            .map(|indices| {
+                indices
+                    .iter()
+                    .filter(|&&idx| !self.emitted_comment_indices.contains(&idx))
+                    .map(|&idx| &self.all_comments[idx])
+                    .collect()
+            })
+            .unwrap_or_default()
+    }
+    /// Gets comments within a given line range [start_line, end_line).
+    ///
+    /// Only returns comments that haven't been emitted yet.
+    pub fn get_comments_in_range(&self, start_line: usize, end_line: usize) -> Vec<&Comment> {
+        if start_line >= end_line {
+            return Vec::new();
+        }
+        self.comments_by_line
+            .range(start_line..end_line)
+            .flat_map(|(_, indices)| indices.iter())
+            .filter(|&&idx| {
+                !self.emitted_comment_indices.contains(&idx)
+                    && self.all_comments[idx].location.end_line < end_line
+            })
+            .map(|&idx| &self.all_comments[idx])
+            .collect()
+    }
+    /// Checks if there are any unemitted comments in the given line range.
+    pub fn has_comments_in_range(&self, start_line: usize, end_line: usize) -> bool {
+        if start_line >= end_line {
+            return false;
+        }
+        self.comments_by_line
+            .range(start_line..end_line)
+            .flat_map(|(_, indices)| indices.iter())
+            .any(|&idx| {
+                !self.emitted_comment_indices.contains(&idx)
+                    && self.all_comments[idx].location.end_line < end_line
+            })
+    }
+    /// Marks a comment as emitted by finding it in the collection.
+    ///
+    /// Uses the line index for O(log n) lookup instead of linear search.
+    pub fn mark_comment_emitted(&mut self, comment: &Comment) {
+        if let Some(indices) = self.comments_by_line.get(&comment.location.start_line) {
+            for &idx in indices {
+                let c = &self.all_comments[idx];
+                if c.location == comment.location && c.text == comment.text {
+                    self.emitted_comment_indices.insert(idx);
+                    return;
+                }
+            }
+        }
+    }
+    /// Marks a comment at the given index as emitted.
+    #[inline]
+    pub fn mark_comment_emitted_by_index(&mut self, idx: usize) {
+        self.emitted_comment_indices.insert(idx);
+    }
+    /// Marks multiple comments as emitted by their indices.
+    ///
+    /// More efficient than calling mark_comment_emitted_by_index repeatedly.
+    pub fn mark_comments_emitted(&mut self, indices: impl IntoIterator<Item = usize>) {
+        self.emitted_comment_indices.extend(indices);
+    }
+    /// Extracts source text for a node.
+    pub fn extract_source(&self, node: &Node) -> Option<&str> {
+        self.source
+            .get(node.location.start_offset..node.location.end_offset)
+    }
+    /// Extracts source text for a range of offsets.
+    pub fn extract_source_range(&self, start: usize, end: usize) -> Option<&str> {
+        self.source.get(start..end)
+    }
+    /// Checks if a comment is standalone (on its own line).
+    ///
+    /// A standalone comment has only whitespace before it on the same line.
+    pub fn is_standalone_comment(&self, comment: &Comment) -> bool {
+        let comment_line = comment.location.start_line;
+        if comment_line == 0 || comment_line > self.source_lines.len() {
+            return false;
+        }
+        let line = self.source_lines[comment_line - 1]; // Convert to 0-indexed
+        if let Some(hash_pos) = line.find('#') {
+            let before_comment = &line[..hash_pos];
+            let is_only_whitespace = before_comment.bytes().all(|b| b == b' ' || b == b'\t');
+            let line_comment_text = &line[hash_pos..];
+            let is_same_comment = line_comment_text.trim_end() == comment.text.trim_end();
+            return is_only_whitespace && is_same_comment;
+        }
+        false
+    }
+    /// Gets all remaining unemitted comments.
+    ///
+    /// Used for emitting comments at the end of the file.
+    pub fn get_remaining_comments(&self) -> Vec<&Comment> {
+        self.all_comments
+            .iter()
+            .enumerate()
+            .filter(|(idx, _)| !self.emitted_comment_indices.contains(idx))
+            .map(|(_, comment)| comment)
+            .collect()
+    }
+    /// Gets comment indices before a given line (not emitted yet).
+    ///
+    /// Returns indices that can be used with `get_comment` and `mark_comment_emitted_by_index`.
+    /// This avoids allocating comment data when only indices are needed.
+    pub fn get_comment_indices_before(&self, line: usize) -> impl Iterator<Item = usize> + '_ {
+        self.comments_by_line
+            .range(..line)
+            .flat_map(|(_, indices)| indices.iter().copied())
+            .filter(move |&idx| {
+                !self.emitted_comment_indices.contains(&idx)
+                    && self.all_comments[idx].location.end_line < line
+            })
+    }
+    /// Gets trailing comment indices on a specific line (not emitted yet).
+    pub fn get_trailing_comment_indices(&self, line: usize) -> impl Iterator<Item = usize> + '_ {
+        self.comments_by_line
+            .get(&line)
+            .into_iter()
+            .flat_map(|indices| indices.iter().copied())
+            .filter(move |&idx| !self.emitted_comment_indices.contains(&idx))
+    }
+    /// Gets comment indices within a given line range [start_line, end_line).
+    ///
+    /// Returns an empty iterator if start_line >= end_line.
+    /// This is consistent with `get_comments_in_range` and `has_comments_in_range`.
+    pub fn get_comment_indices_in_range(
+        &self,
+        start_line: usize,
+        end_line: usize,
+    ) -> impl Iterator<Item = usize> + '_ {
+        // Return empty iterator for invalid range (consistent with get_comments_in_range)
+        let valid_range = start_line < end_line;
+        self.comments_by_line
+            .range(start_line..end_line.max(start_line))
+            .flat_map(|(_, indices)| indices.iter().copied())
+            .filter(move |&idx| {
+                valid_range
+                    && !self.emitted_comment_indices.contains(&idx)
+                    && self.all_comments[idx].location.end_line < end_line
+            })
+    }
+    /// Gets remaining comment indices (not emitted yet).
+    pub fn get_remaining_comment_indices(&self) -> impl Iterator<Item = usize> + '_ {
+        (0..self.all_comments.len()).filter(|idx| !self.emitted_comment_indices.contains(idx))
+    }
+    /// Gets a comment by index.
+    #[inline]
+    pub fn get_comment(&self, idx: usize) -> Option<&Comment> {
+        self.all_comments.get(idx)
+    }
+    /// Gets the last line of code in the AST (excluding comments).
+    pub fn find_last_code_line(ast: &Node) -> usize {
+        let mut max_line = ast.location.end_line;
+        let mut stack = vec![ast];
+        while let Some(node) = stack.pop() {
+            max_line = max_line.max(node.location.end_line);
+            stack.extend(node.children.iter());
+        }
+        max_line
+    }
+}
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::ast::{CommentPosition, CommentType, FormattingInfo, Location, NodeType};
+    use std::collections::HashMap;
+    fn make_comment(text: &str, start_line: usize) -> Comment {
+        Comment {
+            text: text.to_string(),
+            location: Location::new(start_line, 0, start_line, text.len(), 0, text.len()),
+            comment_type: CommentType::Line,
+            position: CommentPosition::Leading,
+        }
+    }
+    fn make_node_with_comments(comments: Vec<Comment>) -> Node {
+        Node {
+            node_type: NodeType::ProgramNode,
+            location: Location::new(1, 0, 10, 0, 0, 100),
+            children: Vec::new(),
+            metadata: HashMap::new(),
+            comments,
+            formatting: FormattingInfo::default(),
+        }
+    }
+    #[test]
+    fn test_collect_comments() {
+        let config = Config::default();
+        let source = "# comment\nclass Foo\nend";
+        let mut ctx = FormatContext::new(&config, source);
+        let comment = make_comment("# comment", 1);
+        let node = make_node_with_comments(vec![comment]);
+        ctx.collect_comments(&node);
+        let comments = ctx.get_comments_before(10);
+        assert_eq!(comments.len(), 1);
+        assert_eq!(comments[0].text, "# comment");
+    }
+    #[test]
+    fn test_mark_comment_emitted() {
+        let config = Config::default();
+        let source = "# comment\ncode";
+        let mut ctx = FormatContext::new(&config, source);
+        let comment = make_comment("# comment", 1);
+        let node = make_node_with_comments(vec![comment.clone()]);
+        ctx.collect_comments(&node);
+        // Before marking
+        assert_eq!(ctx.get_comments_before(10).len(), 1);
+        // Mark as emitted
+        ctx.mark_comment_emitted(&comment);
+        // After marking
+        assert_eq!(ctx.get_comments_before(10).len(), 0);
+    }
+    #[test]
+    fn test_get_comments_in_range() {
+        let config = Config::default();
+        let source = "# comment\ncode";
+        let mut ctx = FormatContext::new(&config, source);
+        let comment1 = make_comment("# comment 1", 2);
+        let comment2 = make_comment("# comment 2", 5);
+        let comment3 = make_comment("# comment 3", 8);
+        let node = make_node_with_comments(vec![comment1, comment2, comment3]);
+        ctx.collect_comments(&node);
+        let comments = ctx.get_comments_in_range(3, 7);
+        assert_eq!(comments.len(), 1);
+        assert_eq!(comments[0].text, "# comment 2");
+    }
+    #[test]
+    fn test_trailing_comments() {
+        let config = Config::default();
+        let source = "code # trailing";
+        let mut ctx = FormatContext::new(&config, source);
+        let comment = Comment {
+            text: "# trailing".to_string(),
+            location: Location::new(1, 5, 1, 15, 5, 15),
+            comment_type: CommentType::Line,
+            position: CommentPosition::Trailing,
+        };
+        let node = make_node_with_comments(vec![comment]);
+        ctx.collect_comments(&node);
+        let trailing = ctx.get_trailing_comments(1);
+        assert_eq!(trailing.len(), 1);
+        assert_eq!(trailing[0].text, "# trailing");
+    }
+    #[test]
+    fn test_next_group_id() {
+        let config = Config::default();
+        let source = "";
+        let mut ctx = FormatContext::new(&config, source);
+        assert_eq!(ctx.next_group_id(), 0);
+        assert_eq!(ctx.next_group_id(), 1);
+        assert_eq!(ctx.next_group_id(), 2);
+    }
+    #[test]
+    fn test_extract_source() {
+        let config = Config::default();
+        let source = "class Foo\nend";
+        let ctx = FormatContext::new(&config, source);
+        let node = Node {
+            node_type: NodeType::ClassNode,
+            location: Location::new(1, 0, 2, 3, 0, 13),
+            children: Vec::new(),
+            metadata: HashMap::new(),
+            comments: Vec::new(),
+            formatting: FormattingInfo::default(),
+        };
+        let extracted = ctx.extract_source(&node);
+        assert_eq!(extracted, Some("class Foo\nend"));
+    }
+}

data/ext/rfmt/src/format/formatter.rs ADDED Viewed

@@ -0,0 +1,226 @@
+//! Formatter - Main entry point for the rule-based formatting system
+//!
+//! The Formatter coordinates the formatting process:
+//! 1. Initialize FormatContext with source and config
+//! 2. Collect comments from AST
+//! 3. Apply rules to generate Doc IR
+//! 4. Print Doc IR to string using Printer
+use crate::ast::{Node, NodeType};
+use crate::config::Config;
+use crate::doc::{concat, hardline, Doc, Printer};
+use crate::error::Result;
+use super::context::FormatContext;
+use super::registry::RuleRegistry;
+use super::rule::format_remaining_comments;
+/// Main formatter that coordinates the formatting process.
+///
+/// The formatter uses a rule-based architecture where each node type
+/// can have a specific formatting rule. Unhandled node types fall back
+/// to source extraction.
+pub struct Formatter {
+    /// Configuration for formatting
+    config: Config,
+    /// Registry of formatting rules
+    registry: RuleRegistry,
+}
+impl Formatter {
+    /// Creates a new formatter with the given configuration.
+    pub fn new(config: Config) -> Self {
+        Self {
+            config,
+            registry: RuleRegistry::default_registry(),
+        }
+    }
+    /// Creates a new formatter with a custom registry.
+    pub fn with_registry(config: Config, registry: RuleRegistry) -> Self {
+        Self { config, registry }
+    }
+    /// Formats Ruby source code.
+    ///
+    /// # Arguments
+    /// * `source` - The original Ruby source code
+    /// * `ast` - The parsed AST root node
+    ///
+    /// # Returns
+    /// The formatted source code as a string
+    pub fn format(&self, source: &str, ast: &Node) -> Result<String> {
+        // 1. Initialize context
+        let mut ctx = FormatContext::new(&self.config, source);
+        // 2. Collect comments from AST
+        ctx.collect_comments(ast);
+        // 3. Generate Doc IR
+        let doc = self.format_node(ast, &mut ctx)?;
+        // 4. Handle remaining comments
+        let last_code_line = FormatContext::find_last_code_line(ast);
+        let remaining = format_remaining_comments(&mut ctx, last_code_line);
+        let final_doc = if remaining.is_empty() {
+            doc
+        } else {
+            concat(vec![doc, remaining])
+        };
+        // 5. Print to string
+        let mut printer = Printer::new(&self.config);
+        let result = printer.print(&final_doc);
+        Ok(result)
+    }
+    /// Formats a single node.
+    pub fn format_node(&self, node: &Node, ctx: &mut FormatContext) -> Result<Doc> {
+        match &node.node_type {
+            NodeType::ProgramNode => self.format_program(node, ctx),
+            NodeType::StatementsNode => self.format_statements(node, ctx),
+            _ => {
+                // Use the rule registry for specific node types
+                let rule = self.registry.get_rule(&node.node_type);
+                rule.format(node, ctx, &self.registry)
+            }
+        }
+    }
+    /// Returns a reference to the registry for recursive formatting.
+    pub fn registry(&self) -> &RuleRegistry {
+        &self.registry
+    }
+    /// Formats the program node (root).
+    fn format_program(&self, node: &Node, ctx: &mut FormatContext) -> Result<Doc> {
+        self.format_children_with_spacing(&node.children, ctx)
+    }
+    /// Formats a statements node (body of class/module/def).
+    fn format_statements(&self, node: &Node, ctx: &mut FormatContext) -> Result<Doc> {
+        self.format_children_with_spacing(&node.children, ctx)
+    }
+    /// Format a sequence of child nodes with appropriate line breaks.
+    fn format_children_with_spacing(
+        &self,
+        children: &[Node],
+        ctx: &mut FormatContext,
+    ) -> Result<Doc> {
+        if children.is_empty() {
+            return Ok(Doc::Empty);
+        }
+        let mut docs: Vec<Doc> = Vec::with_capacity(children.len() * 2);
+        for (i, child) in children.iter().enumerate() {
+            let child_doc = self.format_node(child, ctx)?;
+            docs.push(child_doc);
+            // Add newlines between statements
+            if let Some(next_child) = children.get(i + 1) {
+                let current_end_line = child.location.end_line;
+                let next_start_line = next_child.location.start_line;
+                let line_diff = next_start_line.saturating_sub(current_end_line);
+                // Add 1 hardline if consecutive, 2 hardlines (1 blank line) if there was a gap
+                docs.push(hardline());
+                if line_diff > 1 {
+                    docs.push(hardline());
+                }
+            }
+        }
+        Ok(concat(docs))
+    }
+}
+impl Default for Formatter {
+    fn default() -> Self {
+        Self::new(Config::default())
+    }
+}
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::ast::{FormattingInfo, Location};
+    use std::collections::HashMap;
+    fn make_program_node(children: Vec<Node>, end_line: usize) -> Node {
+        Node {
+            node_type: NodeType::ProgramNode,
+            location: Location::new(1, 0, end_line, 0, 0, 100),
+            children,
+            metadata: HashMap::new(),
+            comments: Vec::new(),
+            formatting: FormattingInfo::default(),
+        }
+    }
+    fn make_class_node(
+        name: &str,
+        start_line: usize,
+        end_line: usize,
+        start_offset: usize,
+        end_offset: usize,
+    ) -> Node {
+        let mut metadata = HashMap::new();
+        metadata.insert("name".to_string(), name.to_string());
+        Node {
+            node_type: NodeType::ClassNode,
+            location: Location::new(start_line, 0, end_line, 3, start_offset, end_offset),
+            children: Vec::new(),
+            metadata,
+            comments: Vec::new(),
+            formatting: FormattingInfo::default(),
+        }
+    }
+    #[test]
+    fn test_format_simple_class() {
+        let source = "class Foo\nend";
+        let class_node = make_class_node("Foo", 1, 2, 0, 13);
+        let ast = make_program_node(vec![class_node], 2);
+        let formatter = Formatter::default();
+        let result = formatter.format(source, &ast).unwrap();
+        assert_eq!(result, "class Foo\nend\n");
+    }
+    #[test]
+    fn test_format_multiple_classes() {
+        let source = "class Foo\nend\n\nclass Bar\nend";
+        let class1 = make_class_node("Foo", 1, 2, 0, 13);
+        let class2 = make_class_node("Bar", 4, 5, 15, 28);
+        let ast = make_program_node(vec![class1, class2], 5);
+        let formatter = Formatter::default();
+        let result = formatter.format(source, &ast).unwrap();
+        // Should preserve blank line between classes
+        assert!(result.contains("class Foo\nend"));
+        assert!(result.contains("class Bar\nend"));
+        assert!(result.contains("\n\n")); // blank line preserved
+    }
+    #[test]
+    fn test_formatter_with_custom_config() {
+        let mut config = Config::default();
+        config.formatting.indent_width = 4;
+        let source = "class Foo\nend";
+        let class_node = make_class_node("Foo", 1, 2, 0, 13);
+        let ast = make_program_node(vec![class_node], 2);
+        let formatter = Formatter::new(config);
+        let result = formatter.format(source, &ast).unwrap();
+        assert_eq!(result, "class Foo\nend\n");
+    }
+}