RubyGems - tsql_parser - Versions diffs - 0.0.1 - Mend

tsql_parser 0.0.1

Files changed (11) hide show

checksums.yaml +7 -0
data/lib/parsing/formatter.rb +195 -0
data/lib/parsing/iterator.rb +54 -0
data/lib/parsing/keyword.rb +93 -0
data/lib/parsing/model/flat_sql_container.rb +77 -0
data/lib/parsing/model/sql_container.rb +48 -0
data/lib/parsing/parser.rb +145 -0
data/lib/parsing/text_formatter.rb +247 -0
data/lib/parsing/tokenizer.rb +151 -0
data/lib/tsql_parser.rb +38 -0
metadata +53 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA256:
+  metadata.gz: ddacc151a5ff2930d1bf0e7501a53711503811c123fdcc1fa74a5e3926435c94
+  data.tar.gz: 3ac0fa643de5be751c55544bb2b226c8e8e5b062f56d8910db01f853c5400597
+SHA512:
+  metadata.gz: eb1cd578f4346f3ed104d5a177ee1f7a3d67f5030fa748df64a85156a364ad38e0efe0e9fc48770d5f216d5a0857c2d65250caee3e28974392fd864a47873227
+  data.tar.gz: ace5d1a98579b17d3b48447691c61c3a0d346a090ba5462b54c6fa44c5ea7da1093825b71a6fb9b28e51f5e93785915f8e358e895011c236bf7ca3121865a84e

data/lib/parsing/formatter.rb ADDED Viewed

@@ -0,0 +1,195 @@
+#   __               .__
+# _/  |_  ___________|  |           ___________ _______  ______ ___________
+# \   __\/  ___/ ____/  |    ______ \____ \__  \\_  __ \/  ___// __ \_  __ \
+#  |  |  \___ < <_|  |  |__ /_____/ |  |_> > __ \|  | \/\___ \\  ___/|  | \/
+#  |__| /____  >__   |____/         |   __(____  /__|  /____  >\___  >__|
+#            \/   |__|              |__|       \/           \/     \/
+#
+# A very light-weight and opinionated T-SQL parser and formatter.
+#
+# github.com/scstauf
+#
+# path:
+#   parsing/formatter.rb
+# object:
+#   TSqlParser::Parsing::Formatter
+module TSqlParser::Parsing
+  require_relative "iterator"
+  require_relative "parser"
+  require_relative "text_formatter"
+  require_relative "model/sql_container"
+  require_relative "model/flat_sql_container"
+  class Formatter
+    def self.format(tokens, tab_count = 0, tab = "    ")
+      containers = self.as_containers(tokens)
+      lines = self.combine_containers(containers)
+      lines = self.cleanup_whitespace(lines)
+      lines = self.insert_indentation(lines, tab_count, tab)
+      lines = self.insert_newlines(lines)
+      text = lines.join("\n")
+      text = TextFormatter.format_inserts(text, tab)
+      text = TextFormatter.format_updates(text, tab)
+      text = TextFormatter.format_joins(text, tab)
+      text = TextFormatter.format_wheres(text, tab)
+      text = TextFormatter.format_selects(text, tab)
+      text = TextFormatter.format_sets(text, tab)
+      text
+    end
+    private
+    def self.insert_newlines(lines)
+      new_lines = []
+      lines.each do |line|
+        first = line.strip.split(" ").first
+        if first.nil?
+          new_lines << ""
+          next
+        end
+        if %w[IF RETURN INSERT DELETE WHILE].include? first or first.start_with? "/*"
+          new_lines << ""
+        end
+        new_lines << line
+      end
+      new_lines
+    end
+    def self.insert_indentation(lines, tab_count = 0, tab = "    ")
+      indented_lines = []
+      work_lines = []
+      lines.each do |line|
+        work_lines << line.split("\n")
+      end
+      sub_one = false
+      work_lines = work_lines.flatten
+      last = ''
+      work_lines.each_with_index do |line, index|
+        first = line.strip.split(" ").first
+        if %w[CASE BEGIN SELECT].include? first or line.strip.start_with? "CREATE PROCEDURE"
+          indented_lines << "#{tab * tab_count}#{line}"
+          tab_count += 1
+        elsif %w[END GO FROM].include? first and last != "DELETE"
+          tab_count -= 1 if tab_count > 0
+          indented_lines << "#{tab * tab_count}#{line}"
+        elsif %w[IF].include? first
+          indented_lines << "#{tab * tab_count}#{line}"
+          next_line = work_lines[index + 1] unless index + 1 > work_lines.size
+          sub_one = true unless next_line.start_with? "BEGIN"
+          tab_count += 1 if sub_one
+          last = first
+          next
+        else
+          indented_lines << "#{tab * tab_count}#{line}"
+        end
+        if sub_one
+          sub_one = false
+          tab_count -= 1
+        end
+        last = first
+      end
+      indented_lines
+    end
+    def self.cleanup_whitespace(combined)
+      lines = []
+      combined.each do |c|
+        lines << self.safe_ws_cleanup(c)
+      end
+      lines
+    end
+    def self.combine_containers(containers)
+      lines = []
+      containers.each do |c|
+        ct = c.get_token
+        builder = []
+        builder << ct[:value]
+        if c.has_siblings?
+          c.get_siblings.each do |sibling|
+            st = sibling.get_token
+            if st[:comment]
+              builder << "\n#{st[:value]}"
+              next
+            end
+            builder << st[:value]
+          end
+        end
+        lines << builder.join(" ")
+      end
+      lines
+    end
+    def self.as_containers(tokens)
+      containers = []
+      container = nil
+      tokens.each do |t|
+        if Parser.is_new_node_keyword? t[:value]
+          containers << container unless container.nil?
+          container = SqlContainer.new(t)
+        else
+          container.add t unless container.nil?
+        end
+      end
+      containers << container unless container.nil?
+      FlatSqlContainer.flatten_containers(containers)
+    end
+    def self.safe_ws_cleanup(line)
+      parts = []
+      builder = ""
+      in_string = false
+      line.split("").each do |c|
+        # if we run into a single-quote
+        # flip the in_string flag
+        if c == "'"
+          if not in_string
+            in_string = true
+            parts << builder unless builder.empty?
+            builder = ""
+            builder << c
+          else
+            in_string = false
+            builder << c
+            parts << builder
+            builder = ""
+          end
+        else
+          builder << c
+        end
+      end
+      parts << builder unless builder.empty?
+      parts.map do |p|
+        if p.start_with? "'" and p.end_with? "'"
+          p
+        else
+          self.fix_whitespace(p)
+        end
+      end.join
+    end
+    def self.fix_whitespace(line)
+      line.gsub(" , ", ", ")
+      #.gsub(' (', '(')
+          .gsub(" )", ")")
+          .gsub("( ", "(")
+          .gsub("AS(", "AS (")
+          .gsub("IN(", "IN (")
+          .gsub(",(", ", (")
+          .gsub("[ ", "[")
+          .gsub(" ]", "]")
+          .gsub("] .", "].")
+          .gsub(". [", ".[")
+          .gsub(" ;", ";")
+    end
+  end
+end

data/lib/parsing/iterator.rb ADDED Viewed

@@ -0,0 +1,54 @@
+#   __               .__
+# _/  |_  ___________|  |           ___________ _______  ______ ___________
+# \   __\/  ___/ ____/  |    ______ \____ \__  \\_  __ \/  ___// __ \_  __ \
+#  |  |  \___ < <_|  |  |__ /_____/ |  |_> > __ \|  | \/\___ \\  ___/|  | \/
+#  |__| /____  >__   |____/         |   __(____  /__|  /____  >\___  >__|
+#            \/   |__|              |__|       \/           \/     \/
+#
+# A very light-weight and opinionated T-SQL parser and formatter.
+#
+# github.com/scstauf
+#
+# path:
+#   parsing/iterator.rb
+# object:
+#   TSqlParser::Parsing::TokenIterator
+module TSqlParser::Parsing
+  class TokenIterator
+    def initialize(tokens)
+      @tokens = tokens
+      @size = tokens.size
+      @iter = -1
+    end
+    def has_next?
+      @iter < @size - 1
+    end
+    def get!
+      @tokens[@iter]
+    end
+    def peek!
+      @tokens[@iter + 1]
+    end
+    def peek_ahead!(length)
+      @tokens[@iter + length]
+    end
+    def peek_value!
+      self.peek![:value]
+    end
+    def peek_ahead_value!(length)
+      self.peek_ahead!(length)[:value]
+    end
+    def next!
+      @iter += 1
+      self.get!
+    end
+  end
+end

data/lib/parsing/keyword.rb ADDED Viewed

@@ -0,0 +1,93 @@
+#   __               .__
+# _/  |_  ___________|  |           ___________ _______  ______ ___________
+# \   __\/  ___/ ____/  |    ______ \____ \__  \\_  __ \/  ___// __ \_  __ \
+#  |  |  \___ < <_|  |  |__ /_____/ |  |_> > __ \|  | \/\___ \\  ___/|  | \/
+#  |__| /____  >__   |____/         |   __(____  /__|  /____  >\___  >__|
+#            \/   |__|              |__|       \/           \/     \/
+#
+# A very light-weight T-SQL parser and formatter.
+#
+# github.com/scstauf
+#
+# path:
+#   parsing/keyword.rb
+# object:
+#   TSqlParser::Parsing::Keyword
+module TSqlParser::Parsing
+  class Keyword
+    def self.get_keywords
+      [].concat(self.get_reserved_keywords)
+        .concat(self.get_special_variables)
+        .concat(self.get_functions)
+        .concat(self.get_types)
+    end
+    def self.get_new_node_keywords
+      %w[CREATE ALTER DROP RENAME SELECT INSERT UPDATE DELETE WHILE IF ELSE DECLARE SET WITH BEGIN FROM WHERE INNER LEFT JOIN END GO GROUP ORDER CASE PRINT RETURN] \
+        - %w[WHERE SET]
+    end
+    def self.get_begin_keyword
+      "BEGIN"
+    end
+    def self.get_end_keyword
+      "END"
+    end
+    def self.get_join_keywords
+      %w[INNER OUTER LEFT RIGHT FULL CROSS JOIN]
+    end
+    def self.get_join_type_keywords
+      %w[INNER LEFT RIGHT CROSS FULL]
+    end
+    def self.get_newline_keywords
+      %w[INSERT UPDATE DELETE SELECT SET DECLARE CREATE FROM INNER FULL OUTER LEFT RIGHT CROSS JOIN IF BEGIN END RETURN WHERE PRINT GROUP ORDER WHILE]
+    end
+    def self.get_reserved_keywords
+      %w[ADD ALL ALTER AND ANY AS ASC AUTHORIZATION BACKUP BEGIN BETWEEN BREAK BROWSE BULK BY CASCADE CASE CHECK CHECKPOINT CLOSE CLUSTERED COALESCE COLLATE COLUMN COMMIT COMPUTE CONSTRAINT CONTAINS CONTAINSTABLE CONTINUE CONVERT CREATE CROSS CURRENT CURRENT_DATE CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR DATABASE DBCC DEALLOCATE DECLARE DEFAULT DELETE DENY DESC DISK DISTINCT DISTRIBUTED DOUBLE DROP DUMP ELSE END ERRLVL ESCAPE EXCEPT EXEC EXECUTE EXISTS EXIT EXTERNAL FETCH FILE FILLFACTOR FOR FOREIGN FREETEXT FREETEXTTABLE FROM FULL FUNCTION GOTO GRANT GROUP HAVING HOLDLOCK IDENTITY IDENTITY_INSERT IDENTITYCOL IF IN INDEX INNER INSERT INTERSECT INTO IS JOIN KEY KILL LEFT LIKE LINENO LOAD MERGE NATIONAL NOCHECK NONCLUSTERED NOT NULL NULLIF OF OFF OFFSETS ON OPEN OPENDATASOURCE OPENQUERY OPENROWSET OPENXML OPTION OR ORDER OUTER OVER PERCENT PIVOT PLAN PRECISION PRIMARY PRINT PROC PROCEDURE PUBLIC RAISERROR READ READTEXT RECONFIGURE REFERENCES REPLICATION RESTORE RESTRICT RETURN REVERT REVOKE RIGHT ROLLBACK ROWCOUNT ROWGUIDCOL RULE SAVE SCHEMA SECURITYAUDIT SELECT SEMANTICKEYPHRASETABLE SEMANTICSIMILARITYDETAILSTABLE SEMANTICSIMILARITYTABLE SESSION_USER SET SETUSER SHUTDOWN SOME STATISTICS SYSTEM_USER TABLE TABLESAMPLE TEXTSIZE THEN TO TOP TRAN TRANSACTION TRIGGER TRUNCATE TRY_CONVERT TSEQUAL UNION UNIQUE UNPIVOT UPDATE UPDATETEXT USE USER VALUES VARYING VIEW WAITFOR WHEN WHERE WHILE WITH WITHIN WRITETEXT]
+    end
+    def self.get_special_variables
+      %w[@@ERROR @@FETCH_STATUS @@IDENTITY @@LOCK_TIMEOUT @@NESTLEVEL @@ROWCOUNT @@SERVERNAME @@SPID @@SQLSTATUS @@TRANCOUNT @@VERSION]
+    end
+    def self.get_types
+      %w[BIGINT BINARY BIT CHAR CURSOR DATE DATETIME DATETIME2 DATETIMEOFFSET DECIMAL FLOAT HIERARCHYID IMAGE INT MONEY NCHAR NTEXT NUMERIC NVARCHAR REAL ROWVERSION SMALLDATETIME SMALLINT SMALLMONEY SQL_VARIANT TABLE TEXT TIME TINYINT UNIQUEIDENTIFIER VARBINARY VARCHAR XML]
+    end
+    def self.get_functions
+      [].concat(self.get_math_functions)
+        .concat(self.get_conversion_functions)
+        .concat(self.get_string_functions)
+        .concat(self.get_aggregate_functions)
+        .concat(self.get_date_functions)
+    end
+    # Functions
+    def self.get_math_functions
+      %w[ABS ACOS ASIN ATAN ATN2 CEILING COS COT DEGREES EXP FLOOR LOG LOG10 PI POWER RADIANS RAND ROUND SIGN SIN SQRT SQUARE TAN]
+    end
+    def self.get_conversion_functions
+      %w[CAST CONVERT PARSE TRY_CAST TRY_CONVERT TRY_PARSE]
+    end
+    def self.get_string_functions
+      %w[ASCII CHAR CHARINDEX CONCAT CONCAT_WS DIFFERENCE FORMAT LEFT LEN LOWER LTRIM NCHAR PATINDEX QUOTENAME REPLACE REPLICATE REVERSE RIGHT RTRIM SOUNDEX SPACE STR STRING_AGG STRING_ESCAPE STRING_SPLIT STUFF SUBSTRING TRANSLATE TRIM UNICODE UPPER]
+    end
+    def self.get_aggregate_functions
+      %w[APPROX_COUNT_DISTINCT AVG CHECKSUM_AGG COUNT COUNT_BIG GROUPING GROUPING_ID MAX MIN STDEV STDEVP STRING_AGG SUM VAR VARP]
+    end
+    def self.get_date_functions
+      %w[CURRENT_TIMESTAMP CURRENT_TIMEZONE CURRENT_TIMEZONE_ID DATE_BUCKET DATEADD DATEDIFF DATEDIFF_BIG DATEFROMPARTS DATENAME DATEPART DATETIME2FROMPARTS DATETIMEFROMPARTS DATETIMEOFFSETFROMPARTS DATETRUNC DAY EOMONTH FORMAT GETDATE GETUTCDATE ISDATE MONTH SMALLDATETIMEFROMPARTS SWITCHOFFSET SYSDATETIME SYSDATETIMEOFFSET SYSUTCDATETIME TIMEFROMPARTS TODATETIMEOFFSET YEAR]
+    end
+  end
+end

data/lib/parsing/model/flat_sql_container.rb ADDED Viewed

@@ -0,0 +1,77 @@
+#   __               .__
+# _/  |_  ___________|  |           ___________ _______  ______ ___________
+# \   __\/  ___/ ____/  |    ______ \____ \__  \\_  __ \/  ___// __ \_  __ \
+#  |  |  \___ < <_|  |  |__ /_____/ |  |_> > __ \|  | \/\___ \\  ___/|  | \/
+#  |__| /____  >__   |____/         |   __(____  /__|  /____  >\___  >__|
+#            \/   |__|              |__|       \/           \/     \/
+#
+# A very light-weight and opinionated T-SQL parser and formatter.
+#
+# github.com/scstauf
+#
+# path:
+#   parsing/model/flat_sql_container.rb
+# object:
+#   TSqlParser::Parsing::FlatSqlContainer
+module TSqlParser::Parsing
+  class FlatSqlContainer
+    def initialize(token = nil)
+      @token = token
+      @children = []
+      @siblings = []
+      unless token.nil?
+        if token.has_nodes?
+          token.get_nodes.each do |n|
+            @siblings << n
+          end
+        end
+      end
+    end
+    def self.flatten_containers(containers)
+      flat_containers = []
+      containers.each do |c|
+        flat_containers << FlatSqlContainer.new(c)
+      end
+      flat_containers
+    end
+    def set_token(token)
+      @token = token
+    end
+    def add_sibling(token)
+      @siblings << token
+    end
+    def add_child(token)
+      @children << SqlContainer.new(token)
+    end
+    def has_siblings?
+      @siblings.size > 0
+    end
+    def has_children?
+      @children.size > 0
+    end
+    def get_siblings
+      @siblings
+    end
+    def get_children
+      @children
+    end
+    def get_token
+      @token.get_token
+    end
+    def to_s
+      @token.get_token[:value] unless @token.nil?
+    end
+  end
+end

data/lib/parsing/model/sql_container.rb ADDED Viewed

@@ -0,0 +1,48 @@
+#   __               .__
+# _/  |_  ___________|  |           ___________ _______  ______ ___________
+# \   __\/  ___/ ____/  |    ______ \____ \__  \\_  __ \/  ___// __ \_  __ \
+#  |  |  \___ < <_|  |  |__ /_____/ |  |_> > __ \|  | \/\___ \\  ___/|  | \/
+#  |__| /____  >__   |____/         |   __(____  /__|  /____  >\___  >__|
+#            \/   |__|              |__|       \/           \/     \/
+#
+# A very light-weight and opinionated T-SQL parser and formatter.
+#
+# github.com/scstauf
+#
+# path:
+#   parsing/model/sql_container.rb
+# object:
+#   TSqlParser::Parsing::SqlContainer
+module TSqlParser::Parsing
+  class SqlContainer
+    def initialize(token = nil)
+      @token = token
+      @nodes = []
+    end
+    def set_token(token)
+      @token = token
+    end
+    def add(token)
+      @nodes << SqlContainer.new(token)
+    end
+    def has_nodes?
+      @nodes.size > 0
+    end
+    def get_nodes
+      @nodes
+    end
+    def get_token
+      @token
+    end
+    def to_s
+      @token[:value] unless @token.nil?
+    end
+  end
+end

data/lib/parsing/parser.rb ADDED Viewed

@@ -0,0 +1,145 @@
+#   __               .__
+# _/  |_  ___________|  |           ___________ _______  ______ ___________
+# \   __\/  ___/ ____/  |    ______ \____ \__  \\_  __ \/  ___// __ \_  __ \
+#  |  |  \___ < <_|  |  |__ /_____/ |  |_> > __ \|  | \/\___ \\  ___/|  | \/
+#  |__| /____  >__   |____/         |   __(____  /__|  /____  >\___  >__|
+#            \/   |__|              |__|       \/           \/     \/
+#
+# A very light-weight and opinionated T-SQL parser and formatter.
+#
+# github.com/scstauf
+#
+# path:
+#   parsing/parser.rb
+# object:
+#   TSqlParser::Parsing::Parser
+module TSqlParser::Parsing
+  require_relative "keyword"
+  class Parser
+    def self.is_multiline_comment_start?(c, next_c)
+      c == "/" and next_c == "*"
+    end
+    def self.is_multiline_comment_end?(c, next_c)
+      c == "*" and next_c == "/"
+    end
+    def self.is_comment_start?(c, next_c)
+      c == "-" and next_c == "-"
+    end
+    def self.is_operator?(s)
+      ["<>", "!=", "<=", ">=", "!<", "!>", "+=", "-=", "*=", "/=", "%=", "==", "=", "+", "-", "%", "/", "*", "<", ">"].include? s
+    end
+    def self.is_one_char_op?(c)
+      ["=", "+", "-", "%", "/", "*", "<", ">"].include? c
+    end
+    def self.is_two_char_op?(c, next_c)
+      ["<>", "!=", "<=", ">=", "==", "!<", "!>", "+=", "-=", "*=", "/=", "%="].include? "#{c}#{next_c}"
+    end
+    def self.is_numeric?(s)
+      s.match? /\A-?+(?=.??\d)\d*\.?\d*\z/
+    end
+    def self.is_variable?(s)
+      s.start_with? "@"
+    end
+    def self.is_special_variable?(s)
+      s.start_with? "@@"
+    end
+    def self.is_temp_table?(s)
+      s.start_with? "#"
+    end
+    def self.is_label?(s)
+      s.end_with? ":"
+    end
+    def self.is_parenthesis?(s)
+      ["(", ")"].include? s
+    end
+    def self.is_open_parenthesis?(s)
+      s == "("
+    end
+    def self.is_close_parenthesis?(s)
+      s == ")"
+    end
+    def self.is_bracket?(s)
+      ["[", "]"].include? s
+    end
+    def self.is_open_bracket?(s)
+      s == "["
+    end
+    def self.is_close_bracket?(s)
+      s == "]"
+    end
+    def self.is_string_mark?(s)
+      s == "'"
+    end
+    def self.is_comma?(s)
+      s == ","
+    end
+    def self.is_comment?(s)
+      (s.start_with? "/*" and s.end_with? "*/") or s.start_with? "--"
+    end
+    def self.is_keyword?(s)
+      Keyword.get_keywords.include? s.upcase
+    end
+    def self.is_begin?(s)
+      Keyword.get_begin_keyword == s.upcase
+    end
+    def self.is_end?(s)
+      Keyword.get_end_keyword == s.upcase
+    end
+    def self.is_join?(s)
+      Keyword.get_join_keywords.include? s.upcase
+    end
+    def self.is_join_type?(s)
+      Keyword.get_join_type_keywords.include? s.upcase
+    end
+    def self.is_function?(s)
+      Keyword.get_functions.include? s.upcase
+    end
+    def self.is_type?(s)
+      Keyword.get_types.include? s.upcase
+    end
+    def self.is_special_variable?(s)
+      Keyword.get_special_variables.include? s.upcase
+    end
+    def self.is_newline_required?(s)
+      Keyword.get_newline_keywords.include? s.upcase
+    end
+    def self.is_new_node_keyword?(s)
+      Keyword.get_new_node_keywords.include? s.upcase
+    end
+    def self.is_terminator?(s)
+      s == ";"
+    end
+  end
+end

data/lib/parsing/text_formatter.rb ADDED Viewed

@@ -0,0 +1,247 @@
+#   __               .__
+# _/  |_  ___________|  |           ___________ _______  ______ ___________
+# \   __\/  ___/ ____/  |    ______ \____ \__  \\_  __ \/  ___// __ \_  __ \
+#  |  |  \___ < <_|  |  |__ /_____/ |  |_> > __ \|  | \/\___ \\  ___/|  | \/
+#  |__| /____  >__   |____/         |   __(____  /__|  /____  >\___  >__|
+#            \/   |__|              |__|       \/           \/     \/
+#
+# A very light-weight and opinionated T-SQL parser and formatter.
+#
+# github.com/scstauf
+#
+# path:
+#   parsing/formatter.rb
+# object:
+#   TSqlParser::Parsing::Formatter
+module TSqlParser::Parsing
+  class TextFormatter
+    def self.format_sets(text, tab = "    ")
+      formatted = []
+      lines = text.split("\n")
+      lines.each do |line|
+        first = line.strip.split(" ").first
+        if first == "SET" and not line.strip.start_with? "SET @"
+          tab_count = self.get_tab_count(line, tab)
+          set = line.strip[first.size + 1..]
+          new_set = self.format_set(set, tab_count, tab)
+          if new_set.nil?
+            formatted << line
+            next
+          end
+          formatted << line.sub(set, new_set)
+        elsif first != "SET" and line.include? " SET "
+          parts = line.strip.split(" SET ")
+          tab_count = self.get_tab_count(line, tab)
+          formatted << "#{tab * tab_count}#{parts[0]}\n"
+          parts[1..].each {|p| formatted << "#{tab * tab_count}SET #{p}" }
+        else
+          formatted << line
+        end
+      end
+      formatted.join("\n")
+    end
+    def self.format_joins(text, tab = "    ")
+      text = text.gsub(/INNER\s+JOIN/, "INNER JOIN")
+                 .gsub(/LEFT\s+JOIN/, "LEFT JOIN")
+      lines = text.split("\n")
+      new_text = []
+      lines.each do |line|
+        first = line.strip.split(" ").first
+        if line.include? " WHERE " and first != "WHERE" and not first.start_with? "--" and not first.start_with? "/*"
+          tab_count = self.get_tab_count(line, tab)
+          where_parts = line.strip.split(" WHERE ")
+          where_text = []
+          where_text << "#{tab * tab_count}#{where_parts[0]}"
+          where_text << "#{tab * tab_count}WHERE #{where_parts[1]}"
+          new_text << where_text.join("\n")
+        else
+          new_text << line
+        end
+      end
+      new_text.join("\n")
+    end
+    def self.format_updates(text, tab = "    ")
+      formatted = []
+      lines = text.split("\n")
+      lines.each do |line|
+        first = line.strip.split(" ").first
+        if first != "UPDATE"
+          formatted << line
+          next
+        end
+        tab_count = self.get_tab_count(line, tab)
+        update = line.strip[first.size + 1..]
+        new_update = self.format_update(update, tab_count, tab)
+        if new_update.nil?
+          formatted << line
+          next
+        end
+        formatted << line.sub(update, new_update)
+      end
+      formatted.join("\n")
+    end
+    def self.format_inserts(text, tab = "    ")
+      formatted = []
+      lines = text.split("\n")
+      search = "INSERT INTO"
+      lines.each do |line|
+        first = line.strip.split(" ").first
+        if first != "INSERT"
+          formatted << line
+          next
+        end
+        tab_count = self.get_tab_count(line, tab)
+        insert = line.strip[search.size + 1..]
+        new_insert = self.format_insert(insert, tab_count, tab)
+        if new_insert.nil?
+          formatted << line
+          next
+        end
+        formatted << line.sub(insert, new_insert)
+      end
+      formatted.join("\n")
+    end
+    def self.format_selects(text, tab = "    ")
+      formatted = []
+      lines = text.split("\n")
+      lines.each do |line|
+        first = line.strip.split(" ").first
+        if first != "SELECT"
+          formatted << line
+          next
+        end
+        tab_count = self.get_tab_count(line, tab)
+        select_sql = line.strip[first.size + 1..]
+        new_select = self.format_select(select_sql, tab_count, tab)
+        if new_select.nil?
+          formatted << line
+          next
+        end
+        formatted << line.sub(select_sql, new_select)
+      end
+      formatted.join("\n")
+    end
+    def self.format_wheres(text, tab = "   ")
+      formatted = []
+      text.split("\n").each do |line|
+        first = line.strip.split(" ").first
+        if first != "WHERE"
+          formatted << line
+          next
+        end
+        tab_count = self.get_tab_count(line, tab)
+        predicate = line.strip[first.size + 1..]
+        new_predicate = self.format_predicate(predicate, tab_count, tab)
+        if new_predicate.nil?
+          formatted << line
+          next
+        end
+        formatted << line.sub(predicate, new_predicate)
+      end
+      formatted.join("\n")
+    end
+    private
+    def self.format_set(s, tab_count = 0, tab = "    ")
+      parts = []
+      builder = ''
+      parenthesis = 0
+      s.split('').each do |c|
+        parenthesis += 1 if c == '('
+        parenthesis -= 1 if c == ')'
+        if c == ','
+          if parenthesis > 0
+            builder << c
+          else
+            parts << builder
+            builder = ''
+          end
+        else
+          builder << c
+        end
+      end
+      parts << builder unless builder.empty?
+      "\n#{parts.map {|p| "#{tab * (tab_count + 1)}#{p.strip}"}.join(",\n")}"
+    end
+    def self.format_update(s, tab_count = 0, tab = "    ")
+      formatted = []
+      parts = s.split(" SET ")
+      table = parts[0]
+      where_parts = parts[1].split(" WHERE ")
+      formatted << "\n#{tab * (tab_count + 1)}#{table}"
+      formatted << "#{tab * tab_count}SET #{where_parts[0]}"
+      formatted << "#{tab * tab_count}WHERE #{where_parts[1]}" if where_parts.size > 0
+      formatted.join("\n")
+    end
+    def self.format_insert(s, tab_count = 0, tab = "    ")
+      formatted = []
+      if s.include? ") VALUES ("
+        tokens = s.split(") VALUES (")
+        table = tokens[0][..tokens[0].index("(") - 2]
+        columns = tokens[0][tokens[0].index("(") + 1..]
+        values = tokens[1][..-2]
+        formatted << "\n#{tab * (tab_count + 1)}#{table}"
+        formatted << "#{tab * (tab_count + 2)}(#{columns})"
+        formatted << "#{tab * (tab_count + 1)}VALUES"
+        formatted << "#{tab * (tab_count + 2)}(#{values})"
+      end
+      formatted.join("\n") unless formatted.empty?
+    end
+    def self.format_select(s, tab_count = 0, tab = "    ")
+      tokens = s.split(", ")
+      "\n#{tokens.map { |t| "#{tab * (tab_count + 1)}#{t}" }.join(",\n")}"
+    end
+    def self.format_predicate(s, tab_count = 0, tab = "    ")
+      indented = []
+      formatted = []
+      builder = []
+      tokens = s.split(" ")
+      tokens.each do |t|
+        if %w[AND OR].include? t
+          formatted << builder.join(" ") unless builder.empty?
+          builder = [t]
+        else
+          builder << t
+        end
+      end
+      formatted << builder.join(" ")
+      level = tab_count
+      formatted.each_with_index do |f, i|
+        indented << "#{tab * (level + 1)}#{f}"
+        level -= f.count(")")
+        level += f.count("(")
+      end
+      "\n#{indented.join("\n")}"
+    end
+    def self.get_tab_count(line, tab = "    ")
+      tab_count = 0
+      while line.start_with? tab
+        tab_count += 1
+        line = line.sub(tab, "")
+      end
+      tab_count
+    end
+  end
+end

data/lib/parsing/tokenizer.rb ADDED Viewed

@@ -0,0 +1,151 @@
+#   __               .__
+# _/  |_  ___________|  |           ___________ _______  ______ ___________
+# \   __\/  ___/ ____/  |    ______ \____ \__  \\_  __ \/  ___// __ \_  __ \
+#  |  |  \___ < <_|  |  |__ /_____/ |  |_> > __ \|  | \/\___ \\  ___/|  | \/
+#  |__| /____  >__   |____/         |   __(____  /__|  /____  >\___  >__|
+#            \/   |__|              |__|       \/           \/     \/
+#
+# A very light-weight and opinionated T-SQL parser and formatter.
+#
+# github.com/scstauf
+#
+# path:
+#   parsing/tokenizer.rb
+# object:
+#   TSqlParser::Parsing::Tokenizer
+module TSqlParser::Parsing
+  require_relative "parser"
+  class Tokenizer
+    def self.tokenize(tsql_string)
+      tokens = basic_tokenize(
+        tsql_string,
+        ["(", ",", ")", "=", "+", "-", "%", "/", "*", "<", "!", ">", "'", "[", "]", ";"],
+        [" ", "\n", "\t"]
+      )
+      tokens.map do |t|
+        categorize(t)
+      end
+    end
+    def self.categorize(s)
+      data = {}
+      data[:value] = s
+      data[:keyword] = true if Parser.is_keyword? s
+      data[:operator] = true if Parser.is_operator? s
+      data[:function] = true if Parser.is_function? s
+      data[:type] = true if Parser.is_type? s
+      data[:comment] = true if Parser.is_comment? s
+      data[:numeric] = true if Parser.is_numeric? s
+      data[:special_variable] = true if Parser.is_special_variable? s
+      data[:variable] = true if Parser.is_variable? s
+      data[:temporary_table] = true if Parser.is_temp_table? s
+      data[:label] = true if Parser.is_label? s
+      data[:parenthesis] = true if Parser.is_parenthesis? s
+      data[:open_parenthesis] = true if Parser.is_open_parenthesis? s
+      data[:close_parenthesis] = true if Parser.is_close_parenthesis? s
+      data[:bracket] = true if Parser.is_bracket? s
+      data[:open_bracket] = true if Parser.is_open_bracket? s
+      data[:close_bracket] = true if Parser.is_close_bracket? s
+      data[:string_mark] = true if Parser.is_string_mark? s
+      data[:comma] = true if Parser.is_comma? s
+      data[:join] = true if Parser.is_join? s
+      data[:join_type] = true if Parser.is_join_type? s
+      data[:begin] = true if Parser.is_begin? s
+      data[:end] = true if Parser.is_end? s
+      data[:terminator] = true if Parser.is_terminator? s
+      data[:value] = data[:value].upcase if data[:keyword] or data[:function] or data[:type]
+      data[:needs_newline] = true if data[:keyword] and Parser.is_newline_required? s
+      data
+    end
+    def self.basic_tokenize(tsql_string, char_delimiters, skip_delimiters)
+      specific_tokens = []
+      delimiters = ([] << char_delimiters << skip_delimiters).flatten
+      builder = ""
+      tsql_chars = tsql_string.split("")
+      multiline_comment = false
+      comment = false
+      string = false
+      string_count = 0
+      skip_count = 0
+      tsql_chars.each_with_index do |c, i|
+        if skip_count > 0
+          skip_count -= 1
+          next
+        end
+        next_c = tsql_chars[i + 1] unless i + 1 > tsql_chars.size
+        if Parser.is_multiline_comment_start?(c, next_c)
+          multiline_comment = true
+          specific_tokens << builder unless builder.empty?
+          builder = c
+          next
+        end
+        if Parser.is_multiline_comment_end?(c, next_c)
+          skip_count = 1
+          multiline_comment = false
+          builder << c << next_c
+          specific_tokens << builder unless builder.empty?
+          builder = ""
+          next
+        end
+        if Parser.is_comment_start?(c, next_c)
+          comment = true
+          skip_count = 1
+          specific_tokens << builder unless builder.empty?
+          builder = "--"
+          next
+        end
+        if c == "'" and not multiline_comment and not comment
+          if not string
+            string = true
+            specific_tokens << builder unless builder.empty?
+            builder = c
+            next
+          else
+            string = false
+            builder << c
+            specific_tokens << builder unless builder.empty?
+            builder = ""
+            next
+          end
+        end
+        if Parser.is_two_char_op?(c, next_c)
+          skip_count = 1
+          specific_tokens << builder unless builder.empty?
+          specific_tokens << "#{c}#{next_c}"
+          builder = ""
+          next
+        end
+        if comment and c != "\n"
+          builder << c
+          next
+        elsif comment and c == "\n"
+          specific_tokens << builder unless builder.empty?
+          builder = ""
+          comment = false
+          next
+        end
+        if delimiters.include? c and !multiline_comment and !string
+          specific_tokens << builder unless builder.empty?
+          specific_tokens << c unless skip_delimiters.include? c
+          builder = ""
+          next
+        end
+        builder << c
+      end
+      specific_tokens << builder unless builder.empty?
+      specific_tokens
+    end
+  end
+end

data/lib/tsql_parser.rb ADDED Viewed

@@ -0,0 +1,38 @@
+#   __               .__
+# _/  |_  ___________|  |           ___________ _______  ______ ___________
+# \   __\/  ___/ ____/  |    ______ \____ \__  \\_  __ \/  ___// __ \_  __ \
+#  |  |  \___ < <_|  |  |__ /_____/ |  |_> > __ \|  | \/\___ \\  ___/|  | \/
+#  |__| /____  >__   |____/         |   __(____  /__|  /____  >\___  >__|
+#            \/   |__|              |__|       \/           \/     \/
+#
+# A very light-weight and opinionated T-SQL parser and formatter.
+#
+# github.com/scstauf
+#
+# path:
+#   tsql_parser.rb
+# object:
+#   TSqlParser
+module TSqlParser
+  # Formats a SQL string.
+  #
+  # @param sql [String] the SQL string to format.
+  # @param tab_count [Integer] the number of tabs to start with.
+  # @param tab [String] the tab string.
+  # @return [String] the formatted SQL string.
+  def self.format(sql, tab_count = 0, tab = "    ")
+    require_relative "parsing/formatter"
+    tokens = self.parse(sql)
+    Parsing::Formatter.format(tokens, tab_count, tab)
+  end
+  # Parses SQL string into token hashes.
+  #
+  # @param sql [String] the SQL string to parse.
+  # @return [Array] the token hashes.
+  def self.parse(sql)
+    require_relative "parsing/tokenizer"
+    Parsing::Tokenizer.tokenize(sql)
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,53 @@
+--- !ruby/object:Gem::Specification
+name: tsql_parser
+version: !ruby/object:Gem::Version
+  version: 0.0.1
+platform: ruby
+authors:
+- Scott Stauffer
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2023-03-27 00:00:00.000000000 Z
+dependencies: []
+description: A very light-weight and opinionated T-SQL parser and formatter.
+email: scott@fuseraft.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- lib/parsing/formatter.rb
+- lib/parsing/iterator.rb
+- lib/parsing/keyword.rb
+- lib/parsing/model/flat_sql_container.rb
+- lib/parsing/model/sql_container.rb
+- lib/parsing/parser.rb
+- lib/parsing/text_formatter.rb
+- lib/parsing/tokenizer.rb
+- lib/tsql_parser.rb
+homepage: https://rubygems.org/gems/tsql_parser
+licenses:
+- MIT
+metadata:
+  source_code_uri: https://github.com/scstauf/tsql_parser
+  documentation_uri: https://www.rubydoc.info/github/scstauf/tsql_parser
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: 2.7.0
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubygems_version: 3.3.5
+signing_key:
+specification_version: 4
+summary: A very light-weight and opinionated T-SQL parser and formatter.
+test_files: []