RubyGems - query_packwerk - Versions diffs - 0.1.0 - Mend

query_packwerk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

checksums.yaml +7 -0
data/.rspec +3 -0
data/.rubocop.yml +90 -0
data/CHANGELOG.md +1 -0
data/CODE_OF_CONDUCT.md +131 -0
data/LICENSE.txt +21 -0
data/README.md +108 -0
data/exe/query_packwerk +7 -0
data/lib/query_packwerk/cli.rb +19 -0
data/lib/query_packwerk/console.rb +65 -0
data/lib/query_packwerk/console_helpers.rb +144 -0
data/lib/query_packwerk/file_cache.rb +160 -0
data/lib/query_packwerk/package.rb +129 -0
data/lib/query_packwerk/packages.rb +78 -0
data/lib/query_packwerk/query_interface.rb +268 -0
data/lib/query_packwerk/rule_rewriter/anonymize_arguments_rule.rb +31 -0
data/lib/query_packwerk/rule_rewriter/anonymize_keyword_arguments_rule.rb +30 -0
data/lib/query_packwerk/rule_rewriter/base_rule.rb +30 -0
data/lib/query_packwerk/rule_rewriter/rule_set_rewriter.rb +56 -0
data/lib/query_packwerk/rule_rewriter.rb +22 -0
data/lib/query_packwerk/version.rb +6 -0
data/lib/query_packwerk/violation.rb +295 -0
data/lib/query_packwerk/violations.rb +270 -0
data/lib/query_packwerk.rb +92 -0
data/sig/query_packwerk.rbs +4 -0
metadata +153 -0

data/lib/query_packwerk/rule_rewriter/base_rule.rb ADDED Viewed

@@ -0,0 +1,30 @@
+# typed: strict
+# frozen_string_literal: true
+module QueryPackwerk
+  class RuleRewriter
+    # Abstract base class for source code transformation rules.
+    # Extends the Parser::AST::Processor to provide common functionality
+    # for traversing and modifying Ruby abstract syntax trees during
+    # source rewriting operations.
+    class BaseRule < Parser::AST::Processor
+      extend T::Sig
+      include RuboCop::AST::Traversal
+      ANONYMIZED = '_'
+      sig { params(rewriter: Parser::Source::TreeRewriter).void }
+      def initialize(rewriter)
+        @rewriter = rewriter
+        super()
+      end
+      sig { params(begin_pos: Integer, end_pos: Integer).returns(Parser::Source::Range) }
+      def create_range(begin_pos, end_pos)
+        Parser::Source::Range.new(@rewriter.source_buffer, begin_pos, end_pos)
+      end
+    end
+  end
+end

data/lib/query_packwerk/rule_rewriter/rule_set_rewriter.rb ADDED Viewed

@@ -0,0 +1,56 @@
+# typed: true
+# frozen_string_literal: true
+module QueryPackwerk
+  class RuleRewriter
+    # Coordinates the application of multiple rewriting rules to source code.
+    # Processes Ruby code using RuboCop's source processing capabilities and
+    # applies each configured rule in sequence to transform source code for
+    # analysis purposes.
+    class RuleSetRewriter
+      extend T::Sig
+      sig { returns(RuboCop::ProcessedSource) }
+      attr_reader :source
+      sig { returns(RuboCop::AST::Node) }
+      attr_reader :ast
+      sig { returns(Parser::Source::TreeRewriter) }
+      attr_reader :rewriter
+      RULES = [
+        RuleRewriter::AnonymizeKeywordArgumentsRule,
+        RuleRewriter::AnonymizeArgumentsRule
+      ].freeze
+      def initialize(string, rules: RULES)
+        @source = processed_source(string)
+        @ast = @source.ast
+        @source_buffer = @source.buffer
+        @rewriter = Parser::Source::TreeRewriter.new(@source_buffer)
+        @rules = rules
+      end
+      def process
+        @rules.each do |rule_class|
+          rule = rule_class.new(@rewriter)
+          @ast.each_node { |node| rule.process(node) }
+        end
+        @rewriter
+          .process
+          .delete("\n").squeeze(' ') # ...and multiple spaces, probably indents from above
+          .gsub('( ', '(') # Remove paren spacing after previous
+          .gsub(' )', ')') # Remove paren spacing after previous
+          .gsub('. ', '.') # Remove suffix-dot spacing
+      end
+      private
+      def processed_source(string)
+        RuboCop::ProcessedSource.new(string, RUBY_VERSION.to_f)
+      end
+    end
+  end
+end

data/lib/query_packwerk/rule_rewriter.rb ADDED Viewed

@@ -0,0 +1,22 @@
+# typed: strict
+# frozen_string_literal: true
+module QueryPackwerk
+  # Orchestrates source code rewriting using defined transformation rules.
+  # Provides an entry point for applying rule-based code transformations,
+  # particularly for anonymizing method arguments and source patterns
+  # to facilitate pattern-based violation analysis.
+  class RuleRewriter
+    autoload :BaseRule, 'query_packwerk/rule_rewriter/base_rule'
+    autoload :RuleSetRewriter, 'query_packwerk/rule_rewriter/rule_set_rewriter'
+    autoload :AnonymizeArgumentsRule, 'query_packwerk/rule_rewriter/anonymize_arguments_rule'
+    autoload :AnonymizeKeywordArgumentsRule, 'query_packwerk/rule_rewriter/anonymize_keyword_arguments_rule'
+    extend T::Sig
+    sig { params(source_string: String).returns(String) }
+    def self.rewrite(source_string)
+      RuleSetRewriter.new(source_string).process
+    end
+  end
+end

data/lib/query_packwerk/version.rb ADDED Viewed

@@ -0,0 +1,6 @@
+# typed: false
+# frozen_string_literal: true
+module QueryPackwerk
+  VERSION = '0.1.0'
+end

data/lib/query_packwerk/violation.rb ADDED Viewed

@@ -0,0 +1,295 @@
+# typed: strict
+# frozen_string_literal: true
+module QueryPackwerk
+  # Represents a single Packwerk violation with extended inspection capabilities.
+  # Provides methods to analyze violation details including source location, contextual
+  # information, and code patterns. Facilitates both detailed and anonymized views of
+  # dependency violations between packages.
+  class Violation
+    extend T::Sig
+    # This does not play nicely with ERB files which may have violations
+    RUBY_FILE = T.let(/\.(rb|rake)\z/, Regexp)
+    ALL_CAPS = T.let(/\A[A-Z_]+\z/, Regexp)
+    sig { returns(QueryPackwerk::Package) }
+    attr_reader :producing_pack
+    sig { returns(QueryPackwerk::Package) }
+    attr_reader :consuming_pack
+    sig do
+      params(
+        original_violation: ParsePackwerk::Violation,
+        consuming_pack: ParsePackwerk::Package,
+        file_cache: QueryPackwerk::FileCache
+      ).void
+    end
+    def initialize(original_violation:, consuming_pack:, file_cache: QueryPackwerk::FileCache.new)
+      @original_violation = original_violation
+      @producing_pack = T.let(
+        QueryPackwerk::Package.new(original_package: T.must(ParsePackwerk.find(original_violation.to_package_name))),
+        QueryPackwerk::Package
+      )
+      @consuming_pack = T.let(
+        QueryPackwerk::Package.new(original_package: consuming_pack),
+        QueryPackwerk::Package
+      )
+      @file_cache = T.let(file_cache, QueryPackwerk::FileCache)
+      @cache_loaded = T.let(false, T::Boolean)
+    end
+    sig { params(headers: T::Boolean).void }
+    def load_cache!(headers: false)
+      return true if @cache_loaded
+      @file_cache.load!(*T.unsafe(files), headers: headers)
+      @cache_loaded = true
+    end
+    sig { params(cache: QueryPackwerk::FileCache).void }
+    def set_cache!(cache)
+      @cache_loaded = false
+      @file_cache = cache
+    end
+    sig { returns(Integer) }
+    def file_count
+      files.size
+    end
+    # Forwarding original properties explicitly.
+    sig { returns(String) }
+    def type
+      @original_violation.type
+    end
+    sig { returns(String) }
+    def to_package_name
+      @original_violation.to_package_name
+    end
+    sig { returns(String) }
+    def class_name
+      @original_violation.class_name
+    end
+    sig { returns(T::Array[String]) }
+    def files
+      @original_violation.files
+    end
+    # Addon methods
+    # Whether or not the files containing violations match any provided globs
+    #
+    # See also: https://ruby-doc.org/core-2.7.6/File.html#method-c-fnmatch
+    sig { params(globs: T.any(String, Regexp)).returns(T::Boolean) }
+    def includes_files?(*globs)
+      globs.any? do |glob|
+        files.any? do |file_name|
+          glob.is_a?(Regexp) ? glob.match?(file_name) : File.fnmatch?(glob, file_name)
+        end
+      end
+    end
+    # All sources and their receiver chains across all files this violation covers
+    sig { returns(T::Array[RuboCop::AST::Node]) }
+    def sources
+      load_cache!
+      files.flat_map do |file_name|
+        @file_cache.get_full_sources(file_name: file_name, class_name: class_name)
+      end
+    end
+    # Adds additional file and line number information to each source
+    sig { returns(T::Array[T.any(String, T::Array[RuboCop::AST::Node])]) }
+    def sources_with_locations
+      load_cache!
+      files.flat_map do |file_name|
+        @file_cache
+          .get_full_sources(file_name: file_name, class_name: class_name)
+          .map { |s| ["#{file_name}:#{s.loc.line}", s.source] }
+      end
+    end
+    # Frequency of which each source occurs
+    sig { returns(T::Hash[String, Integer]) }
+    def source_counts
+      load_cache!
+      sources = files.flat_map do |file_name|
+        @file_cache
+          .get_full_sources(file_name: file_name, class_name: class_name)
+          .map(&:source)
+      end
+      sources.tally
+    end
+    # Sources that have had their arguments anonymized
+    sig { returns(T::Array[String]) }
+    def anonymous_sources
+      load_cache!
+      files.flat_map do |file_name|
+        @file_cache
+          .get_full_anonymous_sources(file_name: file_name, class_name: class_name)
+      end
+    end
+    # sig { returns(T::Array[T.any(String, T::Array[String])]) }
+    sig { returns(T.untyped) }
+    def anonymous_sources_with_locations
+      load_cache!
+      file_sources = files.flat_map do |file_name|
+        @file_cache.get_full_sources(file_name: file_name, class_name: class_name).map do |s|
+          ["#{file_name}:#{s.loc.line}", @file_cache.anonymize_arguments(s.source)]
+        end
+      end
+      anonymous_source_groups = Hash.new { |h, source| h[source] = [] }
+      file_sources.each_with_object(anonymous_source_groups) do |(location, source), groups|
+        groups[source] << location
+      end
+    end
+    sig { params(start_offset: Integer, end_offset: Integer).returns(T.untyped) }
+    def sources_with_contexts(start_offset: 3, end_offset: 3)
+      load_cache!
+      file_sources = files.flat_map do |file_name|
+        @file_cache.get_full_sources(file_name: file_name, class_name: class_name).map do |s|
+          line_number = s.loc.line
+          start_pos = line_number - start_offset
+          end_pos = line_number + end_offset
+          location = "#{file_name}:#{s.loc.line} (L#{start_pos}..#{end_pos})"
+          context = @file_cache.get_file(file_name).lines.slice(start_pos..end_pos)
+          full_context = unindent((context || ['']).join)
+          [@file_cache.anonymize_arguments(s.source), "> #{location}\n\n#{full_context}"]
+        end
+      end
+      anonymous_source_groups = Hash.new { |h, source| h[source] = [] }
+      file_sources.each_with_object(anonymous_source_groups) do |(anonymous_source, full_source), groups|
+        groups[anonymous_source] << full_source
+      end
+    end
+    # Like above frequency of sources, except by method "shape" rather than
+    # exact arguments
+    sig { returns(T::Hash[String, Integer]) }
+    def anonymous_source_counts
+      anonymous_sources.tally
+    end
+    # True count of violations, as there can be multiple of the same violation
+    # in a file.
+    sig { returns(Integer) }
+    def count
+      files.sum do |file_name|
+        @file_cache.get_all_const_occurrences(
+          file_name: file_name,
+          class_name: class_name
+        ).size
+      end
+    end
+    sig do
+      params(keys: T.nilable(T::Array[Symbol])).returns(T::Hash[Symbol, T.untyped])
+    end
+    def runtime_keys(keys)
+      return {} unless defined?(Rails)
+      runtime_values = {}
+      return { is_active_record: false, is_constant: false } unless Kernel.const_defined?(class_name)
+      if keys.nil? || keys.include?(:is_active_record)
+        constant = Kernel.const_get(class_name) # rubocop:disable Sorbet/ConstantsFromStrings
+        value = @file_cache.set(
+          :is_active_record,
+          key: class_name,
+          value: constant.is_a?(Class) && constant < ApplicationRecord
+        )
+        runtime_values[:is_active_record] = value
+      end
+      if keys.nil? || keys.include?(:is_constant)
+        value = @file_cache.set(
+          :is_constant,
+          key: class_name,
+          value: class_name.split('::').last&.match?(ALL_CAPS)
+        )
+        runtime_values[:is_constant] = value
+      end
+      runtime_values
+    end
+    sig do
+      params(keys: T.nilable(T::Array[Symbol])).returns(T::Hash[Symbol, T.untyped])
+    end
+    def deconstruct_keys(keys)
+      all_values = {
+        constant_name: class_name,
+        pack_name: to_package_name,
+        # Type related properties, including convenience boolean handlers
+        type: type,
+        privacy: type == 'privacy',
+        dependency: type == 'dependency',
+        # Reaching into which pack produced the violated constant, and
+        # which consumes the violated constant.
+        consuming_pack: consuming_pack.name,
+        producing_pack: producing_pack.name,
+        # Same, except for owners
+        producing_owner: producing_pack.owner,
+        consuming_owner: consuming_pack.owner,
+        # So why is this "owner" implying producer? Because the
+        # owner field of the violation is producer-oriented.
+        owner: producing_pack.owner,
+        owned: producing_pack.owner.nil?,
+        **runtime_keys(keys)
+      }
+      # all_values[:is_active_record] = active_record? if !keys || keys.include?(:is_active_record)
+      # all_values[:is_constant] = active_record? if !keys || keys.include?(:is_constant)
+      keys.nil? ? all_values : all_values.slice(*T.unsafe(keys))
+    end
+    sig { returns(String) }
+    def inspect
+      "#<#{self.class.name} #{consuming_pack.name} -> #{class_name} (#{type})>"
+    end
+    private
+    sig { params(string: String).returns(String) }
+    def unindent(string)
+      # Multi-line match, this is intentional
+      min_space = string.scan(/^\s*/).min_by(&:length)
+      string.gsub(/^#{min_space}/, '')
+    end
+  end
+end

data/lib/query_packwerk/violations.rb ADDED Viewed

@@ -0,0 +1,270 @@
+# typed: strict
+# frozen_string_literal: true
+require 'coderay'
+module QueryPackwerk
+  # A collection class for managing and querying sets of Packwerk violations.
+  # Provides aggregation, filtering, and analysis methods for violation data,
+  # including source extraction, contextual reporting, and consumer relationship mapping.
+  # Implements Enumerable and QueryInterface for flexible data manipulation.
+  class Violations
+    extend T::Sig
+    extend T::Generic
+    Elem = type_member { { fixed: QueryPackwerk::Violation } }
+    include Enumerable
+    include QueryInterface
+    sig { override.returns(T::Array[QueryPackwerk::Violation]) }
+    attr_reader :original_collection
+    @all = T.let(nil, T.nilable(QueryPackwerk::Violations))
+    class << self
+      extend T::Sig
+      # Get all violations from ParsePackwerk and wrap them in our own
+      # representations. Unlike ParsePackwerk we also capture the destination
+      # of the violation to give a bi-directional view of consumption.
+      sig { returns(QueryPackwerk::Violations) }
+      def all
+        return @all if @all
+        violations = ParsePackwerk.all.flat_map do |pack|
+          pack.violations.map do |violation|
+            QueryPackwerk::Violation.new(
+              original_violation: violation,
+              consuming_pack: pack
+            )
+          end
+        end
+        @all = QueryPackwerk::Violations.new(violations)
+      end
+      # Wrap the interface `where` with this type
+      sig do
+        params(
+          query_params: T.untyped, # Array, or anything responding to `===`, which can't be typed
+          query_fn: T.nilable(T.proc.params(arg0: T.untyped).returns(T::Boolean))
+        ).returns(QueryPackwerk::Violations)
+      end
+      def where(**query_params, &query_fn)
+        QueryPackwerk::Violations.new(super(**query_params, &query_fn))
+      end
+      sig { void }
+      def reload!
+        @all = nil
+      end
+    end
+    sig do
+      params(
+        original_collection: T::Array[QueryPackwerk::Violation],
+        file_cache: QueryPackwerk::FileCache
+      ).void
+    end
+    def initialize(original_collection, file_cache: QueryPackwerk::FileCache.new)
+      @original_collection = original_collection
+      @file_cache = T.let(file_cache, QueryPackwerk::FileCache)
+      @original_collection.each do |violation|
+        violation.set_cache!(file_cache)
+      end
+      @cache_loaded = T.let(false, T::Boolean)
+      @sources_loaded = T.let(false, T::Boolean)
+    end
+    sig { void }
+    def load_cache!
+      return true if @cache_loaded
+      warn "Prepopulating AST cache with #{file_count} files: "
+      start_time = Time.now
+      @original_collection.each(&:load_cache!)
+      finish_time = Time.now - start_time
+      warn '', "AST cache loaded in #{finish_time}"
+      @cache_loaded = true
+    end
+    sig { void }
+    def load_sources!
+      return true if @sources_loaded
+      unless @cache_loaded
+        load_cache!
+        warn
+      end
+      warn "Prepopulating sources cache with #{count} violations: "
+      start_time = Time.now
+      total_sources_loaded = @original_collection.sum do |violation|
+        $stderr.print '.'
+        violation.sources.size
+      end
+      finish_time = Time.now - start_time
+      warn "Loaded #{total_sources_loaded} full sources in #{finish_time}"
+      @sources_loaded = true
+    end
+    sig { returns(Integer) }
+    def file_count
+      @original_collection.sum(&:file_count)
+    end
+    # Gets all sources and their receiving chains grouped by the constant they've violated.
+    sig { returns(T.untyped) }
+    def raw_sources
+      load_sources!
+      deep_merge_groups(@original_collection) do |v|
+        [v.class_name, v.sources]
+      end
+    end
+    # Gets all sources and their receiving chains grouped by the constant they've violated.
+    sig { returns(T::Hash[String, T::Array[String]]) }
+    def sources
+      load_sources!
+      deep_merge_groups(@original_collection) { |v| [v.class_name, v.sources.map(&:source)] }.transform_values(&:uniq)
+    end
+    # In addition to the above also provide the file location and line number along with the
+    # source.
+    sig { returns(T::Hash[String, T::Array[String]]) }
+    def sources_with_locations
+      load_sources!
+      deep_merge_groups(@original_collection) { |v| [v.class_name, v.sources_with_locations] }
+    end
+    # Instead of getting all instances of the source, count how often each occurs, with the option to
+    # provide a threshold to remove lower-occuring items.
+    sig { params(threshold: Integer).returns(T::Hash[String, T::Hash[String, Integer]]) }
+    def source_counts(threshold: 0)
+      load_sources!
+      deep_merge_counts(@original_collection, threshold:) { |v| [v.class_name, v.source_counts] }
+    end
+    # "Anonymize" the arguments of sources by replacing all arguments with underscores to get a look
+    # at the "shape" of a function rather than its exact call (i.e. `test(1, 2, 3)` becomes `test(_, _, _)`).
+    #
+    # This also removes extra spacing, line-breaks, cbase constant sigils, and other extra information to
+    # give a clearer view of a call's "shape".
+    sig { returns(T::Hash[String, T::Array[String]]) }
+    def anonymous_sources
+      load_sources!
+      deep_merge_groups(@original_collection) { |v| [v.class_name, v.anonymous_sources] }.transform_values(&:uniq)
+    end
+    sig { returns(T::Hash[String, T::Hash[String, T::Array[String]]]) }
+    def anonymous_sources_with_locations
+      load_sources!
+      deep_merge_hash_groups(@original_collection) { |v| [v.class_name, v.anonymous_sources_with_locations] }
+    end
+    sig do
+      params(start_offset: Integer, end_offset: Integer).returns(T::Hash[String, T::Hash[String, T::Array[String]]])
+    end
+    def sources_with_contexts(start_offset: 3, end_offset: 3)
+      load_sources!
+      deep_merge_hash_groups(@original_collection) { |v| [v.class_name, v.sources_with_contexts] }
+    end
+    sig { params(start_offset: Integer, end_offset: Integer).returns(String) }
+    def sources_with_contexts_report(start_offset: 3, end_offset: 3)
+      contexts = sources_with_contexts(start_offset:, end_offset:)
+      output = +''
+      contexts.each do |violated_constant, anonymized_sources|
+        heavy_underline = '=' * violated_constant.size
+        output << "#{violated_constant}\n#{heavy_underline}\n\n"
+        anonymized_sources.each do |anonymized_source, full_contexts|
+          light_underline = '-' * anonymized_source.size
+          output << "#{anonymized_source}\n#{light_underline}\n\n"
+          full_contexts.each do |context|
+            output << highlight_ruby(context)
+            output << "\n\n"
+          end
+        end
+      end
+      output
+    end
+    # Like the above source counts, but uses anonymized sources to give a clearer look at how often each
+    # "shape" of a method is called across a set of violations.
+    sig { params(threshold: Integer).returns(T::Hash[String, T::Hash[String, Integer]]) }
+    def anonymous_source_counts(threshold: 0)
+      load_sources!
+      deep_merge_counts(@original_collection, threshold:) { |v| [v.class_name, v.anonymous_source_counts] }
+    end
+    # Find which packs consume these violations
+    sig { params(threshold: Integer).returns(T::Hash[String, Integer]) }
+    def consumers(threshold: 0)
+      tallies = @original_collection.map { |v| v.consuming_pack.name }.tally
+      threshold_filter_sort(tallies, threshold:)
+    end
+    # Find which packs produce these violations
+    sig { params(threshold: Integer).returns(T::Hash[String, Integer]) }
+    def producers(threshold: 0)
+      tallies = @original_collection.map { |v| v.producing_pack.name }.tally
+      threshold_filter_sort(tallies, threshold:)
+    end
+    # Filter for violations which include one of the provided file globs
+    sig { params(file_globs: T.any(String, Regexp)).returns(QueryPackwerk::Violations) }
+    def including_files(*file_globs)
+      filtered_violations = @original_collection.select do |violation|
+        T.unsafe(violation).includes_files?(*file_globs) # Sorbet hates splats
+      end
+      QueryPackwerk::Violations.new(filtered_violations)
+    end
+    # Filter for violations which do not include one of the provided file globs
+    sig { params(file_globs: T.any(String, Regexp)).returns(QueryPackwerk::Violations) }
+    def excluding_files(*file_globs)
+      filtered_violations = @original_collection.reject do |violation|
+        T.unsafe(violation).includes_files?(*file_globs) # Sorbet hates splats
+      end
+      QueryPackwerk::Violations.new(filtered_violations)
+    end
+    sig { returns(String) }
+    def inspect
+      [
+        "#<#{self.class.name} [",
+        to_a.map(&:inspect).join("\n"),
+        ']>'
+      ].join("\n")
+    end
+    private
+    sig { params(string: String).returns(String) }
+    def highlight_ruby(string)
+      CodeRay.encode(string, :ruby, :terminal)
+    end
+  end
+end