RubyGems - ecosystems-bibliothecary - Versions diffs - 14.2.0 → 14.3.0 - Mend

ecosystems-bibliothecary 14.2.0 → 14.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +16 -0
data/Gemfile +1 -0
data/README.md +1 -1
data/bin/benchmark +386 -0
data/lib/bibliothecary/analyser/analysis.rb +10 -5
data/lib/bibliothecary/analyser/matchers.rb +7 -5
data/lib/bibliothecary/analyser.rb +1 -0
data/lib/bibliothecary/file_info.rb +7 -0
data/lib/bibliothecary/parsers/nuget.rb +1 -1
data/lib/bibliothecary/runner.rb +170 -10
data/lib/bibliothecary/version.rb +1 -1
data/lib/bibliothecary.rb +3 -6
data/lib/dockerfile_parser.rb +1 -1
data/lib/modelfile_parser.rb +8 -8
metadata +3 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: c531c54aa377c8bc30d1a2f75e3de0bbad1a0502568976f3d487fe3c4c78bc53
-  data.tar.gz: 82b7ca70158bc5ce1094af762eed9b7cb20fa6492c807663c965266bc8ce535a
+  metadata.gz: c529c2cc8f2f35098f5bbca1294b7a36249580d6ef44a735b00257422dfe568c
+  data.tar.gz: 2680b1d61e665b2bef8ef5b987f3b58b29559266549e3cd68ccdaa8a65d2c4b0
 SHA512:
-  metadata.gz: a981fd824d3227d00b9a937199ab3eeb007139c7f76c28de5a8681e1a680948cfe453abab0a791ea4b65c56d6f2d22943b7a834b6c83c3df36de746504bb2c1d
-  data.tar.gz: 35ec260ba3a5a92a84a5db142771681eb609feb1a65288471f1152f29838d118484f7e1ad9f0e6ff021547b322a8dc0a384a53bf5939e55ee6b4430f4224bc24
+  metadata.gz: 7518c86a4817297d41c9bbab194901de59a0ca6f07dcad90a9b7d7a78584042d08f9765fc21080475767d399fc6e59f274f8e0132f6673d5e6f7dc2e7bc24908
+  data.tar.gz: e916a1d6f2c4fba1f908c8abc0717013ca21d870f7ddea1c0257c579c3de619037e6cf01ab0eb6f57be2c8191482655267b0765020d19cf8d5da7597dbf56ba8

data/CHANGELOG.md CHANGED Viewed

@@ -13,6 +13,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Removed
+## [14.3.0]
+### Added
+- Added `bin/benchmark` script for performance testing.
+### Changed
+- Fixed bug where Runner was recreated on every Bibliothecary method call, causing repeated index rebuilding.
+- Memoized package_managers array in Runner.
+- Added filename/extension index for O(1) parser lookup instead of O(n) linear scan through all parsers.
+- Optimized `identify_manifests` to use filename index directly (~139x faster).
+- Optimized `analyse_file` to use filename index for candidate filtering (~16x faster).
+- Added per-file caching of mapping details in FileInfo to avoid repeated lookups.
+- Added `parse_file_info` method to reuse FileInfo objects during parsing.
 ## [14.2.0]
 ### Added

data/Gemfile CHANGED Viewed

@@ -7,6 +7,7 @@ gem "strings-ansi", ref: "35d0c9430cf0a8022dc12bdab005bce296cb9f00", github: "pi
 # Ruby 3.4+ no longer includes these as default gems
 gem "base64"
+gem "benchmark"
 gem "bigdecimal"
 gem "csv"
 gem "logger"

data/README.md CHANGED Viewed

@@ -13,7 +13,7 @@ Requires Ruby 3.4 or above.
 Add this line to your application's Gemfile:
 ```ruby
-gem "bibliothecary", git: "https://github.com/ecosyste-ms/bibliothecary.git"
+gem "ecosystems-bibliothecary", git: "https://github.com/ecosyste-ms/bibliothecary.git", require: "bibliothecary"
 ```
 And then execute:

data/bin/benchmark ADDED Viewed

@@ -0,0 +1,386 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+require "bundler/setup"
+require "bibliothecary"
+require "benchmark"
+require "optparse"
+class InfrastructureBenchmark
+  FIXTURES_DIR = File.expand_path("../spec/fixtures", __dir__)
+  def initialize(iterations: 100)
+    @iterations = iterations
+  end
+  def run
+    puts "Infrastructure Benchmark"
+    puts "=" * 60
+    puts "Iterations: #{@iterations}"
+    puts
+    # Prepare test data - mix of different file types
+    test_files = prepare_test_files
+    puts "Test files: #{test_files.length}"
+    puts
+    benchmark_package_managers
+    benchmark_matching_overhead
+    benchmark_load_file_info_list_from_contents(test_files)
+    benchmark_analyse_file(test_files)
+  end
+  def prepare_test_files
+    files = []
+    # Get a representative sample of fixtures
+    %w[package.json Gemfile.lock Cargo.toml pom.xml requirements.txt go.mod].each do |name|
+      path = Dir.glob("#{FIXTURES_DIR}/**/#{name}").first
+      next unless path
+      files << { file_path: path.sub("#{FIXTURES_DIR}/", ""), contents: File.read(path) }
+    end
+    files
+  end
+  def benchmark_package_managers
+    puts "package_managers method:"
+    puts "-" * 40
+    runner = Bibliothecary.runner
+    # Warm up
+    5.times { runner.package_managers }
+    time = Benchmark.measure do
+      @iterations.times { runner.package_managers }
+    end
+    printf "  %d calls: %.3f ms total, %.4f ms/call\n",
+           @iterations, time.real * 1000, (time.real / @iterations) * 1000
+    puts
+  end
+  def benchmark_load_file_info_list_from_contents(test_files)
+    puts "load_file_info_list_from_contents:"
+    puts "-" * 40
+    runner = Bibliothecary.runner
+    # Warm up
+    5.times { runner.load_file_info_list_from_contents(test_files) }
+    time = Benchmark.measure do
+      @iterations.times { runner.load_file_info_list_from_contents(test_files) }
+    end
+    total_files = test_files.length * @iterations
+    printf "  %d calls (%d files each): %.3f ms total\n",
+           @iterations, test_files.length, time.real * 1000
+    printf "  %.4f ms/call, %.4f ms/file\n",
+           (time.real / @iterations) * 1000,
+           (time.real / total_files) * 1000
+    puts
+  end
+  def benchmark_analyse_file(test_files)
+    puts "analyse_file (full pipeline):"
+    puts "-" * 40
+    runner = Bibliothecary.runner
+    test_files.each do |file|
+      # Warm up
+      3.times { runner.analyse_file(file[:file_path], file[:contents]) }
+      time = Benchmark.measure do
+        @iterations.times { runner.analyse_file(file[:file_path], file[:contents]) }
+      end
+      printf "  %-30s %.4f ms/call\n",
+             File.basename(file[:file_path]),
+             (time.real / @iterations) * 1000
+    end
+    puts
+  end
+  def benchmark_matching_overhead
+    puts "Matching overhead breakdown:"
+    puts "-" * 40
+    runner = Bibliothecary.runner
+    pms = runner.package_managers
+    # Test with a simple package.json
+    test_file = { file_path: "package.json", contents: '{"dependencies":{}}' }
+    info = Bibliothecary::FileInfo.new(nil, test_file[:file_path], test_file[:contents])
+    # Benchmark match_info? across all parsers
+    time = Benchmark.measure do
+      @iterations.times do
+        pms.each { |pm| pm.match_info?(info) }
+      end
+    end
+    printf "  match_info? x %d parsers: %.4f ms/file\n",
+           pms.length, (time.real / @iterations) * 1000
+    # Benchmark just the npm parser's match_info?
+    npm = pms.find { |pm| pm.platform_name == "npm" }
+    time = Benchmark.measure do
+      @iterations.times { npm.match_info?(info) }
+    end
+    printf "  npm.match_info? alone:    %.4f ms/call\n",
+           (time.real / @iterations) * 1000
+    # Benchmark first_matching_mapping_details (called multiple times per file)
+    time = Benchmark.measure do
+      @iterations.times do
+        npm.send(:first_matching_mapping_details, info)
+      end
+    end
+    printf "  first_matching_mapping_details: %.4f ms/call\n",
+           (time.real / @iterations) * 1000
+    puts
+  end
+end
+class ParserBenchmark
+  FIXTURES_DIR = File.expand_path("../spec/fixtures", __dir__)
+  # Parser methods that require remote services
+  REMOTE_PARSERS = {
+    "swiftpm" => [:parse_package_swift],
+    "hackage" => [:parse_cabal],
+    "hex" => [:parse_mix, :parse_mix_lock],
+    "carthage" => [:parse_cartfile, :parse_cartfile_private, :parse_cartfile_resolved],
+    "clojars" => [:parse_manifest],
+  }.freeze
+  # Multi-parser methods shared across many package managers
+  MULTI_PARSER_METHODS = %i[
+    parse_cyclonedx_json
+    parse_cyclonedx_xml
+    parse_spdx_json
+    parse_spdx_tag_value
+    parse_dependencies_csv
+  ].freeze
+  def initialize(options = {})
+    @iterations = options.fetch(:iterations, 100)
+    @parser_filter = options[:parser]
+    @verbose = options[:verbose]
+    @native_only = options[:native_only]
+    @results = {}
+  end
+  def run
+    puts "Bibliothecary Parser Benchmark"
+    puts "=" * 60
+    puts "Iterations per file: #{@iterations}"
+    puts "Fixtures directory: #{FIXTURES_DIR}"
+    puts
+    parsers = filtered_parsers
+    puts "Running benchmarks for #{parsers.length} parser(s)..."
+    puts
+    parsers.each do |parser|
+      benchmark_parser(parser)
+    end
+    print_summary
+  end
+  def filtered_parsers
+    all_parsers = Bibliothecary.package_managers
+    return all_parsers unless @parser_filter
+    matching = all_parsers.select do |pm|
+      pm.platform_name.downcase.include?(@parser_filter.downcase)
+    end
+    if matching.empty?
+      puts "No parser matching '#{@parser_filter}' found."
+      puts "Available parsers: #{all_parsers.map(&:platform_name).join(', ')}"
+      exit 1
+    end
+    matching
+  end
+  def benchmark_parser(parser)
+    platform = parser.platform_name
+    mapping = parser.mapping
+    fixtures = find_fixtures_for_parser(mapping, platform)
+    if fixtures.empty?
+      puts "#{platform}: no matching fixtures found"
+      puts if @verbose
+      return
+    end
+    puts "#{platform} (#{fixtures.length} files)"
+    puts "-" * 40
+    parser_total = 0
+    file_results = []
+    fixtures.each do |fixture_path, mapping_entry|
+      contents = File.read(fixture_path)
+      filename = File.basename(fixture_path)
+      relative = fixture_path.sub("#{FIXTURES_DIR}/", "")
+      begin
+        time = Benchmark.measure do
+          @iterations.times do
+            parser.send(mapping_entry[:parser], contents, options: { filename: filename })
+          end
+        end
+        avg_ms = (time.real / @iterations) * 1000
+        parser_total += time.real
+        file_results << {
+          file: relative,
+          total: time.real,
+          avg_ms: avg_ms,
+          kind: mapping_entry[:kind],
+        }
+        if @verbose
+          printf "  %-40s %8.3f ms/call (%s)\n", relative, avg_ms, mapping_entry[:kind]
+        end
+      rescue Bibliothecary::RemoteParsingError => e
+        puts "  #{relative}: skipped (remote parser unavailable)"
+      rescue => e
+        puts "  #{relative}: error - #{e.class}: #{e.message}"
+      end
+    end
+    unless @verbose
+      file_results.sort_by { |r| -r[:avg_ms] }.first(3).each do |r|
+        printf "  %-40s %8.3f ms/call\n", r[:file], r[:avg_ms]
+      end
+      puts "  ..." if file_results.length > 3
+    end
+    avg_total = (parser_total / fixtures.length / @iterations) * 1000
+    printf "  Total: %.3f ms avg per file\n", avg_total
+    puts
+    @results[platform] = {
+      files: fixtures.length,
+      total_time: parser_total,
+      avg_per_file: avg_total,
+      file_results: file_results,
+    }
+  end
+  def find_fixtures_for_parser(mapping, platform_name)
+    fixtures = []
+    remote_methods = REMOTE_PARSERS[platform_name] || []
+    all_fixtures.each do |fixture_path|
+      filename = File.basename(fixture_path)
+      relative_path = fixture_path.sub("#{FIXTURES_DIR}/", "")
+      mapping.each do |matcher, entry|
+        next unless entry[:parser]
+        next if remote_methods.include?(entry[:parser])
+        next if @native_only && MULTI_PARSER_METHODS.include?(entry[:parser])
+        if matcher_matches?(matcher, filename, fixture_path)
+          fixtures << [fixture_path, entry]
+          break
+        end
+      end
+    end
+    fixtures
+  end
+  def matcher_matches?(matcher, filename, full_path)
+    relative_path = full_path.sub("#{FIXTURES_DIR}/", "")
+    case matcher
+    when Regexp
+      filename.match?(matcher)
+    when String
+      filename == matcher
+    when Proc
+      matcher.call(relative_path)
+    else
+      false
+    end
+  end
+  def all_fixtures
+    @all_fixtures ||= Dir.glob("#{FIXTURES_DIR}/**/*")
+      .select { |f| File.file?(f) }
+      .reject { |f| f.include?("/broken/") }
+  end
+  def print_summary
+    return if @results.empty?
+    puts "=" * 60
+    puts "Summary (sorted by avg time per file)"
+    puts "=" * 60
+    sorted = @results.sort_by { |_, v| -v[:avg_per_file] }
+    printf "%-20s %10s %12s\n", "Parser", "Files", "Avg ms/file"
+    printf "%-20s %10s %12s\n", "-" * 20, "-" * 10, "-" * 12
+    sorted.each do |platform, data|
+      printf "%-20s %10d %12.3f\n", platform, data[:files], data[:avg_per_file]
+    end
+    puts
+    total_files = @results.values.sum { |v| v[:files] }
+    total_time = @results.values.sum { |v| v[:total_time] }
+    puts "Total: #{total_files} files, #{(total_time * 1000).round(1)} ms total time"
+  end
+end
+options = {
+  iterations: 100,
+  verbose: false,
+  mode: :parsers,
+}
+OptionParser.new do |opts|
+  opts.banner = "Usage: bin/benchmark [options]"
+  opts.on("-p", "--parser NAME", "Only benchmark parsers matching NAME") do |p|
+    options[:parser] = p
+  end
+  opts.on("-n", "--iterations N", Integer, "Number of iterations per file (default: 100)") do |n|
+    options[:iterations] = n
+  end
+  opts.on("-v", "--verbose", "Show all files, not just slowest") do
+    options[:verbose] = true
+  end
+  opts.on("--native-only", "Exclude shared multi-parsers (CycloneDX, SPDX, CSV)") do
+    options[:native_only] = true
+  end
+  opts.on("--infra", "Benchmark infrastructure (load_file_info_list, etc)") do
+    options[:mode] = :infra
+  end
+  opts.on("-h", "--help", "Show this help") do
+    puts opts
+    exit
+  end
+end.parse!
+case options[:mode]
+when :infra
+  InfrastructureBenchmark.new(iterations: options[:iterations]).run
+else
+  ParserBenchmark.new(options).run
+end

data/lib/bibliothecary/analyser/analysis.rb CHANGED Viewed

@@ -40,7 +40,7 @@ module Bibliothecary
         # If your Parser needs to return multiple responses for one file, please override this method
         # For example see conda.rb
         kind = determine_kind_from_info(info)
-        parser_result = parse_file(info.relative_path, info.contents, options: options)
+        parser_result = parse_file_info(info, options: options)
         parser_result = ParserResult.new(dependencies: []) if parser_result.nil? # work around any legacy parsers that return nil
         Bibliothecary::Analyser.create_analysis(platform_name, info.relative_path, kind, parser_result)
@@ -52,26 +52,31 @@ module Bibliothecary
       # Call the matching parse class method for this file with
       # these contents
       def parse_file(filename, contents, options: {})
-        details = first_matching_mapping_details(FileInfo.new(nil, filename, contents))
+        parse_file_info(FileInfo.new(nil, filename, contents), options: options)
+      end
+      # Parse a file using its FileInfo object, reusing cached mapping details.
+      def parse_file_info(info, options: {})
+        details = first_matching_mapping_details(info)
         # this can be raised if we don't check match?/match_info?,
         # OR don't have the file contents when we check them, so
         # it turns out for example that a .xml file isn't a
         # manifest after all.
-        raise Bibliothecary::FileParsingError.new("No parser for this file type", filename) unless details[:parser]
+        raise Bibliothecary::FileParsingError.new("No parser for this file type", info.relative_path) unless details[:parser]
         # The `parser` method should raise an exception if the file is malformed,
         # should return empty [] if the file is fine but simply doesn't contain
         # any dependencies, and should never return nil. At the time of writing
         # this comment, some of the parsers return [] or nil to mean an error
         # which is confusing to users.
-        send(details[:parser], contents, options: options.merge(filename: filename))
+        send(details[:parser], info.contents, options: options.merge(filename: info.relative_path))
       rescue Exception => e # default is StandardError but C bindings throw Exceptions # rubocop:disable Lint/RescueException
         # the C xml parser also puts a newline at the end of the message
         location = e.backtrace_locations[0]
           .to_s
           .then { |l| l =~ /bibliothecary\// ? l.split("bibliothecary/").last : l.split("gems/").last }
-        raise Bibliothecary::FileParsingError.new(e.message.strip, filename, location)
+        raise Bibliothecary::FileParsingError.new(e.message.strip, info.relative_path, location)
       end
       private

data/lib/bibliothecary/analyser/matchers.rb CHANGED Viewed

@@ -52,12 +52,14 @@ module Bibliothecary
         first_matching_mapping_details(info).any?
       end
-      private
+      # Get mapping details for this file, using cache if available.
+      # The cache is stored on the FileInfo object to avoid repeated lookups.
       def first_matching_mapping_details(info)
-        mapping
-          .find { |matcher, details| mapping_entry_match?(matcher, details, info) }
-          &.last || {}
+        info.cached_mapping_details(self) do
+          mapping
+            .find { |matcher, details| mapping_entry_match?(matcher, details, info) }
+            &.last || {}
+        end
       end
     end
   end

data/lib/bibliothecary/analyser.rb CHANGED Viewed

@@ -78,6 +78,7 @@ module Bibliothecary
         original_mapping = mapping
+        singleton_class.remove_method(:mapping)
         define_singleton_method(:mapping) do
           original_mapping.merge(klass.mapping)
         end

data/lib/bibliothecary/file_info.rb CHANGED Viewed

@@ -46,10 +46,17 @@ module Bibliothecary
       @contents = contents
       @package_manager = nil
+      @mapping_cache = {}
     end
     def groupable?
       @package_manager&.groupable?(self)
     end
+    # Cache and retrieve mapping details for a given package manager class.
+    # This avoids repeatedly calling first_matching_mapping_details.
+    def cached_mapping_details(package_manager_class)
+      @mapping_cache[package_manager_class] ||= yield
+    end
   end
 end

data/lib/bibliothecary/parsers/nuget.rb CHANGED Viewed

@@ -201,7 +201,7 @@ module Bibliothecary
       def self.parse_paket_lock(file_contents, options: {})
         lines = file_contents.split("\n")
-        package_version_re = /\s+(?<name>\S+)\s\((?<version>\d+\.\d+[.\d+[.\d+]*]*)\)/
+        package_version_re = /\s+(?<name>\S+)\s\((?<version>\d+(?:\.\d+)+)\)/
         packages = lines.select { |line| package_version_re.match(line) }.map { |line| package_version_re.match(line) }.map do |match|
           Dependency.new(
             name: match[:name].strip,

data/lib/bibliothecary/runner.rb CHANGED Viewed

@@ -40,12 +40,160 @@ module Bibliothecary
     end
     def applicable_package_managers(info)
-      managers = package_managers.select { |pm| pm.match_info?(info) }
+      candidates = candidate_package_managers(info.relative_path)
+      managers = candidates.select { |pm| pm.match_info?(info) }
       managers.empty? ? [nil] : managers
     end
     def package_managers
-      Bibliothecary::Parsers.constants.map { |c| Bibliothecary::Parsers.const_get(c) }.sort_by { |c| c.to_s.downcase }
+      @package_managers ||= Bibliothecary::Parsers.constants
+        .map { |c| Bibliothecary::Parsers.const_get(c) }
+        .sort_by { |c| c.to_s.downcase }
+        .freeze
+    end
+    # Get candidate package managers for a file path using filename/extension index.
+    # Falls back to all package managers for unindexed patterns.
+    def candidate_package_managers(path)
+      filename = File.basename(path)
+      filename_lower = filename.downcase
+      # Check exact filename match first (use fetch to avoid default block on frozen hash)
+      candidates = filename_index.fetch(filename_lower, nil)
+      return candidates if candidates
+      # Check extension matches
+      extension_index.each do |ext, ext_candidates|
+        return ext_candidates if filename_lower.end_with?(ext)
+      end
+      # Fall back to all package managers for unindexed patterns
+      package_managers
+    end
+    # Build an index mapping lowercase filenames to candidate parsers
+    def filename_index
+      @filename_index ||= build_filename_index
+    end
+    # Build an index mapping lowercase extensions to candidate parsers
+    def extension_index
+      @extension_index ||= build_extension_index
+    end
+    def build_filename_index
+      index = {}
+      package_managers.each do |pm|
+        pm.mapping.each_key do |matcher|
+          next unless matcher.is_a?(Proc)
+          # Extract filenames from the matcher by testing common patterns
+          extract_filenames_from_matcher(matcher).each do |filename|
+            key = filename.downcase
+            index[key] ||= []
+            index[key] << pm
+          end
+        end
+      end
+      # Deduplicate and freeze
+      index.transform_values! { |v| v.uniq.freeze }
+      index.freeze
+    end
+    def build_extension_index
+      index = {}
+      package_managers.each do |pm|
+        pm.mapping.each_key do |matcher|
+          next unless matcher.is_a?(Proc)
+          # Extract extensions from the matcher
+          extract_extensions_from_matcher(matcher).each do |ext|
+            key = ext.downcase
+            index[key] ||= []
+            index[key] << pm
+          end
+        end
+      end
+      # Deduplicate and freeze
+      index.transform_values! { |v| v.uniq.freeze }
+      index.freeze
+    end
+    # Try to extract filename patterns from a matcher proc
+    def extract_filenames_from_matcher(matcher)
+      filenames = []
+      # Test common manifest filenames to see which ones match
+      common_filenames.each do |filename|
+        filenames << filename if matcher.call(filename)
+      end
+      filenames
+    end
+    # Try to extract extension patterns from a matcher proc
+    def extract_extensions_from_matcher(matcher)
+      extensions = []
+      # Test common extensions
+      common_extensions.each do |ext|
+        test_file = "test#{ext}"
+        extensions << ext if matcher.call(test_file)
+      end
+      extensions
+    end
+    def common_filenames
+      @common_filenames ||= %w[
+        package.json package-lock.json yarn.lock pnpm-lock.yaml npm-shrinkwrap.json npm-ls.json bun.lock
+        Gemfile Gemfile.lock gems.rb gems.locked
+        Cargo.toml Cargo.lock
+        go.mod go.sum Gopkg.toml Gopkg.lock glide.yaml glide.lock Godeps
+        requirements.txt Pipfile Pipfile.lock pyproject.toml poetry.lock setup.py
+        pom.xml build.gradle build.gradle.kts ivy.xml
+        composer.json composer.lock
+        Podfile Podfile.lock
+        pubspec.yaml pubspec.lock
+        Package.swift Package.resolved
+        Cartfile Cartfile.resolved Cartfile.private
+        mix.exs mix.lock
+        project.clj
+        shard.yml shard.lock
+        environment.yml environment.yaml
+        bower.json
+        elm-package.json elm.json
+        vcpkg.json
+        dub.json dub.sdl
+        haxelib.json
+        action.yml action.yaml
+        Brewfile Brewfile.lock.json
+        REQUIRE Project.toml Manifest.toml
+        paket.lock packages.config Project.json Project.lock.json packages.lock.json project.assets.json
+        DESCRIPTION
+        META.json META.yml cpanfile
+        cabal.config
+        cyclonedx.json cyclonedx.xml
+        dependencies.csv
+        docker-compose.yml docker-compose.yaml Dockerfile
+        MLmodel
+        Modelfile
+        dvc.yaml
+        cog.yaml
+        bentofile.yaml
+        uv.lock pylock.toml
+      ].freeze
+    end
+    def common_extensions
+      @common_extensions ||= %w[
+        .gemspec .nuspec .csproj .cabal .podspec .podspec.json
+        .spdx .cdx.json .cdx.xml
+      ].freeze
     end
     # Parses an array of format [{file_path: "", contents: ""},] to match
@@ -120,7 +268,9 @@ module Bibliothecary
     def analyse_file(file_path, contents)
       contents = Bibliothecary.utf8_string(contents)
-      package_managers.select { |pm| pm.match?(file_path, contents) }.map do |pm|
+      # Use filename index to quickly find candidate parsers
+      candidates = candidate_package_managers(file_path)
+      candidates.select { |pm| pm.match?(file_path, contents) }.map do |pm|
         pm.analyse_contents(file_path, contents, options: @options)
       end.flatten.uniq.compact
     end
@@ -137,14 +287,24 @@ module Bibliothecary
         ignored_dirs.include?(f) || f.start_with?(*ignored_dirs_with_slash)
       end
       allowed_file_list = allowed_file_list.reject { |f| ignored_files.include?(f) }
-      package_managers.map do |pm|
-        # (skip rubocop false positive, since match? is a custom method)
-        allowed_file_list.select do |file_path| # rubocop:disable Style/SelectByRegexp
-          # this is a call to match? without file contents, which will skip
-          # ambiguous filenames that are only possibly a manifest
-          pm.match?(file_path)
+      # Fast path: use filename index directly for known manifest filenames
+      # This avoids creating FileInfo objects and calling match? for each file
+      manifests = []
+      allowed_file_list.each do |file_path|
+        filename_lower = File.basename(file_path).downcase
+        # Check if this filename is in our index (known manifest)
+        if filename_index.key?(filename_lower)
+          manifests << file_path
+          next
         end
-      end.flatten.uniq.compact
+        # Check extension index
+        matched = extension_index.keys.any? { |ext| filename_lower.end_with?(ext) }
+        manifests << file_path if matched
+      end
+      manifests.sort
     end
     def ignored_dirs

data/lib/bibliothecary/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Bibliothecary
-  VERSION = "14.2.0"
+  VERSION = "14.3.0"
 end

data/lib/bibliothecary.rb CHANGED Viewed

@@ -100,19 +100,16 @@ module Bibliothecary
   end
   def self.runner
-    configuration
-    @runner
+    @runner ||= Runner.new(configuration)
   end
   def self.configuration
     @configuration ||= Configuration.new
-    @runner = Runner.new(@configuration)
-    @configuration
   end
   def self.reset
-    @configuration = Configuration.new
-    @runner = Runner.new(@configuration)
+    @configuration = nil
+    @runner = nil
   end
   def self.configure

data/lib/dockerfile_parser.rb CHANGED Viewed

@@ -4,7 +4,7 @@ class DockerfileParser
   end
   def parse
-    fromlines = @file_contents.split("\n").select { |line| line.strip =~ /^\FROM/i }
+    fromlines = @file_contents.split("\n").select { |line| line.strip =~ /^FROM/i }
     fromlines.map do |line|
       line = line.strip.split(' ')

data/lib/modelfile_parser.rb CHANGED Viewed

@@ -4,10 +4,10 @@ class ModelfileParser
   end
   def parse
-    fromlines = @file_contents.split("\n").select { |line| line.strip =~ /^\FROM/i }
+    fromlines = @file_contents.split("\n").select { |line| line.strip =~ /^FROM/i }
     fromlines.map do |line|
-      line = line.strip.split(' ')
+      line = line.strip.split
       # Remove the FROM keyword
       line.shift
@@ -23,19 +23,19 @@ class ModelfileParser
       model_ref = line[0]
       # Check if it's a file path (local GGUF or directory)
-      if model_ref =~ /\.(gguf|safetensors)$/i || model_ref.start_with?('./', '/')
+      if model_ref =~ /\.(gguf|safetensors)$/i || model_ref.start_with?("./", "/")
         {
           name: File.basename(model_ref),
-          requirement: 'local',
-          type: 'runtime'
+          requirement: "local",
+          type: "runtime",
         }
       else
         # It's a registry model (e.g., llama3.2 or llama3.2:latest)
-        parts = model_ref.split(':')
+        parts = model_ref.split(":")
         {
           name: parts[0],
-          requirement: parts[1] || 'latest',
-          type: 'runtime'
+          requirement: parts[1] || "latest",
+          type: "runtime",
         }
       end
     end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: ecosystems-bibliothecary
 version: !ruby/object:Gem::Version
-  version: 14.2.0
+  version: 14.3.0
 platform: ruby
 authors:
 - Andrew Nesbitt
@@ -180,6 +180,7 @@ dependencies:
 email:
 - andrewnez@gmail.com
 executables:
+- benchmark
 - bibliothecary
 - console
 - setup
@@ -201,6 +202,7 @@ files:
 - README.md
 - Rakefile
 - bibliothecary.gemspec
+- bin/benchmark
 - bin/bibliothecary
 - bin/console
 - bin/setup