RubyGems - easy_cols - Versions diffs - 0.1.0 - Mend

easy_cols 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

checksums.yaml +7 -0
data/.cursor/rules/ai.mdc +38 -0
data/.cursor/rules/polly_state.mdc +45 -0
data/.github/workflows/ci.yml +116 -0
data/.gitignore +33 -0
data/.rspec +6 -0
data/Gemfile +16 -0
data/Gemfile.lock +96 -0
data/LICENSE +21 -0
data/README.md +296 -0
data/Rakefile +112 -0
data/TODO.md +14 -0
data/USAGE.md +78 -0
data/bin/easy_cols +7 -0
data/bin/ec +1 -0
data/cols.gemspec +45 -0
data/lib/easy_cols/cli.rb +252 -0
data/lib/easy_cols/column_selector.rb +61 -0
data/lib/easy_cols/formatter.rb +164 -0
data/lib/easy_cols/parser.rb +146 -0
data/lib/easy_cols/version.rb +5 -0
data/lib/easy_cols.rb +15 -0
data/test_data/sample.csv +5 -0
data/test_data/sample_table.txt +6 -0
metadata +190 -0

data/Rakefile ADDED Viewed

@@ -0,0 +1,112 @@
+# frozen_string_literal: true
+require 'bundler/gem_tasks'
+require 'rspec/core/rake_task'
+require_relative 'lib/easy_cols/version'
+RSpec::Core::RakeTask.new(:spec)
+task default: :spec
+namespace :version do
+  desc 'Show current version'
+  task :current do
+    puts "Current version: #{EasyCols::VERSION}"
+  end
+  def bump_version(type)
+    version_file = File.join(__dir__, 'lib', 'easy_cols', 'version.rb')
+    content = File.read(version_file)
+    current_version = EasyCols::VERSION
+    major, minor, patch = current_version.split('.').map(&:to_i)
+    case type
+    when :major
+      major += 1
+      minor = 0
+      patch = 0
+    when :minor
+      minor += 1
+      patch = 0
+    when :patch
+      patch += 1
+    else
+      raise "Unknown version type: #{type}"
+    end
+    new_version = "#{major}.#{minor}.#{patch}"
+    # Update version file
+    new_content = content.sub(/VERSION = ['"]#{current_version}['"]/, "VERSION = '#{new_version}'")
+    File.write(version_file, new_content)
+    puts "Version bumped from #{current_version} to #{new_version}"
+    puts "Updated #{version_file}"
+    puts "\nDon't forget to:"
+    puts "  git add #{version_file}"
+    puts "  git commit -m 'Bump version to #{new_version}'"
+    new_version
+  end
+  desc 'Bump major version (x.0.0)'
+  task :major do
+    bump_version(:major)
+  end
+  desc 'Bump minor version (0.x.0)'
+  task :minor do
+    bump_version(:minor)
+  end
+  desc 'Bump patch version (0.0.x)'
+  task :patch do
+    bump_version(:patch)
+  end
+end
+desc 'Create and push release tag for current version'
+task :release do
+  require 'open3'
+  version = EasyCols::VERSION
+  tag_name = "v#{version}"
+  # Check we're on main branch
+  current_branch, _ = Open3.capture2('git', 'rev-parse', '--abbrev-ref', 'HEAD')
+  current_branch = current_branch.strip
+  unless current_branch == 'main'
+    raise "Must be on main branch to release. Current branch: #{current_branch}"
+  end
+  # Check for uncommitted changes
+  status, _ = Open3.capture2('git', 'status', '--porcelain')
+  unless status.strip.empty?
+    raise "Working directory has uncommitted changes. Commit or stash them first."
+  end
+  # Check if tag already exists
+  tag_check, _ = Open3.capture2('git', 'tag', '-l', tag_name)
+  unless tag_check.strip.empty?
+    raise "Tag #{tag_name} already exists. Did you forget to bump the version?"
+  end
+  # Check if we're up to date with remote
+  Open3.capture2('git', 'fetch', 'origin')
+  local_commit, _ = Open3.capture2('git', 'rev-parse', 'HEAD')
+  remote_commit, _ = Open3.capture2('git', 'rev-parse', 'origin/main')
+  if local_commit.strip != remote_commit.strip
+    raise "Local main is not up to date with origin/main. Please pull or push first."
+  end
+  # Create and push tag
+  puts "Creating release tag #{tag_name}..."
+  system('git', 'tag', '-a', tag_name, '-m', "Release #{tag_name}") or raise "Failed to create tag"
+  puts "Pushing tag to origin..."
+  system('git', 'push', 'origin', tag_name) or raise "Failed to push tag"
+  puts "\n✓ Release tag #{tag_name} created and pushed!"
+  puts "CI will automatically create the GitHub release and publish to RubyGems."
+end

data/TODO.md ADDED Viewed

@@ -0,0 +1,14 @@
+# TODO
+## Future Enhancements
+### Streaming Pipeline Support
+Implement a pure streaming pipeline to handle arbitrarily large files without loading the entire input into memory. This would enable processing files of any size by:
+- Reading input line-by-line instead of loading everything into memory
+- Detecting headers and separator lines incrementally (especially for table format)
+- Outputting rows incrementally as they're processed
+- Handling column selection with header-based selectors in a streaming context
+This would trade some parsing flexibility (look-ahead/backtrack) for memory efficiency, which could be valuable for very large inputs.

data/USAGE.md ADDED Viewed

@@ -0,0 +1,78 @@
+ec [FIELD | FIELD1..FIELD2 | FIELD1-FIELD2] ...
+Ranges of fields can be specified with STARTFIELD..STOPFIELD or STARTFIELD-STOPFIELD.
+FIELD can be a field index (an integer), or a field name.
+The basic idea is to make it *easy* to fetch specific columns of STDIN (or a file).
+There are several supported *formats* for parsing:
+  csv           Comma-separated columns; fields can be quoted; there can be one header line
+  tsv           Tab-separated columns; fields can be quoted; there can be one header line
+  tbl           An ASCII table format, with a header, a separator line (using -+-), and data rows (using | separator)
+Options:
+  --in=FORMAT                              # input format (default: auto)
+  --out=FORMAT                             # output format (default: same)
+  --delim=CHARS       | -d CHARS           # split fields by SEPARATOR
+  --pattern=PATTERN   | -p PATTERN         # split fields by PATTERN
+  --format FORMAT     | -f FORMAT          # assume input is in FORMAT (deprecated, use --in)
+  --quotes            | -q                 # parse matching quotes before splitting
+  --headers[=NUM]     | -h NUM             # ignore header line(s), NUM=1 by default
+  --lines             | -l                 # ignore horizontal lines ("---*", "___*")
+  --blanklines        | -b                 # ignore blank lines
+  --comments=PATTERN  | -c PATTERN         # strip any comment prefix
+  --cprefix=PATTERN   | -c PATTERN
+  --cblock=PATTERN    | -
+  --start=[NUM | PAT] | -s NUM_PAT         # ignore lines before NUM or PAT
+  --stop=[NUM | PAT]  | -S NUM_PAT         # stop at line NUM, or first line matching PAT
+  --no-quotes         | -Q                 # do not parse out matched quotes
+  --no-headers[=NUM]  | -H NUM             # do not ignore header lines (NUM=1 by default)
+  --no-lines          | -L                 # do not ignore horizontal lines
+  --no-blanklines     | -B                 # do not ignore blank lines
+  --csv               # parse input as CSV format (sets --in=csv)
+  --tsv               # parse input as TSV format (sets --in=tsv)
+  --tbl               # parse input as table format (sets --in=table)
+  --table             # output as table format (sets --out=table with pipe separator)
+  --plain             # parse input as plain format (sets --in=plain)
+  --LANGUAGE                                # set comments pattern according to the LANGUAGE
+CHARS="\s"              # default is whitespace, same as " \t\n\r"
+CHARS=":"               # separate fields with ':'
+CHARS=","               # separate fields with ','
+Input Formats (--in=FORMAT):
+    --in=csv    # Comma-separated values, fields can be quoted
+    --in=tsv    # Tab-separated values
+    --in=table  # ASCII table format with header, separator line, and data
+    --in=tbl    # Alias for table
+    --in=plain  # Whitespace-separated values
+    --in=auto   # Auto-detect format (default)
+Output Formats (--out=FORMAT):
+    --out=csv    # Output as CSV (comma-separated, properly quoted)
+    --out=tsv    # Output as TSV (tab-separated)
+    --out=table  # Output as ASCII table (with column widths and separator lines)
+    --out=tbl    # Alias for table
+    --out=plain  # Output as whitespace-separated (aligned columns)
+    --out=same   # Use same format as input (default)
+Format Conversion Examples:
+    ec --in=csv --out=table data.csv   # Convert CSV to table format
+    ec --in=table --out=csv data.txt   # Convert table to CSV format
+    ec --out=table data.csv            # Auto-detect input, convert to table
+    ec --in=tsv --out=csv data.tsv     # Convert TSV to CSV
+The comments pattern can be given explicitly, or can be inferred by the input
+file type (if given), or by --LANGUAGE option, where LANGUAGE is one of: ruby,
+c, go, elixir, python, java, scala, etc.  Each language has a different style
+for doing block comments, and this tool can be used to extract columns from
+blocks of comment text anywhere in file, given the --start and --stop patterns,
+along with the
+Examples:
+  --ruby    # => --comments="^\\s*# "
+  --js      # => --comments="^\\s*//"
+  --python  # => --comments="^\\s*# "

data/bin/easy_cols ADDED Viewed

@@ -0,0 +1,7 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+require_relative '../lib/easy_cols'
+EasyCols::CLI.new.run(ARGV)

data/bin/ec ADDED Viewed

	@@ -0,0 +1 @@
1	+ easy_cols

data/cols.gemspec ADDED Viewed

@@ -0,0 +1,45 @@
+# frozen_string_literal: true
+require_relative 'lib/easy_cols/version'
+Gem::Specification.new do |spec|
+  spec.name          = 'easy_cols'
+  spec.version       = EasyCols::VERSION
+  spec.authors       = ['Alan K. Stebbens']
+  spec.email         = ['aks@stebbens.org']
+  spec.summary       = 'A powerful command-line tool for extracting and processing columns from structured text data'
+  spec.description   = <<~DESC
+    EasyCols is a flexible command-line utility for extracting specific columns from
+    structured text data in various formats (CSV, TSV, table, plain text). It supports
+    sophisticated parsing options including quote handling, comment stripping, header
+    processing, and language-specific comment patterns.  It can be used on both files and STDIN.
+  DESC
+  spec.homepage      = 'https://github.com/aks/easy_cols'
+  spec.license       = 'MIT'
+  spec.files         = Dir.chdir(File.expand_path(__dir__)) do
+    `git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
+  end
+  spec.bindir        = 'bin'
+  spec.executables   = ['easy_cols', 'ec']
+  spec.require_paths = ['lib']
+  spec.required_ruby_version = '>= 3.2.0'
+  spec.add_dependency 'csv', '~> 3.0'
+  spec.add_dependency 'optparse', '~> 0.1'
+  spec.add_development_dependency 'fuubar', '~> 2.5'
+  spec.add_development_dependency 'rspec', '~> 3.12'
+  spec.add_development_dependency 'rubocop', '~> 1.50'
+  spec.add_development_dependency 'rubocop-rspec', '~> 2.20'
+  spec.add_development_dependency 'simplecov', '~> 0.22'
+  spec.add_development_dependency 'rake', '~> 13.0'
+  spec.metadata['rubygems_mfa_required'] = 'true'
+  spec.metadata['homepage_uri']          = spec.homepage
+  spec.metadata['source_code_uri']       = 'https://github.com/aks/easy_cols'
+  spec.metadata['changelog_uri']         = 'https://github.com/aks/easy_cols/blob/main/CHANGELOG.md'
+  spec.metadata['bug_tracker_uri']       = 'https://github.com/aks/easy_cols/issues'
+end

data/lib/easy_cols/cli.rb ADDED Viewed

@@ -0,0 +1,252 @@
+# frozen_string_literal: true
+require 'optparse'
+require 'stringio'
+module EasyCols
+  $PROG = File.basename($0)
+  class CLI
+    def initialize
+      @options = {}
+      @column_selectors = []
+    end
+    def run(argv)
+      parse_options(argv)
+      process_input
+    rescue Error => e
+      warn "Error: #{e.message}"
+      exit 1
+    rescue StandardError => e
+      warn "Unexpected error: #{e.message}"
+      exit 1
+    end
+    private
+    def parse_options(argv)
+      OptionParser.new do |opts|
+        opts.banner = "Usage: #{$PROG} [options] <file> [column_selectors...]"
+        opts.separator <<~HELP
+          Extract and display specific columns from structured text data.
+          Column selectors can be:
+            - Column index: 0, 1, 2, etc. (0-based)
+            - Column range: 0-5, 2-10, etc.
+            - Comma-separated indices: 0,2,5
+            - Header name: 'Name', 'Email', etc.
+          Examples:
+            #{$PROG} data.csv 0 1 2                  # Show columns 0, 1, 2
+            #{$PROG} data.csv 'Name' 'Email'         # Show Name and Email columns
+            #{$PROG} data.csv 0-5                    # Show columns 0 through 5
+            #{$PROG} --format tsv data.tsv 0 1       # Parse as TSV
+            #{$PROG} --table data.txt 0 1 2          # Parse as table format
+            #{$PROG} - < data.csv                    # Read from STDIN
+          Options:
+        HELP
+        opts.on('--in=FORMAT', Parser::SUPPORTED_FORMATS,
+                "Input format (default: auto, formats: #{Parser::SUPPORTED_FORMATS.join(', ')})") do |format|
+          @options[:input_format] = format
+        end
+        opts.on('--out=FORMAT', Formatter::SUPPORTED_OUTPUT_FORMATS,
+                "Output format (default: same, formats: #{Formatter::SUPPORTED_OUTPUT_FORMATS.join(', ')})") do |format|
+          @options[:output_format] = format
+        end
+        # Legacy options for backward compatibility
+        opts.on('-f', '--format=FORMAT', Parser::SUPPORTED_FORMATS,
+                "Input format (deprecated, use --in)") do |format|
+          @options[:input_format] = format
+        end
+        opts.on('-d', '--delimiter=CHARS', 'Field delimiter') do |delim|
+          @options[:delimiter] = delim
+        end
+        opts.on('-D', '--output-delimiter=STR', 'Output separator (default: " , ")') do |str|
+          @options[:output_separator] = str
+        end
+        opts.on('-H', '--no-header', 'Do not output header row') do
+          @options[:no_header] = true
+        end
+        opts.on('--table', 'Use table format for output (sets output format to table)') do
+          # Only set output format, not input format
+          # Input format will be auto-detected unless explicitly set
+          @options[:output_format] = 'table'
+          @options[:output_separator] = ' | '
+          @options[:table_mode] = true
+        end
+        opts.on('--pipe', 'Use pipe separator (" | ")') do
+          @options[:output_separator] = ' | '
+        end
+        opts.on('--tab', 'Use tab separator') do
+          @options[:output_separator] = "\t"
+        end
+        opts.on('--comma', 'Use comma separator (",")') do
+          @options[:output_separator] = ','
+        end
+        # Convenience format options for input
+        opts.on('--csv', 'Parse input as CSV format') do
+          @options[:input_format] = 'csv'
+        end
+        opts.on('--tsv', 'Parse input as TSV format') do
+          @options[:input_format] = 'tsv'
+        end
+        opts.on('--tbl', 'Parse input as table format') do
+          @options[:input_format] = 'table'
+        end
+        opts.on('--plain', 'Parse input as plain (whitespace-separated) format') do
+          @options[:input_format] = 'plain'
+        end
+        opts.on('-v', '--verbose', 'Verbose output') do
+          @options[:verbose] = true
+        end
+        opts.on('-q', '--quiet', 'Quiet output') do
+          @options[:quiet] = true
+        end
+        opts.on('-c', '--count', 'Count columns instead of selecting') do
+          @options[:count_mode] = true
+        end
+        opts.on('-h', '--help', 'Show this help') do
+          puts opts
+          exit 0
+        end
+      end.parse!(argv)
+      @file_path = argv[0]
+      @column_selectors = parse_column_selectors(argv[1..]) if argv.length > 1
+    end
+    def parse_column_selectors(selectors)
+      selectors.map do |selector|
+        case selector
+        when /^\d+$/            # Single integer
+          selector.to_i
+        when /^\d+-\d+$/        # Range
+          start_idx, end_idx = selector.split('-').map(&:to_i)
+          (start_idx..end_idx).to_a
+        when /,/                # Comma-separated
+          selector.split(',').map(&:strip).map(&:to_i)
+        else                    # Header name
+          selector
+        end
+      end
+    end
+    def process_input
+      input_data = read_input
+      if @options[:count_mode]
+        count_columns(input_data)
+      else
+        select_columns(input_data)
+      end
+    end
+    def read_input
+      if @file_path == '-' || @file_path.nil?
+        $stdin.read
+      else
+        File.read(@file_path)
+      end
+    end
+    def select_columns(input_data)
+      # Determine input format
+      input_format = @options[:input_format] || 'auto'
+      # Parser only needs input/parsing options
+      parser_options = {
+        format: input_format,
+        delimiter: @options[:delimiter]
+      }.compact
+      parser = Parser.new(**parser_options)
+      data = parser.parse(input_data)
+      return if data.empty?
+      headers = data.first
+      selector = ColumnSelector.new(headers)
+      # If no selectors provided, default to all columns
+      selectors = if @column_selectors.empty?
+                    (0...headers.length).to_a
+                  else
+                    @column_selectors
+                  end
+      selected_indices = selector.select(selectors)
+      # Determine output format
+      # If 'same', use the detected/parsed input format
+      # However, if output_separator is explicitly set (not default), keep as 'same'
+      # to use format_default which respects the separator
+      actual_input_format = parser.detected_format || 'csv'
+      output_format = @options[:output_format] || 'same'
+      # If separator is explicitly set, use default format (not format-specific)
+      # Otherwise, convert 'same' to the actual input format
+      if output_format == 'same' && !@options[:output_separator]
+        output_format = actual_input_format
+      end
+      # Formatter needs output format and options
+      formatter_options = {
+        format: output_format,
+        separator: @options[:output_separator] || ' , ',
+        show_header: !@options[:no_header],
+        table_mode: @options[:table_mode] || (output_format == 'table' || output_format == 'tbl')
+      }
+      formatter = Formatter.new(formatter_options)
+      output = formatter.format(data, selected_indices)
+      puts output
+    end
+    def count_columns(input_data)
+      # Determine input format
+      input_format = @options[:input_format] || 'auto'
+      # Parser only needs input/parsing options
+      parser_options = {
+        format: input_format,
+        delimiter: @options[:delimiter]
+      }.compact
+      parser = Parser.new(**parser_options)
+      data = parser.parse(input_data)
+      return if data.empty?
+      headers = data.first
+      puts "Headers: #{headers.join(', ')}" unless @options[:quiet]
+      puts "Total columns: #{headers.length}"
+      data[1..].each_with_index do |row, index|
+        puts "Row #{index + 1}: #{row.length} columns" unless @options[:quiet]
+      end
+    end
+  end
+end

data/lib/easy_cols/column_selector.rb ADDED Viewed

@@ -0,0 +1,61 @@
+# frozen_string_literal: true
+module EasyCols
+  class ColumnSelector
+    def initialize(headers)
+      @headers = headers
+    end
+    def select(selectors)
+      indices = []
+      selectors.each do |selector|
+        result = case selector
+                 when Integer then select_by_index(selector)
+                 when Range   then select_by_range(selector)
+                 when Array   then select_by_array(selector)
+                 when String  then select_by_name(selector)
+                 else
+                   raise SelectionError, "Invalid selector type: #{selector.class}"
+                 end
+        indices.concat(result)
+      end
+      indices.uniq.sort
+    end
+    private
+    def select_by_index(index)
+      if index >= 0 && index < @headers.length
+        [index]
+      else
+        raise SelectionError, "Column index #{index} is out of range (0-#{@headers.length - 1})"
+      end
+    end
+    def select_by_range(range)
+      range.to_a.select { |idx| in_range?(idx) }
+    end
+    def select_by_array(array)
+      array.select { |idx| in_range?(idx) }
+    end
+    def in_range?(index)
+      in_range = index >= 0 && index < @headers.length
+      warn "Warning: Column index #{index} is out of range (0-#{@headers.length - 1})" unless in_range
+      in_range
+    end
+    def select_by_name(name)
+      header_idx = @headers.find_index(name)
+      if header_idx
+        [header_idx]
+      else
+        raise SelectionError, "Column '#{name}' not found. Available: #{@headers.join(', ')}"
+      end
+    end
+  end
+end