easy_cols 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,164 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'csv'
4
+
5
+ module EasyCols
6
+ class Formatter
7
+ SUPPORTED_OUTPUT_FORMATS = %w[csv tsv table tbl plain same].freeze
8
+
9
+ def initialize(options = {})
10
+ @options = {
11
+ format: 'same',
12
+ separator: ' , ',
13
+ show_header: true,
14
+ table_mode: false
15
+ }.merge(options)
16
+
17
+ # Use pipe separator in table mode if separator wasn't explicitly provided
18
+ if (@options[:table_mode] || @options[:format] == 'table' || @options[:format] == 'tbl') && !options.key?(:separator)
19
+ @options[:separator] = ' | '
20
+ end
21
+ end
22
+
23
+ def format(data, selected_indices)
24
+ return '' if data.empty? || selected_indices.empty?
25
+
26
+ output_format = @options[:format]
27
+
28
+ case output_format
29
+ when 'csv' then format_csv(data, selected_indices)
30
+ when 'tsv' then format_tsv(data, selected_indices)
31
+ when 'table', 'tbl' then format_table(data, selected_indices)
32
+ when 'plain' then format_plain(data, selected_indices)
33
+ when 'same', nil then format_default(data, selected_indices)
34
+ else
35
+ raise FormatError, "Unsupported output format: #{output_format}"
36
+ end
37
+ end
38
+
39
+ private
40
+
41
+ def format_default(data, selected_indices)
42
+ output = []
43
+
44
+ # Add header if requested
45
+ if @options[:show_header] && data.first
46
+ header_row = selected_indices.map { |i| data.first[i] }
47
+ output << header_row.join(@options[:separator])
48
+
49
+ # Add separator line in table mode
50
+ if @options[:table_mode]
51
+ separator_line = calculate_separator_line(data, selected_indices)
52
+ output << separator_line if separator_line
53
+ end
54
+ end
55
+
56
+ # Add data rows (always skip header row since it's row 0)
57
+ data[1..].each do |row|
58
+ selected_row = selected_indices.map { |i| row[i] }
59
+ output << selected_row.join(@options[:separator])
60
+ end
61
+
62
+ output.join("\n")
63
+ end
64
+
65
+ def format_csv(data, selected_indices)
66
+ output = []
67
+
68
+ if @options[:show_header] && data.first
69
+ header_row = selected_indices.map { |i| data.first[i] }
70
+ output << CSV.generate_line(header_row)
71
+ end
72
+
73
+ data[1..].each do |row|
74
+ selected_row = selected_indices.map { |i| row[i] }
75
+ output << CSV.generate_line(selected_row)
76
+ end
77
+
78
+ output.join
79
+ end
80
+
81
+ def format_tsv(data, selected_indices)
82
+ output = []
83
+
84
+ if @options[:show_header] && data.first
85
+ header_row = selected_indices.map { |i| data.first[i] }
86
+ output << header_row.join("\t")
87
+ end
88
+
89
+ data[1..].each do |row|
90
+ selected_row = selected_indices.map { |i| row[i] }
91
+ output << selected_row.join("\t")
92
+ end
93
+
94
+ output.join("\n")
95
+ end
96
+
97
+ def format_table(data, selected_indices)
98
+ output = []
99
+ column_widths = calculate_column_widths(data, selected_indices)
100
+
101
+ if @options[:show_header] && data.first
102
+ header_row = selected_indices.map.with_index do |col_idx, i|
103
+ format_cell(data.first[col_idx], column_widths[i])
104
+ end
105
+ output << header_row.join(' | ')
106
+ # Separator line uses "-+-" at intersections and "-" for horizontal lines
107
+ separator_line = column_widths.map { |w| '-' * w }.join('-+-')
108
+ output << separator_line
109
+ end
110
+
111
+ # Filter out empty rows (from trailing newlines in CSV)
112
+ data_rows = data[1..].reject { |row| row.nil? || row.all?(&:nil?) || row.all?(&:empty?) }
113
+
114
+ data_rows.each do |row|
115
+ selected_row = selected_indices.map.with_index do |col_idx, i|
116
+ format_cell(row[col_idx], column_widths[i])
117
+ end
118
+ output << selected_row.join(' | ')
119
+ end
120
+
121
+ output.join("\n")
122
+ end
123
+
124
+ def format_plain(data, selected_indices)
125
+ output = []
126
+ column_widths = calculate_column_widths(data, selected_indices)
127
+
128
+ if @options[:show_header] && data.first
129
+ header_row = selected_indices.map.with_index do |col_idx, i|
130
+ format_cell(data.first[col_idx], column_widths[i])
131
+ end
132
+ output << header_row.join(' ')
133
+ end
134
+
135
+ # Filter out empty rows (from trailing newlines in CSV)
136
+ data_rows = data[1..].reject { |row| row.nil? || row.all?(&:nil?) || row.all?(&:empty?) }
137
+
138
+ data_rows.each do |row|
139
+ selected_row = selected_indices.map.with_index do |col_idx, i|
140
+ format_cell(row[col_idx], column_widths[i])
141
+ end
142
+ output << selected_row.join(' ')
143
+ end
144
+
145
+ output.join("\n")
146
+ end
147
+
148
+ def calculate_column_widths(data, selected_indices)
149
+ selected_indices.map do |col_idx|
150
+ data.map { |row| (row[col_idx] || '').length }.max
151
+ end
152
+ end
153
+
154
+ def format_cell(value, width)
155
+ (value || '').ljust(width)
156
+ end
157
+
158
+ def calculate_separator_line(data, selected_indices)
159
+ column_widths = calculate_column_widths(data, selected_indices)
160
+ # Use -+- at intersections for proper ASCII table formatting
161
+ column_widths.map { |width| '-' * width }.join('-+-')
162
+ end
163
+ end
164
+ end
@@ -0,0 +1,146 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'csv'
4
+ require 'stringio'
5
+
6
+ module EasyCols
7
+ class Parser
8
+ SUPPORTED_FORMATS = %w[csv tsv table tbl plain auto].freeze
9
+
10
+ def initialize(**options)
11
+ @options = {
12
+ format: 'auto',
13
+ delimiter: nil,
14
+ pattern: nil,
15
+ quotes: true,
16
+ headers: 1,
17
+ lines: true,
18
+ blanklines: true,
19
+ comments: nil,
20
+ start: nil,
21
+ stop: nil,
22
+ }.merge(options)
23
+ end
24
+
25
+ def parse(input)
26
+ format = detect_format(input) if @options[:format] == 'auto' || @options[:format].nil?
27
+ format ||= @options[:format] || 'csv'
28
+
29
+ # Store the actual format used for reference
30
+ @detected_format = format
31
+
32
+ case format
33
+ when 'csv' then parse_csv(input)
34
+ when 'tsv' then parse_tsv(input)
35
+ when 'table', 'tbl' then parse_table(input)
36
+ when 'plain' then parse_plain(input)
37
+ else
38
+ raise FormatError, "Unsupported format: #{format}"
39
+ end
40
+ end
41
+
42
+ def detected_format
43
+ @detected_format
44
+ end
45
+
46
+ def detect_format(input)
47
+ return 'csv' if input.strip.empty?
48
+
49
+ first_line = input.lines.first&.strip || ''
50
+
51
+ # Check for table format (has pipe separators and separator line)
52
+ if first_line.include?('|') && input.match?(/^[-_|+]+$/m)
53
+ return 'table'
54
+ end
55
+
56
+ # Check for TSV (tabs in first line)
57
+ if first_line.include?("\t")
58
+ return 'tsv'
59
+ end
60
+
61
+ # Check for CSV (commas)
62
+ if first_line.include?(',')
63
+ return 'csv'
64
+ end
65
+
66
+ # Default to plain if no clear indicators
67
+ 'plain'
68
+ end
69
+
70
+ private
71
+
72
+ def parse_csv(input)
73
+ options = { headers: true }
74
+ options[:col_sep] = @options[:delimiter] if @options[:delimiter]
75
+
76
+ # Parse and convert to array format
77
+ csv_data = CSV.parse(input, **options)
78
+ [csv_data.headers] + csv_data.map(&:fields)
79
+ end
80
+
81
+ def parse_tsv(input)
82
+ options = { headers: true, col_sep: "\t" }
83
+ options[:col_sep] = @options[:delimiter] if @options[:delimiter]
84
+
85
+ # Parse and convert to array format
86
+ csv_data = CSV.parse(input, **options)
87
+ [csv_data.headers] + csv_data.map(&:fields)
88
+ end
89
+
90
+ def parse_table(input)
91
+ # Table format: header line, separator line, data lines
92
+ lines = input.lines.map(&:chomp)
93
+
94
+ # Find header and separator lines
95
+ header_line = nil
96
+ separator_line = nil
97
+ data_start = 0
98
+
99
+ lines.each_with_index do |line, index|
100
+ next if line.strip.empty? && @options[:blanklines]
101
+
102
+ if header_line.nil? && !line.strip.empty?
103
+ header_line = line
104
+ data_start = index + 1 # Default to starting after header
105
+ next
106
+ end
107
+
108
+ if separator_line.nil? && header_line && line.match?(/^[-_|+]+$/)
109
+ separator_line = line
110
+ data_start = index + 1
111
+ break
112
+ end
113
+ end
114
+
115
+ # Parse header
116
+ headers = parse_table_line(header_line) if header_line
117
+
118
+ # Parse data rows
119
+ data_rows = []
120
+ lines[data_start..].each do |line|
121
+ next if line.strip.empty? && @options[:blanklines]
122
+ next if line.match?(/^[-_|+]+$/) && @options[:lines]
123
+
124
+ data_rows << parse_table_line(line)
125
+ end
126
+
127
+ # Convert to CSV-like structure
128
+ [headers] + data_rows
129
+ end
130
+
131
+ def parse_plain(input)
132
+ lines = input.lines.map(&:chomp)
133
+ delimiter = @options[:delimiter] || /\s+/
134
+
135
+ lines.map do |line|
136
+ next if line.strip.empty? && @options[:blanklines]
137
+ line.split(delimiter)
138
+ end.compact
139
+ end
140
+
141
+ def parse_table_line(line)
142
+ # Split by " | " pattern for table format
143
+ line.split(/\s*\|\s*/).map(&:strip)
144
+ end
145
+ end
146
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module EasyCols
4
+ VERSION = '0.1.0'
5
+ end
data/lib/easy_cols.rb ADDED
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'easy_cols/version'
4
+ require_relative 'easy_cols/parser'
5
+ require_relative 'easy_cols/formatter'
6
+ require_relative 'easy_cols/column_selector'
7
+ require_relative 'easy_cols/cli'
8
+
9
+ module EasyCols
10
+ class Error < StandardError; end
11
+ class ParseError < Error; end
12
+ class FormatError < Error; end
13
+ class SelectionError < Error; end
14
+ end
15
+
@@ -0,0 +1,5 @@
1
+ Name,Age,City,Country
2
+ John,25,NYC,USA
3
+ Jane,30,LA,USA
4
+ Bob,35,London,UK
5
+
@@ -0,0 +1,6 @@
1
+ Name | Age | City | Country
2
+ -----|-----|------|--------
3
+ John | 25 | NYC | USA
4
+ Jane | 30 | LA | USA
5
+ Bob | 35 | London| UK
6
+
metadata ADDED
@@ -0,0 +1,190 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: easy_cols
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Alan K. Stebbens
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2025-11-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: csv
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '3.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '3.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: optparse
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.1'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.1'
41
+ - !ruby/object:Gem::Dependency
42
+ name: fuubar
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '2.5'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '2.5'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3.12'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3.12'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rubocop
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.50'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.50'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rubocop-rspec
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '2.20'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '2.20'
97
+ - !ruby/object:Gem::Dependency
98
+ name: simplecov
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '0.22'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '0.22'
111
+ - !ruby/object:Gem::Dependency
112
+ name: rake
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '13.0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '13.0'
125
+ description: |
126
+ EasyCols is a flexible command-line utility for extracting specific columns from
127
+ structured text data in various formats (CSV, TSV, table, plain text). It supports
128
+ sophisticated parsing options including quote handling, comment stripping, header
129
+ processing, and language-specific comment patterns. It can be used on both files and STDIN.
130
+ email:
131
+ - aks@stebbens.org
132
+ executables:
133
+ - easy_cols
134
+ - ec
135
+ extensions: []
136
+ extra_rdoc_files: []
137
+ files:
138
+ - ".cursor/rules/ai.mdc"
139
+ - ".cursor/rules/polly_state.mdc"
140
+ - ".github/workflows/ci.yml"
141
+ - ".gitignore"
142
+ - ".rspec"
143
+ - Gemfile
144
+ - Gemfile.lock
145
+ - LICENSE
146
+ - README.md
147
+ - Rakefile
148
+ - TODO.md
149
+ - USAGE.md
150
+ - bin/easy_cols
151
+ - bin/ec
152
+ - cols.gemspec
153
+ - lib/easy_cols.rb
154
+ - lib/easy_cols/cli.rb
155
+ - lib/easy_cols/column_selector.rb
156
+ - lib/easy_cols/formatter.rb
157
+ - lib/easy_cols/parser.rb
158
+ - lib/easy_cols/version.rb
159
+ - test_data/sample.csv
160
+ - test_data/sample_table.txt
161
+ homepage: https://github.com/aks/easy_cols
162
+ licenses:
163
+ - MIT
164
+ metadata:
165
+ rubygems_mfa_required: 'true'
166
+ homepage_uri: https://github.com/aks/easy_cols
167
+ source_code_uri: https://github.com/aks/easy_cols
168
+ changelog_uri: https://github.com/aks/easy_cols/blob/main/CHANGELOG.md
169
+ bug_tracker_uri: https://github.com/aks/easy_cols/issues
170
+ post_install_message:
171
+ rdoc_options: []
172
+ require_paths:
173
+ - lib
174
+ required_ruby_version: !ruby/object:Gem::Requirement
175
+ requirements:
176
+ - - ">="
177
+ - !ruby/object:Gem::Version
178
+ version: 3.2.0
179
+ required_rubygems_version: !ruby/object:Gem::Requirement
180
+ requirements:
181
+ - - ">="
182
+ - !ruby/object:Gem::Version
183
+ version: '0'
184
+ requirements: []
185
+ rubygems_version: 3.5.22
186
+ signing_key:
187
+ specification_version: 4
188
+ summary: A powerful command-line tool for extracting and processing columns from structured
189
+ text data
190
+ test_files: []