marktable 0.0.5 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/marktable/row.rb CHANGED
@@ -2,97 +2,109 @@
2
2
 
3
3
  module Marktable
4
4
  class Row
5
- attr_reader :data, :headers
5
+ attr_reader :values, :headers
6
6
 
7
- def initialize(data = {}, headers: nil)
7
+ def initialize(data, headers: nil)
8
8
  @headers = headers
9
-
10
- if data.is_a?(Hash)
11
- # Ensure all hash values are strings
12
- @data = data.transform_values(&:to_s)
13
- elsif data.is_a?(Array)
14
- # Ensure all array elements are strings
15
- data_strings = data.map(&:to_s)
16
-
17
- @data = if headers && !headers.empty?
18
- # Convert array to hash using headers
19
- headers.each_with_index.each_with_object({}) do |(header, i), hash|
20
- hash[header] = i < data_strings.length ? data_strings[i] : ''
21
- end
9
+ @values = extract_values(data)
10
+ end
11
+
12
+ # Format the row as a markdown table row
13
+ def to_markdown(column_widths)
14
+ vals = @values
15
+
16
+ # Limit values to either headers count or column_widths size, whichever is appropriate
17
+ max_cols = @headers ? @headers.size : column_widths.size
18
+
19
+ formatted_values = column_widths.take(max_cols).map.with_index do |width, i|
20
+ if i < vals.size
21
+ vals[i].to_s.ljust(width)
22
22
  else
23
- # Keep as array when no headers
24
- data_strings
23
+ ''.ljust(width)
25
24
  end
26
- else
27
- @data = headers ? {} : []
28
25
  end
26
+
27
+ "| #{formatted_values.join(' | ')} |"
29
28
  end
30
29
 
30
+ # Access a value by index or header
31
31
  def [](key)
32
- if @data.is_a?(Hash)
33
- @data[key]
34
- elsif key.is_a?(Integer) && key < @data.length
35
- @data[key]
36
- else
37
- nil
32
+ if key.is_a?(Integer)
33
+ @values[key]
34
+ elsif @headers
35
+ idx = @headers.index(key)
36
+ idx ? @values[idx] : nil
38
37
  end
39
38
  end
40
39
 
40
+ # Set a value by index or header
41
41
  def []=(key, value)
42
- if @data.is_a?(Hash)
43
- @data[key] = value.to_s
44
- elsif key.is_a?(Integer)
45
- @data[key] = value.to_s
42
+ if key.is_a?(Integer)
43
+ @values[key] = value if key >= 0 && key < @values.size
44
+ elsif @headers
45
+ idx = @headers.index(key)
46
+ @values[idx] = value if idx && idx < @values.size
46
47
  end
47
48
  end
48
49
 
49
- def values
50
- @data.is_a?(Hash) ? @data.values : @data
51
- end
52
-
53
- def keys
54
- @data.is_a?(Hash) ? @data.keys : (0...@data.size).to_a
55
- end
56
-
57
- def to_h
58
- return @data if @data.is_a?(Hash)
59
- return {} if @data.empty? || @headers.nil? || @headers.empty?
60
-
61
- @headers.each_with_index.each_with_object({}) do |(header, i), hash|
62
- hash[header] = i < @data.length ? @data[i] : ''
63
- end
50
+ # Check if this row uses headers
51
+ def headers?
52
+ !@headers.nil?
64
53
  end
65
54
 
66
- def to_a
67
- @data.is_a?(Array) ? @data : @data.values
68
- end
55
+ # Convert row data to a hash using headers as keys
56
+ def to_hash
57
+ return {} unless @headers
69
58
 
70
- # Convert a row to markdown format with specified column widths
71
- def to_markdown(column_widths)
72
- vals = values
73
- formatted_values = vals.each_with_index.map do |val, i|
74
- val.to_s.ljust(column_widths[i] || val.to_s.length)
59
+ result = {}
60
+ @values.each_with_index do |value, i|
61
+ # Only include values that have a corresponding header
62
+ if i < @headers.size
63
+ header = @headers[i]
64
+ result[header] = value if header
65
+ end
75
66
  end
76
- "| #{formatted_values.join(' | ')} |"
67
+ result
77
68
  end
78
69
 
79
70
  # Parse a markdown row string into an array of values
80
71
  def self.parse(row_string)
81
- row_string.strip.sub(/^\|/, '').sub(/\|$/, '').split('|').map(&:strip)
72
+ # Skip if nil or empty
73
+ return [] if row_string.nil? || row_string.strip.empty?
74
+
75
+ # Remove leading/trailing pipes and split by pipe
76
+ cells = row_string.strip.sub(/^\|/, '').sub(/\|$/, '').split('|')
77
+
78
+ # Trim whitespace from each cell
79
+ cells.map(&:strip)
82
80
  end
83
81
 
84
- # Check if a row string represents a separator row
82
+ # Check if a row is a separator row
85
83
  def self.separator?(row_string)
86
- row_string.strip.gsub(/[\|\-\s]/, '').empty?
84
+ # Skip if nil or empty
85
+ return false if row_string.nil? || row_string.strip.empty?
86
+
87
+ # Remove pipes and strip whitespace
88
+ content = row_string.gsub('|', '').strip
89
+
90
+ # Check if it contains only dashes and colons (separator characters)
91
+ content.match?(/^[\s:,-]+$/) && content.include?('-')
87
92
  end
88
93
 
89
- # Generate a separator row for markdown table with specified widths
90
- def self.separator_row(column_widths)
91
- separators = column_widths.map do |width|
92
- '-' * [3, width].max
94
+ private
95
+
96
+ def extract_values(data)
97
+ case data
98
+ when Hash
99
+ if @headers
100
+ @headers.map { |h| data[h] || '' }
101
+ else
102
+ data.values
103
+ end
104
+ else
105
+ # Array or other enumerable
106
+ Array(data)
93
107
  end
94
-
95
- ["| #{separators.join(' | ')} |", column_widths]
96
108
  end
97
109
  end
98
110
  end
@@ -1,154 +1,84 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative 'formatters/base'
4
+
3
5
  module Marktable
4
6
  class Table
5
7
  include Enumerable
6
-
8
+
7
9
  attr_reader :headers
8
10
 
9
- def initialize(markdown_table = '', headers: true)
10
- @headers = headers
11
- @rows = []
12
- @header_row = nil
13
- parse_content(markdown_table) unless markdown_table.empty?
11
+ def initialize(source, type: :markdown, headers: nil)
12
+ parser = Tables::Base.for(type).new(source, headers)
13
+ result = parser.parse
14
+ @rows = result.rows
15
+ @headers = result.headers
16
+ # Validate headers if present
17
+ validate_headers if @headers
18
+ # Fix: Initialize @has_headers based on whether @headers is present
19
+ @has_headers = !@headers.nil?
14
20
  end
15
21
 
16
- def each
17
- if block_given?
18
- @rows.each { |row| yield(row) }
19
- else
20
- @rows.each
21
- end
22
- end
22
+ # Iteration support
23
+ def each(&block)
24
+ return enum_for(:each) unless block_given?
23
25
 
24
- def to_a
25
- @rows.map { |row| row.data }
26
- end
27
-
28
- def to_s
29
- generate
26
+ @rows.each(&block)
30
27
  end
31
28
 
32
- def generate
33
- return "" if @rows.empty?
34
-
35
- # Extract header keys or use first row data for header
36
- keys = header_keys
37
-
38
- # Calculate column widths considering both headers and all row values
39
- all_values = [keys] + @rows.map { |row| row.values }
40
- column_widths = calculate_column_widths(all_values)
41
-
42
- # Build the markdown table
43
- build_markdown_table(keys, column_widths)
44
- end
45
-
46
- private
47
-
48
- def header_keys
49
- if @headers && @header_row
50
- @header_row.keys
51
- elsif @rows.first&.data.is_a?(Hash)
52
- @rows.first.data.keys
29
+ # Returns the table as an Array of Hashes if headers are present
30
+ # or Array of Arrays if no headers
31
+ def to_a
32
+ if @has_headers
33
+ # Convert rows to hashes, which will automatically exclude values without headers
34
+ @rows.map(&:to_hash)
53
35
  else
54
- @rows.first&.values || []
36
+ # When no headers, return array of arrays with consistent length
37
+ max_length = @rows.map { |row| row.values.length }.max || 0
38
+ @rows.map do |row|
39
+ values = row.values
40
+ values + Array.new(max_length - values.length, '')
41
+ end
55
42
  end
56
43
  end
57
44
 
58
- def build_markdown_table(keys, column_widths)
59
- result = []
60
-
61
- # Add header row
62
- result << row_to_markdown(keys, column_widths)
63
-
64
- # Add separator row
65
- separator, _ = Row.separator_row(column_widths)
66
- result << separator
67
-
68
- # Add data rows
69
- rows_to_render.each do |row|
70
- result << row.to_markdown(column_widths)
71
- end
72
-
73
- result.join("\n")
45
+ def to_html
46
+ Formatters::Base.for(:html).format(@rows, @headers)
74
47
  end
75
48
 
76
- def rows_to_render
77
- if !@headers && !@rows.first&.data.is_a?(Hash) && @rows.size > 1
78
- @rows[1..-1]
79
- else
80
- @rows
81
- end
49
+ # Generate markdown representation
50
+ def to_md
51
+ Formatters::Base.for(:markdown).format(@rows, @headers)
82
52
  end
53
+ alias generate to_md
83
54
 
84
- def parse_content(markdown_table)
85
- # Split content into rows
86
- rows = markdown_table.split("\n").map(&:strip).reject(&:empty?)
87
- return if rows.empty?
88
-
89
- if @headers
90
- parse_with_headers(rows)
91
- else
92
- parse_without_headers(rows)
93
- end
55
+ # Generate CSV representation
56
+ def to_csv
57
+ Formatters::Base.for(:csv).format(@rows, @headers)
94
58
  end
95
59
 
96
- def parse_with_headers(rows)
97
- # Extract headers from first row
98
- header_values = Row.parse(rows.first)
99
- @header_row = header_values.each_with_object({}) { |val, hash| hash[val] = val }
100
-
101
- # Process each data row
102
- rows.each_with_index do |row, index|
103
- # Skip header row and separator rows
104
- next if index == 0 || Row.separator?(row)
105
-
106
- # Parse the row into values
107
- values = Row.parse(row)
108
-
109
- # Create a hash mapping headers to values
110
- row_hash = {}
111
- header_values.each_with_index do |header, i|
112
- row_hash[header] = i < values.length ? values[i] : ''
113
- end
114
-
115
- @rows << Row.new(row_hash, headers: header_values)
116
- end
60
+ # Support for accessing by index like table[0]
61
+ def [](index)
62
+ @rows[index]
117
63
  end
118
64
 
119
- def parse_without_headers(rows)
120
- # When headers: false, store array of arrays
121
- rows.each do |row|
122
- # Skip separator rows
123
- next if Row.separator?(row)
124
-
125
- # Parse the row into values
126
- values = Row.parse(row)
127
- @rows << Row.new(values, headers: nil)
128
- end
65
+ # Returns the number of rows
66
+ def size
67
+ @rows.size
129
68
  end
69
+ alias length size
130
70
 
131
- # Calculate the maximum width of each column
132
- def calculate_column_widths(arrays_of_values)
133
- max_column_count = arrays_of_values.map { |row| row.size }.max || 0
134
- column_widths = Array.new(max_column_count, 0)
135
-
136
- arrays_of_values.each do |row|
137
- row.each_with_index do |cell, i|
138
- cell_width = cell.to_s.length
139
- column_widths[i] = [column_widths[i], cell_width].max
140
- end
141
- end
142
-
143
- column_widths
71
+ def empty?
72
+ @rows.empty?
144
73
  end
145
74
 
146
- # Generate markdown row from array of values with proper spacing
147
- def row_to_markdown(values, column_widths)
148
- formatted_values = values.each_with_index.map do |val, i|
149
- val.to_s.ljust(column_widths[i])
150
- end
151
- "| #{formatted_values.join(' | ')} |"
75
+ private
76
+
77
+ def validate_headers
78
+ duplicates = @headers.group_by { |h| h }.select { |_, v| v.size > 1 }.keys
79
+ return unless duplicates.any?
80
+
81
+ raise ArgumentError, "Duplicate headers are not allowed: #{duplicates.join(', ')}"
152
82
  end
153
83
  end
154
84
  end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'base'
4
+
5
+ module Marktable
6
+ module Tables
7
+ class Array
8
+ def initialize(array, headers)
9
+ @array = array
10
+ @headers_flag = headers
11
+ end
12
+
13
+ def parse
14
+ return Tables::Base.blank if @array.empty?
15
+
16
+ # Determine if this is an array of hashes or array of arrays
17
+ if @array.first.is_a?(Hash)
18
+ parse_array_of_hashes
19
+ else
20
+ parse_array_of_arrays
21
+ end
22
+ end
23
+
24
+ private
25
+
26
+ def parse_array_of_hashes
27
+ # Extract all unique keys from all hashes to handle mismatched keys
28
+ headers = @array.flat_map(&:keys).uniq
29
+
30
+ # Create Row objects for each hash
31
+ rows = @array.map do |hash|
32
+ Row.new(hash, headers: headers)
33
+ end
34
+
35
+ Tables::Base::Result.new(rows:, headers:)
36
+ end
37
+
38
+ def parse_array_of_arrays
39
+ # Arrays of arrays can have an optional header row
40
+ if @headers_flag
41
+ headers = @array.first
42
+ data_rows = @array[1..]
43
+ else
44
+ headers = nil
45
+ data_rows = @array
46
+ end
47
+
48
+ # Create Row objects for each array
49
+ rows = data_rows.map do |values|
50
+ Row.new(values, headers: headers)
51
+ end
52
+
53
+ Tables::Base::Result.new(rows:, headers:)
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'markdown'
4
+ require_relative 'array'
5
+ require_relative 'csv'
6
+ require_relative 'html'
7
+
8
+ module Marktable
9
+ module Tables
10
+ class Base
11
+ Result = Struct.new(:rows, :headers, keyword_init: true)
12
+
13
+ def self.for(type)
14
+ case type.to_sym
15
+ when :markdown
16
+ Markdown
17
+ when :array
18
+ Array
19
+ when :csv
20
+ CSV
21
+ when :html
22
+ HTML
23
+ else
24
+ raise ArgumentError, "Unknown table type: #{type}"
25
+ end
26
+ end
27
+
28
+ def self.blank
29
+ Result.new(rows: [], headers: nil)
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'csv'
4
+ require_relative 'base'
5
+
6
+ module Marktable
7
+ module Tables
8
+ class CSV
9
+ def initialize(csv_data, headers)
10
+ @csv_data = csv_data
11
+ @headers_flag = headers
12
+ end
13
+
14
+ def parse
15
+ csv_table = parse_csv
16
+
17
+ if with_headers?
18
+ parse_with_headers(csv_table)
19
+ else
20
+ parse_without_headers(csv_table)
21
+ end
22
+ end
23
+
24
+ private
25
+
26
+ def parse_csv
27
+ case @csv_data
28
+ when ::CSV::Table
29
+ @csv_data
30
+ when String
31
+ ::CSV.parse(@csv_data, headers: @headers_flag)
32
+ else
33
+ raise ArgumentError, "Cannot parse CSV from #{@csv_data.class}"
34
+ end
35
+ end
36
+
37
+ def with_headers?
38
+ @headers_flag || (@csv_data.is_a?(::CSV::Table) && @csv_data.headers.any?)
39
+ end
40
+
41
+ def parse_with_headers(csv_table)
42
+ headers = csv_table.headers
43
+ rows = []
44
+
45
+ csv_table.each do |csv_row|
46
+ # Convert CSV::Row to hash then to our Row
47
+ row_data = csv_row.to_h
48
+ rows << Row.new(row_data, headers: headers)
49
+ end
50
+
51
+ Tables::Base::Result.new(rows:, headers:)
52
+ end
53
+
54
+ def parse_without_headers(csv_table)
55
+ rows = []
56
+
57
+ if csv_table.is_a?(::CSV::Table)
58
+ csv_table.each do |csv_row|
59
+ rows << Row.new(csv_row.fields)
60
+ end
61
+ else
62
+ csv_table.each do |fields|
63
+ rows << Row.new(fields)
64
+ end
65
+ end
66
+
67
+ Tables::Base::Result.new(rows:, headers: nil)
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+ require_relative 'base'
5
+
6
+ module Marktable
7
+ module Tables
8
+ class HTML
9
+ private attr_reader :table
10
+
11
+ def initialize(html_data, headers = nil)
12
+ @table = extract_table_node(html_data)
13
+ @headers_flag = headers
14
+ end
15
+
16
+ def parse
17
+ return blank if table.nil? || rows.empty?
18
+
19
+ if has_headers?
20
+ headers = extract_row_cells(first_row)
21
+ data_rows = rows[1..]
22
+ else
23
+ headers = nil
24
+ data_rows = rows
25
+ end
26
+
27
+ # Extract data from rows
28
+ parsed_rows = data_rows.map do |row|
29
+ Row.new(extract_row_cells(row), headers:)
30
+ end
31
+
32
+ Tables::Base::Result.new(rows: parsed_rows, headers:)
33
+ end
34
+
35
+ private
36
+
37
+ def extract_table_node(html_data)
38
+ case html_data
39
+ when String
40
+ return nil if html_data.strip.empty?
41
+
42
+ Nokogiri::HTML(html_data).at_css('table')
43
+ when Nokogiri::XML::Element, Nokogiri::XML::NodeSet
44
+ html_data.name == 'table' ? html_data : html_data.at_css('table')
45
+ else
46
+ Nokogiri::HTML(html_data.to_s).at_css('table')
47
+ end
48
+ end
49
+
50
+ def extract_row_cells(row)
51
+ row.css('th, td').map(&:text)
52
+ end
53
+
54
+ def first_row
55
+ @first_row ||= rows.first
56
+ end
57
+
58
+ def has_headers?
59
+ @has_headers ||= first_row.css('th').any? || @headers_flag
60
+ end
61
+
62
+ def rows
63
+ @rows ||= table.css('tr')
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'base'
4
+
5
+ module Marktable
6
+ module Tables
7
+ class Markdown
8
+ # @param [table] [String] The markdown table string.
9
+ # @param [headers] [boolean] Whether the table has headers or not.
10
+ # If nil, it will be inferred from the table content.
11
+ def initialize(table, headers)
12
+ @headers_flag = headers
13
+ @markdown_rows = extract_rows(table)
14
+ end
15
+
16
+ def parse
17
+ if with_headers?
18
+ parse_with_headers
19
+ else
20
+ parse_without_headers
21
+ end
22
+ end
23
+
24
+ private
25
+
26
+ def extract_rows(table)
27
+ return [] if table.nil? || table.empty?
28
+
29
+ table.split("\n").map(&:strip).reject(&:empty?)
30
+ end
31
+
32
+ def infer_headers
33
+ # At least 2 rows with the second being a separator
34
+ @markdown_rows.size >= 2 && Row.separator?(@markdown_rows[1])
35
+ end
36
+
37
+ def extract_header_values
38
+ Row.parse(@markdown_rows.first)
39
+ end
40
+
41
+ def with_headers?
42
+ # If headers flag is explicitly provided, use it
43
+ # Otherwise infer from the table structure
44
+ return @headers_flag unless @headers_flag.nil?
45
+
46
+ infer_headers
47
+ end
48
+
49
+ def parse_with_headers
50
+ return [[], []] if @markdown_rows.empty?
51
+
52
+ header_values = extract_header_values
53
+ rows = []
54
+
55
+ @markdown_rows.each_with_index do |row_string, index|
56
+ # Skip header row and separator row
57
+ next if index.zero? || Row.separator?(row_string)
58
+
59
+ values = Row.parse(row_string)
60
+ rows << Row.new(values, headers: header_values)
61
+ end
62
+
63
+ Tables::Base::Result.new(rows:, headers: header_values)
64
+ end
65
+
66
+ def parse_without_headers
67
+ rows = []
68
+
69
+ @markdown_rows.each do |row_string|
70
+ # Skip separator rows
71
+ next if Row.separator?(row_string)
72
+
73
+ # Parse the row into values
74
+ values = Row.parse(row_string)
75
+ rows << Row.new(values)
76
+ end
77
+
78
+ Tables::Base::Result.new(rows:, headers: nil)
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Marktable
4
+ VERSION = '0.1.0'
5
+ end