marktable 0.0.5 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'base'
4
+
5
+ module Marktable
6
+ module Tables
7
+ class Array
8
+ def initialize(array, headers)
9
+ @array = array
10
+ @headers_flag = headers
11
+ end
12
+
13
+ def parse
14
+ return Tables::Base.blank if @array.empty?
15
+
16
+ # Determine if this is an array of hashes or array of arrays
17
+ if @array.first.is_a?(Hash)
18
+ parse_array_of_hashes
19
+ else
20
+ parse_array_of_arrays
21
+ end
22
+ end
23
+
24
+ private
25
+
26
+ def parse_array_of_hashes
27
+ # Extract all unique keys from all hashes to handle mismatched keys
28
+ headers = @array.flat_map(&:keys).uniq
29
+
30
+ # Create Row objects for each hash
31
+ rows = @array.map do |hash|
32
+ Row.new(hash, headers: headers)
33
+ end
34
+
35
+ Tables::Base::Result.new(rows:, headers:)
36
+ end
37
+
38
+ def parse_array_of_arrays
39
+ # Arrays of arrays can have an optional header row
40
+ if @headers_flag
41
+ headers = @array.first
42
+ data_rows = @array[1..]
43
+ else
44
+ headers = nil
45
+ data_rows = @array
46
+ end
47
+
48
+ # Create Row objects for each array
49
+ rows = data_rows.map do |values|
50
+ Row.new(values, headers: headers)
51
+ end
52
+
53
+ Tables::Base::Result.new(rows:, headers:)
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'markdown'
4
+ require_relative 'array'
5
+ require_relative 'csv'
6
+ require_relative 'html'
7
+
8
+ module Marktable
9
+ module Tables
10
+ class Base
11
+ Result = Struct.new(:rows, :headers, keyword_init: true)
12
+
13
+ def self.for(type)
14
+ case type.to_sym
15
+ when :markdown
16
+ Markdown
17
+ when :array
18
+ Array
19
+ when :csv
20
+ CSV
21
+ when :html
22
+ HTML
23
+ else
24
+ raise ArgumentError, "Unknown table type: #{type}"
25
+ end
26
+ end
27
+
28
+ def self.blank
29
+ Result.new(rows: [], headers: nil)
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'csv'
4
+ require_relative 'base'
5
+
6
+ module Marktable
7
+ module Tables
8
+ class CSV
9
+ def initialize(csv_data, headers)
10
+ @csv_data = csv_data
11
+ @headers_flag = headers
12
+ end
13
+
14
+ def parse
15
+ csv_table = parse_csv
16
+
17
+ if with_headers?
18
+ parse_with_headers(csv_table)
19
+ else
20
+ parse_without_headers(csv_table)
21
+ end
22
+ end
23
+
24
+ private
25
+
26
+ def parse_csv
27
+ case @csv_data
28
+ when ::CSV::Table
29
+ @csv_data
30
+ when String
31
+ ::CSV.parse(@csv_data, headers: @headers_flag)
32
+ else
33
+ raise ArgumentError, "Cannot parse CSV from #{@csv_data.class}"
34
+ end
35
+ end
36
+
37
+ def with_headers?
38
+ @headers_flag || (@csv_data.is_a?(::CSV::Table) && @csv_data.headers.any?)
39
+ end
40
+
41
+ def parse_with_headers(csv_table)
42
+ headers = csv_table.headers
43
+ rows = []
44
+
45
+ csv_table.each do |csv_row|
46
+ # Convert CSV::Row to hash then to our Row
47
+ row_data = csv_row.to_h
48
+ rows << Row.new(row_data, headers: headers)
49
+ end
50
+
51
+ Tables::Base::Result.new(rows:, headers:)
52
+ end
53
+
54
+ def parse_without_headers(csv_table)
55
+ rows = []
56
+
57
+ if csv_table.is_a?(::CSV::Table)
58
+ csv_table.each do |csv_row|
59
+ rows << Row.new(csv_row.fields)
60
+ end
61
+ else
62
+ csv_table.each do |fields|
63
+ rows << Row.new(fields)
64
+ end
65
+ end
66
+
67
+ Tables::Base::Result.new(rows:, headers: nil)
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+ require_relative 'base'
5
+
6
+ module Marktable
7
+ module Tables
8
+ class HTML
9
+ private attr_reader :table
10
+
11
+ def initialize(html_data, headers = nil)
12
+ @table = extract_table_node(html_data)
13
+ @headers_flag = headers
14
+ end
15
+
16
+ def parse
17
+ return blank if table.nil? || rows.empty?
18
+
19
+ if has_headers?
20
+ headers = extract_row_cells(first_row)
21
+ data_rows = rows[1..]
22
+ else
23
+ headers = nil
24
+ data_rows = rows
25
+ end
26
+
27
+ # Extract data from rows
28
+ parsed_rows = data_rows.map do |row|
29
+ Row.new(extract_row_cells(row), headers:)
30
+ end
31
+
32
+ Tables::Base::Result.new(rows: parsed_rows, headers:)
33
+ end
34
+
35
+ private
36
+
37
+ def extract_table_node(html_data)
38
+ case html_data
39
+ when String
40
+ return nil if html_data.strip.empty?
41
+
42
+ Nokogiri::HTML(html_data).at_css('table')
43
+ when Nokogiri::XML::Element, Nokogiri::XML::NodeSet
44
+ html_data.name == 'table' ? html_data : html_data.at_css('table')
45
+ else
46
+ Nokogiri::HTML(html_data.to_s).at_css('table')
47
+ end
48
+ end
49
+
50
+ def extract_row_cells(row)
51
+ row.css('th, td').map(&:text)
52
+ end
53
+
54
+ def first_row
55
+ @first_row ||= rows.first
56
+ end
57
+
58
+ def has_headers?
59
+ @has_headers ||= first_row.css('th').any? || @headers_flag
60
+ end
61
+
62
+ def rows
63
+ @rows ||= table.css('tr')
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'base'
4
+
5
+ module Marktable
6
+ module Tables
7
+ class Markdown
8
+ # @param [table] [String] The markdown table string.
9
+ # @param [headers] [boolean] Whether the table has headers or not.
10
+ # If nil, it will be inferred from the table content.
11
+ def initialize(table, headers)
12
+ @headers_flag = headers
13
+ @markdown_rows = extract_rows(table)
14
+ end
15
+
16
+ def parse
17
+ if with_headers?
18
+ parse_with_headers
19
+ else
20
+ parse_without_headers
21
+ end
22
+ end
23
+
24
+ private
25
+
26
+ def extract_rows(table)
27
+ return [] if table.nil? || table.empty?
28
+
29
+ table.split("\n").map(&:strip).reject(&:empty?)
30
+ end
31
+
32
+ def infer_headers
33
+ # At least 2 rows with the second being a separator
34
+ @markdown_rows.size >= 2 && Row.separator?(@markdown_rows[1])
35
+ end
36
+
37
+ def extract_header_values
38
+ Row.parse(@markdown_rows.first)
39
+ end
40
+
41
+ def with_headers?
42
+ # If headers flag is explicitly provided, use it
43
+ # Otherwise infer from the table structure
44
+ return @headers_flag unless @headers_flag.nil?
45
+
46
+ infer_headers
47
+ end
48
+
49
+ def parse_with_headers
50
+ return [[], []] if @markdown_rows.empty?
51
+
52
+ header_values = extract_header_values
53
+ rows = []
54
+
55
+ @markdown_rows.each_with_index do |row_string, index|
56
+ # Skip header row and separator row
57
+ next if index.zero? || Row.separator?(row_string)
58
+
59
+ values = Row.parse(row_string)
60
+ rows << Row.new(values, headers: header_values)
61
+ end
62
+
63
+ Tables::Base::Result.new(rows:, headers: header_values)
64
+ end
65
+
66
+ def parse_without_headers
67
+ rows = []
68
+
69
+ @markdown_rows.each do |row_string|
70
+ # Skip separator rows
71
+ next if Row.separator?(row_string)
72
+
73
+ # Parse the row into values
74
+ values = Row.parse(row_string)
75
+ rows << Row.new(values)
76
+ end
77
+
78
+ Tables::Base::Result.new(rows:, headers: nil)
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Marktable
4
+ VERSION = '0.1.1'
5
+ end
data/lib/marktable.rb CHANGED
@@ -1,140 +1,24 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative 'marktable/version'
3
4
  require_relative 'marktable/row'
5
+ require_relative 'marktable/tables/base'
4
6
  require_relative 'marktable/table'
5
7
 
6
- if defined?(RSpec)
7
- require_relative '../spec/support/matchers/markdown_matchers'
8
- end
9
-
10
8
  module Marktable
11
- # Parse a markdown table string into an array of rows
12
- def self.parse(markdown_table, headers: true)
13
- Table.new(markdown_table, headers: headers).to_a
14
- end
15
-
16
- # Parse a single markdown row into an array of cell values
17
- def self.parse_line(markdown_row)
18
- Row.parse(markdown_row)
19
- end
20
-
21
- # Iterate through each row of a markdown table
22
- def self.foreach(markdown_table, headers: true)
23
- table = Table.new(markdown_table, headers: headers)
24
- return Enumerator.new do |yielder|
25
- table.each do |row|
26
- yielder << row.data
27
- end
28
- end unless block_given?
29
-
30
- table.each do |row|
31
- yield row.data
32
- end
33
- end
34
-
35
- # Generate a markdown table from provided data
36
- def self.generate(headers: nil)
37
- result = []
38
- markdown_table = ''
39
-
40
- if block_given?
41
- table_data = []
42
- yield table_data
43
-
44
- unless table_data.empty?
45
- # Ensure all data is stringified
46
- string_data = table_data.map do |row|
47
- if row.is_a?(Hash)
48
- row.transform_values(&:to_s)
49
- else
50
- row.map(&:to_s)
51
- end
52
- end
53
-
54
- # Create a Table object
55
- table = table(string_data, headers: headers.nil? ? true : headers)
56
-
57
- markdown_table = table.generate
58
- end
59
- end
60
-
61
- markdown_table
62
- end
63
-
64
- # Read a markdown table from a file
65
- def self.read(path, headers: true)
66
- content = File.read(path)
67
- Table.new(content, headers: headers)
68
- end
69
-
70
- # Write a markdown table to a file
71
- def self.write(path, table_or_data)
72
- content = if table_or_data.is_a?(Table)
73
- table_or_data.to_s
74
- else
75
- table(table_or_data).to_s
76
- end
77
-
78
- File.write(path, content)
9
+ def self.from_markdown(table, headers: nil)
10
+ Table.new(table, type: :markdown, headers:)
79
11
  end
80
12
 
81
- # Convert an array to a Marktable::Table
82
- def self.table(array, headers: true)
83
- table = Table.new([], headers: headers)
84
-
85
- # Ensure all data values are strings
86
- string_array = array.map do |row|
87
- # Handle Row instances by extracting their data
88
- if row.is_a?(Row)
89
- row.data
90
- elsif row.is_a?(Hash)
91
- row.transform_values(&:to_s)
92
- else
93
- row.map(&:to_s)
94
- end
95
- end
96
-
97
- if headers && string_array.first.is_a?(Hash)
98
- header_keys = string_array.first.keys
99
- table.instance_variable_set(:@header_row, header_keys.each_with_object({}) { |k, h| h[k] = k })
100
- table.instance_variable_set(:@rows, string_array.map { |row_data| Row.new(row_data, headers: header_keys) })
101
- else
102
- table.instance_variable_set(:@rows, string_array.map { |row_data| Row.new(row_data, headers: nil) })
103
- end
104
-
105
- table
13
+ def self.from_csv(table, headers: nil)
14
+ Table.new(table, type: :csv, headers:)
106
15
  end
107
16
 
108
- # Filter rows matching a pattern
109
- def self.filter(markdown_table, pattern, headers: true)
110
- table = Table.new(markdown_table, headers: headers)
111
- filtered_rows = table.to_a.select do |row|
112
- if row.is_a?(Hash)
113
- row.values.any? { |v| v.to_s.match?(pattern) }
114
- else
115
- row.any? { |v| v.to_s.match?(pattern) }
116
- end
117
- end
118
-
119
- table(filtered_rows, headers: headers)
17
+ def self.from_array(table, headers: nil)
18
+ Table.new(table, type: :array, headers:)
120
19
  end
121
20
 
122
- # Map over rows (all values will be converted to strings)
123
- def self.map(markdown_table, headers: true)
124
- table = Table.new(markdown_table, headers: headers)
125
- mapped_rows = []
126
-
127
- table.each do |row|
128
- result = yield(row)
129
- # Ensure result is string-compatible
130
- if result.is_a?(Hash)
131
- result = result.transform_values(&:to_s)
132
- elsif result.is_a?(Array)
133
- result = result.map(&:to_s)
134
- end
135
- mapped_rows << result
136
- end
137
-
138
- table(mapped_rows, headers: headers)
21
+ def self.from_html(table)
22
+ Table.new(table, type: :html)
139
23
  end
140
24
  end
@@ -4,152 +4,54 @@ require 'capybara'
4
4
  require 'nokogiri'
5
5
 
6
6
  RSpec::Matchers.define :match_markdown do |expected_markdown|
7
- match do |actual|
8
- @actual_data = parse_input(actual)
9
- @expected_data = parse_input(expected_markdown)
10
-
11
- normalize(@actual_data) == normalize(@expected_data)
7
+ chain :with_format do |format|
8
+ @format = format
12
9
  end
13
10
 
14
- failure_message do |actual|
15
- @actual_data = parse_input(actual)
16
- @expected_data = parse_input(expected_markdown)
11
+ match do |actual|
12
+ @expected_data = parse_input(expected_markdown, :markdown)
13
+ @format ||= infer_format(actual)
14
+ @actual_data = parse_input(actual, @format)
15
+
16
+ # Compare data using to_a for consistent comparison
17
+ @actual_data.to_a == @expected_data.to_a
18
+ end
17
19
 
20
+ failure_message do
18
21
  format_failure_message(@expected_data, @actual_data)
19
22
  end
20
23
 
21
- failure_message_when_negated do |actual|
22
- @actual_data = parse_input(actual)
23
-
24
+ failure_message_when_negated do
24
25
  "Expected markdown tables to differ, but they match:\n\n" \
25
- "#{format_as_markdown(@actual_data)}"
26
+ "#{@actual_data.to_md}"
26
27
  end
27
-
28
+
28
29
  private
29
-
30
- # Parse different types of inputs into a common data structure
31
- def parse_input(input)
32
- case input
33
- when String
34
- if looks_like_html?(input)
35
- parse_html_table(input)
36
- else
37
- Marktable.parse(input)
38
- end
30
+
31
+ def parse_input(input, format = nil)
32
+ return input if input.is_a?(Marktable::Table)
33
+
34
+ Marktable::Table.new(input, type: format)
35
+ end
36
+
37
+ def infer_format(data)
38
+ case data
39
+ when Array
40
+ :array
41
+ when CSV::Table
42
+ :csv
39
43
  when Marktable::Table
40
- input.to_a
41
- when Capybara::Node::Element
42
- parse_capybara_element(input)
44
+ :markdown
43
45
  else
44
- input
45
- end
46
- end
47
-
48
- def looks_like_html?(text)
49
- text.include?('<table') || text.include?('<tr') || text.include?('<td')
50
- end
51
-
52
- # Normalize data by trimming whitespace in cell values
53
- def normalize(data)
54
- data.map do |row|
55
- if row.is_a?(Hash)
56
- row.transform_values { |v| v.to_s.strip }
57
- else
58
- row.map { |v| v.to_s.strip }
59
- end
46
+ :markdown
60
47
  end
61
48
  end
62
-
49
+
63
50
  def format_failure_message(expected_data, actual_data)
64
- expected_formatted = format_as_markdown(expected_data)
65
- actual_formatted = format_as_markdown(actual_data)
66
-
67
51
  "Expected markdown table to match:\n\n" \
68
- "Expected:\n#{expected_formatted}\n\n" \
69
- "Actual:\n#{actual_formatted}\n\n" \
70
- "Parsed expected data: #{expected_data.inspect}\n" \
71
- "Parsed actual data: #{actual_data.inspect}"
72
- end
73
-
74
- def format_as_markdown(data)
75
- Marktable.table(data).to_s
76
- end
77
-
78
- # Parse HTML table into rows of data using Nokogiri
79
- def parse_html_table(html)
80
- doc = Nokogiri::HTML(html)
81
-
82
- # Extract headers
83
- headers = extract_headers_with_nokogiri(doc)
84
-
85
- # Extract body rows
86
- body_rows = extract_body_rows_with_nokogiri(doc)
87
-
88
- # Convert rows to hashes using the headers
89
- body_rows.map do |row|
90
- row_to_hash(row, headers)
91
- end
92
- end
93
-
94
- def extract_headers_with_nokogiri(doc)
95
- headers = doc.css('thead th, thead td').map(&:text)
96
- if headers.empty? && doc.css('tr').any?
97
- headers = doc.css('tr:first-child th, tr:first-child td').map(&:text)
98
- end
99
- headers
100
- end
101
-
102
- def extract_body_rows_with_nokogiri(doc)
103
- tbody_rows = doc.css('tbody tr').map { |tr| tr.css('th, td').map(&:text) }
104
-
105
- # If no tbody, use all rows after the first (assuming first is header)
106
- if tbody_rows.empty?
107
- tbody_rows = doc.css('tr')[1..-1].to_a.map { |tr| tr.css('th, td').map(&:text) }
108
- end
109
-
110
- tbody_rows
111
- end
112
-
113
- def row_to_hash(cells, headers)
114
- row_hash = {}
115
- headers.each_with_index do |header, i|
116
- row_hash[header] = i < cells.length ? cells[i] : ''
117
- end
118
- row_hash
119
- end
120
-
121
- def parse_capybara_element(element)
122
- # Extract headers
123
- headers = extract_headers_from_capybara(element)
124
-
125
- # Extract body rows
126
- body_rows = extract_body_rows_from_capybara(element)
127
-
128
- # Convert rows to hashes using the headers
129
- body_rows.map do |cells|
130
- row_to_hash(cells, headers)
131
- end
132
- end
133
-
134
- def extract_headers_from_capybara(element)
135
- thead = element.first('thead') rescue nil
136
- if thead
137
- thead.all('th, td').map(&:text)
138
- else
139
- first_row = element.first('tr')
140
- first_row ? first_row.all('th, td').map(&:text) : []
141
- end
142
- end
143
-
144
- def extract_body_rows_from_capybara(element)
145
- body_rows = element.all('tbody tr')
146
-
147
- # If no tbody, assume first row is header and skip it
148
- if body_rows.empty?
149
- all_rows = element.all('tr')
150
- body_rows = all_rows[1..]
151
- end
152
-
153
- body_rows.map { |tr| tr.all('th, td').map(&:text) }
52
+ "Expected:\n#{expected_data.to_md}\n\n" \
53
+ "Actual:\n#{actual_data.to_md}\n\n" \
54
+ "Parsed expected data: #{expected_data.to_a.inspect}\n" \
55
+ "Parsed actual data: #{actual_data.to_a.inspect}"
154
56
  end
155
57
  end