marktable 0.0.4s → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/marktable.rb CHANGED
@@ -1,140 +1,24 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative 'marktable/version'
3
4
  require_relative 'marktable/row'
5
+ require_relative 'marktable/tables/base'
4
6
  require_relative 'marktable/table'
5
7
 
6
- if defined?(RSpec)
7
- require_relative '../spec/support/matchers/markdown_matchers'
8
- end
9
-
10
8
  module Marktable
11
- # Parse a markdown table string into an array of rows
12
- def self.parse(markdown_table, headers: true)
13
- Table.new(markdown_table, headers: headers).to_a
14
- end
15
-
16
- # Parse a single markdown row into an array of cell values
17
- def self.parse_line(markdown_row)
18
- Row.parse(markdown_row)
19
- end
20
-
21
- # Iterate through each row of a markdown table
22
- def self.foreach(markdown_table, headers: true)
23
- table = Table.new(markdown_table, headers: headers)
24
- return Enumerator.new do |yielder|
25
- table.each do |row|
26
- yielder << row.data
27
- end
28
- end unless block_given?
29
-
30
- table.each do |row|
31
- yield row.data
32
- end
33
- end
34
-
35
- # Generate a markdown table from provided data
36
- def self.generate(headers: nil)
37
- result = []
38
- markdown_table = ''
39
-
40
- if block_given?
41
- table_data = []
42
- yield table_data
43
-
44
- unless table_data.empty?
45
- # Ensure all data is stringified
46
- string_data = table_data.map do |row|
47
- if row.is_a?(Hash)
48
- row.transform_values(&:to_s)
49
- else
50
- row.map(&:to_s)
51
- end
52
- end
53
-
54
- # Create a Table object
55
- table = table(string_data, headers: headers.nil? ? true : headers)
56
-
57
- markdown_table = table.generate
58
- end
59
- end
60
-
61
- markdown_table
62
- end
63
-
64
- # Read a markdown table from a file
65
- def self.read(path, headers: true)
66
- content = File.read(path)
67
- Table.new(content, headers: headers)
68
- end
69
-
70
- # Write a markdown table to a file
71
- def self.write(path, table_or_data)
72
- content = if table_or_data.is_a?(Table)
73
- table_or_data.to_s
74
- else
75
- table(table_or_data).to_s
76
- end
77
-
78
- File.write(path, content)
9
+ def self.from_markdown(table, headers: nil)
10
+ Table.new(table, type: :markdown, headers:)
79
11
  end
80
12
 
81
- # Convert an array to a Marktable::Table
82
- def self.table(array, headers: true)
83
- table = Table.new([], headers: headers)
84
-
85
- # Ensure all data values are strings
86
- string_array = array.map do |row|
87
- # Handle Row instances by extracting their data
88
- if row.is_a?(Row)
89
- row.data
90
- elsif row.is_a?(Hash)
91
- row.transform_values(&:to_s)
92
- else
93
- row.map(&:to_s)
94
- end
95
- end
96
-
97
- if headers && string_array.first.is_a?(Hash)
98
- header_keys = string_array.first.keys
99
- table.instance_variable_set(:@header_row, header_keys.each_with_object({}) { |k, h| h[k] = k })
100
- table.instance_variable_set(:@rows, string_array.map { |row_data| Row.new(row_data, headers: header_keys) })
101
- else
102
- table.instance_variable_set(:@rows, string_array.map { |row_data| Row.new(row_data, headers: nil) })
103
- end
104
-
105
- table
13
+ def self.from_csv(table, headers: nil)
14
+ Table.new(table, type: :csv, headers:)
106
15
  end
107
16
 
108
- # Filter rows matching a pattern
109
- def self.filter(markdown_table, pattern, headers: true)
110
- table = Table.new(markdown_table, headers: headers)
111
- filtered_rows = table.to_a.select do |row|
112
- if row.is_a?(Hash)
113
- row.values.any? { |v| v.to_s.match?(pattern) }
114
- else
115
- row.any? { |v| v.to_s.match?(pattern) }
116
- end
117
- end
118
-
119
- table(filtered_rows, headers: headers)
17
+ def self.from_array(table, headers: nil)
18
+ Table.new(table, type: :array, headers:)
120
19
  end
121
20
 
122
- # Map over rows (all values will be converted to strings)
123
- def self.map(markdown_table, headers: true)
124
- table = Table.new(markdown_table, headers: headers)
125
- mapped_rows = []
126
-
127
- table.each do |row|
128
- result = yield(row)
129
- # Ensure result is string-compatible
130
- if result.is_a?(Hash)
131
- result = result.transform_values(&:to_s)
132
- elsif result.is_a?(Array)
133
- result = result.map(&:to_s)
134
- end
135
- mapped_rows << result
136
- end
137
-
138
- table(mapped_rows, headers: headers)
21
+ def self.from_html(table)
22
+ Table.new(table, type: :html)
139
23
  end
140
24
  end
@@ -1,228 +1,57 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'capybara'
4
+ require 'nokogiri'
4
5
 
5
6
  RSpec::Matchers.define :match_markdown do |expected_markdown|
6
- match do |actual|
7
- @actual_data = parse_input(actual)
8
- @expected_data = parse_input(expected_markdown)
9
-
10
- normalize(@actual_data) == normalize(@expected_data)
7
+ chain :with_format do |format|
8
+ @format = format
11
9
  end
12
10
 
13
- failure_message do |actual|
14
- @actual_data = parse_input(actual)
15
- @expected_data = parse_input(expected_markdown)
11
+ match do |actual|
12
+ @expected_data = parse_input(expected_markdown, :markdown)
13
+ @format ||= infer_format(actual)
14
+ @actual_data = parse_input(actual, @format)
16
15
 
16
+ # Compare data using to_a for consistent comparison
17
+ @actual_data.to_a == @expected_data.to_a
18
+ end
19
+
20
+ failure_message do
17
21
  format_failure_message(@expected_data, @actual_data)
18
22
  end
19
23
 
20
- failure_message_when_negated do |actual|
21
- @actual_data = parse_input(actual)
22
-
24
+ failure_message_when_negated do
23
25
  "Expected markdown tables to differ, but they match:\n\n" \
24
- "#{format_as_markdown(@actual_data)}"
26
+ "#{@actual_data.to_md}"
25
27
  end
26
-
28
+
27
29
  private
28
-
29
- # Parse different types of inputs into a common data structure
30
- def parse_input(input)
31
- case input
32
- when String
33
- if looks_like_html?(input)
34
- parse_html_table(input)
35
- else
36
- Marktable.parse(input)
37
- end
30
+
31
+ def parse_input(input, format = nil)
32
+ return input if input.is_a?(Marktable::Table)
33
+
34
+ Marktable::Table.new(input, type: format)
35
+ end
36
+
37
+ def infer_format(data)
38
+ case data
39
+ when Array
40
+ :array
41
+ when CSV::Table
42
+ :csv
38
43
  when Marktable::Table
39
- input.to_a
40
- when Capybara::Node::Element
41
- parse_capybara_element(input)
44
+ :markdown
42
45
  else
43
- input
44
- end
45
- end
46
-
47
- def looks_like_html?(text)
48
- text.include?('<table') || text.include?('<tr') || text.include?('<td')
49
- end
50
-
51
- # Normalize data by trimming whitespace in cell values
52
- def normalize(data)
53
- data.map do |row|
54
- if row.is_a?(Hash)
55
- row.transform_values { |v| v.to_s.strip }
56
- else
57
- row.map { |v| v.to_s.strip }
58
- end
46
+ :markdown
59
47
  end
60
48
  end
61
-
49
+
62
50
  def format_failure_message(expected_data, actual_data)
63
- expected_formatted = format_as_markdown(expected_data)
64
- actual_formatted = format_as_markdown(actual_data)
65
-
66
51
  "Expected markdown table to match:\n\n" \
67
- "Expected:\n#{expected_formatted}\n\n" \
68
- "Actual:\n#{actual_formatted}\n\n" \
69
- "Parsed expected data: #{expected_data.inspect}\n" \
70
- "Parsed actual data: #{actual_data.inspect}"
71
- end
72
-
73
- def format_as_markdown(data)
74
- Marktable.table(data).to_s
75
- end
76
-
77
- # Parse HTML table into rows of data
78
- def parse_html_table(html)
79
- if defined?(Nokogiri)
80
- parse_html_with_nokogiri(html)
81
- else
82
- begin
83
- require('nokogiri')
84
- parse_html_with_nokogiri(html)
85
- rescue LoadError
86
- parse_html_without_nokogiri(html)
87
- end
88
- end
89
- end
90
-
91
- def parse_html_with_nokogiri(html)
92
- doc = Nokogiri::HTML(html)
93
-
94
- # Extract headers
95
- headers = extract_headers_with_nokogiri(doc)
96
-
97
- # Extract body rows
98
- body_rows = extract_body_rows_with_nokogiri(doc)
99
-
100
- # Convert rows to hashes using the headers
101
- body_rows.map do |row|
102
- row_to_hash(row, headers)
103
- end
104
- end
105
-
106
- def extract_headers_with_nokogiri(doc)
107
- headers = doc.css('thead th, thead td').map(&:text)
108
- if headers.empty? && doc.css('tr').any?
109
- headers = doc.css('tr:first-child th, tr:first-child td').map(&:text)
110
- end
111
- headers
112
- end
113
-
114
- def extract_body_rows_with_nokogiri(doc)
115
- tbody_rows = doc.css('tbody tr').map { |tr| tr.css('th, td').map(&:text) }
116
-
117
- # If no tbody, use all rows after the first (assuming first is header)
118
- if tbody_rows.empty?
119
- tbody_rows = doc.css('tr')[1..-1].to_a.map { |tr| tr.css('th, td').map(&:text) }
120
- end
121
-
122
- tbody_rows
123
- end
124
-
125
- def parse_html_without_nokogiri(html)
126
- # Extract headers
127
- headers = extract_headers_without_nokogiri(html)
128
-
129
- # Extract body rows
130
- body_rows = extract_body_rows_without_nokogiri(html, headers)
131
-
132
- body_rows
133
- end
134
-
135
- def extract_headers_without_nokogiri(html)
136
- headers = []
137
-
138
- if html.include?('<thead')
139
- # Extract headers from thead
140
- thead_html = html[html.index('<thead')...(html.index('</thead>') + 8)]
141
- headers = thead_html.scan(/<t[hd].*?>(.*?)<\/t[hd]>/im).map { |cell| cell[0].strip }
142
- else
143
- # No thead, get headers from first tr
144
- first_tr = html.match(/<tr.*?>(.*?)<\/tr>/im)
145
- if first_tr
146
- headers = first_tr[1].scan(/<t[hd].*?>(.*?)<\/t[hd]>/im).map { |cell| cell[0].strip }
147
- end
148
- end
149
-
150
- headers
151
- end
152
-
153
- def extract_body_rows_without_nokogiri(html, headers)
154
- rows = []
155
- has_thead = html.include?('<thead')
156
- has_tbody = html.include?('<tbody')
157
- in_tbody = false
158
-
159
- html.scan(/<tr.*?>(.*?)<\/tr>/im).each_with_index do |tr_content, index|
160
- # Skip header rows
161
- next if should_skip_header_row?(html, tr_content[0], index, has_thead, has_tbody)
162
-
163
- # For tables with thead/tbody, only include tbody rows
164
- if has_thead && has_tbody
165
- in_tbody = html[0..html.index(tr_content[0])].include?('<tbody') unless in_tbody
166
- in_tbody = false if html[0..html.index(tr_content[0])].include?('</tbody')
167
- next unless in_tbody
168
- end
169
-
170
- cells = tr_content[0].scan(/<t[hd].*?>(.*?)<\/t[hd]>/im).map { |cell_content| cell_content[0].strip }
171
-
172
- if cells.any? && headers.any?
173
- rows << row_to_hash(cells, headers)
174
- end
175
- end
176
-
177
- rows
178
- end
179
-
180
- def should_skip_header_row?(html, tr_content, index, has_thead, has_tbody)
181
- (has_thead && html[0..html.index(tr_content)].include?('<thead') &&
182
- !html[0..html.index(tr_content)].include?('</thead')) ||
183
- (!has_thead && !has_tbody && index == 0)
184
- end
185
-
186
- def row_to_hash(cells, headers)
187
- row_hash = {}
188
- headers.each_with_index do |header, i|
189
- row_hash[header] = i < cells.length ? cells[i] : ''
190
- end
191
- row_hash
192
- end
193
-
194
- def parse_capybara_element(element)
195
- # Extract headers
196
- headers = extract_headers_from_capybara(element)
197
-
198
- # Extract body rows
199
- body_rows = extract_body_rows_from_capybara(element)
200
-
201
- # Convert rows to hashes using the headers
202
- body_rows.map do |cells|
203
- row_to_hash(cells, headers)
204
- end
205
- end
206
-
207
- def extract_headers_from_capybara(element)
208
- thead = element.first('thead') rescue nil
209
- if thead
210
- thead.all('th, td').map(&:text)
211
- else
212
- first_row = element.first('tr')
213
- first_row ? first_row.all('th, td').map(&:text) : []
214
- end
215
- end
216
-
217
- def extract_body_rows_from_capybara(element)
218
- body_rows = element.all('tbody tr')
219
-
220
- # If no tbody, assume first row is header and skip it
221
- if body_rows.empty?
222
- all_rows = element.all('tr')
223
- body_rows = all_rows[1..]
224
- end
225
-
226
- body_rows.map { |tr| tr.all('th, td').map(&:text) }
52
+ "Expected:\n#{expected_data.to_md}\n\n" \
53
+ "Actual:\n#{actual_data.to_md}\n\n" \
54
+ "Parsed expected data: #{expected_data.to_a.inspect}\n" \
55
+ "Parsed actual data: #{actual_data.to_a.inspect}"
227
56
  end
228
57
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: marktable
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4s
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Francois Gaspard
@@ -9,6 +9,34 @@ bindir: bin
9
9
  cert_chain: []
10
10
  date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: csv
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '3.0'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '3.0'
26
+ - !ruby/object:Gem::Dependency
27
+ name: nokogiri
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '1.14'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '1.14'
12
40
  - !ruby/object:Gem::Dependency
13
41
  name: rake
14
42
  requirement: !ruby/object:Gem::Requirement
@@ -34,8 +62,18 @@ files:
34
62
  - LICENSE
35
63
  - README.md
36
64
  - lib/marktable.rb
65
+ - lib/marktable/formatters/base.rb
66
+ - lib/marktable/formatters/csv.rb
67
+ - lib/marktable/formatters/html.rb
68
+ - lib/marktable/formatters/markdown.rb
37
69
  - lib/marktable/row.rb
38
70
  - lib/marktable/table.rb
71
+ - lib/marktable/tables/array.rb
72
+ - lib/marktable/tables/base.rb
73
+ - lib/marktable/tables/csv.rb
74
+ - lib/marktable/tables/html.rb
75
+ - lib/marktable/tables/markdown.rb
76
+ - lib/marktable/version.rb
39
77
  - spec/support/matchers/markdown_matchers.rb
40
78
  homepage: https://github.com/Francois-gaspard/marktable
41
79
  licenses: