marktable 0.0.4s → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +345 -6
- data/lib/marktable/formatters/base.rb +27 -0
- data/lib/marktable/formatters/csv.rb +24 -0
- data/lib/marktable/formatters/html.rb +50 -0
- data/lib/marktable/formatters/markdown.rb +66 -0
- data/lib/marktable/row.rb +74 -62
- data/lib/marktable/table.rb +53 -123
- data/lib/marktable/tables/array.rb +57 -0
- data/lib/marktable/tables/base.rb +33 -0
- data/lib/marktable/tables/csv.rb +71 -0
- data/lib/marktable/tables/html.rb +67 -0
- data/lib/marktable/tables/markdown.rb +82 -0
- data/lib/marktable/version.rb +5 -0
- data/lib/marktable.rb +10 -126
- data/spec/support/matchers/markdown_matchers.rb +35 -206
- metadata +39 -1
data/lib/marktable.rb
CHANGED
@@ -1,140 +1,24 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require_relative 'marktable/version'
|
3
4
|
require_relative 'marktable/row'
|
5
|
+
require_relative 'marktable/tables/base'
|
4
6
|
require_relative 'marktable/table'
|
5
7
|
|
6
|
-
if defined?(RSpec)
|
7
|
-
require_relative '../spec/support/matchers/markdown_matchers'
|
8
|
-
end
|
9
|
-
|
10
8
|
module Marktable
|
11
|
-
|
12
|
-
|
13
|
-
Table.new(markdown_table, headers: headers).to_a
|
14
|
-
end
|
15
|
-
|
16
|
-
# Parse a single markdown row into an array of cell values
|
17
|
-
def self.parse_line(markdown_row)
|
18
|
-
Row.parse(markdown_row)
|
19
|
-
end
|
20
|
-
|
21
|
-
# Iterate through each row of a markdown table
|
22
|
-
def self.foreach(markdown_table, headers: true)
|
23
|
-
table = Table.new(markdown_table, headers: headers)
|
24
|
-
return Enumerator.new do |yielder|
|
25
|
-
table.each do |row|
|
26
|
-
yielder << row.data
|
27
|
-
end
|
28
|
-
end unless block_given?
|
29
|
-
|
30
|
-
table.each do |row|
|
31
|
-
yield row.data
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
# Generate a markdown table from provided data
|
36
|
-
def self.generate(headers: nil)
|
37
|
-
result = []
|
38
|
-
markdown_table = ''
|
39
|
-
|
40
|
-
if block_given?
|
41
|
-
table_data = []
|
42
|
-
yield table_data
|
43
|
-
|
44
|
-
unless table_data.empty?
|
45
|
-
# Ensure all data is stringified
|
46
|
-
string_data = table_data.map do |row|
|
47
|
-
if row.is_a?(Hash)
|
48
|
-
row.transform_values(&:to_s)
|
49
|
-
else
|
50
|
-
row.map(&:to_s)
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
# Create a Table object
|
55
|
-
table = table(string_data, headers: headers.nil? ? true : headers)
|
56
|
-
|
57
|
-
markdown_table = table.generate
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
markdown_table
|
62
|
-
end
|
63
|
-
|
64
|
-
# Read a markdown table from a file
|
65
|
-
def self.read(path, headers: true)
|
66
|
-
content = File.read(path)
|
67
|
-
Table.new(content, headers: headers)
|
68
|
-
end
|
69
|
-
|
70
|
-
# Write a markdown table to a file
|
71
|
-
def self.write(path, table_or_data)
|
72
|
-
content = if table_or_data.is_a?(Table)
|
73
|
-
table_or_data.to_s
|
74
|
-
else
|
75
|
-
table(table_or_data).to_s
|
76
|
-
end
|
77
|
-
|
78
|
-
File.write(path, content)
|
9
|
+
def self.from_markdown(table, headers: nil)
|
10
|
+
Table.new(table, type: :markdown, headers:)
|
79
11
|
end
|
80
12
|
|
81
|
-
|
82
|
-
|
83
|
-
table = Table.new([], headers: headers)
|
84
|
-
|
85
|
-
# Ensure all data values are strings
|
86
|
-
string_array = array.map do |row|
|
87
|
-
# Handle Row instances by extracting their data
|
88
|
-
if row.is_a?(Row)
|
89
|
-
row.data
|
90
|
-
elsif row.is_a?(Hash)
|
91
|
-
row.transform_values(&:to_s)
|
92
|
-
else
|
93
|
-
row.map(&:to_s)
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
if headers && string_array.first.is_a?(Hash)
|
98
|
-
header_keys = string_array.first.keys
|
99
|
-
table.instance_variable_set(:@header_row, header_keys.each_with_object({}) { |k, h| h[k] = k })
|
100
|
-
table.instance_variable_set(:@rows, string_array.map { |row_data| Row.new(row_data, headers: header_keys) })
|
101
|
-
else
|
102
|
-
table.instance_variable_set(:@rows, string_array.map { |row_data| Row.new(row_data, headers: nil) })
|
103
|
-
end
|
104
|
-
|
105
|
-
table
|
13
|
+
def self.from_csv(table, headers: nil)
|
14
|
+
Table.new(table, type: :csv, headers:)
|
106
15
|
end
|
107
16
|
|
108
|
-
|
109
|
-
|
110
|
-
table = Table.new(markdown_table, headers: headers)
|
111
|
-
filtered_rows = table.to_a.select do |row|
|
112
|
-
if row.is_a?(Hash)
|
113
|
-
row.values.any? { |v| v.to_s.match?(pattern) }
|
114
|
-
else
|
115
|
-
row.any? { |v| v.to_s.match?(pattern) }
|
116
|
-
end
|
117
|
-
end
|
118
|
-
|
119
|
-
table(filtered_rows, headers: headers)
|
17
|
+
def self.from_array(table, headers: nil)
|
18
|
+
Table.new(table, type: :array, headers:)
|
120
19
|
end
|
121
20
|
|
122
|
-
|
123
|
-
|
124
|
-
table = Table.new(markdown_table, headers: headers)
|
125
|
-
mapped_rows = []
|
126
|
-
|
127
|
-
table.each do |row|
|
128
|
-
result = yield(row)
|
129
|
-
# Ensure result is string-compatible
|
130
|
-
if result.is_a?(Hash)
|
131
|
-
result = result.transform_values(&:to_s)
|
132
|
-
elsif result.is_a?(Array)
|
133
|
-
result = result.map(&:to_s)
|
134
|
-
end
|
135
|
-
mapped_rows << result
|
136
|
-
end
|
137
|
-
|
138
|
-
table(mapped_rows, headers: headers)
|
21
|
+
def self.from_html(table)
|
22
|
+
Table.new(table, type: :html)
|
139
23
|
end
|
140
24
|
end
|
@@ -1,228 +1,57 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'capybara'
|
4
|
+
require 'nokogiri'
|
4
5
|
|
5
6
|
RSpec::Matchers.define :match_markdown do |expected_markdown|
|
6
|
-
|
7
|
-
@
|
8
|
-
@expected_data = parse_input(expected_markdown)
|
9
|
-
|
10
|
-
normalize(@actual_data) == normalize(@expected_data)
|
7
|
+
chain :with_format do |format|
|
8
|
+
@format = format
|
11
9
|
end
|
12
10
|
|
13
|
-
|
14
|
-
@
|
15
|
-
@
|
11
|
+
match do |actual|
|
12
|
+
@expected_data = parse_input(expected_markdown, :markdown)
|
13
|
+
@format ||= infer_format(actual)
|
14
|
+
@actual_data = parse_input(actual, @format)
|
16
15
|
|
16
|
+
# Compare data using to_a for consistent comparison
|
17
|
+
@actual_data.to_a == @expected_data.to_a
|
18
|
+
end
|
19
|
+
|
20
|
+
failure_message do
|
17
21
|
format_failure_message(@expected_data, @actual_data)
|
18
22
|
end
|
19
23
|
|
20
|
-
failure_message_when_negated do
|
21
|
-
@actual_data = parse_input(actual)
|
22
|
-
|
24
|
+
failure_message_when_negated do
|
23
25
|
"Expected markdown tables to differ, but they match:\n\n" \
|
24
|
-
"#{
|
26
|
+
"#{@actual_data.to_md}"
|
25
27
|
end
|
26
|
-
|
28
|
+
|
27
29
|
private
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
30
|
+
|
31
|
+
def parse_input(input, format = nil)
|
32
|
+
return input if input.is_a?(Marktable::Table)
|
33
|
+
|
34
|
+
Marktable::Table.new(input, type: format)
|
35
|
+
end
|
36
|
+
|
37
|
+
def infer_format(data)
|
38
|
+
case data
|
39
|
+
when Array
|
40
|
+
:array
|
41
|
+
when CSV::Table
|
42
|
+
:csv
|
38
43
|
when Marktable::Table
|
39
|
-
|
40
|
-
when Capybara::Node::Element
|
41
|
-
parse_capybara_element(input)
|
44
|
+
:markdown
|
42
45
|
else
|
43
|
-
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
def looks_like_html?(text)
|
48
|
-
text.include?('<table') || text.include?('<tr') || text.include?('<td')
|
49
|
-
end
|
50
|
-
|
51
|
-
# Normalize data by trimming whitespace in cell values
|
52
|
-
def normalize(data)
|
53
|
-
data.map do |row|
|
54
|
-
if row.is_a?(Hash)
|
55
|
-
row.transform_values { |v| v.to_s.strip }
|
56
|
-
else
|
57
|
-
row.map { |v| v.to_s.strip }
|
58
|
-
end
|
46
|
+
:markdown
|
59
47
|
end
|
60
48
|
end
|
61
|
-
|
49
|
+
|
62
50
|
def format_failure_message(expected_data, actual_data)
|
63
|
-
expected_formatted = format_as_markdown(expected_data)
|
64
|
-
actual_formatted = format_as_markdown(actual_data)
|
65
|
-
|
66
51
|
"Expected markdown table to match:\n\n" \
|
67
|
-
"Expected:\n#{
|
68
|
-
"Actual:\n#{
|
69
|
-
"Parsed expected data: #{expected_data.inspect}\n" \
|
70
|
-
"Parsed actual data: #{actual_data.inspect}"
|
71
|
-
end
|
72
|
-
|
73
|
-
def format_as_markdown(data)
|
74
|
-
Marktable.table(data).to_s
|
75
|
-
end
|
76
|
-
|
77
|
-
# Parse HTML table into rows of data
|
78
|
-
def parse_html_table(html)
|
79
|
-
if defined?(Nokogiri)
|
80
|
-
parse_html_with_nokogiri(html)
|
81
|
-
else
|
82
|
-
begin
|
83
|
-
require('nokogiri')
|
84
|
-
parse_html_with_nokogiri(html)
|
85
|
-
rescue LoadError
|
86
|
-
parse_html_without_nokogiri(html)
|
87
|
-
end
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
def parse_html_with_nokogiri(html)
|
92
|
-
doc = Nokogiri::HTML(html)
|
93
|
-
|
94
|
-
# Extract headers
|
95
|
-
headers = extract_headers_with_nokogiri(doc)
|
96
|
-
|
97
|
-
# Extract body rows
|
98
|
-
body_rows = extract_body_rows_with_nokogiri(doc)
|
99
|
-
|
100
|
-
# Convert rows to hashes using the headers
|
101
|
-
body_rows.map do |row|
|
102
|
-
row_to_hash(row, headers)
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
def extract_headers_with_nokogiri(doc)
|
107
|
-
headers = doc.css('thead th, thead td').map(&:text)
|
108
|
-
if headers.empty? && doc.css('tr').any?
|
109
|
-
headers = doc.css('tr:first-child th, tr:first-child td').map(&:text)
|
110
|
-
end
|
111
|
-
headers
|
112
|
-
end
|
113
|
-
|
114
|
-
def extract_body_rows_with_nokogiri(doc)
|
115
|
-
tbody_rows = doc.css('tbody tr').map { |tr| tr.css('th, td').map(&:text) }
|
116
|
-
|
117
|
-
# If no tbody, use all rows after the first (assuming first is header)
|
118
|
-
if tbody_rows.empty?
|
119
|
-
tbody_rows = doc.css('tr')[1..-1].to_a.map { |tr| tr.css('th, td').map(&:text) }
|
120
|
-
end
|
121
|
-
|
122
|
-
tbody_rows
|
123
|
-
end
|
124
|
-
|
125
|
-
def parse_html_without_nokogiri(html)
|
126
|
-
# Extract headers
|
127
|
-
headers = extract_headers_without_nokogiri(html)
|
128
|
-
|
129
|
-
# Extract body rows
|
130
|
-
body_rows = extract_body_rows_without_nokogiri(html, headers)
|
131
|
-
|
132
|
-
body_rows
|
133
|
-
end
|
134
|
-
|
135
|
-
def extract_headers_without_nokogiri(html)
|
136
|
-
headers = []
|
137
|
-
|
138
|
-
if html.include?('<thead')
|
139
|
-
# Extract headers from thead
|
140
|
-
thead_html = html[html.index('<thead')...(html.index('</thead>') + 8)]
|
141
|
-
headers = thead_html.scan(/<t[hd].*?>(.*?)<\/t[hd]>/im).map { |cell| cell[0].strip }
|
142
|
-
else
|
143
|
-
# No thead, get headers from first tr
|
144
|
-
first_tr = html.match(/<tr.*?>(.*?)<\/tr>/im)
|
145
|
-
if first_tr
|
146
|
-
headers = first_tr[1].scan(/<t[hd].*?>(.*?)<\/t[hd]>/im).map { |cell| cell[0].strip }
|
147
|
-
end
|
148
|
-
end
|
149
|
-
|
150
|
-
headers
|
151
|
-
end
|
152
|
-
|
153
|
-
def extract_body_rows_without_nokogiri(html, headers)
|
154
|
-
rows = []
|
155
|
-
has_thead = html.include?('<thead')
|
156
|
-
has_tbody = html.include?('<tbody')
|
157
|
-
in_tbody = false
|
158
|
-
|
159
|
-
html.scan(/<tr.*?>(.*?)<\/tr>/im).each_with_index do |tr_content, index|
|
160
|
-
# Skip header rows
|
161
|
-
next if should_skip_header_row?(html, tr_content[0], index, has_thead, has_tbody)
|
162
|
-
|
163
|
-
# For tables with thead/tbody, only include tbody rows
|
164
|
-
if has_thead && has_tbody
|
165
|
-
in_tbody = html[0..html.index(tr_content[0])].include?('<tbody') unless in_tbody
|
166
|
-
in_tbody = false if html[0..html.index(tr_content[0])].include?('</tbody')
|
167
|
-
next unless in_tbody
|
168
|
-
end
|
169
|
-
|
170
|
-
cells = tr_content[0].scan(/<t[hd].*?>(.*?)<\/t[hd]>/im).map { |cell_content| cell_content[0].strip }
|
171
|
-
|
172
|
-
if cells.any? && headers.any?
|
173
|
-
rows << row_to_hash(cells, headers)
|
174
|
-
end
|
175
|
-
end
|
176
|
-
|
177
|
-
rows
|
178
|
-
end
|
179
|
-
|
180
|
-
def should_skip_header_row?(html, tr_content, index, has_thead, has_tbody)
|
181
|
-
(has_thead && html[0..html.index(tr_content)].include?('<thead') &&
|
182
|
-
!html[0..html.index(tr_content)].include?('</thead')) ||
|
183
|
-
(!has_thead && !has_tbody && index == 0)
|
184
|
-
end
|
185
|
-
|
186
|
-
def row_to_hash(cells, headers)
|
187
|
-
row_hash = {}
|
188
|
-
headers.each_with_index do |header, i|
|
189
|
-
row_hash[header] = i < cells.length ? cells[i] : ''
|
190
|
-
end
|
191
|
-
row_hash
|
192
|
-
end
|
193
|
-
|
194
|
-
def parse_capybara_element(element)
|
195
|
-
# Extract headers
|
196
|
-
headers = extract_headers_from_capybara(element)
|
197
|
-
|
198
|
-
# Extract body rows
|
199
|
-
body_rows = extract_body_rows_from_capybara(element)
|
200
|
-
|
201
|
-
# Convert rows to hashes using the headers
|
202
|
-
body_rows.map do |cells|
|
203
|
-
row_to_hash(cells, headers)
|
204
|
-
end
|
205
|
-
end
|
206
|
-
|
207
|
-
def extract_headers_from_capybara(element)
|
208
|
-
thead = element.first('thead') rescue nil
|
209
|
-
if thead
|
210
|
-
thead.all('th, td').map(&:text)
|
211
|
-
else
|
212
|
-
first_row = element.first('tr')
|
213
|
-
first_row ? first_row.all('th, td').map(&:text) : []
|
214
|
-
end
|
215
|
-
end
|
216
|
-
|
217
|
-
def extract_body_rows_from_capybara(element)
|
218
|
-
body_rows = element.all('tbody tr')
|
219
|
-
|
220
|
-
# If no tbody, assume first row is header and skip it
|
221
|
-
if body_rows.empty?
|
222
|
-
all_rows = element.all('tr')
|
223
|
-
body_rows = all_rows[1..]
|
224
|
-
end
|
225
|
-
|
226
|
-
body_rows.map { |tr| tr.all('th, td').map(&:text) }
|
52
|
+
"Expected:\n#{expected_data.to_md}\n\n" \
|
53
|
+
"Actual:\n#{actual_data.to_md}\n\n" \
|
54
|
+
"Parsed expected data: #{expected_data.to_a.inspect}\n" \
|
55
|
+
"Parsed actual data: #{actual_data.to_a.inspect}"
|
227
56
|
end
|
228
57
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: marktable
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Francois Gaspard
|
@@ -9,6 +9,34 @@ bindir: bin
|
|
9
9
|
cert_chain: []
|
10
10
|
date: 1980-01-02 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
|
+
- !ruby/object:Gem::Dependency
|
13
|
+
name: csv
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
15
|
+
requirements:
|
16
|
+
- - "~>"
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: '3.0'
|
19
|
+
type: :runtime
|
20
|
+
prerelease: false
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
22
|
+
requirements:
|
23
|
+
- - "~>"
|
24
|
+
- !ruby/object:Gem::Version
|
25
|
+
version: '3.0'
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: nokogiri
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - "~>"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '1.14'
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '1.14'
|
12
40
|
- !ruby/object:Gem::Dependency
|
13
41
|
name: rake
|
14
42
|
requirement: !ruby/object:Gem::Requirement
|
@@ -34,8 +62,18 @@ files:
|
|
34
62
|
- LICENSE
|
35
63
|
- README.md
|
36
64
|
- lib/marktable.rb
|
65
|
+
- lib/marktable/formatters/base.rb
|
66
|
+
- lib/marktable/formatters/csv.rb
|
67
|
+
- lib/marktable/formatters/html.rb
|
68
|
+
- lib/marktable/formatters/markdown.rb
|
37
69
|
- lib/marktable/row.rb
|
38
70
|
- lib/marktable/table.rb
|
71
|
+
- lib/marktable/tables/array.rb
|
72
|
+
- lib/marktable/tables/base.rb
|
73
|
+
- lib/marktable/tables/csv.rb
|
74
|
+
- lib/marktable/tables/html.rb
|
75
|
+
- lib/marktable/tables/markdown.rb
|
76
|
+
- lib/marktable/version.rb
|
39
77
|
- spec/support/matchers/markdown_matchers.rb
|
40
78
|
homepage: https://github.com/Francois-gaspard/marktable
|
41
79
|
licenses:
|