marktable 0.0.2 → 0.0.4s

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 662b8c0026118b8a9a8cbfcefc4e22ca2d245020672b04e9dc17e923356b60cd
4
- data.tar.gz: 1307c073aeea515d89ee1e40c5d3a6753f6de815c7ae2f72f1ed042c9a733436
3
+ metadata.gz: '09cd0e8dca57fcd46c94f64c80ccba04609368bac7e7ddd6daf7c6d3ffd67320'
4
+ data.tar.gz: 90b6106afd8f92d4810fd786b58c925a21447738568afc3bcc344def6f6bcff6
5
5
  SHA512:
6
- metadata.gz: 8fe53f45c22f224efece661fcdbcd4fbd61426211f331d592e93dfa33953344b64e73b1d8558d5640f01fd13d67f71f80fc63c42e55b7212f6326da7497911b4
7
- data.tar.gz: e50d5d8cc11159dd809adb47b4f44a71116dab221a70b1c7a73495f7c4377601a6f7a59dc1afef9b67e7943758a86adaa1006460f02f4fe0f55fe9bd5387237d
6
+ metadata.gz: 8fa4867f6bfd9b9d9dabf2087203d501f2149c381c91479f6d79f45f6431e2cfc3096078a8f1f06cdcff9bb0e4a397af64af6adbd16a9a6fd515cbbd9f2cec36
7
+ data.tar.gz: 1bc26eb0ca767b832c90f636b8001a3c3967bd0369e4745a302301063ff8536251a9bbc879c1c88707afd6a19185b6442fd2370514ad8aaa371740e219811b4f
data/lib/marktable.rb CHANGED
@@ -3,6 +3,10 @@
3
3
  require_relative 'marktable/row'
4
4
  require_relative 'marktable/table'
5
5
 
6
+ if defined?(RSpec)
7
+ require_relative '../spec/support/matchers/markdown_matchers'
8
+ end
9
+
6
10
  module Marktable
7
11
  # Parse a markdown table string into an array of rows
8
12
  def self.parse(markdown_table, headers: true)
@@ -0,0 +1,228 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'capybara'
4
+
5
+ RSpec::Matchers.define :match_markdown do |expected_markdown|
6
+ match do |actual|
7
+ @actual_data = parse_input(actual)
8
+ @expected_data = parse_input(expected_markdown)
9
+
10
+ normalize(@actual_data) == normalize(@expected_data)
11
+ end
12
+
13
+ failure_message do |actual|
14
+ @actual_data = parse_input(actual)
15
+ @expected_data = parse_input(expected_markdown)
16
+
17
+ format_failure_message(@expected_data, @actual_data)
18
+ end
19
+
20
+ failure_message_when_negated do |actual|
21
+ @actual_data = parse_input(actual)
22
+
23
+ "Expected markdown tables to differ, but they match:\n\n" \
24
+ "#{format_as_markdown(@actual_data)}"
25
+ end
26
+
27
+ private
28
+
29
+ # Parse different types of inputs into a common data structure
30
+ def parse_input(input)
31
+ case input
32
+ when String
33
+ if looks_like_html?(input)
34
+ parse_html_table(input)
35
+ else
36
+ Marktable.parse(input)
37
+ end
38
+ when Marktable::Table
39
+ input.to_a
40
+ when Capybara::Node::Element
41
+ parse_capybara_element(input)
42
+ else
43
+ input
44
+ end
45
+ end
46
+
47
+ def looks_like_html?(text)
48
+ text.include?('<table') || text.include?('<tr') || text.include?('<td')
49
+ end
50
+
51
+ # Normalize data by trimming whitespace in cell values
52
+ def normalize(data)
53
+ data.map do |row|
54
+ if row.is_a?(Hash)
55
+ row.transform_values { |v| v.to_s.strip }
56
+ else
57
+ row.map { |v| v.to_s.strip }
58
+ end
59
+ end
60
+ end
61
+
62
+ def format_failure_message(expected_data, actual_data)
63
+ expected_formatted = format_as_markdown(expected_data)
64
+ actual_formatted = format_as_markdown(actual_data)
65
+
66
+ "Expected markdown table to match:\n\n" \
67
+ "Expected:\n#{expected_formatted}\n\n" \
68
+ "Actual:\n#{actual_formatted}\n\n" \
69
+ "Parsed expected data: #{expected_data.inspect}\n" \
70
+ "Parsed actual data: #{actual_data.inspect}"
71
+ end
72
+
73
+ def format_as_markdown(data)
74
+ Marktable.table(data).to_s
75
+ end
76
+
77
+ # Parse HTML table into rows of data
78
+ def parse_html_table(html)
79
+ if defined?(Nokogiri)
80
+ parse_html_with_nokogiri(html)
81
+ else
82
+ begin
83
+ require('nokogiri')
84
+ parse_html_with_nokogiri(html)
85
+ rescue LoadError
86
+ parse_html_without_nokogiri(html)
87
+ end
88
+ end
89
+ end
90
+
91
+ def parse_html_with_nokogiri(html)
92
+ doc = Nokogiri::HTML(html)
93
+
94
+ # Extract headers
95
+ headers = extract_headers_with_nokogiri(doc)
96
+
97
+ # Extract body rows
98
+ body_rows = extract_body_rows_with_nokogiri(doc)
99
+
100
+ # Convert rows to hashes using the headers
101
+ body_rows.map do |row|
102
+ row_to_hash(row, headers)
103
+ end
104
+ end
105
+
106
+ def extract_headers_with_nokogiri(doc)
107
+ headers = doc.css('thead th, thead td').map(&:text)
108
+ if headers.empty? && doc.css('tr').any?
109
+ headers = doc.css('tr:first-child th, tr:first-child td').map(&:text)
110
+ end
111
+ headers
112
+ end
113
+
114
+ def extract_body_rows_with_nokogiri(doc)
115
+ tbody_rows = doc.css('tbody tr').map { |tr| tr.css('th, td').map(&:text) }
116
+
117
+ # If no tbody, use all rows after the first (assuming first is header)
118
+ if tbody_rows.empty?
119
+ tbody_rows = doc.css('tr')[1..-1].to_a.map { |tr| tr.css('th, td').map(&:text) }
120
+ end
121
+
122
+ tbody_rows
123
+ end
124
+
125
+ def parse_html_without_nokogiri(html)
126
+ # Extract headers
127
+ headers = extract_headers_without_nokogiri(html)
128
+
129
+ # Extract body rows
130
+ body_rows = extract_body_rows_without_nokogiri(html, headers)
131
+
132
+ body_rows
133
+ end
134
+
135
+ def extract_headers_without_nokogiri(html)
136
+ headers = []
137
+
138
+ if html.include?('<thead')
139
+ # Extract headers from thead
140
+ thead_html = html[html.index('<thead')...(html.index('</thead>') + 8)]
141
+ headers = thead_html.scan(/<t[hd].*?>(.*?)<\/t[hd]>/im).map { |cell| cell[0].strip }
142
+ else
143
+ # No thead, get headers from first tr
144
+ first_tr = html.match(/<tr.*?>(.*?)<\/tr>/im)
145
+ if first_tr
146
+ headers = first_tr[1].scan(/<t[hd].*?>(.*?)<\/t[hd]>/im).map { |cell| cell[0].strip }
147
+ end
148
+ end
149
+
150
+ headers
151
+ end
152
+
153
+ def extract_body_rows_without_nokogiri(html, headers)
154
+ rows = []
155
+ has_thead = html.include?('<thead')
156
+ has_tbody = html.include?('<tbody')
157
+ in_tbody = false
158
+
159
+ html.scan(/<tr.*?>(.*?)<\/tr>/im).each_with_index do |tr_content, index|
160
+ # Skip header rows
161
+ next if should_skip_header_row?(html, tr_content[0], index, has_thead, has_tbody)
162
+
163
+ # For tables with thead/tbody, only include tbody rows
164
+ if has_thead && has_tbody
165
+ in_tbody = html[0..html.index(tr_content[0])].include?('<tbody') unless in_tbody
166
+ in_tbody = false if html[0..html.index(tr_content[0])].include?('</tbody')
167
+ next unless in_tbody
168
+ end
169
+
170
+ cells = tr_content[0].scan(/<t[hd].*?>(.*?)<\/t[hd]>/im).map { |cell_content| cell_content[0].strip }
171
+
172
+ if cells.any? && headers.any?
173
+ rows << row_to_hash(cells, headers)
174
+ end
175
+ end
176
+
177
+ rows
178
+ end
179
+
180
+ def should_skip_header_row?(html, tr_content, index, has_thead, has_tbody)
181
+ (has_thead && html[0..html.index(tr_content)].include?('<thead') &&
182
+ !html[0..html.index(tr_content)].include?('</thead')) ||
183
+ (!has_thead && !has_tbody && index == 0)
184
+ end
185
+
186
+ def row_to_hash(cells, headers)
187
+ row_hash = {}
188
+ headers.each_with_index do |header, i|
189
+ row_hash[header] = i < cells.length ? cells[i] : ''
190
+ end
191
+ row_hash
192
+ end
193
+
194
+ def parse_capybara_element(element)
195
+ # Extract headers
196
+ headers = extract_headers_from_capybara(element)
197
+
198
+ # Extract body rows
199
+ body_rows = extract_body_rows_from_capybara(element)
200
+
201
+ # Convert rows to hashes using the headers
202
+ body_rows.map do |cells|
203
+ row_to_hash(cells, headers)
204
+ end
205
+ end
206
+
207
+ def extract_headers_from_capybara(element)
208
+ thead = element.first('thead') rescue nil
209
+ if thead
210
+ thead.all('th, td').map(&:text)
211
+ else
212
+ first_row = element.first('tr')
213
+ first_row ? first_row.all('th, td').map(&:text) : []
214
+ end
215
+ end
216
+
217
+ def extract_body_rows_from_capybara(element)
218
+ body_rows = element.all('tbody tr')
219
+
220
+ # If no tbody, assume first row is header and skip it
221
+ if body_rows.empty?
222
+ all_rows = element.all('tr')
223
+ body_rows = all_rows[1..]
224
+ end
225
+
226
+ body_rows.map { |tr| tr.all('th, td').map(&:text) }
227
+ end
228
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: marktable
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.4s
5
5
  platform: ruby
6
6
  authors:
7
7
  - Francois Gaspard
@@ -36,6 +36,7 @@ files:
36
36
  - lib/marktable.rb
37
37
  - lib/marktable/row.rb
38
38
  - lib/marktable/table.rb
39
+ - spec/support/matchers/markdown_matchers.rb
39
40
  homepage: https://github.com/Francois-gaspard/marktable
40
41
  licenses:
41
42
  - MIT