markdown_exec 3.2.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Gemfile.lock +1 -1
- data/Rakefile +3 -3
- data/bats/block-type-ux-auto.bats +1 -1
- data/bats/block-type-ux-default.bats +1 -1
- data/bats/block-type-ux-echo-hash-transform.bats +1 -1
- data/bats/block-type-ux-echo-hash.bats +2 -2
- data/bats/block-type-ux-exec-hash-transform.bats +8 -0
- data/bats/block-type-ux-exec-hash.bats +15 -0
- data/bats/block-type-ux-exec.bats +1 -1
- data/bats/block-type-ux-force.bats +9 -0
- data/bats/block-type-ux-formats.bats +8 -0
- data/bats/block-type-ux-readonly.bats +1 -1
- data/bats/block-type-ux-row-format.bats +1 -1
- data/bats/block-type-ux-transform.bats +1 -1
- data/bats/import-directive-parameter-symbols.bats +9 -0
- data/bats/import-duplicates.bats +4 -2
- data/bats/import-parameter-symbols.bats +8 -0
- data/bats/markup.bats +1 -1
- data/bats/options.bats +1 -1
- data/bin/tab_completion.sh +5 -1
- data/docs/dev/block-type-ux-echo-hash-transform.md +14 -12
- data/docs/dev/block-type-ux-exec-hash-transform.md +37 -0
- data/docs/dev/block-type-ux-exec-hash.md +93 -0
- data/docs/dev/block-type-ux-force.md +20 -0
- data/docs/dev/block-type-ux-formats.md +58 -0
- data/docs/dev/hexdump_format.md +267 -0
- data/docs/dev/import/parameter-symbols.md +6 -0
- data/docs/dev/import-directive-parameter-symbols.md +9 -0
- data/docs/dev/import-parameter-symbols-template.md +24 -0
- data/docs/dev/import-parameter-symbols.md +6 -0
- data/docs/dev/load-vars-state-demo.md +35 -0
- data/docs/ux-blocks-examples.md +2 -3
- data/examples/import_with_substitution_demo.md +130 -26
- data/examples/imports/organism_template.md +86 -29
- data/lib/cached_nested_file_reader.rb +265 -27
- data/lib/constants.rb +8 -1
- data/lib/env_interface.rb +13 -7
- data/lib/evaluate_shell_expressions.rb +1 -0
- data/lib/fcb.rb +120 -28
- data/lib/format_table.rb +56 -23
- data/lib/fout.rb +5 -0
- data/lib/hash_delegator.rb +1158 -347
- data/lib/markdown_exec/version.rb +1 -1
- data/lib/markdown_exec.rb +2 -0
- data/lib/mdoc.rb +13 -11
- data/lib/menu.src.yml +139 -34
- data/lib/menu.yml +116 -32
- data/lib/string_util.rb +80 -0
- data/lib/table_extractor.rb +170 -64
- data/lib/ww.rb +325 -29
- metadata +18 -2
data/lib/table_extractor.rb
CHANGED
@@ -1,68 +1,119 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
# Extracts Markdown-style tables from text lines and returns metadata about each table
|
4
|
+
#
|
5
|
+
# This class analyzes an array of text lines to identify tables formatted in Markdown style.
|
6
|
+
# It supports both multi-line tables (using | delimiters) and single-line tables (using ! delimiters).
|
7
|
+
# For each table found, it returns metadata including row count, column count, and position.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# lines = [
|
11
|
+
# '| Name | Age | City',
|
12
|
+
# '|------|-----|-----',
|
13
|
+
# '| John | 30 | NYC'
|
14
|
+
# ]
|
15
|
+
# tables = TableExtractor.extract_tables(lines, regexp: /^[ \t]*\|? *(?::?-+:?) *( *\| *(?::?-+:?) *)*\|? *$/)
|
16
|
+
# # Returns: [{ column_offset: 1, columns: 3, delimiter: '|', rows: 3, start_index: 0 }]
|
3
17
|
class TableExtractor
|
4
18
|
# Extract tables from an array of text lines formatted in Markdown style
|
5
|
-
#
|
6
|
-
# @
|
7
|
-
#
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
+
#
|
20
|
+
# @param lines [Array<String>] The array of text lines to analyze
|
21
|
+
# @param regexp [Regexp] Regular expression to match table separator rows (e.g., |---|---|)
|
22
|
+
# @param multi_line_delimiter [String] Delimiter character for multi-line tables (default: '|')
|
23
|
+
# @param single_line_delimiter [String] Delimiter character for single-line tables (default: '!')
|
24
|
+
# @return [Array<Hash>] Array of table metadata hashes with keys:
|
25
|
+
# - column_offset: Always 1 (for compatibility)
|
26
|
+
# - columns: Number of columns in the table
|
27
|
+
# - delimiter: The delimiter character used ('|' or '!')
|
28
|
+
# - rows: Number of rows in the table
|
29
|
+
# - start_index: Index of the first line of the table in the input array
|
30
|
+
def self.extract_tables(
|
31
|
+
lines,
|
32
|
+
multi_line_delimiter: '|',
|
33
|
+
regexp:,
|
34
|
+
single_line_delimiter: '!'
|
35
|
+
)
|
36
|
+
current_column_count = 0
|
37
|
+
current_row_count = 0
|
38
|
+
extracted_tables = []
|
39
|
+
inside_multi_line_table = false
|
40
|
+
table_start_index = nil
|
41
|
+
|
42
|
+
# Regex patterns for single-line table row parsing
|
43
|
+
single_line_start_pattern = /^\s*#{single_line_delimiter}/
|
44
|
+
single_line_content_pattern = /(?:^|(?<=#{single_line_delimiter}))\s*([^#{single_line_delimiter}]*)\s*(?=#{single_line_delimiter}|$)/
|
45
|
+
|
46
|
+
# Helper method to add the current table to results and reset state
|
47
|
+
add_current_table = lambda do |delimiter|
|
48
|
+
extracted_tables << {
|
49
|
+
column_offset: 1,
|
50
|
+
columns: current_column_count,
|
51
|
+
delimiter: delimiter,
|
52
|
+
rows: current_row_count,
|
53
|
+
start_index: table_start_index
|
54
|
+
}
|
55
|
+
current_column_count = 0
|
56
|
+
current_row_count = 0
|
57
|
+
inside_multi_line_table = false
|
58
|
+
table_start_index = nil
|
59
|
+
end
|
60
|
+
|
61
|
+
lines.each_with_index do |line, line_index|
|
62
|
+
# Detect single-line tables (e.g., !Name!Age!City!)
|
63
|
+
if !inside_multi_line_table && line =~ single_line_start_pattern
|
64
|
+
current_row_count = 1
|
65
|
+
extracted_columns = line.scan(single_line_content_pattern).flatten
|
66
|
+
table_start_index = line_index
|
67
|
+
|
68
|
+
current_column_count = extracted_columns.count - 1
|
69
|
+
add_current_table.call(single_line_delimiter)
|
70
|
+
|
71
|
+
# Detect multi-line table separator rows (e.g., |---|---|)
|
72
|
+
elsif line.strip.match?(regexp)
|
73
|
+
if inside_multi_line_table
|
19
74
|
# Add the current table before starting a new one
|
20
|
-
|
21
|
-
rows: row_count,
|
22
|
-
columns: column_count,
|
23
|
-
start_index: table_start
|
24
|
-
}
|
75
|
+
add_current_table.call(multi_line_delimiter)
|
25
76
|
end
|
26
|
-
# Start a new table
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
elsif
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
table_start = nil
|
42
|
-
row_count = 0
|
43
|
-
column_count = 0
|
77
|
+
# Start a new multi-line table
|
78
|
+
current_column_count = line.split(multi_line_delimiter).count - 1
|
79
|
+
current_row_count = 2 # Account for header and separator rows
|
80
|
+
inside_multi_line_table = true
|
81
|
+
table_start_index = line_index - 1 if table_start_index.nil?
|
82
|
+
|
83
|
+
# Continue multi-line table with data rows
|
84
|
+
elsif inside_multi_line_table &&
|
85
|
+
(line.strip.start_with?(multi_line_delimiter) ||
|
86
|
+
line.include?(multi_line_delimiter))
|
87
|
+
current_row_count += 1
|
88
|
+
|
89
|
+
# End multi-line table when we encounter a non-table line
|
90
|
+
elsif inside_multi_line_table
|
91
|
+
add_current_table.call(multi_line_delimiter)
|
44
92
|
end
|
45
93
|
end
|
46
94
|
|
47
|
-
# Handle
|
48
|
-
if
|
49
|
-
|
50
|
-
rows: row_count,
|
51
|
-
columns: column_count,
|
52
|
-
start_index: table_start
|
53
|
-
}
|
95
|
+
# Handle table that ends at the last line
|
96
|
+
if inside_multi_line_table
|
97
|
+
add_current_table.call(multi_line_delimiter)
|
54
98
|
end
|
55
99
|
|
56
|
-
|
100
|
+
extracted_tables
|
57
101
|
end
|
58
102
|
end
|
59
103
|
|
60
104
|
return if $PROGRAM_NAME != __FILE__
|
61
105
|
|
106
|
+
# # for ww
|
107
|
+
# require 'bundler/setup'
|
108
|
+
# Bundler.require(:default)
|
109
|
+
|
62
110
|
require 'minitest/autorun'
|
63
111
|
|
64
112
|
class TestTableExtractor < Minitest::Test
|
65
|
-
|
113
|
+
# Regex pattern to match table separator rows with optional colons and hyphens
|
114
|
+
@@table_separator_regexp = /^[ \t]*\|? *(?::?-+:?) *( *\| *(?::?-+:?) *)*\|? *$/
|
115
|
+
@@multi_line_delimiter = '|'
|
116
|
+
@@single_line_delimiter = '!'
|
66
117
|
|
67
118
|
def test_single_table
|
68
119
|
lines = [
|
@@ -71,9 +122,15 @@ class TestTableExtractor < Minitest::Test
|
|
71
122
|
'| Pongo tapanuliensis| Pongo| Hominidae',
|
72
123
|
'| | Histiophryne| Antennariidae'
|
73
124
|
]
|
74
|
-
expected = [{
|
125
|
+
expected = [{ column_offset: 1, columns: 3, delimiter: @@multi_line_delimiter, rows: 4,
|
126
|
+
start_index: 0 }]
|
75
127
|
assert_equal expected,
|
76
|
-
TableExtractor.extract_tables(
|
128
|
+
TableExtractor.extract_tables(
|
129
|
+
lines,
|
130
|
+
regexp: @@table_separator_regexp,
|
131
|
+
multi_line_delimiter: @@multi_line_delimiter,
|
132
|
+
single_line_delimiter: @@single_line_delimiter
|
133
|
+
)
|
77
134
|
end
|
78
135
|
|
79
136
|
def test_indented_table
|
@@ -83,9 +140,15 @@ class TestTableExtractor < Minitest::Test
|
|
83
140
|
"\t | Pongo tapanuliensis| Pongo| Hominidae",
|
84
141
|
"\t | | Histiophryne| Antennariidae"
|
85
142
|
]
|
86
|
-
expected = [{
|
143
|
+
expected = [{ column_offset: 1, columns: 3, delimiter: @@multi_line_delimiter, rows: 4,
|
144
|
+
start_index: 0 }]
|
87
145
|
assert_equal expected,
|
88
|
-
TableExtractor.extract_tables(
|
146
|
+
TableExtractor.extract_tables(
|
147
|
+
lines,
|
148
|
+
regexp: @@table_separator_regexp,
|
149
|
+
multi_line_delimiter: @@multi_line_delimiter,
|
150
|
+
single_line_delimiter: @@single_line_delimiter
|
151
|
+
)
|
89
152
|
end
|
90
153
|
|
91
154
|
def test_multiple_tables
|
@@ -100,11 +163,18 @@ class TestTableExtractor < Minitest::Test
|
|
100
163
|
'| Tapanuli Orangutan| Pongo tapanuliensis'
|
101
164
|
]
|
102
165
|
expected = [
|
103
|
-
{
|
104
|
-
|
166
|
+
{ column_offset: 1, columns: 3, delimiter: @@multi_line_delimiter,
|
167
|
+
rows: 4, start_index: 0 },
|
168
|
+
{ column_offset: 1, columns: 2, delimiter: @@multi_line_delimiter,
|
169
|
+
rows: 3, start_index: 5 }
|
105
170
|
]
|
106
171
|
assert_equal expected,
|
107
|
-
TableExtractor.extract_tables(
|
172
|
+
TableExtractor.extract_tables(
|
173
|
+
lines,
|
174
|
+
regexp: @@table_separator_regexp,
|
175
|
+
multi_line_delimiter: @@multi_line_delimiter,
|
176
|
+
single_line_delimiter: @@single_line_delimiter
|
177
|
+
)
|
108
178
|
end
|
109
179
|
|
110
180
|
def test_no_tables
|
@@ -114,7 +184,10 @@ class TestTableExtractor < Minitest::Test
|
|
114
184
|
]
|
115
185
|
expected = []
|
116
186
|
assert_equal expected,
|
117
|
-
TableExtractor.extract_tables(
|
187
|
+
TableExtractor.extract_tables(
|
188
|
+
lines,
|
189
|
+
regexp: @@table_separator_regexp
|
190
|
+
)
|
118
191
|
end
|
119
192
|
|
120
193
|
def test_inconsistent_columns
|
@@ -128,11 +201,20 @@ class TestTableExtractor < Minitest::Test
|
|
128
201
|
'|-|-|-',
|
129
202
|
'| Tapanuli Orangutan| Pongo tapanuliensis'
|
130
203
|
]
|
131
|
-
#
|
132
|
-
expected = [
|
133
|
-
|
204
|
+
# Number of columns determined from row of dividers
|
205
|
+
expected = [
|
206
|
+
{ column_offset: 1, columns: 2, delimiter: @@multi_line_delimiter,
|
207
|
+
rows: 4, start_index: 0 },
|
208
|
+
{ column_offset: 1, columns: 3, delimiter: @@multi_line_delimiter,
|
209
|
+
rows: 3, start_index: 5 }
|
210
|
+
]
|
134
211
|
assert_equal expected,
|
135
|
-
TableExtractor.extract_tables(
|
212
|
+
TableExtractor.extract_tables(
|
213
|
+
lines,
|
214
|
+
regexp: @@table_separator_regexp,
|
215
|
+
multi_line_delimiter: @@multi_line_delimiter,
|
216
|
+
single_line_delimiter: @@single_line_delimiter
|
217
|
+
)
|
136
218
|
end
|
137
219
|
|
138
220
|
def test_table_at_end_of_lines
|
@@ -143,9 +225,17 @@ class TestTableExtractor < Minitest::Test
|
|
143
225
|
'| Pongo tapanuliensis| Pongo| Hominidae',
|
144
226
|
'| | Histiophryne| Antennariidae'
|
145
227
|
]
|
146
|
-
expected = [
|
228
|
+
expected = [
|
229
|
+
{ column_offset: 1, columns: 3, delimiter: @@multi_line_delimiter,
|
230
|
+
rows: 4, start_index: 1 }
|
231
|
+
]
|
147
232
|
assert_equal expected,
|
148
|
-
TableExtractor.extract_tables(
|
233
|
+
TableExtractor.extract_tables(
|
234
|
+
lines,
|
235
|
+
regexp: @@table_separator_regexp,
|
236
|
+
multi_line_delimiter: @@multi_line_delimiter,
|
237
|
+
single_line_delimiter: @@single_line_delimiter
|
238
|
+
)
|
149
239
|
end
|
150
240
|
|
151
241
|
def test_table_without_starting_pipe
|
@@ -156,9 +246,17 @@ class TestTableExtractor < Minitest::Test
|
|
156
246
|
'| Pongo tapanuliensis| Pongo| Hominidae',
|
157
247
|
'| | Histiophryne| Antennariidae'
|
158
248
|
]
|
159
|
-
expected = [
|
249
|
+
expected = [
|
250
|
+
{ column_offset: 1, columns: 3, delimiter: @@multi_line_delimiter,
|
251
|
+
rows: 4, start_index: 1 }
|
252
|
+
]
|
160
253
|
assert_equal expected,
|
161
|
-
TableExtractor.extract_tables(
|
254
|
+
TableExtractor.extract_tables(
|
255
|
+
lines,
|
256
|
+
regexp: @@table_separator_regexp,
|
257
|
+
multi_line_delimiter: @@multi_line_delimiter,
|
258
|
+
single_line_delimiter: @@single_line_delimiter
|
259
|
+
)
|
162
260
|
end
|
163
261
|
|
164
262
|
def test_table_with_colon_hyphens
|
@@ -168,8 +266,16 @@ class TestTableExtractor < Minitest::Test
|
|
168
266
|
'| John Doe| 30| New York',
|
169
267
|
'| Jane Doe| 25| Los Angeles'
|
170
268
|
]
|
171
|
-
expected = [
|
269
|
+
expected = [
|
270
|
+
{ column_offset: 1, columns: 3, delimiter: @@multi_line_delimiter,
|
271
|
+
rows: 4, start_index: 0 }
|
272
|
+
]
|
172
273
|
assert_equal expected,
|
173
|
-
TableExtractor.extract_tables(
|
274
|
+
TableExtractor.extract_tables(
|
275
|
+
lines,
|
276
|
+
regexp: @@table_separator_regexp,
|
277
|
+
multi_line_delimiter: @@multi_line_delimiter,
|
278
|
+
single_line_delimiter: @@single_line_delimiter
|
279
|
+
)
|
174
280
|
end
|
175
281
|
end
|