sparx 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,204 @@
1
+ module Sparx
2
+ SAFE_PROTOCOLS = %w[http https tel mailto sms facetime skype whatsapp geo zoom spotify vscode ftp].freeze
3
+ ESCAPE_HTML = {'&' => '&amp;', '<' => '&lt;', '>' => '&gt;', '"' => '&quot;', "'" => '&#39;'}.freeze
4
+ ESCAPE_ATTR = {'"' => '&quot;', "'" => '&#39;', '<' => '&lt;', '>' => '&gt;'}.freeze
5
+ STANDALONE = /([*\/-]+)\[([^\[\]]*)\]/.freeze
6
+ HEADING = /^(\#{1,6})(?!\#)\s+(.+)$/.freeze
7
+ LINK_FORMAT =/([*\/-]*)\[([^\[\]]*(?:\[[^\[\]]*\][^\[\]]*)*?)\](\/[a-zA-Z0-9]|[a-zA-Z0-9]+:\/\/|www\.|@|#|(?:tel|mailto|sms|facetime|skype|whatsapp|geo|zoom|spotify|vscode):)([^\s\[\]^]*)(\^[a-zA-Z0-9_]*)?/.freeze
8
+ BRACKETS1 = /([*\/-]+)\[([^\[\]]*(?:\[[^\[\]]*\][^\[\]]*)*)\]/.freeze
9
+ BRACKETS2 = /([*\/-]+)\[([^\[\]]*(?:\[[^\[\]]*\][^\[\]]*)*)\]/.freeze
10
+ CITATIONS = /([*\/-]*)\[([^\[\]]+?)\]@([a-zA-Z0-9_-]+)/.freeze
11
+ CITATION = /@([a-zA-Z0-9_-]+):\s*([^\s]+)(?:\s+"((?:[^"\\]|\\.)*)"\s*)?/.freeze
12
+ CITATION_REMOVE = /@([a-zA-Z0-9_-]+):\s*([^\s]+)(?:\s+"((?:[^"\\]|\\.)*)")?/.freeze
13
+ NUMBERED_CITE_REMOVE = /^(\d+)\[([^\]]+)\]\s*([^\s]+)(?:\s+"((?:[^"\\]|\\.)*)")?/.freeze
14
+ NUMBERED_CITE = /^(\d+)\[([^\]]+)\]\s*([^\s]+)(?:\s+"((?:[^"\\]|\\.)*)")?/.freeze
15
+ UNSAFE = /^(javascript|data|vbscript):/i.freeze
16
+ CODE = /```(\w+)?\n?(.*?)```/m.freeze
17
+ TABLES = /(^\|.*\|$\n?)+/m.freeze
18
+ TABLE_ROWS = /^\|[-|]+\|$/.freeze
19
+ PARAGRAPH_SPLIT = /\n(?:\s*\n)+/.freeze
20
+ DOUBLE_NEWLINE = /\n\s*\n/.freeze
21
+ FORMAT_PROTECT = /([*\/-]+\[[^\]]*\])/.freeze
22
+ LINK_PROTECT = /(\[[^\]]*\]\(?[^\s\)]+\)?)/.freeze
23
+ private
24
+ def self.process_inline_code(content); content.gsub(/`([^`]+)`/) { "<code>#{$1}</code>" }; end
25
+ def self.process_small_tags(content, recursive_processor = nil); content.gsub(/s\[(.*?)\]/m) { "<small>#{recursive_processor ? recursive_processor.call($1, {}, {}) : $1}</small>" }; end
26
+ def self.process_span_classes(content, recursive_processor = nil); content.gsub(/\.([a-zA-Z0-9_-]+)\[(.*?)\]/m) { "<span class=\"#{$1}\">#{recursive_processor ? recursive_processor.call($2, {}, {}) : $2}</span>" }; end
27
+ def self.process_links_with_formatting(content, citations, numbered_citations, recursive_processor = nil)
28
+ content.gsub(LINK_FORMAT) do
29
+ prefix, inner, url_prefix, url_suffix, target = $1, $2, $3, $4, $5
30
+
31
+ # Keep the URL validation logic exactly as you had it
32
+ full_url = "#{url_prefix}#{url_suffix}"
33
+ next apply_formatting_prefixes(recursive_processor ? recursive_processor.call(inner, citations, numbered_citations) : inner, prefix) unless valid_url?(url_prefix, url_suffix)
34
+
35
+ # Optimize the inner processing
36
+ text, title = inner.split('|', 2)
37
+ escaped_text = escape_html_content(text || "")
38
+ processed_content = recursive_processor ? recursive_processor.call(escaped_text, citations, numbered_citations) : escaped_text
39
+
40
+ # Optimize attribute building
41
+ link_attrs = %Q(href="#{full_url}")
42
+ link_attrs += %Q( title="#{escape_html_attr(title)}") if title && !title.empty?
43
+ if target
44
+ target_value = target[1..-1].empty? ? '_blank' : target[1..-1]
45
+ link_attrs += %Q( target="#{target_value}")
46
+ end
47
+
48
+ apply_formatting_prefixes(%Q(<a #{link_attrs}>#{processed_content}</a>), prefix)
49
+ end
50
+ end
51
+ def self.valid_url?(url_prefix, url_suffix)
52
+ return false if url_prefix == "/" && (url_suffix.empty? || url_suffix =~ /^[.,;!?]/)
53
+ full_url = "#{url_prefix}#{url_suffix}"
54
+ return false if full_url.length <= 1
55
+ if full_url =~ /^([a-zA-Z]+):/i
56
+ return SAFE_PROTOCOLS.include?($1.downcase)
57
+ end
58
+
59
+ true
60
+ end
61
+ def self.process_standalone_formatting(content, recursive_processor = nil)
62
+ content.gsub(STANDALONE) do
63
+ prefix, inner = $1, $2;processed_content = recursive_processor ? recursive_processor.call(inner, {}, {}) : inner;apply_formatting_prefixes(processed_content, prefix)
64
+ end
65
+ end
66
+ def self.process_bracket_formatting_loops(text, citations, numbered_citations)
67
+ while text =~ BRACKETS1
68
+ text = text.gsub(BRACKETS2) {|match| prefix, inner = $1, $2
69
+ processed_content = parse_styles(inner, citations, numbered_citations)
70
+ apply_formatting_prefixes(processed_content, prefix)}
71
+ end
72
+ text
73
+ end
74
+ def self.apply_formatting_prefixes(content, prefix);prefix.each_char.reverse_each {|p| content = case p;when '*';"<strong>#{content}</strong>";when '/';"<em>#{content}</em>";when '-';"<del>#{content}</del>";else;content;end};content;end
75
+ def self.escape_html_content(text)
76
+ return text if text.nil?
77
+ text.gsub(/[&<>"']/) { |char| ESCAPE_HTML[char] }
78
+ end
79
+ def self.escape_html_attr(str)
80
+ return "" if str.nil? || str.empty?
81
+ str.gsub(/["'<>]/) { |char| ESCAPE_ATTR[char] }
82
+ end
83
+ def self.process_citations(content, citations, numbered_citations, recursive_processor = nil)
84
+ content.gsub(CITATIONS) do
85
+ prefix, inner, cite_id = $1, $2, $3
86
+ next "#{prefix}[#{inner}]@#{cite_id}" unless citations[cite_id]
87
+
88
+ escaped_inner = escape_html_content(inner)
89
+ processed_content = recursive_processor ? recursive_processor.call(escaped_inner, citations, numbered_citations) : escaped_inner
90
+ url = citations[cite_id][:url]
91
+ title_attr = citations[cite_id][:title] ? %Q( title="#{escape_html_attr(citations[cite_id][:title])}") : ""
92
+
93
+ apply_formatting_prefixes(%Q(<a href="#{url}"#{title_attr}>#{processed_content}</a>), prefix)
94
+ end
95
+ end
96
+ def self.process_numbered_citations(content, citations, numbered_citations, recursive_processor = nil)
97
+ content.gsub(/([*\/-]*)\[([^\[\]]+?)\]:(\d+)/) do
98
+ prefix, inner, cite_num = $1, $2, $3
99
+ next "#{prefix}[#{inner}]:#{cite_num}" unless numbered_citations[cite_num]
100
+ processed_content = recursive_processor ? recursive_processor.call(inner, citations, numbered_citations) : inner
101
+ apply_formatting_prefixes(%Q(<a href="#cite-#{cite_num}">#{processed_content}<sup>#{cite_num}</sup></a>), prefix)
102
+ end
103
+ end
104
+ def self.process_all_inline_elements(content, citations, numbered_citations, recursive_processor = nil)
105
+ content = process_images(content, citations, numbered_citations, recursive_processor)
106
+ content = process_citations(content, citations, numbered_citations, recursive_processor)
107
+ content = process_numbered_citations(content, citations, numbered_citations, recursive_processor)
108
+ content = process_inline_code(content)
109
+ content = process_small_tags(content, recursive_processor)
110
+ content = process_span_classes(content, recursive_processor)
111
+ content = process_links_with_formatting(content, citations, numbered_citations, recursive_processor)
112
+ process_standalone_formatting(content, recursive_processor)
113
+ end
114
+ def self.process_code_blocks(text)
115
+ text.gsub(CODE) do
116
+ lang, code = $1, $2.rstrip.gsub(/^\s{2}/, '')
117
+ escaped_code = code.gsub('&', '&amp;').gsub('<', '&lt;').gsub('>', '&gt;')
118
+ lang_class = lang ? %Q( class="language-#{lang}") : ""
119
+ "<pre><code#{lang_class}>#{escaped_code}</code></pre>"
120
+ end.gsub(/`([^`]+)`/) do
121
+ escaped_content = $1.gsub('&', '&amp;').gsub('<', '&lt;').gsub('>', '&gt;')
122
+ "<code>#{escaped_content}</code>"
123
+ end
124
+ end
125
+ def self.process_headings(text, citations, numbered_citations)
126
+ text.gsub(HEADING) do
127
+ level, content = $1.length, $2
128
+ "<h#{level}>#{process_all_inline_elements(content, citations, numbered_citations, method(:parse_styles_recursive))}</h#{level}>"
129
+ end
130
+ end
131
+ def self.process_tables(text)
132
+ text.gsub(TABLES) do |tbl|
133
+ rows = tbl.strip.split("\n")
134
+ header_cells = rows.shift.split("|").reject(&:empty?).map { |c| "<th>#{escape_html_content(c.strip)}</th>" }.join
135
+ rows.shift if rows.first && rows.first.strip =~ TABLE_ROWS
136
+ body = rows.map do |r|
137
+ cells = r.split("|").reject(&:empty?).map { |c| "<td>#{escape_html_content(c.strip)}</td>" }.join
138
+ "<tr>#{cells}</tr>"
139
+ end.join
140
+ "<table><thead><tr>#{header_cells}</tr></thead><tbody>#{body}</tbody></table>"
141
+ end
142
+ end
143
+
144
+
145
+ def self.extract_citations(text)
146
+ citations = {}
147
+ text.scan(CITATION) do |id, url, title|
148
+ next if url =~ UNSAFE
149
+ citations[id] = { url: url, title: title }
150
+ end
151
+ citations
152
+ end
153
+
154
+
155
+ def self.extract_numbered_citations(text)
156
+ numbered_citations = {}
157
+ text.scan(NUMBERED_CITE) do |number, title, url, description|
158
+ numbered_citations[number] = {
159
+ title: title.strip,
160
+ url: url.strip,
161
+ description: description ? description.strip : nil
162
+ }
163
+ end
164
+ numbered_citations
165
+ end
166
+
167
+
168
+ def self.remove_citation_definitions(text)
169
+ text.gsub(CITATION_REMOVE, "").gsub(NUMBERED_CITE_REMOVE, "")
170
+ end
171
+ def self.add_numbered_citations_section(text, numbered_citations)
172
+ return text if numbered_citations.empty?
173
+
174
+ citation_html = numbered_citations.keys.sort_by(&:to_i).map do |num|
175
+ cite = numbered_citations[num]
176
+ title_attr = cite[:description] ? %Q( title="#{escape_html_attr(cite[:description])}") : ""
177
+ escaped_title = escape_html_content(cite[:title])
178
+ %Q(<cite id="cite-#{num}"><span class="cite-number">#{num}</span> <a href="#{cite[:url]}"#{title_attr}>#{escaped_title}</a></cite>)
179
+ end.join("\n")
180
+
181
+ text + "\n\n<section class=\"citations\">\n#{citation_html}\n</section>"
182
+ end
183
+ def self.wrap_paragraphs_if_needed(text, original_text)
184
+ return text unless original_text.match?(DOUBLE_NEWLINE)
185
+ placeholder_names = (CONTAINER_SPECS.values.map { |s| s[:placeholder] } + LIST_SPECS.values.map { |s| s[:placeholder] }).uniq.join('|')
186
+ block_element_regex = /\A(<(ul|ol|table|blockquote|pre|h\d|div|details|section|img|dl|aside|figure)|(#{placeholder_names}))/i
187
+ text.split(PARAGRAPH_SPLIT).map { |block|
188
+ block.strip!
189
+ block.empty? ? nil : (block =~ block_element_regex ? block : "<p>#{block}</p>")
190
+ }.compact.join("\n\n")
191
+ end
192
+
193
+
194
+ def self.escape_html_content_except_syntax(text)
195
+ placeholders = {}
196
+
197
+ text = text.gsub(FORMAT_PROTECT) { placeholders["F_#{placeholders.size}"] = $1; "%%%F_#{placeholders.size}%%%" }
198
+ text = text.gsub(LINK_PROTECT) { placeholders["L_#{placeholders.size}"] = $1; "%%%L_#{placeholders.size}%%%" }
199
+ text = escape_html_content(text)
200
+
201
+ placeholders.each { |k, v| text.gsub!("%%%#{k}%%%", v) }
202
+ text
203
+ end
204
+ end
@@ -0,0 +1,64 @@
1
+ module Sparx
2
+ def self.parse(text, safe: false)
3
+ text = text.gsub("\x00", "")
4
+
5
+ if safe
6
+ # Process code blocks FIRST to protect them from safe mode escaping
7
+ code_blocks = {}
8
+ counter = 0
9
+
10
+ # Extract and protect code blocks
11
+ text = text.gsub(/```(\w+)?\n?(.*?)```/m) do
12
+ lang = $1
13
+ code = $2
14
+ placeholder = "%%%CODEBLOCK_#{counter}%%%"
15
+ code_blocks[placeholder] = { lang: lang, code: code }
16
+ counter += 1
17
+ placeholder
18
+ end
19
+
20
+ # Extract and protect inline code
21
+ text = text.gsub(/`([^`]+)`/) do
22
+ placeholder = "%%%INLINECODE_#{counter}%%%"
23
+ code_blocks[placeholder] = { inline: true, code: $1 }
24
+ counter += 1
25
+ placeholder
26
+ end
27
+
28
+ # NOW escape HTML in remaining content
29
+ text = text.gsub(/<[^>]+>/) { |tag| escape_html_content(tag) }
30
+
31
+ # Restore code blocks (they'll be processed normally later)
32
+ code_blocks.each do |placeholder, data|
33
+ if data[:inline]
34
+ text.gsub!(placeholder, "`#{data[:code]}`")
35
+ else
36
+ lang = data[:lang] ? "#{data[:lang]}\n" : ""
37
+ text.gsub!(placeholder, "```#{lang}#{data[:code]}```")
38
+ end
39
+ end
40
+ end
41
+
42
+ global_counters.clear
43
+ original_text = text.dup
44
+ citations = extract_citations(text)
45
+ numbered_citations = extract_numbered_citations(text)
46
+ text = remove_citation_definitions(text)
47
+ parsed_text = process_all_containers(text, citations, numbered_citations)
48
+ parsed_text = add_numbered_citations_section(parsed_text, numbered_citations)
49
+ parsed_text
50
+ end
51
+ private
52
+ def self.parse_styles(text, citations, numbered_citations)
53
+ return "" if text.nil?
54
+ text = process_code_blocks(text)
55
+ text = process_headings(text, citations, numbered_citations)
56
+ text = process_tables(text)
57
+ # KEEP this but FIX the recursion:
58
+ text = process_all_inline_elements(text, citations, numbered_citations, nil)
59
+ process_bracket_formatting_loops(text, citations, numbered_citations)
60
+ end
61
+ def self.parse_styles_recursive(content, citations, numbered_citations)
62
+ parse_styles(content, citations, numbered_citations)
63
+ end
64
+ end
@@ -0,0 +1,5 @@
1
+ module Sparx
2
+
3
+ VERSION = "0.1.5"
4
+
5
+ end
data/lib/sparx.rb ADDED
@@ -0,0 +1,5 @@
1
+ require_relative 'sparx/constants'
2
+ require_relative 'sparx/inline_processor'
3
+ require_relative 'sparx/image_processor'
4
+ require_relative 'sparx/container_processor'
5
+ require_relative 'sparx/parser'
@@ -0,0 +1,173 @@
1
+ %YAML 1.2
2
+ ---
3
+ name: Sparx
4
+ file_extensions: [sparx, spk]
5
+ scope: source.sparx
6
+
7
+ contexts:
8
+ main:
9
+ - include: comments
10
+ - include: headings
11
+ - include: containers
12
+ - include: lists
13
+ - include: images
14
+ - include: links
15
+ - include: code
16
+ - include: inline-formatting
17
+
18
+ comments:
19
+ - match: '^\\\s.*$'
20
+ scope: comment.line.sparx
21
+
22
+ headings:
23
+ - match: '^\s*# (.+)$'
24
+ scope: markup.heading.1.sparx
25
+
26
+ - match: '^\s*## (.+)$'
27
+ scope: markup.heading.2.sparx
28
+
29
+ - match: '^\s*### (.+)$'
30
+ scope: markup.heading.3.sparx
31
+
32
+ - match: '^\s*#### (.+)$'
33
+ scope: markup.heading.4.sparx
34
+
35
+ - match: '^\s*##### (.+)$'
36
+ scope: markup.heading.5.sparx
37
+
38
+ - match: '^\s*###### (.+)$'
39
+ scope: markup.heading.6.sparx
40
+
41
+ containers:
42
+ - match: '^\s*\$\[([a-zA-Z][a-zA-Z0-9_-]*)\]\{'
43
+ scope: meta.container.section.sparx
44
+ push: container-section
45
+
46
+ - match: '^\s*>(?:\[([^\]]+)\])?\{'
47
+ scope: meta.container.blockquote.sparx
48
+ push: container-blockquote
49
+
50
+ - match: '^\s*\.([a-zA-Z0-9_-]+)\{'
51
+ scope: meta.container.div.sparx
52
+ push: container-div
53
+
54
+ - match: '^\s*~\{'
55
+ scope: meta.container.aside.sparx
56
+ push: container-aside
57
+
58
+ - match: '^\s*f\[([^\]]+)\]\{'
59
+ scope: meta.container.figure.sparx
60
+ push: container-figure
61
+
62
+ - match: '^\s*\+\[([^\]]+)\]\{'
63
+ scope: meta.container.details.sparx
64
+ push: container-details
65
+
66
+ container-section:
67
+ - meta_scope: meta.container.section.sparx
68
+ - match: '^\s*\}'
69
+ scope: meta.container.section.sparx
70
+ pop: true
71
+ - include: containers
72
+ - include: main
73
+
74
+ container-blockquote:
75
+ - meta_scope: meta.container.blockquote.sparx
76
+ - match: '^\s*\}'
77
+ scope: meta.container.blockquote.sparx
78
+ pop: true
79
+ - include: containers
80
+ - include: main
81
+
82
+ container-div:
83
+ - meta_scope: meta.container.div.sparx
84
+ - match: '^\s*\}'
85
+ scope: meta.container.div.sparx
86
+ pop: true
87
+ - include: containers
88
+ - include: main
89
+
90
+ container-aside:
91
+ - meta_scope: meta.container.aside.sparx
92
+ - match: '^\s*\}'
93
+ scope: meta.container.aside.sparx
94
+ pop: true
95
+ - include: containers
96
+ - include: main
97
+
98
+ container-figure:
99
+ - meta_scope: meta.container.figure.sparx
100
+ - match: '^\s*\}'
101
+ scope: meta.container.figure.sparx
102
+ pop: true
103
+ - include: containers
104
+ - include: main
105
+
106
+ container-details:
107
+ - meta_scope: meta.container.details.sparx
108
+ - match: '^\s*\}'
109
+ scope: meta.container.details.sparx
110
+ pop: true
111
+ - include: containers
112
+ - include: main
113
+
114
+ lists:
115
+ - match: '^\s*(-+)\s+(.+)$'
116
+ scope: markup.list.unnumbered.sparx
117
+
118
+ - match: '^\s*(\++)\s+(.+)$'
119
+ scope: markup.list.numbered.sparx
120
+
121
+ - match: '^\s*(:+)([^:]+):\s*(.*)$'
122
+ scope: meta.list.definition.sparx
123
+
124
+ images:
125
+ - match: '^\s*src\[([^\]]+)\](.+)$'
126
+ scope: meta.image.responsive.sparx
127
+
128
+ - match: '([*\/-]*)i\[([^\]]+)\](@[a-zA-Z0-9_-]+)?([^=\s]+)(?:=(\d+x\d+))?'
129
+ scope: meta.image.inline.sparx
130
+
131
+ links:
132
+ - match: '([*\/-]*)\[([^\]]+)\](@[a-zA-Z0-9_-]+)'
133
+ scope: meta.link.citation.sparx
134
+
135
+ - match: '([*\/-]*)\[([^\]]+)\]:(\d+)'
136
+ scope: meta.link.numbered-citation.sparx
137
+
138
+ - match: '([*\/-]*)\[([^\]]+)\]((?:\/[a-zA-Z0-9]|[a-zA-Z0-9]+:\/\/|www\.|#|(?:tel|mailto|sms|facetime|skype|whatsapp|geo|zoom|spotify|vscode):)([^\s\[\]^]*))(\^[a-zA-Z0-9_]*)?'
139
+ scope: meta.link.inline.sparx
140
+
141
+ code:
142
+ - match: '^```(\w+)?$'
143
+ push: code-block
144
+
145
+ - match: '`([^`]+)`'
146
+ scope: markup.raw.inline.sparx
147
+
148
+ code-block:
149
+ - meta_scope: meta.embedded.block.sparx
150
+ - match: '^```$'
151
+ pop: true
152
+
153
+ inline-formatting:
154
+ - match: '\*\[([^\]]+)\]'
155
+ scope: markup.bold.sparx
156
+
157
+ - match: '/\[([^\]]+)\]'
158
+ scope: markup.italic.sparx
159
+
160
+ - match: '-\[([^\]]+)\]'
161
+ scope: markup.strikethrough.sparx
162
+
163
+ - match: '\*/\[([^\]]+)\]'
164
+ scope: markup.bold.italic.sparx
165
+
166
+ - match: '/\*\[([^\]]+)\]'
167
+ scope: markup.italic.bold.sparx
168
+
169
+ - match: 's\[([^\]]+)\]'
170
+ scope: markup.small.sparx
171
+
172
+ - match: '\.([a-zA-Z0-9_-]+)\[([^\]]+)\]'
173
+ scope: markup.span.class.sparx