ZMediumToMarkdown 3.5.2 → 3.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/Models/Paragraph.rb +20 -0
- data/lib/Parsers/MarkupStyleRender.rb +80 -9
- metadata +16 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7c9120282aaa35d8568b605ae3af2c24d874c60604198168e603d38457d1ae2b
|
|
4
|
+
data.tar.gz: fe051875aa9f1970da37bd858aa937b2f529d26af82baef3e1ccec71385a09f4
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 160d52fafbcbe3fdfe1c0653934fc4bffa99c76b173bec42108cc47df670cdd37a4c7e0444ccd84ee1185945ffa9ec50372004a812761edb7e23b40dec6a88b3
|
|
7
|
+
data.tar.gz: 9476fe987ff76ad6e9002dfc142241546654ca7a64771c9d69d296e8938de31bf5dac505ce737d3f7e6299d83c58fb1f5bfa62c49c0e043892476cc4255eea1c
|
data/lib/Models/Paragraph.rb
CHANGED
|
@@ -18,6 +18,16 @@ class Paragraph
|
|
|
18
18
|
|
|
19
19
|
class Markup
|
|
20
20
|
attr_accessor :type, :start, :end, :href, :anchorType, :userId, :linkMetadata
|
|
21
|
+
|
|
22
|
+
# Semantic identity fields used for `==` / `eql?` / `hash`. `start` and
|
|
23
|
+
# `end` are interval coordinates (handled by Rangeable as the [lo, hi]
|
|
24
|
+
# pair) rather than identity. `linkMetadata` is currently unused
|
|
25
|
+
# downstream so it is excluded too. This identity is what lets
|
|
26
|
+
# Rangeable merge two Markups that describe the same logical span
|
|
27
|
+
# (e.g. two STRONG runs that overlap) into a single coalesced
|
|
28
|
+
# interval.
|
|
29
|
+
SEMANTIC_KEYS = [:type, :href, :anchorType, :userId].freeze
|
|
30
|
+
|
|
21
31
|
def initialize(json)
|
|
22
32
|
@type = json['type']
|
|
23
33
|
@start = json['start']
|
|
@@ -27,6 +37,16 @@ class Paragraph
|
|
|
27
37
|
@userId = json['userId']
|
|
28
38
|
@linkMetadata = json['linkMetadata']
|
|
29
39
|
end
|
|
40
|
+
|
|
41
|
+
def ==(other)
|
|
42
|
+
return false unless other.is_a?(Markup)
|
|
43
|
+
SEMANTIC_KEYS.all? { |k| public_send(k) == other.public_send(k) }
|
|
44
|
+
end
|
|
45
|
+
alias_method :eql?, :==
|
|
46
|
+
|
|
47
|
+
def hash
|
|
48
|
+
SEMANTIC_KEYS.map { |k| public_send(k) }.hash
|
|
49
|
+
end
|
|
30
50
|
end
|
|
31
51
|
|
|
32
52
|
class MetaData
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
require 'Models/Paragraph'
|
|
2
2
|
require 'Helper'
|
|
3
|
+
require 'rangeable'
|
|
3
4
|
|
|
4
5
|
# Renders a Paragraph's text + Markup list into final markdown.
|
|
5
6
|
#
|
|
@@ -111,7 +112,7 @@ class MarkupStyleRender
|
|
|
111
112
|
end
|
|
112
113
|
|
|
113
114
|
def buildTag(markup)
|
|
114
|
-
case markup.type
|
|
115
|
+
tag = case markup.type
|
|
115
116
|
when "EM" then TagChar.new(2, markup.start, markup.end, "_", "_")
|
|
116
117
|
when "CODE" then TagChar.new(0, markup.start, markup.end, "`", "`")
|
|
117
118
|
when "STRONG" then TagChar.new(2, markup.start, markup.end, "**", "**")
|
|
@@ -119,8 +120,12 @@ class MarkupStyleRender
|
|
|
119
120
|
when "A" then buildAnchorTag(markup)
|
|
120
121
|
else
|
|
121
122
|
Helper.makeWarningText("Undefined Markup Type: #{markup.type}.")
|
|
122
|
-
nil
|
|
123
|
+
return nil
|
|
123
124
|
end
|
|
125
|
+
# Stash the originating Markup on the tag so walkCharsWithTags can
|
|
126
|
+
# use it as the Rangeable element key (see #walkCharsWithTags).
|
|
127
|
+
tag&.instance_variable_set(:@_markup, markup)
|
|
128
|
+
tag
|
|
124
129
|
end
|
|
125
130
|
|
|
126
131
|
def buildAnchorTag(markup)
|
|
@@ -148,18 +153,42 @@ class MarkupStyleRender
|
|
|
148
153
|
end
|
|
149
154
|
end
|
|
150
155
|
|
|
156
|
+
# Walks every char index and dispatches into the open/close hooks. We
|
|
157
|
+
# build two index-keyed Hashes (`opens_at`, `closes_at`) up front so the
|
|
158
|
+
# hot path is O(1) per char instead of the previous O(m) `tags.select`
|
|
159
|
+
# scan; combined with the linear walk over chars that turns total cost
|
|
160
|
+
# from O(L · m) into O(L + m). Same-position tags inside each bucket
|
|
161
|
+
# keep their pre-sorted order from the caller.
|
|
162
|
+
#
|
|
163
|
+
# ESCAPE tags bypass Rangeable entirely. ESCAPE ranges are single-char
|
|
164
|
+
# synthetic markups injected by Paragraph#initialize and they MUST stay
|
|
165
|
+
# disjoint — feeding them through Rangeable would coalesce two ESCAPEs
|
|
166
|
+
# at adjacent positions into a single span, double-emitting the
|
|
167
|
+
# backslash. Non-ESCAPE markups go through Rangeable so identical-type
|
|
168
|
+
# overlapping spans (e.g. two STRONGs that share a few chars) get
|
|
169
|
+
# merged into a single tag pair.
|
|
151
170
|
def walkCharsWithTags(tags)
|
|
171
|
+
rangeable_tags, escape_tags = tags.partition { |t| !escape_tag?(t) }
|
|
172
|
+
merged_tags = mergeTagsViaRangeable(rangeable_tags)
|
|
173
|
+
final_tags = (merged_tags + escape_tags).sort_by(&:startIndex)
|
|
174
|
+
|
|
175
|
+
opens_at = Hash.new { |h, k| h[k] = [] }
|
|
176
|
+
closes_at = Hash.new { |h, k| h[k] = [] }
|
|
177
|
+
final_tags.each do |t|
|
|
178
|
+
opens_at[t.startIndex] << t
|
|
179
|
+
closes_at[t.endIndex] << t
|
|
180
|
+
end
|
|
181
|
+
|
|
152
182
|
response = []
|
|
153
183
|
stack = []
|
|
154
|
-
|
|
155
184
|
chars.each do |index, char|
|
|
156
185
|
if newline?(char)
|
|
157
186
|
emitNewline(char, stack, response)
|
|
158
187
|
end
|
|
159
188
|
|
|
160
|
-
openStartingTags(
|
|
189
|
+
openStartingTags(opens_at[index], stack, response) if opens_at.key?(index)
|
|
161
190
|
emitChar(char, stack, response) unless newline?(char)
|
|
162
|
-
closeEndingTags(
|
|
191
|
+
closeEndingTags(closes_at[index], stack, response) if closes_at.key?(index)
|
|
163
192
|
end
|
|
164
193
|
|
|
165
194
|
# Flush any tags still open at end-of-paragraph.
|
|
@@ -167,6 +196,45 @@ class MarkupStyleRender
|
|
|
167
196
|
response
|
|
168
197
|
end
|
|
169
198
|
|
|
199
|
+
# Build a Rangeable from the non-ESCAPE TagChars, then read the merged
|
|
200
|
+
# ranges back out as fresh TagChar instances (one per coalesced span,
|
|
201
|
+
# rather than one per original markup). Each TagChar carries enough
|
|
202
|
+
# info (sort priority, start/end strings) to drive emission, so we
|
|
203
|
+
# reuse a representative original TagChar per Markup as the prototype.
|
|
204
|
+
def mergeTagsViaRangeable(rangeable_tags)
|
|
205
|
+
return [] if rangeable_tags.empty?
|
|
206
|
+
|
|
207
|
+
rangeable = Rangeable.new
|
|
208
|
+
proto_by_markup = {}
|
|
209
|
+
|
|
210
|
+
rangeable_tags.each do |tag|
|
|
211
|
+
markup = tag.instance_variable_get(:@_markup)
|
|
212
|
+
proto_by_markup[markup] ||= tag
|
|
213
|
+
# TagChar stored endIndex as `end - 1` (last covered slot); restore
|
|
214
|
+
# the half-open `end` for Rangeable's closed-interval insert.
|
|
215
|
+
rangeable.insert(markup, start: tag.startIndex, end: tag.endIndex)
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
merged = []
|
|
219
|
+
rangeable.each do |markup, ranges|
|
|
220
|
+
proto = proto_by_markup[markup]
|
|
221
|
+
startCharsStr = proto.startChars.chars.join
|
|
222
|
+
endCharsStr = proto.endChars.chars.join
|
|
223
|
+
ranges.each do |lo, hi|
|
|
224
|
+
# TagChar.new takes the half-open `end`; it stores `end - 1`.
|
|
225
|
+
merged << TagChar.new(proto.sort, lo, hi + 1, startCharsStr, endCharsStr)
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
merged
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
# ESCAPE markups are emitted as TagChar with startChars == "\\" and
|
|
232
|
+
# empty endChars; identifying them by start-string is simpler than
|
|
233
|
+
# threading a type tag through the TagChar struct.
|
|
234
|
+
def escape_tag?(tag)
|
|
235
|
+
tag.startChars.chars.join == "\\"
|
|
236
|
+
end
|
|
237
|
+
|
|
170
238
|
def newline?(char)
|
|
171
239
|
char.chars.join == "\n"
|
|
172
240
|
end
|
|
@@ -180,8 +248,8 @@ class MarkupStyleRender
|
|
|
180
248
|
stack.each { |tag| response.push(tag.startChars) }
|
|
181
249
|
end
|
|
182
250
|
|
|
183
|
-
def openStartingTags(
|
|
184
|
-
startTags =
|
|
251
|
+
def openStartingTags(startTags, stack, response)
|
|
252
|
+
startTags = startTags.sort_by(&:sort)
|
|
185
253
|
suppressEmit = false
|
|
186
254
|
startTags.each do |tag|
|
|
187
255
|
response.append(tag.startChars) unless suppressEmit
|
|
@@ -211,10 +279,13 @@ class MarkupStyleRender
|
|
|
211
279
|
# supposed to end here (overlapping markups), close it anyway and
|
|
212
280
|
# re-open it after the legitimate closes — keeping each individual
|
|
213
281
|
# tag pair properly nested in the output.
|
|
214
|
-
def closeEndingTags(
|
|
215
|
-
endTags = tags.select { |t| t.endIndex == index }
|
|
282
|
+
def closeEndingTags(endTags, stack, response)
|
|
216
283
|
return if endTags.empty?
|
|
217
284
|
|
|
285
|
+
# Caller passes the pre-built bucket; clone so we can mutate locally
|
|
286
|
+
# (find_index + delete_at) without trashing the cached array.
|
|
287
|
+
endTags = endTags.dup
|
|
288
|
+
|
|
218
289
|
mismatchTags = []
|
|
219
290
|
until endTags.empty?
|
|
220
291
|
stackTag = stack.pop
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ZMediumToMarkdown
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 3.
|
|
4
|
+
version: 3.6.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- ZhgChgLi
|
|
8
8
|
bindir: bin
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date: 2026-05-
|
|
10
|
+
date: 2026-05-09 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
12
|
- !ruby/object:Gem::Dependency
|
|
13
13
|
name: nokogiri
|
|
@@ -91,6 +91,20 @@ dependencies:
|
|
|
91
91
|
- - "~>"
|
|
92
92
|
- !ruby/object:Gem::Version
|
|
93
93
|
version: '0.15'
|
|
94
|
+
- !ruby/object:Gem::Dependency
|
|
95
|
+
name: rangeable
|
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
|
97
|
+
requirements:
|
|
98
|
+
- - "~>"
|
|
99
|
+
- !ruby/object:Gem::Version
|
|
100
|
+
version: '1.0'
|
|
101
|
+
type: :runtime
|
|
102
|
+
prerelease: false
|
|
103
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
104
|
+
requirements:
|
|
105
|
+
- - "~>"
|
|
106
|
+
- !ruby/object:Gem::Version
|
|
107
|
+
version: '1.0'
|
|
94
108
|
description: ZMediumToMarkdown converts Medium posts into clean, portable Markdown.
|
|
95
109
|
It can download a single post or every post from a Medium username, preserving headings,
|
|
96
110
|
lists, blockquotes, code blocks, images, links, and common embeds such as GitHub
|