ZMediumToMarkdown 3.5.2 → 3.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 396aabb00395a5451c046ad36eca450186120dd1f836242c2bc0d40885126ee3
4
- data.tar.gz: f10a39b453030fcf5286b1df32ed482b948c49c4152d6dd17e7a5871f9b5ed45
3
+ metadata.gz: 7c9120282aaa35d8568b605ae3af2c24d874c60604198168e603d38457d1ae2b
4
+ data.tar.gz: fe051875aa9f1970da37bd858aa937b2f529d26af82baef3e1ccec71385a09f4
5
5
  SHA512:
6
- metadata.gz: 70a93c6be7b9c0e62966dc8cc1217ad95d458468833a26d2b61329eee2b0b9626d33e232490a02bab76f83f8923d5144906c41e394e4aa049b3634c37cc7382d
7
- data.tar.gz: a67492a79d7857fff707c0e2aa313332db8a92a58c8e1c61941096194301910d60578051bf150f3ddbd7e04144eac9cd28c1c8a6ee2b0cdbf7e1069b967b24dd
6
+ metadata.gz: 160d52fafbcbe3fdfe1c0653934fc4bffa99c76b173bec42108cc47df670cdd37a4c7e0444ccd84ee1185945ffa9ec50372004a812761edb7e23b40dec6a88b3
7
+ data.tar.gz: 9476fe987ff76ad6e9002dfc142241546654ca7a64771c9d69d296e8938de31bf5dac505ce737d3f7e6299d83c58fb1f5bfa62c49c0e043892476cc4255eea1c
@@ -18,6 +18,16 @@ class Paragraph
18
18
 
19
19
  class Markup
20
20
  attr_accessor :type, :start, :end, :href, :anchorType, :userId, :linkMetadata
21
+
22
+ # Semantic identity fields used for `==` / `eql?` / `hash`. `start` and
23
+ # `end` are interval coordinates (handled by Rangeable as the [lo, hi]
24
+ # pair) rather than identity. `linkMetadata` is currently unused
25
+ # downstream so it is excluded too. This identity is what lets
26
+ # Rangeable merge two Markups that describe the same logical span
27
+ # (e.g. two STRONG runs that overlap) into a single coalesced
28
+ # interval.
29
+ SEMANTIC_KEYS = [:type, :href, :anchorType, :userId].freeze
30
+
21
31
  def initialize(json)
22
32
  @type = json['type']
23
33
  @start = json['start']
@@ -27,6 +37,16 @@ class Paragraph
27
37
  @userId = json['userId']
28
38
  @linkMetadata = json['linkMetadata']
29
39
  end
40
+
41
+ def ==(other)
42
+ return false unless other.is_a?(Markup)
43
+ SEMANTIC_KEYS.all? { |k| public_send(k) == other.public_send(k) }
44
+ end
45
+ alias_method :eql?, :==
46
+
47
+ def hash
48
+ SEMANTIC_KEYS.map { |k| public_send(k) }.hash
49
+ end
30
50
  end
31
51
 
32
52
  class MetaData
@@ -1,5 +1,6 @@
1
1
  require 'Models/Paragraph'
2
2
  require 'Helper'
3
+ require 'rangeable'
3
4
 
4
5
  # Renders a Paragraph's text + Markup list into final markdown.
5
6
  #
@@ -111,7 +112,7 @@ class MarkupStyleRender
111
112
  end
112
113
 
113
114
  def buildTag(markup)
114
- case markup.type
115
+ tag = case markup.type
115
116
  when "EM" then TagChar.new(2, markup.start, markup.end, "_", "_")
116
117
  when "CODE" then TagChar.new(0, markup.start, markup.end, "`", "`")
117
118
  when "STRONG" then TagChar.new(2, markup.start, markup.end, "**", "**")
@@ -119,8 +120,12 @@ class MarkupStyleRender
119
120
  when "A" then buildAnchorTag(markup)
120
121
  else
121
122
  Helper.makeWarningText("Undefined Markup Type: #{markup.type}.")
122
- nil
123
+ return nil
123
124
  end
125
+ # Stash the originating Markup on the tag so walkCharsWithTags can
126
+ # use it as the Rangeable element key (see #walkCharsWithTags).
127
+ tag&.instance_variable_set(:@_markup, markup)
128
+ tag
124
129
  end
125
130
 
126
131
  def buildAnchorTag(markup)
@@ -148,18 +153,42 @@ class MarkupStyleRender
148
153
  end
149
154
  end
150
155
 
156
+ # Walks every char index and dispatches into the open/close hooks. We
157
+ # build two index-keyed Hashes (`opens_at`, `closes_at`) up front so the
158
+ # hot path is O(1) per char instead of the previous O(m) `tags.select`
159
+ # scan; combined with the linear walk over chars that turns total cost
160
+ # from O(L · m) into O(L + m). Same-position tags inside each bucket
161
+ # keep their pre-sorted order from the caller.
162
+ #
163
+ # ESCAPE tags bypass Rangeable entirely. ESCAPE ranges are single-char
164
+ # synthetic markups injected by Paragraph#initialize and they MUST stay
165
+ # disjoint — feeding them through Rangeable would coalesce two ESCAPEs
166
+ # at adjacent positions into a single span, double-emitting the
167
+ # backslash. Non-ESCAPE markups go through Rangeable so identical-type
168
+ # overlapping spans (e.g. two STRONGs that share a few chars) get
169
+ # merged into a single tag pair.
151
170
  def walkCharsWithTags(tags)
171
+ rangeable_tags, escape_tags = tags.partition { |t| !escape_tag?(t) }
172
+ merged_tags = mergeTagsViaRangeable(rangeable_tags)
173
+ final_tags = (merged_tags + escape_tags).sort_by(&:startIndex)
174
+
175
+ opens_at = Hash.new { |h, k| h[k] = [] }
176
+ closes_at = Hash.new { |h, k| h[k] = [] }
177
+ final_tags.each do |t|
178
+ opens_at[t.startIndex] << t
179
+ closes_at[t.endIndex] << t
180
+ end
181
+
152
182
  response = []
153
183
  stack = []
154
-
155
184
  chars.each do |index, char|
156
185
  if newline?(char)
157
186
  emitNewline(char, stack, response)
158
187
  end
159
188
 
160
- openStartingTags(tags, index, stack, response)
189
+ openStartingTags(opens_at[index], stack, response) if opens_at.key?(index)
161
190
  emitChar(char, stack, response) unless newline?(char)
162
- closeEndingTags(tags, index, stack, response)
191
+ closeEndingTags(closes_at[index], stack, response) if closes_at.key?(index)
163
192
  end
164
193
 
165
194
  # Flush any tags still open at end-of-paragraph.
@@ -167,6 +196,45 @@ class MarkupStyleRender
167
196
  response
168
197
  end
169
198
 
199
+ # Build a Rangeable from the non-ESCAPE TagChars, then read the merged
200
+ # ranges back out as fresh TagChar instances (one per coalesced span,
201
+ # rather than one per original markup). Each TagChar carries enough
202
+ # info (sort priority, start/end strings) to drive emission, so we
203
+ # reuse a representative original TagChar per Markup as the prototype.
204
+ def mergeTagsViaRangeable(rangeable_tags)
205
+ return [] if rangeable_tags.empty?
206
+
207
+ rangeable = Rangeable.new
208
+ proto_by_markup = {}
209
+
210
+ rangeable_tags.each do |tag|
211
+ markup = tag.instance_variable_get(:@_markup)
212
+ proto_by_markup[markup] ||= tag
213
+ # TagChar stored endIndex as `end - 1` (last covered slot); restore
214
+ # the half-open `end` for Rangeable's closed-interval insert.
215
+ rangeable.insert(markup, start: tag.startIndex, end: tag.endIndex)
216
+ end
217
+
218
+ merged = []
219
+ rangeable.each do |markup, ranges|
220
+ proto = proto_by_markup[markup]
221
+ startCharsStr = proto.startChars.chars.join
222
+ endCharsStr = proto.endChars.chars.join
223
+ ranges.each do |lo, hi|
224
+ # TagChar.new takes the half-open `end`; it stores `end - 1`.
225
+ merged << TagChar.new(proto.sort, lo, hi + 1, startCharsStr, endCharsStr)
226
+ end
227
+ end
228
+ merged
229
+ end
230
+
231
+ # ESCAPE markups are emitted as TagChar with startChars == "\\" and
232
+ # empty endChars; identifying them by start-string is simpler than
233
+ # threading a type tag through the TagChar struct.
234
+ def escape_tag?(tag)
235
+ tag.startChars.chars.join == "\\"
236
+ end
237
+
170
238
  def newline?(char)
171
239
  char.chars.join == "\n"
172
240
  end
@@ -180,8 +248,8 @@ class MarkupStyleRender
180
248
  stack.each { |tag| response.push(tag.startChars) }
181
249
  end
182
250
 
183
- def openStartingTags(tags, index, stack, response)
184
- startTags = tags.select { |t| t.startIndex == index }.sort_by(&:sort)
251
+ def openStartingTags(startTags, stack, response)
252
+ startTags = startTags.sort_by(&:sort)
185
253
  suppressEmit = false
186
254
  startTags.each do |tag|
187
255
  response.append(tag.startChars) unless suppressEmit
@@ -211,10 +279,13 @@ class MarkupStyleRender
211
279
  # supposed to end here (overlapping markups), close it anyway and
212
280
  # re-open it after the legitimate closes — keeping each individual
213
281
  # tag pair properly nested in the output.
214
- def closeEndingTags(tags, index, stack, response)
215
- endTags = tags.select { |t| t.endIndex == index }
282
+ def closeEndingTags(endTags, stack, response)
216
283
  return if endTags.empty?
217
284
 
285
+ # Caller passes the pre-built bucket; clone so we can mutate locally
286
+ # (find_index + delete_at) without trashing the cached array.
287
+ endTags = endTags.dup
288
+
218
289
  mismatchTags = []
219
290
  until endTags.empty?
220
291
  stackTag = stack.pop
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ZMediumToMarkdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.5.2
4
+ version: 3.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ZhgChgLi
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2026-05-06 00:00:00.000000000 Z
10
+ date: 2026-05-09 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: nokogiri
@@ -91,6 +91,20 @@ dependencies:
91
91
  - - "~>"
92
92
  - !ruby/object:Gem::Version
93
93
  version: '0.15'
94
+ - !ruby/object:Gem::Dependency
95
+ name: rangeable
96
+ requirement: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - "~>"
99
+ - !ruby/object:Gem::Version
100
+ version: '1.0'
101
+ type: :runtime
102
+ prerelease: false
103
+ version_requirements: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - "~>"
106
+ - !ruby/object:Gem::Version
107
+ version: '1.0'
94
108
  description: ZMediumToMarkdown converts Medium posts into clean, portable Markdown.
95
109
  It can download a single post or every post from a Medium username, preserving headings,
96
110
  lists, blockquotes, code blocks, images, links, and common embeds such as GitHub