canvas_link_migrator 1.0.15 → 1.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 36b5ebb39673be4a7105bc459809c1faa333899c9d709d665cabd00f4242401d
4
- data.tar.gz: 6702833e94aa9a7fd3bb3265199c2b7caf269e30fd0c82b5c15028d810d44c14
3
+ metadata.gz: '08e4bb1b132305d9655e4f426f2685e6c524dff6f2e316ee4c32c4898e330e98'
4
+ data.tar.gz: b000b72459dd32746f8aa31d662df81e180aaff36fd461a88cd6c4d17bfed661
5
5
  SHA512:
6
- metadata.gz: e8a5d21a3c0a74e5805163d724e42c64a0db4a6d2b18f85f637e401f6ca09089aab287e8afb88b22150cd7eadf90e51a1ba3e78d9bb7892968bbf57055554bfe
7
- data.tar.gz: 1fd447f0548f8c84a2fdea0a6a791679eea31c6f4dede5502d4191dcf96b994d4072b303a6ba713151bc03aaddefe6ad003e556ffd4ce39880f8ce792a216522
6
+ metadata.gz: e731f7af1980e3a1b731b138ba551b921776834f4fe237a005ecf737ddd270ad286d508482c3dd3050b0414c758f2b707d2084fa97c1714a697660e2f1e5dbff
7
+ data.tar.gz: 9a2100127f1aaf994360e18d6e0370cd67b60239caf9e1fab451bddfc1ae05a885a78abee63cf9d5f0c952b5add8789bf054f97667e0173617cdb7863009e3e7
@@ -32,6 +32,18 @@ module CanvasLinkMigrator
32
32
  delegate :convert, to: :link_parser
33
33
  delegate :resolver_links!, to: :link_resolver
34
34
 
35
+ def convert_single_link(single_link, link_type: false)
36
+ url = single_link.dup
37
+ LinkParser::REFERENCE_KEYWORDS.each { |ref| url.gsub!("%24#{ref}%24", "$#{ref}$") }
38
+ # create the link map for a single link (parse)
39
+ link_parsing_result = link_parser.parse_single_url(url, link_type)
40
+ link_parser.handle_parsed_url(url, link_parsing_result, nil, nil, link_type, nil, nil)
41
+ # resolve_link! on the single element link map
42
+ link_resolver.resolve_link!(link_parsing_result, link_type)
43
+ # return the new value
44
+ link_parsing_result[:new_value]
45
+ end
46
+
35
47
  def convert_exported_html(input_html)
36
48
  new_html = link_parser.convert(input_html, "type", "lookup_id", "field")
37
49
  replace!(new_html)
@@ -81,7 +81,7 @@ module CanvasLinkMigrator
81
81
 
82
82
  def convert(html, item_type, mig_id, field, remove_outer_nodes_if_one_child: nil)
83
83
  mig_id = mig_id.to_s
84
- doc = Nokogiri::HTML5.fragment(html || "")
84
+ doc = Nokogiri::HTML5.fragment(html || "", max_tree_depth: 10_000)
85
85
 
86
86
  # Replace source tags with iframes
87
87
  doc.search("source[data-media-type],source[data-media-id]").each do |source|
@@ -144,50 +144,63 @@ module CanvasLinkMigrator
144
144
  end
145
145
 
146
146
  result = parse_url(url, node, attr)
147
+ handle_parsed_url(url, result, node, attr, item_type, mig_id, field)
148
+ end
149
+
150
+ def handle_parsed_url(url, result, node, attr, item_type, mig_id, field)
147
151
  if result[:resolved]
148
152
  # resolved, just replace and carry on
149
- new_url = result[:new_url] || url
150
- unless CanvasLinkMigrator.relative_url?(new_url)
151
- # perform configured substitutions
152
- if (processed_url = @migration_query_service.process_domain_substitutions(new_url))
153
- new_url = processed_url
154
- end
155
- # relative-ize absolute links outside the course but inside our domain
156
- # (analogous to what is done in Api#process_incoming_html_content)
157
- begin
158
- uri = URI.parse(new_url)
159
- account_hosts = @migration_query_service.context_hosts.map { |h| h.split(":").first }
160
- if account_hosts.include?(uri.host)
161
- uri.scheme = uri.host = uri.port = nil
162
- new_url = uri.to_s
163
- end
164
- rescue URI::InvalidURIError, URI::InvalidComponentError
165
- nil
166
- end
167
- end
168
- node[attr] = new_url
153
+ handle_resolved_link(url, result, node, attr)
169
154
  else
170
- result.delete(:resolved)
171
- if result[:link_type] == :media_object
172
- # because we may actually change the media comment node itself
173
- # (rather than just replacing a value), we're going to
174
- # replace the entire node with a placeholder
175
- result[:old_value] = node.to_xml
176
- result[:placeholder] = placeholder(result[:old_value])
177
- placeholder_node = Nokogiri::HTML5.fragment(result[:placeholder])
155
+ handle_unresolved_link(url, result, node, attr, item_type, mig_id, field)
156
+ end
157
+ end
178
158
 
179
- node.replace(placeholder_node)
180
- else
181
- result[:old_value] = node[attr]
182
- result[:placeholder] = placeholder(result[:old_value])
183
- # replace the inner html of an anchor tag if it matches the href
184
- if node.name == "a" && attr == "href" && node["href"] == node.inner_html.delete("\n").strip
185
- node.inner_html = result[:placeholder]
159
+ def handle_unresolved_link(url, result, node, attr, item_type, mig_id, field)
160
+ result.delete(:resolved)
161
+ if result[:link_type] == :media_object
162
+ # because we may actually change the media comment node itself
163
+ # (rather than just replacing a value), we're going to
164
+ # replace the entire node with a placeholder
165
+ result[:old_value] = node ? node.to_xml : result[:rel_path]
166
+
167
+ result[:placeholder] = placeholder(result[:old_value])
168
+ placeholder_node = Nokogiri::HTML5.fragment(result[:placeholder])
169
+
170
+ node.replace(placeholder_node) if node
171
+ else
172
+ result[:old_value] = node ? node[attr] : url
173
+ result[:placeholder] = placeholder(result[:old_value])
174
+ # replace the inner html of an anchor tag if it matches the href
175
+ if node && node.name == "a" && attr == "href" && node["href"] == node.inner_html.delete("\n").strip
176
+ node.inner_html = result[:placeholder]
177
+ end
178
+ node[attr] = result[:placeholder] if node
179
+ end
180
+ add_unresolved_link(result, item_type, mig_id, field)
181
+ end
182
+
183
+ def handle_resolved_link(url, result, node, attr)
184
+ new_url = result[:new_url] || url
185
+ unless CanvasLinkMigrator.relative_url?(new_url)
186
+ # perform configured substitutions
187
+ if (processed_url = @migration_query_service.process_domain_substitutions(new_url))
188
+ new_url = processed_url
189
+ end
190
+ # relative-ize absolute links outside the course but inside our domain
191
+ # (analogous to what is done in Api#process_incoming_html_content)
192
+ begin
193
+ uri = URI.parse(new_url)
194
+ account_hosts = @migration_query_service.context_hosts.map { |h| h.split(":").first }
195
+ if account_hosts.include?(uri.host)
196
+ uri.scheme = uri.host = uri.port = nil
197
+ new_url = uri.to_s
186
198
  end
187
- node[attr] = result[:placeholder]
199
+ rescue URI::InvalidURIError, URI::InvalidComponentError
200
+ nil
188
201
  end
189
- add_unresolved_link(result, item_type, mig_id, field)
190
202
  end
203
+ node[attr] = new_url
191
204
  end
192
205
 
193
206
  def unresolved(type, data = {})
@@ -202,8 +215,11 @@ module CanvasLinkMigrator
202
215
  "?type=#{type}&embedded=true"
203
216
  end
204
217
 
218
+ def parse_single_url(url, link_type = nil)
219
+ parse_url(url, nil, nil, link_type)
220
+ end
205
221
  # returns a hash with resolution status and data to hold onto if unresolved
206
- def parse_url(url, node, attr)
222
+ def parse_url(url, node, attr, link_type = nil)
207
223
  if url =~ /wiki_page_migration_id=(.*)/
208
224
  unresolved(:wiki_page, migration_id: $1)
209
225
  elsif url =~ /discussion_topic_migration_id=(.*)/
@@ -211,13 +227,13 @@ module CanvasLinkMigrator
211
227
  elsif url =~ %r{\$CANVAS_COURSE_REFERENCE\$/modules/items/([^?]*)(\?.*)?}
212
228
  unresolved(:module_item, migration_id: $1, query: $2)
213
229
  elsif url =~ %r{\$CANVAS_COURSE_REFERENCE\$/file_ref/([^/?#]+)(.*)}
214
- in_media_iframe = (attr == "src" && ["iframe", "source"].include?(node.name) && (node["data-media-id"] || node["data-media-type"]))
215
- rest = in_media_iframe ? media_params(node["data-media-type"]) : $2
230
+ in_media_iframe = node && (attr == "src" && %w[iframe source].include?(node.name) && (node["data-media-id"] || node["data-media-type"]))
231
+ rest = (in_media_iframe && node) ? media_params(node["data-media-type"]) : $2
216
232
  unresolved(:file_ref,
217
233
  migration_id: $1,
218
234
  rest: rest,
219
235
  in_media_iframe: in_media_iframe,
220
- target_blank: node['target'] == "_blank" && node.name == "a" && attr == "href"
236
+ target_blank: node && node['target'] == "_blank" && node.name == "a" && attr == "href"
221
237
  )
222
238
  elsif url =~ %r{(?:\$CANVAS_OBJECT_REFERENCE\$|\$WIKI_REFERENCE\$)/([^/]*)/([^?]*)(\?.*)?}
223
239
  if KNOWN_REFERENCE_TYPES.include?($1)
@@ -231,16 +247,21 @@ module CanvasLinkMigrator
231
247
  resolved("#{@migration_query_service.context_path}/#{$1}")
232
248
  elsif url =~ %r{\$IMS(?:-|_)CC(?:-|_)FILEBASE\$/(.*)}
233
249
  rel_path = URI::DEFAULT_PARSER.unescape($1)
234
- if (attr == "href" && node["class"]&.include?("instructure_inline_media_comment")) ||
235
- (attr == "src" && ["iframe", "source"].include?(node.name) && (node["data-media-id"] || node["data-media-type"]))
250
+
251
+ if (attr == "href" && node && node["class"]&.include?("instructure_inline_media_comment")) ||
252
+ (attr == "src" && node && %w[iframe source].include?(node.name) && (node["data-media-id"] || node["data-media-type"])) ||
253
+ link_type == :media_object
236
254
  unresolved(:media_object, rel_path: rel_path)
237
255
  else
238
256
  unresolved(:file, rel_path: rel_path)
239
257
  end
240
- elsif (attr == "src" && ["iframe", "source"].include?(node.name) && (node["data-media-id"] || node["data-media-type"]))
258
+ elsif (attr == "src" && node && %w[iframe source].include?(node.name) && (node["data-media-id"] || node["data-media-type"])) ||
259
+ link_type == :media_object
241
260
  # media_objects_iframe course copy reference without an attachment id, change to media_attachments_iframe
242
- unresolved(:media_object, rel_path: node["src"])
243
- elsif @migration_query_service.supports_embedded_images && attr == "src" && (info_match = url.match(%r{\Adata:(?<mime_type>[-\w]+/[-\w+.]+)?;base64,(?<image>.*)}m))
261
+ rel_path = node ? node["src"] : url
262
+ unresolved(:media_object, rel_path: rel_path)
263
+ elsif (@migration_query_service.supports_embedded_images && attr == "src" && (info_match = url.match(%r{\Adata:(?<mime_type>[-\w]+/[-\w+.]+)?;base64,(?<image>.*)}m))) ||
264
+ link_type == :image
244
265
  result = @migration_query_service.link_embedded_image(info_match)
245
266
  if result[:resolved]
246
267
  resolved(result[:url])
@@ -249,7 +270,7 @@ module CanvasLinkMigrator
249
270
  end
250
271
  elsif # rubocop:disable Lint/DuplicateBranch
251
272
  # Equation image, leave it alone
252
- (attr == "src" && node["class"] && node["class"].include?("equation_image")) || # rubocop:disable Layout/ConditionPosition
273
+ (attr == "src" && node && node["class"] && node["class"].include?("equation_image")) || # rubocop:disable Layout/ConditionPosition
253
274
  # The file is in the context of an AQ, leave the link alone
254
275
  url =~ %r{\A/assessment_questions/\d+/files/\d+} ||
255
276
  # This points to a specific file already, leave it alone
@@ -54,7 +54,8 @@ module CanvasLinkMigrator
54
54
  end
55
55
 
56
56
  # finds the :new_value to use to replace the placeholder
57
- def resolve_link!(link)
57
+ def resolve_link!(link, preset_type = false)
58
+ link[:link_type] = preset_type if preset_type
58
59
  case link[:link_type]
59
60
  when :wiki_page
60
61
  if (linked_wiki_url = @migration_id_converter.convert_wiki_page_migration_id_to_slug(link[:migration_id]))
@@ -103,7 +104,7 @@ module CanvasLinkMigrator
103
104
  # this part is a little trickier
104
105
  # tl;dr we've replaced the entire node with the placeholder
105
106
  # see LinkParser for details
106
- rel_path = link[:rel_path]
107
+ rel_path = link[:rel_path] || link[:old_value]
107
108
  node = Nokogiri::HTML5.fragment(link[:old_value]).children.first
108
109
  new_url = resolve_media_data(node, rel_path)
109
110
  new_url ||= resolve_relative_file_url(rel_path)
@@ -112,14 +113,14 @@ module CanvasLinkMigrator
112
113
  new_url = rel_path.include?("#{context_path}/file_contents") ? rel_path : missing_relative_file_url(rel_path)
113
114
  link[:missing_url] = new_url
114
115
  end
115
- if ["iframe", "source"].include?(node.name)
116
+ if %w[iframe source].include?(node.name)
116
117
  node["src"] = new_url
117
118
  else
118
119
  node["href"] = new_url
119
120
  end
120
- link[:new_value] = node.to_s
121
+ link[:new_value] = node.name === 'text' ? new_url : node.to_s
121
122
  when :file
122
- rel_path = link[:rel_path]
123
+ rel_path = link[:rel_path] || link[:old_value]
123
124
  new_url = resolve_relative_file_url(rel_path)
124
125
  # leave user urls alone
125
126
  new_url ||= rel_path if is_relative_user_url(rel_path)
@@ -249,10 +250,13 @@ module CanvasLinkMigrator
249
250
  elsif rel_path&.match(/\/media_attachments_iframe\/\d+/)
250
251
  # media attachment from another course or something
251
252
  rel_path
252
- elsif (file_id, uuid = @migration_id_converter.convert_attachment_media_id(node["data-media-id"]))
253
+ elsif node["data-media-id"] && (file_id, uuid = @migration_id_converter.convert_attachment_media_id(node["data-media-id"]))
253
254
  file_id ? media_attachment_iframe_url(file_id, uuid, node["data-media-type"]) : nil
254
- elsif (file_id, uuid = @migration_id_converter.convert_attachment_media_id(rel_path.match(/media_objects(?:_iframe)?\/([^?.]+)/)&.[](1)))
255
+ elsif (identifier = rel_path.match(/media_objects(?:_iframe)?\/([^?.]+)/)) && (file_id, uuid = @migration_id_converter.convert_attachment_media_id(identifier.[](1)))
255
256
  file_id ? media_attachment_iframe_url(file_id, uuid, node["data-media-type"]) : nil
257
+ elsif node.name == "text" && rel_path[/^[^?]+/] && (mig_id = rel_path[/^[^?]+/].match(/[^\/]+$/)[0])
258
+ file = @migration_id_converter.lookup_attachment_by_migration_id(mig_id)
259
+ media_attachment_iframe_url(file["id"], file["uuid"])
256
260
  else
257
261
  node.delete("class")
258
262
  node.delete("id")
@@ -1,3 +1,3 @@
1
1
  module CanvasLinkMigrator
2
- VERSION = "1.0.15"
2
+ VERSION = "1.0.17"
3
3
  end
@@ -403,13 +403,40 @@ describe CanvasLinkMigrator::ImportedHtmlConverter do
403
403
 
404
404
  it "converts iframe srcs that point to non-media files" do
405
405
  test_string = <<~HTML
406
- <p><iframe style="width: 100%; height: 100vh; border: none;" src="$IMS-CC-FILEBASE$/subfolder/test.png?canvas_download=1"></iframe></p>
406
+ <p><iframe style="width: 100%; height: 100vh; border: none;" src="$IMS-CC-FILEBASE$/subfolder/test.png?canvas_download=1"></iframe></p>
407
407
  HTML
408
408
  converted_string = <<~HTML
409
- <p><iframe style="width: 100%; height: 100vh; border: none;" src="/courses/2/files/7/download?verifier=u7"></iframe></p>
409
+ <p><iframe style="width: 100%; height: 100vh; border: none;" src="/courses/2/files/7/download?verifier=u7"></iframe></p>
410
410
  HTML
411
411
  html = @converter.convert_exported_html(test_string)
412
412
  expect(html[0]).to eq converted_string
413
413
  end
414
414
  end
415
+
416
+ describe ".convert_single_link" do
417
+ before(:each) do
418
+ @path = "/courses/2/"
419
+ @converter = CanvasLinkMigrator::ImportedHtmlConverter.new(resource_map: JSON.parse(File.read("spec/fixtures/canvas_resource_map.json")))
420
+ end
421
+
422
+ it "converts a wiki reference" do
423
+ test_string = "%24WIKI_REFERENCE%24/wiki/test-wiki-page?query=blah"
424
+ link = @converter.convert_single_link(test_string)
425
+ expect(link).to eq "#{@path}pages/test-wiki-page?query=blah"
426
+ end
427
+
428
+ it "converts a wiki reference with migration id" do
429
+ test_string = "%24WIKI_REFERENCE%24/pages/A?query=blah"
430
+ link = @converter.convert_single_link(test_string)
431
+ expect(link).to eq "#{@path}pages/slug-a?query=blah"
432
+ end
433
+
434
+ it "formats media properly if explicitly set to do so" do
435
+ test_string = "$IMS-CC-FILEBASE$/subfolder/with a space/yodawg.mov?canvas_=1&canvas_qs_type=video&canvas_qs_amp=&canvas_qs_embedded=true&media_attachment=true"
436
+ link = @converter.convert_single_link(test_string, link_type: :media_object)
437
+ expect(link).to eq "/media_attachments_iframe/9?embedded=true&verifier=u9"
438
+ link = @converter.convert_single_link(test_string)
439
+ expect(link).to eq "/courses/2/files/9?verifier=u9&type=video&amp=&embedded=true"
440
+ end
441
+ end
415
442
  end
@@ -69,5 +69,19 @@ describe CanvasLinkMigrator::LinkParser do
69
69
  doc = Nokogiri::HTML5.fragment(%Q(<a id="media_comment_m-4uoGqVdEqXhpqu2ZMytHSy9XMV73aQ7E" class="instructure_inline_media_comment" data-media_comment_type="video" data-alt=""></a>))
70
70
  expect{ parser.convert(doc.to_html, "type", "lookup_id", "field") }.not_to raise_error
71
71
  end
72
+
73
+ it "handles deeply nested html up to 10.000 levels" do
74
+ deeply_nested_html = "<div>" * 9999
75
+ deeply_nested_html += "<a target=\"_blank\"></a>"
76
+ deeply_nested_html += "</div>" * 9999
77
+ expect{ parser.convert(deeply_nested_html, "type", "lookup_id", "field") }.not_to raise_error
78
+ end
79
+
80
+ it "raises error when html is beyond 10.000 depth" do
81
+ deeply_nested_html = "<div>" * 10_000
82
+ deeply_nested_html += "<a target=\"_blank\"></a>"
83
+ deeply_nested_html += "</div>" * 10_000
84
+ expect{ parser.convert(deeply_nested_html, "type", "lookup_id", "field") }.to raise_error("Document tree depth limit exceeded")
85
+ end
72
86
  end
73
87
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: canvas_link_migrator
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.15
4
+ version: 1.0.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mysti Lilla
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2024-10-24 00:00:00.000000000 Z
14
+ date: 2025-03-05 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: activesupport