canvas_link_migrator 1.0.15 → 1.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/canvas_link_migrator/imported_html_converter.rb +12 -0
- data/lib/canvas_link_migrator/link_parser.rb +69 -48
- data/lib/canvas_link_migrator/link_resolver.rb +11 -7
- data/lib/canvas_link_migrator/version.rb +1 -1
- data/spec/canvas_link_migrator/imported_html_converter_spec.rb +29 -2
- data/spec/canvas_link_migrator/link_parser_spec.rb +14 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '08e4bb1b132305d9655e4f426f2685e6c524dff6f2e316ee4c32c4898e330e98'
|
4
|
+
data.tar.gz: b000b72459dd32746f8aa31d662df81e180aaff36fd461a88cd6c4d17bfed661
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e731f7af1980e3a1b731b138ba551b921776834f4fe237a005ecf737ddd270ad286d508482c3dd3050b0414c758f2b707d2084fa97c1714a697660e2f1e5dbff
|
7
|
+
data.tar.gz: 9a2100127f1aaf994360e18d6e0370cd67b60239caf9e1fab451bddfc1ae05a885a78abee63cf9d5f0c952b5add8789bf054f97667e0173617cdb7863009e3e7
|
@@ -32,6 +32,18 @@ module CanvasLinkMigrator
|
|
32
32
|
delegate :convert, to: :link_parser
|
33
33
|
delegate :resolver_links!, to: :link_resolver
|
34
34
|
|
35
|
+
def convert_single_link(single_link, link_type: false)
|
36
|
+
url = single_link.dup
|
37
|
+
LinkParser::REFERENCE_KEYWORDS.each { |ref| url.gsub!("%24#{ref}%24", "$#{ref}$") }
|
38
|
+
# create the link map for a single link (parse)
|
39
|
+
link_parsing_result = link_parser.parse_single_url(url, link_type)
|
40
|
+
link_parser.handle_parsed_url(url, link_parsing_result, nil, nil, link_type, nil, nil)
|
41
|
+
# resolve_link! on the single element link map
|
42
|
+
link_resolver.resolve_link!(link_parsing_result, link_type)
|
43
|
+
# return the new value
|
44
|
+
link_parsing_result[:new_value]
|
45
|
+
end
|
46
|
+
|
35
47
|
def convert_exported_html(input_html)
|
36
48
|
new_html = link_parser.convert(input_html, "type", "lookup_id", "field")
|
37
49
|
replace!(new_html)
|
@@ -81,7 +81,7 @@ module CanvasLinkMigrator
|
|
81
81
|
|
82
82
|
def convert(html, item_type, mig_id, field, remove_outer_nodes_if_one_child: nil)
|
83
83
|
mig_id = mig_id.to_s
|
84
|
-
doc = Nokogiri::HTML5.fragment(html || "")
|
84
|
+
doc = Nokogiri::HTML5.fragment(html || "", max_tree_depth: 10_000)
|
85
85
|
|
86
86
|
# Replace source tags with iframes
|
87
87
|
doc.search("source[data-media-type],source[data-media-id]").each do |source|
|
@@ -144,50 +144,63 @@ module CanvasLinkMigrator
|
|
144
144
|
end
|
145
145
|
|
146
146
|
result = parse_url(url, node, attr)
|
147
|
+
handle_parsed_url(url, result, node, attr, item_type, mig_id, field)
|
148
|
+
end
|
149
|
+
|
150
|
+
def handle_parsed_url(url, result, node, attr, item_type, mig_id, field)
|
147
151
|
if result[:resolved]
|
148
152
|
# resolved, just replace and carry on
|
149
|
-
|
150
|
-
unless CanvasLinkMigrator.relative_url?(new_url)
|
151
|
-
# perform configured substitutions
|
152
|
-
if (processed_url = @migration_query_service.process_domain_substitutions(new_url))
|
153
|
-
new_url = processed_url
|
154
|
-
end
|
155
|
-
# relative-ize absolute links outside the course but inside our domain
|
156
|
-
# (analogous to what is done in Api#process_incoming_html_content)
|
157
|
-
begin
|
158
|
-
uri = URI.parse(new_url)
|
159
|
-
account_hosts = @migration_query_service.context_hosts.map { |h| h.split(":").first }
|
160
|
-
if account_hosts.include?(uri.host)
|
161
|
-
uri.scheme = uri.host = uri.port = nil
|
162
|
-
new_url = uri.to_s
|
163
|
-
end
|
164
|
-
rescue URI::InvalidURIError, URI::InvalidComponentError
|
165
|
-
nil
|
166
|
-
end
|
167
|
-
end
|
168
|
-
node[attr] = new_url
|
153
|
+
handle_resolved_link(url, result, node, attr)
|
169
154
|
else
|
170
|
-
result
|
171
|
-
|
172
|
-
|
173
|
-
# (rather than just replacing a value), we're going to
|
174
|
-
# replace the entire node with a placeholder
|
175
|
-
result[:old_value] = node.to_xml
|
176
|
-
result[:placeholder] = placeholder(result[:old_value])
|
177
|
-
placeholder_node = Nokogiri::HTML5.fragment(result[:placeholder])
|
155
|
+
handle_unresolved_link(url, result, node, attr, item_type, mig_id, field)
|
156
|
+
end
|
157
|
+
end
|
178
158
|
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
159
|
+
def handle_unresolved_link(url, result, node, attr, item_type, mig_id, field)
|
160
|
+
result.delete(:resolved)
|
161
|
+
if result[:link_type] == :media_object
|
162
|
+
# because we may actually change the media comment node itself
|
163
|
+
# (rather than just replacing a value), we're going to
|
164
|
+
# replace the entire node with a placeholder
|
165
|
+
result[:old_value] = node ? node.to_xml : result[:rel_path]
|
166
|
+
|
167
|
+
result[:placeholder] = placeholder(result[:old_value])
|
168
|
+
placeholder_node = Nokogiri::HTML5.fragment(result[:placeholder])
|
169
|
+
|
170
|
+
node.replace(placeholder_node) if node
|
171
|
+
else
|
172
|
+
result[:old_value] = node ? node[attr] : url
|
173
|
+
result[:placeholder] = placeholder(result[:old_value])
|
174
|
+
# replace the inner html of an anchor tag if it matches the href
|
175
|
+
if node && node.name == "a" && attr == "href" && node["href"] == node.inner_html.delete("\n").strip
|
176
|
+
node.inner_html = result[:placeholder]
|
177
|
+
end
|
178
|
+
node[attr] = result[:placeholder] if node
|
179
|
+
end
|
180
|
+
add_unresolved_link(result, item_type, mig_id, field)
|
181
|
+
end
|
182
|
+
|
183
|
+
def handle_resolved_link(url, result, node, attr)
|
184
|
+
new_url = result[:new_url] || url
|
185
|
+
unless CanvasLinkMigrator.relative_url?(new_url)
|
186
|
+
# perform configured substitutions
|
187
|
+
if (processed_url = @migration_query_service.process_domain_substitutions(new_url))
|
188
|
+
new_url = processed_url
|
189
|
+
end
|
190
|
+
# relative-ize absolute links outside the course but inside our domain
|
191
|
+
# (analogous to what is done in Api#process_incoming_html_content)
|
192
|
+
begin
|
193
|
+
uri = URI.parse(new_url)
|
194
|
+
account_hosts = @migration_query_service.context_hosts.map { |h| h.split(":").first }
|
195
|
+
if account_hosts.include?(uri.host)
|
196
|
+
uri.scheme = uri.host = uri.port = nil
|
197
|
+
new_url = uri.to_s
|
186
198
|
end
|
187
|
-
|
199
|
+
rescue URI::InvalidURIError, URI::InvalidComponentError
|
200
|
+
nil
|
188
201
|
end
|
189
|
-
add_unresolved_link(result, item_type, mig_id, field)
|
190
202
|
end
|
203
|
+
node[attr] = new_url
|
191
204
|
end
|
192
205
|
|
193
206
|
def unresolved(type, data = {})
|
@@ -202,8 +215,11 @@ module CanvasLinkMigrator
|
|
202
215
|
"?type=#{type}&embedded=true"
|
203
216
|
end
|
204
217
|
|
218
|
+
def parse_single_url(url, link_type = nil)
|
219
|
+
parse_url(url, nil, nil, link_type)
|
220
|
+
end
|
205
221
|
# returns a hash with resolution status and data to hold onto if unresolved
|
206
|
-
def parse_url(url, node, attr)
|
222
|
+
def parse_url(url, node, attr, link_type = nil)
|
207
223
|
if url =~ /wiki_page_migration_id=(.*)/
|
208
224
|
unresolved(:wiki_page, migration_id: $1)
|
209
225
|
elsif url =~ /discussion_topic_migration_id=(.*)/
|
@@ -211,13 +227,13 @@ module CanvasLinkMigrator
|
|
211
227
|
elsif url =~ %r{\$CANVAS_COURSE_REFERENCE\$/modules/items/([^?]*)(\?.*)?}
|
212
228
|
unresolved(:module_item, migration_id: $1, query: $2)
|
213
229
|
elsif url =~ %r{\$CANVAS_COURSE_REFERENCE\$/file_ref/([^/?#]+)(.*)}
|
214
|
-
in_media_iframe = (attr == "src" && [
|
215
|
-
rest = in_media_iframe ? media_params(node["data-media-type"]) : $2
|
230
|
+
in_media_iframe = node && (attr == "src" && %w[iframe source].include?(node.name) && (node["data-media-id"] || node["data-media-type"]))
|
231
|
+
rest = (in_media_iframe && node) ? media_params(node["data-media-type"]) : $2
|
216
232
|
unresolved(:file_ref,
|
217
233
|
migration_id: $1,
|
218
234
|
rest: rest,
|
219
235
|
in_media_iframe: in_media_iframe,
|
220
|
-
target_blank: node['target'] == "_blank" && node.name == "a" && attr == "href"
|
236
|
+
target_blank: node && node['target'] == "_blank" && node.name == "a" && attr == "href"
|
221
237
|
)
|
222
238
|
elsif url =~ %r{(?:\$CANVAS_OBJECT_REFERENCE\$|\$WIKI_REFERENCE\$)/([^/]*)/([^?]*)(\?.*)?}
|
223
239
|
if KNOWN_REFERENCE_TYPES.include?($1)
|
@@ -231,16 +247,21 @@ module CanvasLinkMigrator
|
|
231
247
|
resolved("#{@migration_query_service.context_path}/#{$1}")
|
232
248
|
elsif url =~ %r{\$IMS(?:-|_)CC(?:-|_)FILEBASE\$/(.*)}
|
233
249
|
rel_path = URI::DEFAULT_PARSER.unescape($1)
|
234
|
-
|
235
|
-
|
250
|
+
|
251
|
+
if (attr == "href" && node && node["class"]&.include?("instructure_inline_media_comment")) ||
|
252
|
+
(attr == "src" && node && %w[iframe source].include?(node.name) && (node["data-media-id"] || node["data-media-type"])) ||
|
253
|
+
link_type == :media_object
|
236
254
|
unresolved(:media_object, rel_path: rel_path)
|
237
255
|
else
|
238
256
|
unresolved(:file, rel_path: rel_path)
|
239
257
|
end
|
240
|
-
elsif (attr == "src" && [
|
258
|
+
elsif (attr == "src" && node && %w[iframe source].include?(node.name) && (node["data-media-id"] || node["data-media-type"])) ||
|
259
|
+
link_type == :media_object
|
241
260
|
# media_objects_iframe course copy reference without an attachment id, change to media_attachments_iframe
|
242
|
-
|
243
|
-
|
261
|
+
rel_path = node ? node["src"] : url
|
262
|
+
unresolved(:media_object, rel_path: rel_path)
|
263
|
+
elsif (@migration_query_service.supports_embedded_images && attr == "src" && (info_match = url.match(%r{\Adata:(?<mime_type>[-\w]+/[-\w+.]+)?;base64,(?<image>.*)}m))) ||
|
264
|
+
link_type == :image
|
244
265
|
result = @migration_query_service.link_embedded_image(info_match)
|
245
266
|
if result[:resolved]
|
246
267
|
resolved(result[:url])
|
@@ -249,7 +270,7 @@ module CanvasLinkMigrator
|
|
249
270
|
end
|
250
271
|
elsif # rubocop:disable Lint/DuplicateBranch
|
251
272
|
# Equation image, leave it alone
|
252
|
-
(attr == "src" && node["class"] && node["class"].include?("equation_image")) || # rubocop:disable Layout/ConditionPosition
|
273
|
+
(attr == "src" && node && node["class"] && node["class"].include?("equation_image")) || # rubocop:disable Layout/ConditionPosition
|
253
274
|
# The file is in the context of an AQ, leave the link alone
|
254
275
|
url =~ %r{\A/assessment_questions/\d+/files/\d+} ||
|
255
276
|
# This points to a specific file already, leave it alone
|
@@ -54,7 +54,8 @@ module CanvasLinkMigrator
|
|
54
54
|
end
|
55
55
|
|
56
56
|
# finds the :new_value to use to replace the placeholder
|
57
|
-
def resolve_link!(link)
|
57
|
+
def resolve_link!(link, preset_type = false)
|
58
|
+
link[:link_type] = preset_type if preset_type
|
58
59
|
case link[:link_type]
|
59
60
|
when :wiki_page
|
60
61
|
if (linked_wiki_url = @migration_id_converter.convert_wiki_page_migration_id_to_slug(link[:migration_id]))
|
@@ -103,7 +104,7 @@ module CanvasLinkMigrator
|
|
103
104
|
# this part is a little trickier
|
104
105
|
# tl;dr we've replaced the entire node with the placeholder
|
105
106
|
# see LinkParser for details
|
106
|
-
rel_path = link[:rel_path]
|
107
|
+
rel_path = link[:rel_path] || link[:old_value]
|
107
108
|
node = Nokogiri::HTML5.fragment(link[:old_value]).children.first
|
108
109
|
new_url = resolve_media_data(node, rel_path)
|
109
110
|
new_url ||= resolve_relative_file_url(rel_path)
|
@@ -112,14 +113,14 @@ module CanvasLinkMigrator
|
|
112
113
|
new_url = rel_path.include?("#{context_path}/file_contents") ? rel_path : missing_relative_file_url(rel_path)
|
113
114
|
link[:missing_url] = new_url
|
114
115
|
end
|
115
|
-
if [
|
116
|
+
if %w[iframe source].include?(node.name)
|
116
117
|
node["src"] = new_url
|
117
118
|
else
|
118
119
|
node["href"] = new_url
|
119
120
|
end
|
120
|
-
link[:new_value] = node.to_s
|
121
|
+
link[:new_value] = node.name === 'text' ? new_url : node.to_s
|
121
122
|
when :file
|
122
|
-
rel_path = link[:rel_path]
|
123
|
+
rel_path = link[:rel_path] || link[:old_value]
|
123
124
|
new_url = resolve_relative_file_url(rel_path)
|
124
125
|
# leave user urls alone
|
125
126
|
new_url ||= rel_path if is_relative_user_url(rel_path)
|
@@ -249,10 +250,13 @@ module CanvasLinkMigrator
|
|
249
250
|
elsif rel_path&.match(/\/media_attachments_iframe\/\d+/)
|
250
251
|
# media attachment from another course or something
|
251
252
|
rel_path
|
252
|
-
elsif (file_id, uuid = @migration_id_converter.convert_attachment_media_id(node["data-media-id"]))
|
253
|
+
elsif node["data-media-id"] && (file_id, uuid = @migration_id_converter.convert_attachment_media_id(node["data-media-id"]))
|
253
254
|
file_id ? media_attachment_iframe_url(file_id, uuid, node["data-media-type"]) : nil
|
254
|
-
elsif (
|
255
|
+
elsif (identifier = rel_path.match(/media_objects(?:_iframe)?\/([^?.]+)/)) && (file_id, uuid = @migration_id_converter.convert_attachment_media_id(identifier.[](1)))
|
255
256
|
file_id ? media_attachment_iframe_url(file_id, uuid, node["data-media-type"]) : nil
|
257
|
+
elsif node.name == "text" && rel_path[/^[^?]+/] && (mig_id = rel_path[/^[^?]+/].match(/[^\/]+$/)[0])
|
258
|
+
file = @migration_id_converter.lookup_attachment_by_migration_id(mig_id)
|
259
|
+
media_attachment_iframe_url(file["id"], file["uuid"])
|
256
260
|
else
|
257
261
|
node.delete("class")
|
258
262
|
node.delete("id")
|
@@ -403,13 +403,40 @@ describe CanvasLinkMigrator::ImportedHtmlConverter do
|
|
403
403
|
|
404
404
|
it "converts iframe srcs that point to non-media files" do
|
405
405
|
test_string = <<~HTML
|
406
|
-
|
406
|
+
<p><iframe style="width: 100%; height: 100vh; border: none;" src="$IMS-CC-FILEBASE$/subfolder/test.png?canvas_download=1"></iframe></p>
|
407
407
|
HTML
|
408
408
|
converted_string = <<~HTML
|
409
|
-
|
409
|
+
<p><iframe style="width: 100%; height: 100vh; border: none;" src="/courses/2/files/7/download?verifier=u7"></iframe></p>
|
410
410
|
HTML
|
411
411
|
html = @converter.convert_exported_html(test_string)
|
412
412
|
expect(html[0]).to eq converted_string
|
413
413
|
end
|
414
414
|
end
|
415
|
+
|
416
|
+
describe ".convert_single_link" do
|
417
|
+
before(:each) do
|
418
|
+
@path = "/courses/2/"
|
419
|
+
@converter = CanvasLinkMigrator::ImportedHtmlConverter.new(resource_map: JSON.parse(File.read("spec/fixtures/canvas_resource_map.json")))
|
420
|
+
end
|
421
|
+
|
422
|
+
it "converts a wiki reference" do
|
423
|
+
test_string = "%24WIKI_REFERENCE%24/wiki/test-wiki-page?query=blah"
|
424
|
+
link = @converter.convert_single_link(test_string)
|
425
|
+
expect(link).to eq "#{@path}pages/test-wiki-page?query=blah"
|
426
|
+
end
|
427
|
+
|
428
|
+
it "converts a wiki reference with migration id" do
|
429
|
+
test_string = "%24WIKI_REFERENCE%24/pages/A?query=blah"
|
430
|
+
link = @converter.convert_single_link(test_string)
|
431
|
+
expect(link).to eq "#{@path}pages/slug-a?query=blah"
|
432
|
+
end
|
433
|
+
|
434
|
+
it "formats media properly if explicitly set to do so" do
|
435
|
+
test_string = "$IMS-CC-FILEBASE$/subfolder/with a space/yodawg.mov?canvas_=1&canvas_qs_type=video&canvas_qs_amp=&canvas_qs_embedded=true&media_attachment=true"
|
436
|
+
link = @converter.convert_single_link(test_string, link_type: :media_object)
|
437
|
+
expect(link).to eq "/media_attachments_iframe/9?embedded=true&verifier=u9"
|
438
|
+
link = @converter.convert_single_link(test_string)
|
439
|
+
expect(link).to eq "/courses/2/files/9?verifier=u9&type=video&=&embedded=true"
|
440
|
+
end
|
441
|
+
end
|
415
442
|
end
|
@@ -69,5 +69,19 @@ describe CanvasLinkMigrator::LinkParser do
|
|
69
69
|
doc = Nokogiri::HTML5.fragment(%Q(<a id="media_comment_m-4uoGqVdEqXhpqu2ZMytHSy9XMV73aQ7E" class="instructure_inline_media_comment" data-media_comment_type="video" data-alt=""></a>))
|
70
70
|
expect{ parser.convert(doc.to_html, "type", "lookup_id", "field") }.not_to raise_error
|
71
71
|
end
|
72
|
+
|
73
|
+
it "handles deeply nested html up to 10.000 levels" do
|
74
|
+
deeply_nested_html = "<div>" * 9999
|
75
|
+
deeply_nested_html += "<a target=\"_blank\"></a>"
|
76
|
+
deeply_nested_html += "</div>" * 9999
|
77
|
+
expect{ parser.convert(deeply_nested_html, "type", "lookup_id", "field") }.not_to raise_error
|
78
|
+
end
|
79
|
+
|
80
|
+
it "raises error when html is beyond 10.000 depth" do
|
81
|
+
deeply_nested_html = "<div>" * 10_000
|
82
|
+
deeply_nested_html += "<a target=\"_blank\"></a>"
|
83
|
+
deeply_nested_html += "</div>" * 10_000
|
84
|
+
expect{ parser.convert(deeply_nested_html, "type", "lookup_id", "field") }.to raise_error("Document tree depth limit exceeded")
|
85
|
+
end
|
72
86
|
end
|
73
87
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: canvas_link_migrator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.17
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mysti Lilla
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date:
|
14
|
+
date: 2025-03-05 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: activesupport
|