article_json 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +12 -0
- data/LICENSE +21 -0
- data/README.md +78 -0
- data/bin/article_json_export_google_doc.rb +22 -0
- data/bin/article_json_export_html.rb +14 -0
- data/bin/article_json_parse_google_doc.rb +14 -0
- data/bin/update_reference_document.sh +18 -0
- data/lib/article_json/article.rb +53 -0
- data/lib/article_json/configuration.rb +24 -0
- data/lib/article_json/elements/base.rb +40 -0
- data/lib/article_json/elements/embed.rb +58 -0
- data/lib/article_json/elements/heading.rb +37 -0
- data/lib/article_json/elements/image.rb +41 -0
- data/lib/article_json/elements/list.rb +37 -0
- data/lib/article_json/elements/paragraph.rb +31 -0
- data/lib/article_json/elements/quote.rb +41 -0
- data/lib/article_json/elements/text.rb +45 -0
- data/lib/article_json/elements/text_box.rb +37 -0
- data/lib/article_json/export/html/elements/base.rb +59 -0
- data/lib/article_json/export/html/elements/embed.rb +28 -0
- data/lib/article_json/export/html/elements/heading.rb +19 -0
- data/lib/article_json/export/html/elements/image.rb +33 -0
- data/lib/article_json/export/html/elements/list.rb +25 -0
- data/lib/article_json/export/html/elements/paragraph.rb +17 -0
- data/lib/article_json/export/html/elements/quote.rb +29 -0
- data/lib/article_json/export/html/elements/shared/caption.rb +22 -0
- data/lib/article_json/export/html/elements/shared/float.rb +17 -0
- data/lib/article_json/export/html/elements/text.rb +44 -0
- data/lib/article_json/export/html/elements/text_box.rb +25 -0
- data/lib/article_json/export/html/exporter.rb +22 -0
- data/lib/article_json/import/google_doc/html/css_analyzer.rb +144 -0
- data/lib/article_json/import/google_doc/html/embedded_facebook_video_parser.rb +33 -0
- data/lib/article_json/import/google_doc/html/embedded_parser.rb +113 -0
- data/lib/article_json/import/google_doc/html/embedded_slideshare_parser.rb +36 -0
- data/lib/article_json/import/google_doc/html/embedded_tweet_parser.rb +37 -0
- data/lib/article_json/import/google_doc/html/embedded_vimeo_video_parser.rb +29 -0
- data/lib/article_json/import/google_doc/html/embedded_youtube_video_parser.rb +33 -0
- data/lib/article_json/import/google_doc/html/heading_parser.rb +38 -0
- data/lib/article_json/import/google_doc/html/image_parser.rb +75 -0
- data/lib/article_json/import/google_doc/html/list_parser.rb +46 -0
- data/lib/article_json/import/google_doc/html/node_analyzer.rb +111 -0
- data/lib/article_json/import/google_doc/html/paragraph_parser.rb +26 -0
- data/lib/article_json/import/google_doc/html/parser.rb +125 -0
- data/lib/article_json/import/google_doc/html/quote_parser.rb +46 -0
- data/lib/article_json/import/google_doc/html/shared/caption.rb +20 -0
- data/lib/article_json/import/google_doc/html/shared/float.rb +21 -0
- data/lib/article_json/import/google_doc/html/text_box_parser.rb +49 -0
- data/lib/article_json/import/google_doc/html/text_parser.rb +89 -0
- data/lib/article_json/utils/o_embed_resolver/base.rb +63 -0
- data/lib/article_json/utils/o_embed_resolver/facebook_video.rb +21 -0
- data/lib/article_json/utils/o_embed_resolver/slideshare.rb +22 -0
- data/lib/article_json/utils/o_embed_resolver/tweet.rb +23 -0
- data/lib/article_json/utils/o_embed_resolver/vimeo_video.rb +21 -0
- data/lib/article_json/utils/o_embed_resolver/youtube_video.rb +21 -0
- data/lib/article_json/utils.rb +11 -0
- data/lib/article_json/version.rb +3 -0
- data/lib/article_json.rb +55 -0
- metadata +189 -0
@@ -0,0 +1,59 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module HTML
|
4
|
+
module Elements
|
5
|
+
class Base
|
6
|
+
# @param [ArticleJSON::Elements::Base] element
|
7
|
+
def initialize(element)
|
8
|
+
@element = element
|
9
|
+
end
|
10
|
+
|
11
|
+
# Export a HTML node out of the given element
|
12
|
+
# Dynamically looks up the right export-element-class, instantiates it
|
13
|
+
# and then calls the #build method.
|
14
|
+
# @return [Nokogiri::HTML::Node]
|
15
|
+
def export
|
16
|
+
exporter = self.class == Base ? self.class.build(@element) : self
|
17
|
+
exporter.export unless exporter.nil?
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def create_element(tag, *args)
|
23
|
+
Nokogiri::HTML.fragment('').document.create_element(tag.to_s, *args)
|
24
|
+
end
|
25
|
+
|
26
|
+
def create_text_node(text)
|
27
|
+
Nokogiri::HTML.fragment(text).children.first
|
28
|
+
end
|
29
|
+
|
30
|
+
class << self
|
31
|
+
# Instantiate the correct sub class for a given element
|
32
|
+
# @param [ArticleJSON::Elements::Base] element
|
33
|
+
# @return [ArticleJSON::Export::HTML::Elements::Base]
|
34
|
+
def build(element)
|
35
|
+
klass = exporter_by_type(element.type)
|
36
|
+
klass.new(element) unless klass.nil?
|
37
|
+
end
|
38
|
+
|
39
|
+
# Look up the correct exporter class based on the element type
|
40
|
+
# @param [Symbol] type
|
41
|
+
# @return [ArticleJSON::Export::HTML::Elements::Base]
|
42
|
+
def exporter_by_type(type)
|
43
|
+
{
|
44
|
+
text: Text,
|
45
|
+
paragraph: Paragraph,
|
46
|
+
heading: Heading,
|
47
|
+
list: List,
|
48
|
+
image: Image,
|
49
|
+
text_box: TextBox,
|
50
|
+
quote: Quote,
|
51
|
+
embed: Embed,
|
52
|
+
}[type.to_sym]
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module HTML
|
4
|
+
module Elements
|
5
|
+
class Embed < Base
|
6
|
+
include Shared::Caption
|
7
|
+
|
8
|
+
def export
|
9
|
+
create_element(:figure).tap do |figure|
|
10
|
+
figure.add_child(embed_node)
|
11
|
+
figure.add_child(caption_node(:figcaption))
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def embed_node
|
18
|
+
create_element(:div, embedded_object, class: 'embed')
|
19
|
+
end
|
20
|
+
|
21
|
+
def embedded_object
|
22
|
+
"Embedded Object: #{@element.embed_type}-#{@element.embed_id}"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module HTML
|
4
|
+
module Elements
|
5
|
+
class Heading < Base
|
6
|
+
def export
|
7
|
+
create_element(tag_name, @element.content)
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def tag_name
|
13
|
+
"h#{@element.level}".to_sym
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module HTML
|
4
|
+
module Elements
|
5
|
+
class Image < Base
|
6
|
+
include Shared::Caption
|
7
|
+
include Shared::Float
|
8
|
+
|
9
|
+
# @return [Nokogiri::HTML::Node]
|
10
|
+
def export
|
11
|
+
create_element(:figure, node_opts).tap do |figure|
|
12
|
+
figure.add_child(image_node)
|
13
|
+
figure.add_child(caption_node(:figcaption))
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
# @return [Nokogiri::HTML::Node]
|
20
|
+
def image_node
|
21
|
+
create_element(:img, src: @element.source_url)
|
22
|
+
end
|
23
|
+
|
24
|
+
# @return [Hash]
|
25
|
+
def node_opts
|
26
|
+
return if floating_class.nil?
|
27
|
+
{ class: floating_class }
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module HTML
|
4
|
+
module Elements
|
5
|
+
class List < Base
|
6
|
+
def export
|
7
|
+
create_element(tag_name).tap do |list|
|
8
|
+
@element.content.each do |child_element|
|
9
|
+
item = create_element(:li)
|
10
|
+
item.add_child(Paragraph.new(child_element).export)
|
11
|
+
list.add_child(item)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def tag_name
|
19
|
+
@element.list_type == :ordered ? :ol : :ul
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module HTML
|
4
|
+
module Elements
|
5
|
+
class Paragraph < Base
|
6
|
+
def export
|
7
|
+
create_element(:p).tap do |p|
|
8
|
+
@element.content.each do |child_element|
|
9
|
+
p.add_child(Text.new(child_element).export)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module HTML
|
4
|
+
module Elements
|
5
|
+
class Quote < Base
|
6
|
+
include Shared::Caption
|
7
|
+
include Shared::Float
|
8
|
+
|
9
|
+
def export
|
10
|
+
create_element(:aside, node_opts).tap do |aside|
|
11
|
+
@element.content.each do |child_element|
|
12
|
+
aside.add_child(Base.new(child_element).export)
|
13
|
+
end
|
14
|
+
aside.add_child(caption_node(:small))
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
# @return [Hash]
|
21
|
+
def node_opts
|
22
|
+
return if floating_class.nil?
|
23
|
+
{ class: floating_class }
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module HTML
|
4
|
+
module Elements
|
5
|
+
module Shared
|
6
|
+
module Caption
|
7
|
+
# Generate the caption node
|
8
|
+
# @param [String] tag_name
|
9
|
+
# @return [Nokogiri::HTML::Node]
|
10
|
+
def caption_node(tag_name)
|
11
|
+
create_element(tag_name).tap do |caption|
|
12
|
+
@element.caption.each do |child_element|
|
13
|
+
caption.add_child(Text.new(child_element).export)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module HTML
|
4
|
+
module Elements
|
5
|
+
module Shared
|
6
|
+
module Float
|
7
|
+
# The element's floating class, if necessary
|
8
|
+
# @return [String]
|
9
|
+
def floating_class
|
10
|
+
"float-#{@element.float}" unless @element.float.nil?
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module HTML
|
4
|
+
module Elements
|
5
|
+
class Text < Base
|
6
|
+
# @return [Nokogiri::HTML::Node]
|
7
|
+
def export
|
8
|
+
return bold_and_italic_node if @element.bold && @element.italic
|
9
|
+
return bold_node if @element.bold
|
10
|
+
return italic_node if @element.italic
|
11
|
+
content_node
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
# @return [Nokogiri::HTML::Node]
|
17
|
+
def italic_node
|
18
|
+
create_element(:em).tap { |em| em.add_child(content_node) }
|
19
|
+
end
|
20
|
+
|
21
|
+
# @return [Nokogiri::HTML::Node]
|
22
|
+
def bold_node
|
23
|
+
create_element(:strong).tap do |strong|
|
24
|
+
strong.add_child(content_node)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# @return [Nokogiri::HTML::Node]
|
29
|
+
def bold_and_italic_node
|
30
|
+
create_element(:strong).tap do |strong|
|
31
|
+
strong.add_child(italic_node)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# @return [Nokogiri::HTML::Node]
|
36
|
+
def content_node
|
37
|
+
return create_text_node(@element.content) if @element.href.nil?
|
38
|
+
create_element(:a, @element.content, href: @element.href)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module HTML
|
4
|
+
module Elements
|
5
|
+
class TextBox < Base
|
6
|
+
include Shared::Float
|
7
|
+
|
8
|
+
def export
|
9
|
+
create_element(:div, node_opts).tap do |div|
|
10
|
+
@element.content.each do |child_element|
|
11
|
+
div.add_child(Base.new(child_element).export)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def node_opts
|
19
|
+
{ class: ['text-box', floating_class].compact.join(' ') }
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module HTML
|
4
|
+
class Exporter
|
5
|
+
# @param [Array[ArticleJSON::Elements::Base]] elements
|
6
|
+
def initialize(elements)
|
7
|
+
@elements = elements
|
8
|
+
end
|
9
|
+
|
10
|
+
# Generate a string with the HTML representation of all elements
|
11
|
+
# @return [String]
|
12
|
+
def html
|
13
|
+
doc = Nokogiri::HTML.fragment('')
|
14
|
+
@elements.each do |element|
|
15
|
+
doc.add_child(Elements::Base.new(element).export)
|
16
|
+
end
|
17
|
+
doc.to_html(save_with: 0)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,144 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Import
|
3
|
+
module GoogleDoc
|
4
|
+
module HTML
|
5
|
+
class CSSAnalyzer
|
6
|
+
attr_reader :css_parser,
|
7
|
+
:bold_classes,
|
8
|
+
:italic_classes,
|
9
|
+
:centered_classes,
|
10
|
+
:right_aligned_classes
|
11
|
+
|
12
|
+
# Initialize the parser with CSS code
|
13
|
+
# @param [String] css
|
14
|
+
def initialize(css = '')
|
15
|
+
@css_parser = ::CssParser::Parser.new
|
16
|
+
css_parser.load_string!(css)
|
17
|
+
parse
|
18
|
+
end
|
19
|
+
|
20
|
+
# Check if a given class attribute contains at least one class that
|
21
|
+
# makes its text bold
|
22
|
+
# @param [String] class_str
|
23
|
+
# @return [Boolean]
|
24
|
+
def bold?(class_str)
|
25
|
+
(class_str.split(' ') & bold_classes).any?
|
26
|
+
end
|
27
|
+
|
28
|
+
# Check if a given class attribute contains at least one class that
|
29
|
+
# makes its text italic
|
30
|
+
# @param [String] class_str
|
31
|
+
# @return [Boolean]
|
32
|
+
def italic?(class_str)
|
33
|
+
(class_str.split(' ') & italic_classes).any?
|
34
|
+
end
|
35
|
+
|
36
|
+
# Check if a given class attribute contains at least one class that
|
37
|
+
# sets its alignment to the right
|
38
|
+
# @param [String] class_str
|
39
|
+
# @return [Boolean]
|
40
|
+
def right_aligned?(class_str)
|
41
|
+
(class_str.split(' ') & right_aligned_classes).any?
|
42
|
+
end
|
43
|
+
|
44
|
+
# Check if a given class attribute contains no class that sets its
|
45
|
+
# alignment to right or center
|
46
|
+
# @param [String] class_str
|
47
|
+
# @return [Boolean]
|
48
|
+
def left_aligned?(class_str)
|
49
|
+
!right_aligned?(class_str) && !centered?(class_str)
|
50
|
+
end
|
51
|
+
|
52
|
+
# Check if a given class attribute contains at least one class that
|
53
|
+
# centers it
|
54
|
+
# @param [String] class_str
|
55
|
+
# @return [Boolean]
|
56
|
+
def centered?(class_str)
|
57
|
+
(class_str.split(' ') & centered_classes).any?
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
# Parse the CSS code and save CSS selectors for certain styles
|
63
|
+
def parse
|
64
|
+
# arrays containing class names for certain formatting
|
65
|
+
@bold_classes = []
|
66
|
+
@italic_classes = []
|
67
|
+
@right_aligned_classes = []
|
68
|
+
@centered_classes = []
|
69
|
+
|
70
|
+
css_parser.each_rule_set do |rule_set|
|
71
|
+
# does this ruleset make text bold?
|
72
|
+
if rule_set_is_bold?(rule_set)
|
73
|
+
add_classes(rule_set, bold_classes)
|
74
|
+
end
|
75
|
+
# does this ruleset make text italic?
|
76
|
+
if rule_set_is_italic?(rule_set)
|
77
|
+
add_classes(rule_set, italic_classes)
|
78
|
+
end
|
79
|
+
# does this ruleset make text right-aligned?
|
80
|
+
if rule_set_is_right_aligned?(rule_set)
|
81
|
+
add_classes(rule_set, right_aligned_classes)
|
82
|
+
end
|
83
|
+
# does this ruleset make text centered?
|
84
|
+
if rule_set_is_centered?(rule_set)
|
85
|
+
add_classes(rule_set, centered_classes)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
# @param [CssParser::RuleSet] rule_set
|
91
|
+
# @return [Boolean]
|
92
|
+
def rule_set_is_bold?(rule_set)
|
93
|
+
value = clean_value_from_rule_set(rule_set, 'font-weight')
|
94
|
+
value =~ /\d/ ? value.to_i >= 600 : %w(bold bolder).include?(value)
|
95
|
+
end
|
96
|
+
|
97
|
+
# @param [CssParser::RuleSet] rule_set
|
98
|
+
# @return [Boolean]
|
99
|
+
def rule_set_is_italic?(rule_set)
|
100
|
+
clean_value_from_rule_set(rule_set, 'font-style') == 'italic'
|
101
|
+
end
|
102
|
+
|
103
|
+
# @param [CssParser::RuleSet] rule_set
|
104
|
+
# @return [Boolean]
|
105
|
+
def rule_set_is_right_aligned?(rule_set)
|
106
|
+
clean_value_from_rule_set(rule_set, 'text-align') == 'right'
|
107
|
+
end
|
108
|
+
|
109
|
+
# @param [CssParser::RuleSet] rule_set
|
110
|
+
# @return [Boolean]
|
111
|
+
def rule_set_is_centered?(rule_set)
|
112
|
+
clean_value_from_rule_set(rule_set, 'text-align') == 'center'
|
113
|
+
end
|
114
|
+
|
115
|
+
# @param [CssParser::RuleSet] rule_set
|
116
|
+
# @param [String] key
|
117
|
+
# @return [String]
|
118
|
+
def clean_value_from_rule_set(rule_set, key)
|
119
|
+
rule_set
|
120
|
+
.get_value(key)
|
121
|
+
.to_s
|
122
|
+
.tr(';', '')
|
123
|
+
.strip
|
124
|
+
end
|
125
|
+
|
126
|
+
# Add all class selectors of a rule set to a given array
|
127
|
+
# @param [CssParser::RuleSet] rule_set
|
128
|
+
# @param [Array] class_array
|
129
|
+
def add_classes(rule_set, class_array)
|
130
|
+
rule_set.each_selector do |selector|
|
131
|
+
selector_name = selector.to_s.strip
|
132
|
+
if selector_name[0] == '.'
|
133
|
+
selector_name = selector_name[1, selector_name.length]
|
134
|
+
unless class_array.include?(selector_name)
|
135
|
+
class_array << selector_name
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Import
|
3
|
+
module GoogleDoc
|
4
|
+
module HTML
|
5
|
+
class EmbeddedFacebookVideoParser < EmbeddedParser
|
6
|
+
# The type of this embedded element
|
7
|
+
# @return [Symbol]
|
8
|
+
def embed_type
|
9
|
+
:facebook_video
|
10
|
+
end
|
11
|
+
|
12
|
+
class << self
|
13
|
+
# Regular expression to check if a given string is a FB Video URL
|
14
|
+
# Also used to extract the ID from the URL
|
15
|
+
# @return [Regexp]
|
16
|
+
def url_regexp
|
17
|
+
%r{
|
18
|
+
^\S* # all protocols & sub domains
|
19
|
+
facebook\.com/ # domain
|
20
|
+
( # optional path & parameters
|
21
|
+
\w+/videos/|
|
22
|
+
video\.php\?v=|
|
23
|
+
video\.php\?id=
|
24
|
+
)
|
25
|
+
(?<id>\d+) # numeric video id
|
26
|
+
}xi
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Import
|
3
|
+
module GoogleDoc
|
4
|
+
module HTML
|
5
|
+
class EmbeddedParser
|
6
|
+
include Shared::Caption
|
7
|
+
|
8
|
+
# @param [Nokogiri::HTML::Node] node
|
9
|
+
# @param [Nokogiri::HTML::Node] caption_node
|
10
|
+
# @param [ArticleJSON::Import::GoogleDoc::HTML::CSSAnalyzer] css_analyzer
|
11
|
+
def initialize(node:, caption_node:, css_analyzer:)
|
12
|
+
@node = node
|
13
|
+
@caption_node = caption_node
|
14
|
+
@css_analyzer = css_analyzer
|
15
|
+
end
|
16
|
+
|
17
|
+
# Extract the video ID from an URL
|
18
|
+
# @return [String]
|
19
|
+
def embed_id
|
20
|
+
match = @node.inner_text.strip.match(self.class.url_regexp)
|
21
|
+
match[:id] if match
|
22
|
+
end
|
23
|
+
|
24
|
+
# The type of this embedded element
|
25
|
+
# To be implemented by sub classes!
|
26
|
+
# @return [Symbol]
|
27
|
+
def embed_type
|
28
|
+
raise NotImplementedError
|
29
|
+
end
|
30
|
+
|
31
|
+
# Extract any potential tags, specified in brackets after the URL
|
32
|
+
# @return [Array[Symbol]]
|
33
|
+
def tags
|
34
|
+
match = /(.*?)[\s\u00A0]+\[(?<tags>.*)\]/.match(@node.inner_text)
|
35
|
+
(match ? match[:tags] : '').split(' ')
|
36
|
+
end
|
37
|
+
|
38
|
+
# The embedded element
|
39
|
+
# @return [ArticleJSON::Elements::Embed]
|
40
|
+
def element
|
41
|
+
ArticleJSON::Elements::Embed.new(
|
42
|
+
embed_type: embed_type,
|
43
|
+
embed_id: embed_id,
|
44
|
+
tags: tags,
|
45
|
+
caption: caption
|
46
|
+
)
|
47
|
+
end
|
48
|
+
|
49
|
+
class << self
|
50
|
+
# Check if a given string is a Youtube embedding
|
51
|
+
# @param [String] text
|
52
|
+
# @return [Boolean]
|
53
|
+
def matches?(text)
|
54
|
+
!!(url_regexp =~ text)
|
55
|
+
end
|
56
|
+
|
57
|
+
# Regular expression to check if node content is embeddable element
|
58
|
+
# Is also used to extract the ID from the URL.
|
59
|
+
# @return [Regexp]
|
60
|
+
def url_regexp
|
61
|
+
raise NotImplementedError
|
62
|
+
end
|
63
|
+
|
64
|
+
# Build a embedded element based on the node's content
|
65
|
+
# @param [Nokogiri::HTML::Node] node
|
66
|
+
# @param [Nokogiri::HTML::Node] caption_node
|
67
|
+
# @param [ArticleJSON::Import::GoogleDoc::HTML::CSSAnalyzer] css_analyzer
|
68
|
+
# @return [ArticleJSON::Elements::Embed]
|
69
|
+
def build(node:, caption_node:, css_analyzer:)
|
70
|
+
find_parser(node.inner_text)
|
71
|
+
&.new(
|
72
|
+
node: node,
|
73
|
+
caption_node: caption_node,
|
74
|
+
css_analyzer: css_analyzer
|
75
|
+
)
|
76
|
+
&.element
|
77
|
+
end
|
78
|
+
|
79
|
+
# Check if a node contains a supported embedded element
|
80
|
+
# @param [Nokogiri::HTML::Node] node
|
81
|
+
# @return [Boolean]
|
82
|
+
def supported?(node)
|
83
|
+
!find_parser(node.inner_text).nil?
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
|
88
|
+
# List of embedded element classes
|
89
|
+
# @return [ArticleJSON::Import::GoogleDoc::HTML::EmbeddedParser]
|
90
|
+
def parsers
|
91
|
+
[
|
92
|
+
EmbeddedFacebookVideoParser,
|
93
|
+
EmbeddedVimeoVideoParser,
|
94
|
+
EmbeddedYoutubeVideoParser,
|
95
|
+
EmbeddedTweetParser,
|
96
|
+
EmbeddedSlideshareParser,
|
97
|
+
]
|
98
|
+
end
|
99
|
+
|
100
|
+
# Find the first matching class for a given (URL) string
|
101
|
+
# @param [String] text
|
102
|
+
# @return [Class]
|
103
|
+
def find_parser(text)
|
104
|
+
text = text.strip.downcase
|
105
|
+
return nil if text.empty?
|
106
|
+
parsers.find { |klass| klass.matches?(text) }
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Import
|
3
|
+
module GoogleDoc
|
4
|
+
module HTML
|
5
|
+
class EmbeddedSlideshareParser < EmbeddedParser
|
6
|
+
# The type of this embedded element
|
7
|
+
# @return [Symbol]
|
8
|
+
def embed_type
|
9
|
+
:slideshare
|
10
|
+
end
|
11
|
+
|
12
|
+
# Extract the slide show ID (including the handle) from an URL
|
13
|
+
# @return [String]
|
14
|
+
def embed_id
|
15
|
+
match = @node.inner_text.strip.match(self.class.url_regexp)
|
16
|
+
"#{match[:handle]}/#{match[:id]}" if match
|
17
|
+
end
|
18
|
+
|
19
|
+
class << self
|
20
|
+
# Regular expression to check if a given string is a Slideshare URL
|
21
|
+
# Also used to extract HANDLE and ID from the URL.
|
22
|
+
# @return [Regexp]
|
23
|
+
def url_regexp
|
24
|
+
%r{
|
25
|
+
^\S* # all protocols & sub domains
|
26
|
+
slideshare\.net/ # domain
|
27
|
+
(?<handle>[^/\s]+)/ # username / handle
|
28
|
+
(?<id>[^/?&\s\u00A0]+) # the id / slug of the slide show
|
29
|
+
}xi
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|