article_json 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +12 -0
- data/LICENSE +21 -0
- data/README.md +78 -0
- data/bin/article_json_export_google_doc.rb +22 -0
- data/bin/article_json_export_html.rb +14 -0
- data/bin/article_json_parse_google_doc.rb +14 -0
- data/bin/update_reference_document.sh +18 -0
- data/lib/article_json/article.rb +53 -0
- data/lib/article_json/configuration.rb +24 -0
- data/lib/article_json/elements/base.rb +40 -0
- data/lib/article_json/elements/embed.rb +58 -0
- data/lib/article_json/elements/heading.rb +37 -0
- data/lib/article_json/elements/image.rb +41 -0
- data/lib/article_json/elements/list.rb +37 -0
- data/lib/article_json/elements/paragraph.rb +31 -0
- data/lib/article_json/elements/quote.rb +41 -0
- data/lib/article_json/elements/text.rb +45 -0
- data/lib/article_json/elements/text_box.rb +37 -0
- data/lib/article_json/export/html/elements/base.rb +59 -0
- data/lib/article_json/export/html/elements/embed.rb +28 -0
- data/lib/article_json/export/html/elements/heading.rb +19 -0
- data/lib/article_json/export/html/elements/image.rb +33 -0
- data/lib/article_json/export/html/elements/list.rb +25 -0
- data/lib/article_json/export/html/elements/paragraph.rb +17 -0
- data/lib/article_json/export/html/elements/quote.rb +29 -0
- data/lib/article_json/export/html/elements/shared/caption.rb +22 -0
- data/lib/article_json/export/html/elements/shared/float.rb +17 -0
- data/lib/article_json/export/html/elements/text.rb +44 -0
- data/lib/article_json/export/html/elements/text_box.rb +25 -0
- data/lib/article_json/export/html/exporter.rb +22 -0
- data/lib/article_json/import/google_doc/html/css_analyzer.rb +144 -0
- data/lib/article_json/import/google_doc/html/embedded_facebook_video_parser.rb +33 -0
- data/lib/article_json/import/google_doc/html/embedded_parser.rb +113 -0
- data/lib/article_json/import/google_doc/html/embedded_slideshare_parser.rb +36 -0
- data/lib/article_json/import/google_doc/html/embedded_tweet_parser.rb +37 -0
- data/lib/article_json/import/google_doc/html/embedded_vimeo_video_parser.rb +29 -0
- data/lib/article_json/import/google_doc/html/embedded_youtube_video_parser.rb +33 -0
- data/lib/article_json/import/google_doc/html/heading_parser.rb +38 -0
- data/lib/article_json/import/google_doc/html/image_parser.rb +75 -0
- data/lib/article_json/import/google_doc/html/list_parser.rb +46 -0
- data/lib/article_json/import/google_doc/html/node_analyzer.rb +111 -0
- data/lib/article_json/import/google_doc/html/paragraph_parser.rb +26 -0
- data/lib/article_json/import/google_doc/html/parser.rb +125 -0
- data/lib/article_json/import/google_doc/html/quote_parser.rb +46 -0
- data/lib/article_json/import/google_doc/html/shared/caption.rb +20 -0
- data/lib/article_json/import/google_doc/html/shared/float.rb +21 -0
- data/lib/article_json/import/google_doc/html/text_box_parser.rb +49 -0
- data/lib/article_json/import/google_doc/html/text_parser.rb +89 -0
- data/lib/article_json/utils/o_embed_resolver/base.rb +63 -0
- data/lib/article_json/utils/o_embed_resolver/facebook_video.rb +21 -0
- data/lib/article_json/utils/o_embed_resolver/slideshare.rb +22 -0
- data/lib/article_json/utils/o_embed_resolver/tweet.rb +23 -0
- data/lib/article_json/utils/o_embed_resolver/vimeo_video.rb +21 -0
- data/lib/article_json/utils/o_embed_resolver/youtube_video.rb +21 -0
- data/lib/article_json/utils.rb +11 -0
- data/lib/article_json/version.rb +3 -0
- data/lib/article_json.rb +55 -0
- metadata +189 -0
@@ -0,0 +1,59 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module HTML
|
4
|
+
module Elements
|
5
|
+
class Base
|
6
|
+
# @param [ArticleJSON::Elements::Base] element
|
7
|
+
def initialize(element)
|
8
|
+
@element = element
|
9
|
+
end
|
10
|
+
|
11
|
+
# Export a HTML node out of the given element
|
12
|
+
# Dynamically looks up the right export-element-class, instantiates it
|
13
|
+
# and then calls the #build method.
|
14
|
+
# @return [Nokogiri::HTML::Node]
|
15
|
+
def export
|
16
|
+
exporter = self.class == Base ? self.class.build(@element) : self
|
17
|
+
exporter.export unless exporter.nil?
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def create_element(tag, *args)
|
23
|
+
Nokogiri::HTML.fragment('').document.create_element(tag.to_s, *args)
|
24
|
+
end
|
25
|
+
|
26
|
+
def create_text_node(text)
|
27
|
+
Nokogiri::HTML.fragment(text).children.first
|
28
|
+
end
|
29
|
+
|
30
|
+
class << self
|
31
|
+
# Instantiate the correct sub class for a given element
|
32
|
+
# @param [ArticleJSON::Elements::Base] element
|
33
|
+
# @return [ArticleJSON::Export::HTML::Elements::Base]
|
34
|
+
def build(element)
|
35
|
+
klass = exporter_by_type(element.type)
|
36
|
+
klass.new(element) unless klass.nil?
|
37
|
+
end
|
38
|
+
|
39
|
+
# Look up the correct exporter class based on the element type
|
40
|
+
# @param [Symbol] type
|
41
|
+
# @return [ArticleJSON::Export::HTML::Elements::Base]
|
42
|
+
def exporter_by_type(type)
|
43
|
+
{
|
44
|
+
text: Text,
|
45
|
+
paragraph: Paragraph,
|
46
|
+
heading: Heading,
|
47
|
+
list: List,
|
48
|
+
image: Image,
|
49
|
+
text_box: TextBox,
|
50
|
+
quote: Quote,
|
51
|
+
embed: Embed,
|
52
|
+
}[type.to_sym]
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module HTML
|
4
|
+
module Elements
|
5
|
+
class Embed < Base
|
6
|
+
include Shared::Caption
|
7
|
+
|
8
|
+
def export
|
9
|
+
create_element(:figure).tap do |figure|
|
10
|
+
figure.add_child(embed_node)
|
11
|
+
figure.add_child(caption_node(:figcaption))
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def embed_node
|
18
|
+
create_element(:div, embedded_object, class: 'embed')
|
19
|
+
end
|
20
|
+
|
21
|
+
def embedded_object
|
22
|
+
"Embedded Object: #{@element.embed_type}-#{@element.embed_id}"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module HTML
|
4
|
+
module Elements
|
5
|
+
class Heading < Base
|
6
|
+
def export
|
7
|
+
create_element(tag_name, @element.content)
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def tag_name
|
13
|
+
"h#{@element.level}".to_sym
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module HTML
|
4
|
+
module Elements
|
5
|
+
class Image < Base
|
6
|
+
include Shared::Caption
|
7
|
+
include Shared::Float
|
8
|
+
|
9
|
+
# @return [Nokogiri::HTML::Node]
|
10
|
+
def export
|
11
|
+
create_element(:figure, node_opts).tap do |figure|
|
12
|
+
figure.add_child(image_node)
|
13
|
+
figure.add_child(caption_node(:figcaption))
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
# @return [Nokogiri::HTML::Node]
|
20
|
+
def image_node
|
21
|
+
create_element(:img, src: @element.source_url)
|
22
|
+
end
|
23
|
+
|
24
|
+
# @return [Hash]
|
25
|
+
def node_opts
|
26
|
+
return if floating_class.nil?
|
27
|
+
{ class: floating_class }
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module HTML
|
4
|
+
module Elements
|
5
|
+
class List < Base
|
6
|
+
def export
|
7
|
+
create_element(tag_name).tap do |list|
|
8
|
+
@element.content.each do |child_element|
|
9
|
+
item = create_element(:li)
|
10
|
+
item.add_child(Paragraph.new(child_element).export)
|
11
|
+
list.add_child(item)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def tag_name
|
19
|
+
@element.list_type == :ordered ? :ol : :ul
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module HTML
|
4
|
+
module Elements
|
5
|
+
class Paragraph < Base
|
6
|
+
def export
|
7
|
+
create_element(:p).tap do |p|
|
8
|
+
@element.content.each do |child_element|
|
9
|
+
p.add_child(Text.new(child_element).export)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module HTML
|
4
|
+
module Elements
|
5
|
+
class Quote < Base
|
6
|
+
include Shared::Caption
|
7
|
+
include Shared::Float
|
8
|
+
|
9
|
+
def export
|
10
|
+
create_element(:aside, node_opts).tap do |aside|
|
11
|
+
@element.content.each do |child_element|
|
12
|
+
aside.add_child(Base.new(child_element).export)
|
13
|
+
end
|
14
|
+
aside.add_child(caption_node(:small))
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
# @return [Hash]
|
21
|
+
def node_opts
|
22
|
+
return if floating_class.nil?
|
23
|
+
{ class: floating_class }
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module HTML
|
4
|
+
module Elements
|
5
|
+
module Shared
|
6
|
+
module Caption
|
7
|
+
# Generate the caption node
|
8
|
+
# @param [String] tag_name
|
9
|
+
# @return [Nokogiri::HTML::Node]
|
10
|
+
def caption_node(tag_name)
|
11
|
+
create_element(tag_name).tap do |caption|
|
12
|
+
@element.caption.each do |child_element|
|
13
|
+
caption.add_child(Text.new(child_element).export)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module HTML
|
4
|
+
module Elements
|
5
|
+
module Shared
|
6
|
+
module Float
|
7
|
+
# The element's floating class, if necessary
|
8
|
+
# @return [String]
|
9
|
+
def floating_class
|
10
|
+
"float-#{@element.float}" unless @element.float.nil?
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module HTML
|
4
|
+
module Elements
|
5
|
+
class Text < Base
|
6
|
+
# @return [Nokogiri::HTML::Node]
|
7
|
+
def export
|
8
|
+
return bold_and_italic_node if @element.bold && @element.italic
|
9
|
+
return bold_node if @element.bold
|
10
|
+
return italic_node if @element.italic
|
11
|
+
content_node
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
# @return [Nokogiri::HTML::Node]
|
17
|
+
def italic_node
|
18
|
+
create_element(:em).tap { |em| em.add_child(content_node) }
|
19
|
+
end
|
20
|
+
|
21
|
+
# @return [Nokogiri::HTML::Node]
|
22
|
+
def bold_node
|
23
|
+
create_element(:strong).tap do |strong|
|
24
|
+
strong.add_child(content_node)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# @return [Nokogiri::HTML::Node]
|
29
|
+
def bold_and_italic_node
|
30
|
+
create_element(:strong).tap do |strong|
|
31
|
+
strong.add_child(italic_node)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# @return [Nokogiri::HTML::Node]
|
36
|
+
def content_node
|
37
|
+
return create_text_node(@element.content) if @element.href.nil?
|
38
|
+
create_element(:a, @element.content, href: @element.href)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module HTML
|
4
|
+
module Elements
|
5
|
+
class TextBox < Base
|
6
|
+
include Shared::Float
|
7
|
+
|
8
|
+
def export
|
9
|
+
create_element(:div, node_opts).tap do |div|
|
10
|
+
@element.content.each do |child_element|
|
11
|
+
div.add_child(Base.new(child_element).export)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def node_opts
|
19
|
+
{ class: ['text-box', floating_class].compact.join(' ') }
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module HTML
|
4
|
+
class Exporter
|
5
|
+
# @param [Array[ArticleJSON::Elements::Base]] elements
|
6
|
+
def initialize(elements)
|
7
|
+
@elements = elements
|
8
|
+
end
|
9
|
+
|
10
|
+
# Generate a string with the HTML representation of all elements
|
11
|
+
# @return [String]
|
12
|
+
def html
|
13
|
+
doc = Nokogiri::HTML.fragment('')
|
14
|
+
@elements.each do |element|
|
15
|
+
doc.add_child(Elements::Base.new(element).export)
|
16
|
+
end
|
17
|
+
doc.to_html(save_with: 0)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,144 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Import
|
3
|
+
module GoogleDoc
|
4
|
+
module HTML
|
5
|
+
class CSSAnalyzer
|
6
|
+
attr_reader :css_parser,
|
7
|
+
:bold_classes,
|
8
|
+
:italic_classes,
|
9
|
+
:centered_classes,
|
10
|
+
:right_aligned_classes
|
11
|
+
|
12
|
+
# Initialize the parser with CSS code
|
13
|
+
# @param [String] css
|
14
|
+
def initialize(css = '')
|
15
|
+
@css_parser = ::CssParser::Parser.new
|
16
|
+
css_parser.load_string!(css)
|
17
|
+
parse
|
18
|
+
end
|
19
|
+
|
20
|
+
# Check if a given class attribute contains at least one class that
|
21
|
+
# makes its text bold
|
22
|
+
# @param [String] class_str
|
23
|
+
# @return [Boolean]
|
24
|
+
def bold?(class_str)
|
25
|
+
(class_str.split(' ') & bold_classes).any?
|
26
|
+
end
|
27
|
+
|
28
|
+
# Check if a given class attribute contains at least one class that
|
29
|
+
# makes its text italic
|
30
|
+
# @param [String] class_str
|
31
|
+
# @return [Boolean]
|
32
|
+
def italic?(class_str)
|
33
|
+
(class_str.split(' ') & italic_classes).any?
|
34
|
+
end
|
35
|
+
|
36
|
+
# Check if a given class attribute contains at least one class that
|
37
|
+
# sets its alignment to the right
|
38
|
+
# @param [String] class_str
|
39
|
+
# @return [Boolean]
|
40
|
+
def right_aligned?(class_str)
|
41
|
+
(class_str.split(' ') & right_aligned_classes).any?
|
42
|
+
end
|
43
|
+
|
44
|
+
# Check if a given class attribute contains no class that sets its
|
45
|
+
# alignment to right or center
|
46
|
+
# @param [String] class_str
|
47
|
+
# @return [Boolean]
|
48
|
+
def left_aligned?(class_str)
|
49
|
+
!right_aligned?(class_str) && !centered?(class_str)
|
50
|
+
end
|
51
|
+
|
52
|
+
# Check if a given class attribute contains at least one class that
|
53
|
+
# centers it
|
54
|
+
# @param [String] class_str
|
55
|
+
# @return [Boolean]
|
56
|
+
def centered?(class_str)
|
57
|
+
(class_str.split(' ') & centered_classes).any?
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
# Parse the CSS code and save CSS selectors for certain styles
|
63
|
+
def parse
|
64
|
+
# arrays containing class names for certain formatting
|
65
|
+
@bold_classes = []
|
66
|
+
@italic_classes = []
|
67
|
+
@right_aligned_classes = []
|
68
|
+
@centered_classes = []
|
69
|
+
|
70
|
+
css_parser.each_rule_set do |rule_set|
|
71
|
+
# does this ruleset make text bold?
|
72
|
+
if rule_set_is_bold?(rule_set)
|
73
|
+
add_classes(rule_set, bold_classes)
|
74
|
+
end
|
75
|
+
# does this ruleset make text italic?
|
76
|
+
if rule_set_is_italic?(rule_set)
|
77
|
+
add_classes(rule_set, italic_classes)
|
78
|
+
end
|
79
|
+
# does this ruleset make text right-aligned?
|
80
|
+
if rule_set_is_right_aligned?(rule_set)
|
81
|
+
add_classes(rule_set, right_aligned_classes)
|
82
|
+
end
|
83
|
+
# does this ruleset make text centered?
|
84
|
+
if rule_set_is_centered?(rule_set)
|
85
|
+
add_classes(rule_set, centered_classes)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
# @param [CssParser::RuleSet] rule_set
|
91
|
+
# @return [Boolean]
|
92
|
+
def rule_set_is_bold?(rule_set)
|
93
|
+
value = clean_value_from_rule_set(rule_set, 'font-weight')
|
94
|
+
value =~ /\d/ ? value.to_i >= 600 : %w(bold bolder).include?(value)
|
95
|
+
end
|
96
|
+
|
97
|
+
# @param [CssParser::RuleSet] rule_set
|
98
|
+
# @return [Boolean]
|
99
|
+
def rule_set_is_italic?(rule_set)
|
100
|
+
clean_value_from_rule_set(rule_set, 'font-style') == 'italic'
|
101
|
+
end
|
102
|
+
|
103
|
+
# @param [CssParser::RuleSet] rule_set
|
104
|
+
# @return [Boolean]
|
105
|
+
def rule_set_is_right_aligned?(rule_set)
|
106
|
+
clean_value_from_rule_set(rule_set, 'text-align') == 'right'
|
107
|
+
end
|
108
|
+
|
109
|
+
# @param [CssParser::RuleSet] rule_set
|
110
|
+
# @return [Boolean]
|
111
|
+
def rule_set_is_centered?(rule_set)
|
112
|
+
clean_value_from_rule_set(rule_set, 'text-align') == 'center'
|
113
|
+
end
|
114
|
+
|
115
|
+
# @param [CssParser::RuleSet] rule_set
|
116
|
+
# @param [String] key
|
117
|
+
# @return [String]
|
118
|
+
def clean_value_from_rule_set(rule_set, key)
|
119
|
+
rule_set
|
120
|
+
.get_value(key)
|
121
|
+
.to_s
|
122
|
+
.tr(';', '')
|
123
|
+
.strip
|
124
|
+
end
|
125
|
+
|
126
|
+
# Add all class selectors of a rule set to a given array
|
127
|
+
# @param [CssParser::RuleSet] rule_set
|
128
|
+
# @param [Array] class_array
|
129
|
+
def add_classes(rule_set, class_array)
|
130
|
+
rule_set.each_selector do |selector|
|
131
|
+
selector_name = selector.to_s.strip
|
132
|
+
if selector_name[0] == '.'
|
133
|
+
selector_name = selector_name[1, selector_name.length]
|
134
|
+
unless class_array.include?(selector_name)
|
135
|
+
class_array << selector_name
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Import
|
3
|
+
module GoogleDoc
|
4
|
+
module HTML
|
5
|
+
class EmbeddedFacebookVideoParser < EmbeddedParser
|
6
|
+
# The type of this embedded element
|
7
|
+
# @return [Symbol]
|
8
|
+
def embed_type
|
9
|
+
:facebook_video
|
10
|
+
end
|
11
|
+
|
12
|
+
class << self
|
13
|
+
# Regular expression to check if a given string is a FB Video URL
|
14
|
+
# Also used to extract the ID from the URL
|
15
|
+
# @return [Regexp]
|
16
|
+
def url_regexp
|
17
|
+
%r{
|
18
|
+
^\S* # all protocols & sub domains
|
19
|
+
facebook\.com/ # domain
|
20
|
+
( # optional path & parameters
|
21
|
+
\w+/videos/|
|
22
|
+
video\.php\?v=|
|
23
|
+
video\.php\?id=
|
24
|
+
)
|
25
|
+
(?<id>\d+) # numeric video id
|
26
|
+
}xi
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Import
|
3
|
+
module GoogleDoc
|
4
|
+
module HTML
|
5
|
+
class EmbeddedParser
|
6
|
+
include Shared::Caption
|
7
|
+
|
8
|
+
# @param [Nokogiri::HTML::Node] node
|
9
|
+
# @param [Nokogiri::HTML::Node] caption_node
|
10
|
+
# @param [ArticleJSON::Import::GoogleDoc::HTML::CSSAnalyzer] css_analyzer
|
11
|
+
def initialize(node:, caption_node:, css_analyzer:)
|
12
|
+
@node = node
|
13
|
+
@caption_node = caption_node
|
14
|
+
@css_analyzer = css_analyzer
|
15
|
+
end
|
16
|
+
|
17
|
+
# Extract the video ID from an URL
|
18
|
+
# @return [String]
|
19
|
+
def embed_id
|
20
|
+
match = @node.inner_text.strip.match(self.class.url_regexp)
|
21
|
+
match[:id] if match
|
22
|
+
end
|
23
|
+
|
24
|
+
# The type of this embedded element
|
25
|
+
# To be implemented by sub classes!
|
26
|
+
# @return [Symbol]
|
27
|
+
def embed_type
|
28
|
+
raise NotImplementedError
|
29
|
+
end
|
30
|
+
|
31
|
+
# Extract any potential tags, specified in brackets after the URL
|
32
|
+
# @return [Array[Symbol]]
|
33
|
+
def tags
|
34
|
+
match = /(.*?)[\s\u00A0]+\[(?<tags>.*)\]/.match(@node.inner_text)
|
35
|
+
(match ? match[:tags] : '').split(' ')
|
36
|
+
end
|
37
|
+
|
38
|
+
# The embedded element
|
39
|
+
# @return [ArticleJSON::Elements::Embed]
|
40
|
+
def element
|
41
|
+
ArticleJSON::Elements::Embed.new(
|
42
|
+
embed_type: embed_type,
|
43
|
+
embed_id: embed_id,
|
44
|
+
tags: tags,
|
45
|
+
caption: caption
|
46
|
+
)
|
47
|
+
end
|
48
|
+
|
49
|
+
class << self
|
50
|
+
# Check if a given string is a Youtube embedding
|
51
|
+
# @param [String] text
|
52
|
+
# @return [Boolean]
|
53
|
+
def matches?(text)
|
54
|
+
!!(url_regexp =~ text)
|
55
|
+
end
|
56
|
+
|
57
|
+
# Regular expression to check if node content is embeddable element
|
58
|
+
# Is also used to extract the ID from the URL.
|
59
|
+
# @return [Regexp]
|
60
|
+
def url_regexp
|
61
|
+
raise NotImplementedError
|
62
|
+
end
|
63
|
+
|
64
|
+
# Build a embedded element based on the node's content
|
65
|
+
# @param [Nokogiri::HTML::Node] node
|
66
|
+
# @param [Nokogiri::HTML::Node] caption_node
|
67
|
+
# @param [ArticleJSON::Import::GoogleDoc::HTML::CSSAnalyzer] css_analyzer
|
68
|
+
# @return [ArticleJSON::Elements::Embed]
|
69
|
+
def build(node:, caption_node:, css_analyzer:)
|
70
|
+
find_parser(node.inner_text)
|
71
|
+
&.new(
|
72
|
+
node: node,
|
73
|
+
caption_node: caption_node,
|
74
|
+
css_analyzer: css_analyzer
|
75
|
+
)
|
76
|
+
&.element
|
77
|
+
end
|
78
|
+
|
79
|
+
# Check if a node contains a supported embedded element
|
80
|
+
# @param [Nokogiri::HTML::Node] node
|
81
|
+
# @return [Boolean]
|
82
|
+
def supported?(node)
|
83
|
+
!find_parser(node.inner_text).nil?
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
|
88
|
+
# List of embedded element classes
|
89
|
+
# @return [ArticleJSON::Import::GoogleDoc::HTML::EmbeddedParser]
|
90
|
+
def parsers
|
91
|
+
[
|
92
|
+
EmbeddedFacebookVideoParser,
|
93
|
+
EmbeddedVimeoVideoParser,
|
94
|
+
EmbeddedYoutubeVideoParser,
|
95
|
+
EmbeddedTweetParser,
|
96
|
+
EmbeddedSlideshareParser,
|
97
|
+
]
|
98
|
+
end
|
99
|
+
|
100
|
+
# Find the first matching class for a given (URL) string
|
101
|
+
# @param [String] text
|
102
|
+
# @return [Class]
|
103
|
+
def find_parser(text)
|
104
|
+
text = text.strip.downcase
|
105
|
+
return nil if text.empty?
|
106
|
+
parsers.find { |klass| klass.matches?(text) }
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Import
|
3
|
+
module GoogleDoc
|
4
|
+
module HTML
|
5
|
+
class EmbeddedSlideshareParser < EmbeddedParser
|
6
|
+
# The type of this embedded element
|
7
|
+
# @return [Symbol]
|
8
|
+
def embed_type
|
9
|
+
:slideshare
|
10
|
+
end
|
11
|
+
|
12
|
+
# Extract the slide show ID (including the handle) from an URL
|
13
|
+
# @return [String]
|
14
|
+
def embed_id
|
15
|
+
match = @node.inner_text.strip.match(self.class.url_regexp)
|
16
|
+
"#{match[:handle]}/#{match[:id]}" if match
|
17
|
+
end
|
18
|
+
|
19
|
+
class << self
|
20
|
+
# Regular expression to check if a given string is a Slideshare URL
|
21
|
+
# Also used to extract HANDLE and ID from the URL.
|
22
|
+
# @return [Regexp]
|
23
|
+
def url_regexp
|
24
|
+
%r{
|
25
|
+
^\S* # all protocols & sub domains
|
26
|
+
slideshare\.net/ # domain
|
27
|
+
(?<handle>[^/\s]+)/ # username / handle
|
28
|
+
(?<id>[^/?&\s\u00A0]+) # the id / slug of the slide show
|
29
|
+
}xi
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|