article_json 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +12 -0
  3. data/LICENSE +21 -0
  4. data/README.md +78 -0
  5. data/bin/article_json_export_google_doc.rb +22 -0
  6. data/bin/article_json_export_html.rb +14 -0
  7. data/bin/article_json_parse_google_doc.rb +14 -0
  8. data/bin/update_reference_document.sh +18 -0
  9. data/lib/article_json/article.rb +53 -0
  10. data/lib/article_json/configuration.rb +24 -0
  11. data/lib/article_json/elements/base.rb +40 -0
  12. data/lib/article_json/elements/embed.rb +58 -0
  13. data/lib/article_json/elements/heading.rb +37 -0
  14. data/lib/article_json/elements/image.rb +41 -0
  15. data/lib/article_json/elements/list.rb +37 -0
  16. data/lib/article_json/elements/paragraph.rb +31 -0
  17. data/lib/article_json/elements/quote.rb +41 -0
  18. data/lib/article_json/elements/text.rb +45 -0
  19. data/lib/article_json/elements/text_box.rb +37 -0
  20. data/lib/article_json/export/html/elements/base.rb +59 -0
  21. data/lib/article_json/export/html/elements/embed.rb +28 -0
  22. data/lib/article_json/export/html/elements/heading.rb +19 -0
  23. data/lib/article_json/export/html/elements/image.rb +33 -0
  24. data/lib/article_json/export/html/elements/list.rb +25 -0
  25. data/lib/article_json/export/html/elements/paragraph.rb +17 -0
  26. data/lib/article_json/export/html/elements/quote.rb +29 -0
  27. data/lib/article_json/export/html/elements/shared/caption.rb +22 -0
  28. data/lib/article_json/export/html/elements/shared/float.rb +17 -0
  29. data/lib/article_json/export/html/elements/text.rb +44 -0
  30. data/lib/article_json/export/html/elements/text_box.rb +25 -0
  31. data/lib/article_json/export/html/exporter.rb +22 -0
  32. data/lib/article_json/import/google_doc/html/css_analyzer.rb +144 -0
  33. data/lib/article_json/import/google_doc/html/embedded_facebook_video_parser.rb +33 -0
  34. data/lib/article_json/import/google_doc/html/embedded_parser.rb +113 -0
  35. data/lib/article_json/import/google_doc/html/embedded_slideshare_parser.rb +36 -0
  36. data/lib/article_json/import/google_doc/html/embedded_tweet_parser.rb +37 -0
  37. data/lib/article_json/import/google_doc/html/embedded_vimeo_video_parser.rb +29 -0
  38. data/lib/article_json/import/google_doc/html/embedded_youtube_video_parser.rb +33 -0
  39. data/lib/article_json/import/google_doc/html/heading_parser.rb +38 -0
  40. data/lib/article_json/import/google_doc/html/image_parser.rb +75 -0
  41. data/lib/article_json/import/google_doc/html/list_parser.rb +46 -0
  42. data/lib/article_json/import/google_doc/html/node_analyzer.rb +111 -0
  43. data/lib/article_json/import/google_doc/html/paragraph_parser.rb +26 -0
  44. data/lib/article_json/import/google_doc/html/parser.rb +125 -0
  45. data/lib/article_json/import/google_doc/html/quote_parser.rb +46 -0
  46. data/lib/article_json/import/google_doc/html/shared/caption.rb +20 -0
  47. data/lib/article_json/import/google_doc/html/shared/float.rb +21 -0
  48. data/lib/article_json/import/google_doc/html/text_box_parser.rb +49 -0
  49. data/lib/article_json/import/google_doc/html/text_parser.rb +89 -0
  50. data/lib/article_json/utils/o_embed_resolver/base.rb +63 -0
  51. data/lib/article_json/utils/o_embed_resolver/facebook_video.rb +21 -0
  52. data/lib/article_json/utils/o_embed_resolver/slideshare.rb +22 -0
  53. data/lib/article_json/utils/o_embed_resolver/tweet.rb +23 -0
  54. data/lib/article_json/utils/o_embed_resolver/vimeo_video.rb +21 -0
  55. data/lib/article_json/utils/o_embed_resolver/youtube_video.rb +21 -0
  56. data/lib/article_json/utils.rb +11 -0
  57. data/lib/article_json/version.rb +3 -0
  58. data/lib/article_json.rb +55 -0
  59. metadata +189 -0
@@ -0,0 +1,37 @@
1
+ module ArticleJSON
2
+ module Import
3
+ module GoogleDoc
4
+ module HTML
5
+ class EmbeddedTweetParser < EmbeddedParser
6
+ # The type of this embedded element
7
+ # @return [Symbol]
8
+ def embed_type
9
+ :tweet
10
+ end
11
+
12
+ # Extract the tweet ID (including the handle) from an URL
13
+ # @return [String]
14
+ def embed_id
15
+ match = @node.inner_text.strip.match(self.class.url_regexp)
16
+ "#{match[:handle]}/#{match[:id]}" if match
17
+ end
18
+
19
+ class << self
20
+ # Regular expression to check if a given string is a Twitter URL
21
+ # Also used to extract the ID from the URL.
22
+ # @return [Regexp]
23
+ def url_regexp
24
+ %r{
25
+ ^\S* # all protocols & sub domains
26
+ twitter\.com/ # domain
27
+ (?<handle>[^#/]+) # twitter handle
28
+ (?:\#|/status/|/statuses/) # optional path or hash char
29
+ (?<id>\d+) # numeric tweet id
30
+ }xi
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,29 @@
1
+ module ArticleJSON
2
+ module Import
3
+ module GoogleDoc
4
+ module HTML
5
+ class EmbeddedVimeoVideoParser < EmbeddedParser
6
+ # The type of this embedded element
7
+ # @return [Symbol]
8
+ def embed_type
9
+ :vimeo_video
10
+ end
11
+
12
+ class << self
13
+ # Regular expression to check if a given string is a Vimeo URL
14
+ # Can also be used to extract the ID from the URL
15
+ # @return [Regexp]
16
+ def url_regexp
17
+ %r{
18
+ ^\S* # all protocols & sub domains
19
+ vimeo\.com # domain
20
+ .*[\#/] # optional path
21
+ (?<id>[\d]+) # numerical id
22
+ }xi
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,33 @@
1
+ module ArticleJSON
2
+ module Import
3
+ module GoogleDoc
4
+ module HTML
5
+ class EmbeddedYoutubeVideoParser < EmbeddedParser
6
+ # The type of this embedded element
7
+ # @return [Symbol]
8
+ def embed_type
9
+ :youtube_video
10
+ end
11
+
12
+ class << self
13
+ # Regular expression to check if a given string is a Youtube URL
14
+ # Also used to extract the ID from the URL.
15
+ # @return [Regexp]
16
+ def url_regexp
17
+ %r{
18
+ ^\S* # all protocols & sub domains
19
+ ( # different domains / paths
20
+ youtube\.com/(
21
+ [^/]+/.+/|(v|e(mbed)?)/|.*[?&]v=
22
+ )|
23
+ youtu\.be/
24
+ )
25
+ (?<id>[a-zA-Z0-9_-]+) # alpha-numerical id, including _-
26
+ }xi
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,38 @@
1
+ module ArticleJSON
2
+ module Import
3
+ module GoogleDoc
4
+ module HTML
5
+ class HeadingParser
6
+ # @param [Nokogiri::HTML::Node] node
7
+ def initialize(node:)
8
+ @node = node
9
+ end
10
+
11
+ # The raw text content of the heading, without any markup
12
+ # @return [String]
13
+ def content
14
+ @node.inner_text
15
+ end
16
+
17
+ # Determine the level of the heading
18
+ # The level corresponds to the header tag, e.g. `<h3>` is level 3.
19
+ # @return [Integer]
20
+ def level
21
+ case @node.name
22
+ when 'h1' then 1
23
+ when 'h2' then 2
24
+ when 'h3' then 3
25
+ when 'h4' then 4
26
+ when 'h5' then 5
27
+ end
28
+ end
29
+
30
+ # @return [ArticleJSON::Elements::Heading]
31
+ def element
32
+ ArticleJSON::Elements::Heading.new(level: level, content: content)
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,75 @@
1
+ module ArticleJSON
2
+ module Import
3
+ module GoogleDoc
4
+ module HTML
5
+ class ImageParser
6
+ include Shared::Caption
7
+ include Shared::Float
8
+
9
+ # @param [Nokogiri::HTML::Node] node
10
+ # @param [Nokogiri::HTML::Node] caption_node
11
+ # @param [ArticleJSON::Import::GoogleDoc::HTML::CSSAnalyzer] css_analyzer
12
+ def initialize(node:, caption_node:, css_analyzer:)
13
+ @node = node
14
+ @caption_node = caption_node
15
+ @css_analyzer = css_analyzer
16
+
17
+ # Main node indicates the floating behavior
18
+ @float_node = @node
19
+ end
20
+
21
+ # The value of the image's `src` attribute
22
+ # @return [String]
23
+ def source_url
24
+ image_node.attribute('src').value
25
+ end
26
+
27
+ # The node of the actual image
28
+ # @return [Nokogiri::HTML::Node]
29
+ def image_node
30
+ @node.xpath('.//img').first
31
+ end
32
+
33
+ # Check if the image is floating (left, right or not at all)
34
+ # @return [Symbol]
35
+ def float
36
+ super if floatable_size?
37
+ end
38
+
39
+ # @return [ArticleJSON::Elements::Image]
40
+ def element
41
+ ArticleJSON::Elements::Image.new(
42
+ source_url: source_url,
43
+ float: float,
44
+ caption: caption
45
+ )
46
+ end
47
+
48
+ private
49
+
50
+ # Check if the image's width can be determined and is less than 500px
51
+ # This is about 3/4 of the google document width...
52
+ # @return [Boolean]
53
+ def floatable_size?
54
+ image_width && image_width < 500
55
+ end
56
+
57
+ # Get the specified width of the image if available
58
+ # The width can either be specified in a width attribute or via style
59
+ # attribute. If not, `nil` is returned.
60
+ # @return [Integer]
61
+ def image_width
62
+ @image_width ||=
63
+ if image_node.has_attribute?('width')
64
+ image_node.attribute('width').value.to_i
65
+ elsif image_node.has_attribute?('style')
66
+ regex = /width:\s?(?<px>\d+|(\d+?\.\d+))px/
67
+ match = image_node.attribute('style').value.match(regex)
68
+ match['px'].to_i if match && match['px']
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,46 @@
1
+ module ArticleJSON
2
+ module Import
3
+ module GoogleDoc
4
+ module HTML
5
+ class ListParser
6
+ # @param [Nokogiri::HTML::Node] node
7
+ # @param [ArticleJSON::Import::GoogleDoc::HTML::CSSAnalyzer] css_analyzer
8
+ def initialize(node:, css_analyzer:)
9
+ @node = node
10
+ @css_analyzer = css_analyzer
11
+ end
12
+
13
+ # Determine the list type, either ordered or unordered
14
+ # @return [Symbol]
15
+ def list_type
16
+ case @node.name
17
+ when 'ol' then :ordered
18
+ when 'ul' then :unordered
19
+ end
20
+ end
21
+
22
+ # Parse the list's sub nodes to get a set of paragraphs
23
+ # @return [Array[ArticleJSON::Elements::Paragraph]]
24
+ def content
25
+ @node
26
+ .children
27
+ .select { |node| node.name == 'li' }
28
+ .map do |node|
29
+ ParagraphParser
30
+ .new(node: node, css_analyzer: @css_analyzer)
31
+ .element
32
+ end
33
+ end
34
+
35
+ # @return [ArticleJSON::Elements::List]
36
+ def element
37
+ ArticleJSON::Elements::List.new(
38
+ list_type: list_type,
39
+ content: content
40
+ )
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,111 @@
1
+ module ArticleJSON
2
+ module Import
3
+ module GoogleDoc
4
+ module HTML
5
+ class NodeAnalyzer
6
+ attr_reader :node
7
+
8
+ # @param [Nokogiri::HTML::Node] node
9
+ def initialize(node)
10
+ @node = node
11
+ end
12
+
13
+ # Check if a node equals a certain text
14
+ # @param [String] text
15
+ # @return [Boolean]
16
+ def has_text?(text)
17
+ node.inner_text.strip.downcase == text.strip.downcase
18
+ end
19
+
20
+ # Check if the node is empty, i.e. not containing any text
21
+ # Given that images are the only nodes without text, we have to make
22
+ # sure that it's not an image.
23
+ # @return [Boolean]
24
+ def empty?
25
+ return @is_empty if defined? @is_empty
26
+ @is_empty = node.inner_text.strip.empty? && !image? && !hr?
27
+ end
28
+
29
+ # Check if the node is a header tag between <h1> and <h5>
30
+ # @return [Boolean]
31
+ def heading?
32
+ return @is_heading if defined? @is_heading
33
+ @is_heading = %w(h1 h2 h3 h4 h5).include?(node.name)
34
+ end
35
+
36
+ # Check if the node is a horizontal line (i.e. `<hr>`)
37
+ # @return [Boolean]
38
+ def hr?
39
+ node.name == 'hr'
40
+ end
41
+
42
+ # Check if the node is a normal text paragraph
43
+ # @return [Boolean]
44
+ def paragraph?
45
+ return @is_paragraph if defined? @is_paragraph
46
+ @is_paragraph =
47
+ node.name == 'p' &&
48
+ !empty? &&
49
+ !image? &&
50
+ !text_box? &&
51
+ !quote? &&
52
+ !embed?
53
+ end
54
+
55
+ # Check if the node contains an ordered or unordered list
56
+ # @return [Boolean]
57
+ def list?
58
+ return @is_list if defined? @is_list
59
+ @is_list = %w(ul ol).include?(node.name)
60
+ end
61
+
62
+ # Check if the node starts a text box
63
+ # Text boxes start with a single line saying "Textbox:" or "Highlight:".
64
+ # @return [Boolean]
65
+ def text_box?
66
+ return @is_text_box if defined? @is_text_box
67
+ @is_text_box = has_text?('textbox:') || has_text?('highlight:')
68
+ end
69
+
70
+ # Check if the node starts a quote
71
+ # Quotes start with a single line saying "Quote:".
72
+ # @return [Boolean]
73
+ def quote?
74
+ return @is_quote if defined? @is_quote
75
+ @is_quote = has_text?('quote:')
76
+ end
77
+
78
+ # Check if the node contains an image
79
+ # @return [Boolean]
80
+ def image?
81
+ return @is_image if defined? @is_image
82
+ @is_image = node.xpath('.//img').length > 0
83
+ end
84
+
85
+ # Check if the node contains an embedded element
86
+ # @return [Boolean]
87
+ def embed?
88
+ return @is_embed if defined? @is_embed
89
+ @is_embed = EmbeddedParser.supported?(node)
90
+ end
91
+
92
+ # Determine the type of this node
93
+ # The type is one of the elements supported by article_json.
94
+ # @return [Symbol]
95
+ def type
96
+ return :empty if empty?
97
+ return :hr if hr?
98
+ return :heading if heading?
99
+ return :paragraph if paragraph?
100
+ return :list if list?
101
+ return :text_box if text_box?
102
+ return :quote if quote?
103
+ return :image if image?
104
+ return :embed if embed?
105
+ :unknown
106
+ end
107
+ end
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,26 @@
1
+ module ArticleJSON
2
+ module Import
3
+ module GoogleDoc
4
+ module HTML
5
+ class ParagraphParser
6
+ # @param [Nokogiri::HTML::Node] node
7
+ # @param [ArticleJSON::Import::GoogleDoc::HTML::CSSAnalyzer] css_analyzer
8
+ def initialize(node:, css_analyzer:)
9
+ @node = node
10
+ @css_analyzer = css_analyzer
11
+ end
12
+
13
+ # @return [Array[ArticleJSON::Elements::Text]]
14
+ def content
15
+ TextParser.extract(node: @node, css_analyzer: @css_analyzer)
16
+ end
17
+
18
+ # @return [ArticleJSON::Elements::Paragraph]
19
+ def element
20
+ ArticleJSON::Elements::Paragraph.new(content: content)
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,125 @@
1
+ module ArticleJSON
2
+ module Import
3
+ module GoogleDoc
4
+ module HTML
5
+ class Parser
6
+ # @param [String] html
7
+ def initialize(html)
8
+ doc = Nokogiri::HTML(html)
9
+ @body_enumerator = doc.xpath('//body').last.children.to_enum
10
+
11
+ css_node = doc.xpath('//head/style').last
12
+ @css_analyzer = CSSAnalyzer.new(css_node&.inner_text)
13
+ end
14
+
15
+ # Parse the body of the document and return the result
16
+ # @return [Array[ArticleJSON::Elements::Base]]
17
+ def parsed_content
18
+ @parsed_content ||= parse_body
19
+ end
20
+
21
+ private
22
+
23
+ # Loop over all body nodes and parse them
24
+ # @return [Array[ArticleJSON::Elements::Base]]
25
+ def parse_body
26
+ @parsed_content = []
27
+ while body_has_more_nodes?
28
+ @parsed_content << begin
29
+ @current_node = NodeAnalyzer.new(@body_enumerator.next)
30
+ parse_current_node || next
31
+ end
32
+ end
33
+ @parsed_content
34
+ end
35
+
36
+ # Parse the current node and return an element, if available
37
+ # @return [ArticleJSON::Elements::Base]
38
+ def parse_current_node
39
+ case @current_node.type
40
+ when :heading then parse_heading
41
+ when :paragraph then parse_paragraph
42
+ when :list then parse_list
43
+ when :image then parse_image
44
+ when :text_box then parse_text_box
45
+ when :quote then parse_quote
46
+ when :embed then parse_embed
47
+ when :hr, :empty, :unknown then nil
48
+ end
49
+ end
50
+
51
+ # @return [ArticleJSON::Elements::Heading]
52
+ def parse_heading
53
+ HeadingParser.new(node: @current_node.node).element
54
+ end
55
+
56
+ # @return [ArticleJSON::Elements::Paragraph]
57
+ def parse_paragraph
58
+ ParagraphParser
59
+ .new(node: @current_node.node, css_analyzer: @css_analyzer)
60
+ .element
61
+ end
62
+
63
+ # @return [ArticleJSON::Elements::List]
64
+ def parse_list
65
+ ListParser
66
+ .new(node: @current_node.node, css_analyzer: @css_analyzer)
67
+ .element
68
+ end
69
+
70
+ # @return [ArticleJSON::Elements::Image]
71
+ def parse_image
72
+ ImageParser
73
+ .new(
74
+ node: @current_node.node,
75
+ caption_node: @body_enumerator.next,
76
+ css_analyzer: @css_analyzer
77
+ )
78
+ .element
79
+ end
80
+
81
+ # @return [ArticleJSON::Elements::TextBox]
82
+ def parse_text_box
83
+ TextBoxParser
84
+ .new(nodes: nodes_until_hr, css_analyzer: @css_analyzer)
85
+ .element
86
+ end
87
+
88
+ # @return [ArticleJSON::Elements::Quote]
89
+ def parse_quote
90
+ QuoteParser
91
+ .new(nodes: nodes_until_hr, css_analyzer: @css_analyzer)
92
+ .element
93
+ end
94
+
95
+ # @return [ArticleJSON::Elements::Embed]
96
+ def parse_embed
97
+ EmbeddedParser.build(
98
+ node: @current_node.node,
99
+ caption_node: @body_enumerator.next,
100
+ css_analyzer: @css_analyzer
101
+ )
102
+ end
103
+
104
+ # Collect all nodes until a horizontal line, advancing the enumerator
105
+ # @return [Array[Nokogiri::HTML::Node]]
106
+ def nodes_until_hr
107
+ nodes = []
108
+ until NodeAnalyzer.new(@body_enumerator.peek).hr?
109
+ nodes << @body_enumerator.next
110
+ end
111
+ nodes
112
+ end
113
+
114
+ # @return [Boolean]
115
+ def body_has_more_nodes?
116
+ @body_enumerator.peek
117
+ true
118
+ rescue StopIteration
119
+ false
120
+ end
121
+ end
122
+ end
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,46 @@
1
+ module ArticleJSON
2
+ module Import
3
+ module GoogleDoc
4
+ module HTML
5
+ class QuoteParser
6
+ include Shared::Caption
7
+ include Shared::Float
8
+
9
+ # @param [Array[Nokogiri::HTML::Node]] nodes
10
+ # @param [ArticleJSON::Import::GoogleDoc::HTML::CSSAnalyzer] css_analyzer
11
+ def initialize(nodes:, css_analyzer:)
12
+ @nodes = nodes.reject { |node| NodeAnalyzer.new(node).empty? }
13
+ @css_analyzer = css_analyzer
14
+
15
+ # First node of the quote indicates floating behavior
16
+ @float_node = @nodes.first
17
+ # Last node of the quote contains the caption
18
+ @caption_node = @nodes.last
19
+ end
20
+
21
+ # Parse the quote's nodes to get a set of paragraphs
22
+ # The last node is ignored as it contains the quote caption
23
+ # @return [Array[ArticleJSON::Elements::Paragraph]]
24
+ def content
25
+ @nodes
26
+ .take(@nodes.size - 1)
27
+ .map do |node|
28
+ ParagraphParser
29
+ .new(node: node, css_analyzer: @css_analyzer)
30
+ .element
31
+ end
32
+ end
33
+
34
+ # @return [ArticleJSON::Elements::Quote]
35
+ def element
36
+ ArticleJSON::Elements::Quote.new(
37
+ content: content,
38
+ caption: caption,
39
+ float: float
40
+ )
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,20 @@
1
+ module ArticleJSON
2
+ module Import
3
+ module GoogleDoc
4
+ module HTML
5
+ module Shared
6
+ module Caption
7
+ # Parse the caption node
8
+ # @return [Array[ArticleJSON::Elements::Text]]
9
+ def caption
10
+ ArticleJSON::Import::GoogleDoc::HTML::TextParser.extract(
11
+ node: @caption_node,
12
+ css_analyzer: @css_analyzer
13
+ )
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,21 @@
1
+ module ArticleJSON
2
+ module Import
3
+ module GoogleDoc
4
+ module HTML
5
+ module Shared
6
+ module Float
7
+ # Check if the quote is floating (left, right or not at all)
8
+ # @return [Symbol]
9
+ def float
10
+ return unless @float_node.has_attribute?('class')
11
+ node_class = @float_node.attribute('class').value || ''
12
+ return :right if @css_analyzer.right_aligned?(node_class)
13
+ return :left if @css_analyzer.left_aligned?(node_class)
14
+ nil
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,49 @@
1
+ module ArticleJSON
2
+ module Import
3
+ module GoogleDoc
4
+ module HTML
5
+ class TextBoxParser
6
+ include Shared::Float
7
+
8
+ # @param [Array[Nokogiri::HTML::Node]] nodes
9
+ # @param [ArticleJSON::Import::GoogleDoc::HTML::CSSAnalyzer] css_analyzer
10
+ def initialize(nodes:, css_analyzer:)
11
+ @nodes = nodes.reject { |node| NodeAnalyzer.new(node).empty? }
12
+ @css_analyzer = css_analyzer
13
+
14
+ # First node of the text box indicates floating behavior
15
+ @float_node = @nodes.first
16
+ end
17
+
18
+ # Parse the text box's nodes to get a list of sub elements
19
+ # Supported sub elements are: headings, paragraphs & lists.
20
+ # @return [Array]
21
+ def content
22
+ @nodes.map { |node| parse_sub_node(node) }.compact
23
+ end
24
+
25
+ # Hash representation of this text box
26
+ # @return [ArticleJSON::Elements::TextBox]
27
+ def element
28
+ ArticleJSON::Elements::TextBox.new(float: float, content: content)
29
+ end
30
+
31
+ private
32
+
33
+ def parse_sub_node(node)
34
+ case NodeAnalyzer.new(node).type
35
+ when :heading
36
+ HeadingParser.new(node: node).element
37
+ when :paragraph
38
+ ParagraphParser
39
+ .new(node: node, css_analyzer: @css_analyzer)
40
+ .element
41
+ when :list
42
+ ListParser.new(node: node, css_analyzer: @css_analyzer).element
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end