article_json 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +12 -0
  3. data/LICENSE +21 -0
  4. data/README.md +78 -0
  5. data/bin/article_json_export_google_doc.rb +22 -0
  6. data/bin/article_json_export_html.rb +14 -0
  7. data/bin/article_json_parse_google_doc.rb +14 -0
  8. data/bin/update_reference_document.sh +18 -0
  9. data/lib/article_json/article.rb +53 -0
  10. data/lib/article_json/configuration.rb +24 -0
  11. data/lib/article_json/elements/base.rb +40 -0
  12. data/lib/article_json/elements/embed.rb +58 -0
  13. data/lib/article_json/elements/heading.rb +37 -0
  14. data/lib/article_json/elements/image.rb +41 -0
  15. data/lib/article_json/elements/list.rb +37 -0
  16. data/lib/article_json/elements/paragraph.rb +31 -0
  17. data/lib/article_json/elements/quote.rb +41 -0
  18. data/lib/article_json/elements/text.rb +45 -0
  19. data/lib/article_json/elements/text_box.rb +37 -0
  20. data/lib/article_json/export/html/elements/base.rb +59 -0
  21. data/lib/article_json/export/html/elements/embed.rb +28 -0
  22. data/lib/article_json/export/html/elements/heading.rb +19 -0
  23. data/lib/article_json/export/html/elements/image.rb +33 -0
  24. data/lib/article_json/export/html/elements/list.rb +25 -0
  25. data/lib/article_json/export/html/elements/paragraph.rb +17 -0
  26. data/lib/article_json/export/html/elements/quote.rb +29 -0
  27. data/lib/article_json/export/html/elements/shared/caption.rb +22 -0
  28. data/lib/article_json/export/html/elements/shared/float.rb +17 -0
  29. data/lib/article_json/export/html/elements/text.rb +44 -0
  30. data/lib/article_json/export/html/elements/text_box.rb +25 -0
  31. data/lib/article_json/export/html/exporter.rb +22 -0
  32. data/lib/article_json/import/google_doc/html/css_analyzer.rb +144 -0
  33. data/lib/article_json/import/google_doc/html/embedded_facebook_video_parser.rb +33 -0
  34. data/lib/article_json/import/google_doc/html/embedded_parser.rb +113 -0
  35. data/lib/article_json/import/google_doc/html/embedded_slideshare_parser.rb +36 -0
  36. data/lib/article_json/import/google_doc/html/embedded_tweet_parser.rb +37 -0
  37. data/lib/article_json/import/google_doc/html/embedded_vimeo_video_parser.rb +29 -0
  38. data/lib/article_json/import/google_doc/html/embedded_youtube_video_parser.rb +33 -0
  39. data/lib/article_json/import/google_doc/html/heading_parser.rb +38 -0
  40. data/lib/article_json/import/google_doc/html/image_parser.rb +75 -0
  41. data/lib/article_json/import/google_doc/html/list_parser.rb +46 -0
  42. data/lib/article_json/import/google_doc/html/node_analyzer.rb +111 -0
  43. data/lib/article_json/import/google_doc/html/paragraph_parser.rb +26 -0
  44. data/lib/article_json/import/google_doc/html/parser.rb +125 -0
  45. data/lib/article_json/import/google_doc/html/quote_parser.rb +46 -0
  46. data/lib/article_json/import/google_doc/html/shared/caption.rb +20 -0
  47. data/lib/article_json/import/google_doc/html/shared/float.rb +21 -0
  48. data/lib/article_json/import/google_doc/html/text_box_parser.rb +49 -0
  49. data/lib/article_json/import/google_doc/html/text_parser.rb +89 -0
  50. data/lib/article_json/utils/o_embed_resolver/base.rb +63 -0
  51. data/lib/article_json/utils/o_embed_resolver/facebook_video.rb +21 -0
  52. data/lib/article_json/utils/o_embed_resolver/slideshare.rb +22 -0
  53. data/lib/article_json/utils/o_embed_resolver/tweet.rb +23 -0
  54. data/lib/article_json/utils/o_embed_resolver/vimeo_video.rb +21 -0
  55. data/lib/article_json/utils/o_embed_resolver/youtube_video.rb +21 -0
  56. data/lib/article_json/utils.rb +11 -0
  57. data/lib/article_json/version.rb +3 -0
  58. data/lib/article_json.rb +55 -0
  59. metadata +189 -0
@@ -0,0 +1,37 @@
1
+ module ArticleJSON
2
+ module Import
3
+ module GoogleDoc
4
+ module HTML
5
+ class EmbeddedTweetParser < EmbeddedParser
6
+ # The type of this embedded element
7
+ # @return [Symbol]
8
+ def embed_type
9
+ :tweet
10
+ end
11
+
12
+ # Extract the tweet ID (including the handle) from an URL
13
+ # @return [String]
14
+ def embed_id
15
+ match = @node.inner_text.strip.match(self.class.url_regexp)
16
+ "#{match[:handle]}/#{match[:id]}" if match
17
+ end
18
+
19
+ class << self
20
+ # Regular expression to check if a given string is a Twitter URL
21
+ # Also used to extract the ID from the URL.
22
+ # @return [Regexp]
23
+ def url_regexp
24
+ %r{
25
+ ^\S* # all protocols & sub domains
26
+ twitter\.com/ # domain
27
+ (?<handle>[^#/]+) # twitter handle
28
+ (?:\#|/status/|/statuses/) # optional path or hash char
29
+ (?<id>\d+) # numeric tweet id
30
+ }xi
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,29 @@
1
+ module ArticleJSON
2
+ module Import
3
+ module GoogleDoc
4
+ module HTML
5
+ class EmbeddedVimeoVideoParser < EmbeddedParser
6
+ # The type of this embedded element
7
+ # @return [Symbol]
8
+ def embed_type
9
+ :vimeo_video
10
+ end
11
+
12
+ class << self
13
+ # Regular expression to check if a given string is a Vimeo URL
14
+ # Can also be used to extract the ID from the URL
15
+ # @return [Regexp]
16
+ def url_regexp
17
+ %r{
18
+ ^\S* # all protocols & sub domains
19
+ vimeo\.com # domain
20
+ .*[\#/] # optional path
21
+ (?<id>[\d]+) # numerical id
22
+ }xi
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,33 @@
1
+ module ArticleJSON
2
+ module Import
3
+ module GoogleDoc
4
+ module HTML
5
+ class EmbeddedYoutubeVideoParser < EmbeddedParser
6
+ # The type of this embedded element
7
+ # @return [Symbol]
8
+ def embed_type
9
+ :youtube_video
10
+ end
11
+
12
+ class << self
13
+ # Regular expression to check if a given string is a Youtube URL
14
+ # Also used to extract the ID from the URL.
15
+ # @return [Regexp]
16
+ def url_regexp
17
+ %r{
18
+ ^\S* # all protocols & sub domains
19
+ ( # different domains / paths
20
+ youtube\.com/(
21
+ [^/]+/.+/|(v|e(mbed)?)/|.*[?&]v=
22
+ )|
23
+ youtu\.be/
24
+ )
25
+ (?<id>[a-zA-Z0-9_-]+) # alpha-numerical id, including _-
26
+ }xi
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,38 @@
1
+ module ArticleJSON
2
+ module Import
3
+ module GoogleDoc
4
+ module HTML
5
+ class HeadingParser
6
+ # @param [Nokogiri::HTML::Node] node
7
+ def initialize(node:)
8
+ @node = node
9
+ end
10
+
11
+ # The raw text content of the heading, without any markup
12
+ # @return [String]
13
+ def content
14
+ @node.inner_text
15
+ end
16
+
17
+ # Determine the level of the heading
18
+ # The level corresponds to the header tag, e.g. `<h3>` is level 3.
19
+ # @return [Integer]
20
+ def level
21
+ case @node.name
22
+ when 'h1' then 1
23
+ when 'h2' then 2
24
+ when 'h3' then 3
25
+ when 'h4' then 4
26
+ when 'h5' then 5
27
+ end
28
+ end
29
+
30
+ # @return [ArticleJSON::Elements::Heading]
31
+ def element
32
+ ArticleJSON::Elements::Heading.new(level: level, content: content)
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,75 @@
1
+ module ArticleJSON
2
+ module Import
3
+ module GoogleDoc
4
+ module HTML
5
+ class ImageParser
6
+ include Shared::Caption
7
+ include Shared::Float
8
+
9
+ # @param [Nokogiri::HTML::Node] node
10
+ # @param [Nokogiri::HTML::Node] caption_node
11
+ # @param [ArticleJSON::Import::GoogleDoc::HTML::CSSAnalyzer] css_analyzer
12
+ def initialize(node:, caption_node:, css_analyzer:)
13
+ @node = node
14
+ @caption_node = caption_node
15
+ @css_analyzer = css_analyzer
16
+
17
+ # Main node indicates the floating behavior
18
+ @float_node = @node
19
+ end
20
+
21
+ # The value of the image's `src` attribute
22
+ # @return [String]
23
+ def source_url
24
+ image_node.attribute('src').value
25
+ end
26
+
27
+ # The node of the actual image
28
+ # @return [Nokogiri::HTML::Node]
29
+ def image_node
30
+ @node.xpath('.//img').first
31
+ end
32
+
33
+ # Check if the image is floating (left, right or not at all)
34
+ # @return [Symbol]
35
+ def float
36
+ super if floatable_size?
37
+ end
38
+
39
+ # @return [ArticleJSON::Elements::Image]
40
+ def element
41
+ ArticleJSON::Elements::Image.new(
42
+ source_url: source_url,
43
+ float: float,
44
+ caption: caption
45
+ )
46
+ end
47
+
48
+ private
49
+
50
+ # Check if the image's width can be determined and is less than 500px
51
+ # This is about 3/4 of the google document width...
52
+ # @return [Boolean]
53
+ def floatable_size?
54
+ image_width && image_width < 500
55
+ end
56
+
57
+ # Get the specified width of the image if available
58
+ # The width can either be specified in a width attribute or via style
59
+ # attribute. If not, `nil` is returned.
60
+ # @return [Integer]
61
+ def image_width
62
+ @image_width ||=
63
+ if image_node.has_attribute?('width')
64
+ image_node.attribute('width').value.to_i
65
+ elsif image_node.has_attribute?('style')
66
+ regex = /width:\s?(?<px>\d+|(\d+?\.\d+))px/
67
+ match = image_node.attribute('style').value.match(regex)
68
+ match['px'].to_i if match && match['px']
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,46 @@
1
+ module ArticleJSON
2
+ module Import
3
+ module GoogleDoc
4
+ module HTML
5
+ class ListParser
6
+ # @param [Nokogiri::HTML::Node] node
7
+ # @param [ArticleJSON::Import::GoogleDoc::HTML::CSSAnalyzer] css_analyzer
8
+ def initialize(node:, css_analyzer:)
9
+ @node = node
10
+ @css_analyzer = css_analyzer
11
+ end
12
+
13
+ # Determine the list type, either ordered or unordered
14
+ # @return [Symbol]
15
+ def list_type
16
+ case @node.name
17
+ when 'ol' then :ordered
18
+ when 'ul' then :unordered
19
+ end
20
+ end
21
+
22
+ # Parse the list's sub nodes to get a set of paragraphs
23
+ # @return [Array[ArticleJSON::Elements::Paragraph]]
24
+ def content
25
+ @node
26
+ .children
27
+ .select { |node| node.name == 'li' }
28
+ .map do |node|
29
+ ParagraphParser
30
+ .new(node: node, css_analyzer: @css_analyzer)
31
+ .element
32
+ end
33
+ end
34
+
35
+ # @return [ArticleJSON::Elements::List]
36
+ def element
37
+ ArticleJSON::Elements::List.new(
38
+ list_type: list_type,
39
+ content: content
40
+ )
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,111 @@
1
+ module ArticleJSON
2
+ module Import
3
+ module GoogleDoc
4
+ module HTML
5
+ class NodeAnalyzer
6
+ attr_reader :node
7
+
8
+ # @param [Nokogiri::HTML::Node] node
9
+ def initialize(node)
10
+ @node = node
11
+ end
12
+
13
+ # Check if a node equals a certain text
14
+ # @param [String] text
15
+ # @return [Boolean]
16
+ def has_text?(text)
17
+ node.inner_text.strip.downcase == text.strip.downcase
18
+ end
19
+
20
+ # Check if the node is empty, i.e. not containing any text
21
+ # Given that images are the only nodes without text, we have to make
22
+ # sure that it's not an image.
23
+ # @return [Boolean]
24
+ def empty?
25
+ return @is_empty if defined? @is_empty
26
+ @is_empty = node.inner_text.strip.empty? && !image? && !hr?
27
+ end
28
+
29
+ # Check if the node is a header tag between <h1> and <h5>
30
+ # @return [Boolean]
31
+ def heading?
32
+ return @is_heading if defined? @is_heading
33
+ @is_heading = %w(h1 h2 h3 h4 h5).include?(node.name)
34
+ end
35
+
36
+ # Check if the node is a horizontal line (i.e. `<hr>`)
37
+ # @return [Boolean]
38
+ def hr?
39
+ node.name == 'hr'
40
+ end
41
+
42
+ # Check if the node is a normal text paragraph
43
+ # @return [Boolean]
44
+ def paragraph?
45
+ return @is_paragraph if defined? @is_paragraph
46
+ @is_paragraph =
47
+ node.name == 'p' &&
48
+ !empty? &&
49
+ !image? &&
50
+ !text_box? &&
51
+ !quote? &&
52
+ !embed?
53
+ end
54
+
55
+ # Check if the node contains an ordered or unordered list
56
+ # @return [Boolean]
57
+ def list?
58
+ return @is_list if defined? @is_list
59
+ @is_list = %w(ul ol).include?(node.name)
60
+ end
61
+
62
+ # Check if the node starts a text box
63
+ # Text boxes start with a single line saying "Textbox:" or "Highlight:".
64
+ # @return [Boolean]
65
+ def text_box?
66
+ return @is_text_box if defined? @is_text_box
67
+ @is_text_box = has_text?('textbox:') || has_text?('highlight:')
68
+ end
69
+
70
+ # Check if the node starts a quote
71
+ # Quotes start with a single line saying "Quote:".
72
+ # @return [Boolean]
73
+ def quote?
74
+ return @is_quote if defined? @is_quote
75
+ @is_quote = has_text?('quote:')
76
+ end
77
+
78
+ # Check if the node contains an image
79
+ # @return [Boolean]
80
+ def image?
81
+ return @is_image if defined? @is_image
82
+ @is_image = node.xpath('.//img').length > 0
83
+ end
84
+
85
+ # Check if the node contains an embedded element
86
+ # @return [Boolean]
87
+ def embed?
88
+ return @is_embed if defined? @is_embed
89
+ @is_embed = EmbeddedParser.supported?(node)
90
+ end
91
+
92
+ # Determine the type of this node
93
+ # The type is one of the elements supported by article_json.
94
+ # @return [Symbol]
95
+ def type
96
+ return :empty if empty?
97
+ return :hr if hr?
98
+ return :heading if heading?
99
+ return :paragraph if paragraph?
100
+ return :list if list?
101
+ return :text_box if text_box?
102
+ return :quote if quote?
103
+ return :image if image?
104
+ return :embed if embed?
105
+ :unknown
106
+ end
107
+ end
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,26 @@
1
+ module ArticleJSON
2
+ module Import
3
+ module GoogleDoc
4
+ module HTML
5
+ class ParagraphParser
6
+ # @param [Nokogiri::HTML::Node] node
7
+ # @param [ArticleJSON::Import::GoogleDoc::HTML::CSSAnalyzer] css_analyzer
8
+ def initialize(node:, css_analyzer:)
9
+ @node = node
10
+ @css_analyzer = css_analyzer
11
+ end
12
+
13
+ # @return [Array[ArticleJSON::Elements::Text]]
14
+ def content
15
+ TextParser.extract(node: @node, css_analyzer: @css_analyzer)
16
+ end
17
+
18
+ # @return [ArticleJSON::Elements::Paragraph]
19
+ def element
20
+ ArticleJSON::Elements::Paragraph.new(content: content)
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,125 @@
1
+ module ArticleJSON
2
+ module Import
3
+ module GoogleDoc
4
+ module HTML
5
+ class Parser
6
+ # @param [String] html
7
+ def initialize(html)
8
+ doc = Nokogiri::HTML(html)
9
+ @body_enumerator = doc.xpath('//body').last.children.to_enum
10
+
11
+ css_node = doc.xpath('//head/style').last
12
+ @css_analyzer = CSSAnalyzer.new(css_node&.inner_text)
13
+ end
14
+
15
+ # Parse the body of the document and return the result
16
+ # @return [Array[ArticleJSON::Elements::Base]]
17
+ def parsed_content
18
+ @parsed_content ||= parse_body
19
+ end
20
+
21
+ private
22
+
23
+ # Loop over all body nodes and parse them
24
+ # @return [Array[ArticleJSON::Elements::Base]]
25
+ def parse_body
26
+ @parsed_content = []
27
+ while body_has_more_nodes?
28
+ @parsed_content << begin
29
+ @current_node = NodeAnalyzer.new(@body_enumerator.next)
30
+ parse_current_node || next
31
+ end
32
+ end
33
+ @parsed_content
34
+ end
35
+
36
+ # Parse the current node and return an element, if available
37
+ # @return [ArticleJSON::Elements::Base]
38
+ def parse_current_node
39
+ case @current_node.type
40
+ when :heading then parse_heading
41
+ when :paragraph then parse_paragraph
42
+ when :list then parse_list
43
+ when :image then parse_image
44
+ when :text_box then parse_text_box
45
+ when :quote then parse_quote
46
+ when :embed then parse_embed
47
+ when :hr, :empty, :unknown then nil
48
+ end
49
+ end
50
+
51
+ # @return [ArticleJSON::Elements::Heading]
52
+ def parse_heading
53
+ HeadingParser.new(node: @current_node.node).element
54
+ end
55
+
56
+ # @return [ArticleJSON::Elements::Paragraph]
57
+ def parse_paragraph
58
+ ParagraphParser
59
+ .new(node: @current_node.node, css_analyzer: @css_analyzer)
60
+ .element
61
+ end
62
+
63
+ # @return [ArticleJSON::Elements::List]
64
+ def parse_list
65
+ ListParser
66
+ .new(node: @current_node.node, css_analyzer: @css_analyzer)
67
+ .element
68
+ end
69
+
70
+ # @return [ArticleJSON::Elements::Image]
71
+ def parse_image
72
+ ImageParser
73
+ .new(
74
+ node: @current_node.node,
75
+ caption_node: @body_enumerator.next,
76
+ css_analyzer: @css_analyzer
77
+ )
78
+ .element
79
+ end
80
+
81
+ # @return [ArticleJSON::Elements::TextBox]
82
+ def parse_text_box
83
+ TextBoxParser
84
+ .new(nodes: nodes_until_hr, css_analyzer: @css_analyzer)
85
+ .element
86
+ end
87
+
88
+ # @return [ArticleJSON::Elements::Quote]
89
+ def parse_quote
90
+ QuoteParser
91
+ .new(nodes: nodes_until_hr, css_analyzer: @css_analyzer)
92
+ .element
93
+ end
94
+
95
+ # @return [ArticleJSON::Elements::Embed]
96
+ def parse_embed
97
+ EmbeddedParser.build(
98
+ node: @current_node.node,
99
+ caption_node: @body_enumerator.next,
100
+ css_analyzer: @css_analyzer
101
+ )
102
+ end
103
+
104
+ # Collect all nodes until a horizontal line, advancing the enumerator
105
+ # @return [Array[Nokogiri::HTML::Node]]
106
+ def nodes_until_hr
107
+ nodes = []
108
+ until NodeAnalyzer.new(@body_enumerator.peek).hr?
109
+ nodes << @body_enumerator.next
110
+ end
111
+ nodes
112
+ end
113
+
114
+ # @return [Boolean]
115
+ def body_has_more_nodes?
116
+ @body_enumerator.peek
117
+ true
118
+ rescue StopIteration
119
+ false
120
+ end
121
+ end
122
+ end
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,46 @@
1
+ module ArticleJSON
2
+ module Import
3
+ module GoogleDoc
4
+ module HTML
5
+ class QuoteParser
6
+ include Shared::Caption
7
+ include Shared::Float
8
+
9
+ # @param [Array[Nokogiri::HTML::Node]] nodes
10
+ # @param [ArticleJSON::Import::GoogleDoc::HTML::CSSAnalyzer] css_analyzer
11
+ def initialize(nodes:, css_analyzer:)
12
+ @nodes = nodes.reject { |node| NodeAnalyzer.new(node).empty? }
13
+ @css_analyzer = css_analyzer
14
+
15
+ # First node of the quote indicates floating behavior
16
+ @float_node = @nodes.first
17
+ # Last node of the quote contains the caption
18
+ @caption_node = @nodes.last
19
+ end
20
+
21
+ # Parse the quote's nodes to get a set of paragraphs
22
+ # The last node is ignored as it contains the quote caption
23
+ # @return [Array[ArticleJSON::Elements::Paragraph]]
24
+ def content
25
+ @nodes
26
+ .take(@nodes.size - 1)
27
+ .map do |node|
28
+ ParagraphParser
29
+ .new(node: node, css_analyzer: @css_analyzer)
30
+ .element
31
+ end
32
+ end
33
+
34
+ # @return [ArticleJSON::Elements::Quote]
35
+ def element
36
+ ArticleJSON::Elements::Quote.new(
37
+ content: content,
38
+ caption: caption,
39
+ float: float
40
+ )
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,20 @@
1
+ module ArticleJSON
2
+ module Import
3
+ module GoogleDoc
4
+ module HTML
5
+ module Shared
6
+ module Caption
7
+ # Parse the caption node
8
+ # @return [Array[ArticleJSON::Elements::Text]]
9
+ def caption
10
+ ArticleJSON::Import::GoogleDoc::HTML::TextParser.extract(
11
+ node: @caption_node,
12
+ css_analyzer: @css_analyzer
13
+ )
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,21 @@
1
+ module ArticleJSON
2
+ module Import
3
+ module GoogleDoc
4
+ module HTML
5
+ module Shared
6
+ module Float
7
+ # Check if the quote is floating (left, right or not at all)
8
+ # @return [Symbol]
9
+ def float
10
+ return unless @float_node.has_attribute?('class')
11
+ node_class = @float_node.attribute('class').value || ''
12
+ return :right if @css_analyzer.right_aligned?(node_class)
13
+ return :left if @css_analyzer.left_aligned?(node_class)
14
+ nil
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,49 @@
1
+ module ArticleJSON
2
+ module Import
3
+ module GoogleDoc
4
+ module HTML
5
+ class TextBoxParser
6
+ include Shared::Float
7
+
8
+ # @param [Array[Nokogiri::HTML::Node]] nodes
9
+ # @param [ArticleJSON::Import::GoogleDoc::HTML::CSSAnalyzer] css_analyzer
10
+ def initialize(nodes:, css_analyzer:)
11
+ @nodes = nodes.reject { |node| NodeAnalyzer.new(node).empty? }
12
+ @css_analyzer = css_analyzer
13
+
14
+ # First node of the text box indicates floating behavior
15
+ @float_node = @nodes.first
16
+ end
17
+
18
+ # Parse the text box's nodes to get a list of sub elements
19
+ # Supported sub elements are: headings, paragraphs & lists.
20
+ # @return [Array]
21
+ def content
22
+ @nodes.map { |node| parse_sub_node(node) }.compact
23
+ end
24
+
25
+ # Hash representation of this text box
26
+ # @return [ArticleJSON::Elements::TextBox]
27
+ def element
28
+ ArticleJSON::Elements::TextBox.new(float: float, content: content)
29
+ end
30
+
31
+ private
32
+
33
+ def parse_sub_node(node)
34
+ case NodeAnalyzer.new(node).type
35
+ when :heading
36
+ HeadingParser.new(node: node).element
37
+ when :paragraph
38
+ ParagraphParser
39
+ .new(node: node, css_analyzer: @css_analyzer)
40
+ .element
41
+ when :list
42
+ ListParser.new(node: node, css_analyzer: @css_analyzer).element
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end