RubyGems - article_json - Versions diffs - 0.3.8 → 0.4.1 - Mend

article_json 0.3.8 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

data/lib/article_json/export/apple_news/elements/paragraph.rb ADDED Viewed

@@ -0,0 +1,36 @@
+module ArticleJSON
+  module Export
+    module AppleNews
+      module Elements
+        class Paragraph < Base
+          # Generate the paragraph node with its containing text elements
+          # @return [Hash]
+          def export
+            {
+              role: 'body',
+              text: text,
+              format: 'html',
+              layout: 'bodyLayout',
+              textStyle: 'bodyStyle',
+            }
+          end
+          private
+          # Get the exporter class for text elements
+          # @return [ArticleJSON::Export::Common::HTML::Elements::Base]
+          def text_exporter
+            self.class.exporter_by_type(:text)
+          end
+          def text
+            @element.content.map do |child_element|
+              text_exporter.new(child_element)
+                           .export
+            end.join
+          end
+        end
+      end
+    end
+  end
+end

data/lib/article_json/export/apple_news/elements/quote.rb ADDED Viewed

@@ -0,0 +1,60 @@
+module ArticleJSON
+  module Export
+    module AppleNews
+      module Elements
+        class Quote < Base
+          include ArticleJSON::Export::Common::HTML::Elements::Base
+          include ArticleJSON::Export::Common::HTML::Elements::Text
+          def export
+            [quote, author]
+          end
+          private
+          # Quote
+          # @return [Hash]
+          def quote
+            {
+              role: 'pullquote',
+              text: quote_text,
+              format: 'html',
+              layout: 'pullquoteLayout',
+              textStyle: 'pullquoteStyle',
+            }
+          end
+          # Author
+          # @return [Hash]
+          def author
+            {
+              role: 'author',
+              text: author_text,
+              format: 'html',
+              layout: 'pullquoteAttributeLayout',
+              textStyle: 'quoteAttributeStyle',
+            }
+          end
+          def text_exporter
+            self.class.exporter_by_type(:text)
+          end
+          # Quote Text
+          # @return [String]
+          def quote_text
+            element = @element.content.first&.content.first
+            text_exporter.new(element).export
+          end
+          # Author Text
+          # @return [String]
+          def author_text
+            element = @element.caption.first
+            text_exporter.new(element).export
+          end
+        end
+      end
+    end
+  end
+end

data/lib/article_json/export/apple_news/elements/text.rb ADDED Viewed

@@ -0,0 +1,55 @@
+module ArticleJSON
+  module Export
+    module AppleNews
+      module Elements
+        class Text < Base
+          include ArticleJSON::Export::Common::HTML::Elements::Base
+          include ArticleJSON::Export::Common::HTML::Elements::Text
+          UNSUPPORTED_HTML_TAGS = %w[
+            title
+            meta
+            script
+            noscript
+            style
+            link
+            applet
+            object
+            iframe
+            noframes
+            form
+            select
+            option
+            optgroup
+          ].freeze
+          # A Nokogiri object is returned with`super`, which is is then
+          # returned as a either a string or as HTML (when not plain text),
+          # both of which are compatible with Apple News format. Takes into
+          # account bold, italic and href.
+          # @return [String]
+          def export
+            super.to_s
+          end
+          # @param [String] text
+          def create_text_nodes(text)
+            Nokogiri::HTML.fragment(sanitize_text(text).gsub(/\n/, '<br>')).children
+          end
+          # Removes UNSUPPORTED_TAGS from text
+          #
+          # @param [String] text
+          # @return [String]
+          def sanitize_text(text)
+            doc = Nokogiri::HTML.fragment(text)
+            UNSUPPORTED_HTML_TAGS.each do |tag|
+              doc.search(tag).each(&:remove)
+            end
+            doc.inner_html
+          end
+        end
+      end
+    end
+  end
+end

data/lib/article_json/export/apple_news/elements/text_box.rb ADDED Viewed

@@ -0,0 +1,51 @@
+module ArticleJSON
+  module Export
+    module AppleNews
+      module Elements
+        class TextBox < Base
+          include ArticleJSON::Export::Common::HTML::Elements::TextBox
+          # List
+          # @return [Hash]
+          def export
+            {
+              role: 'container',
+              layout: 'textBoxLayout',
+              style: 'textBoxStyle',
+              components: map_styles(elements),
+            }
+          end
+          private
+          # @return [Array]
+          def elements
+            @element.content.map do |child_element|
+              case child_element
+              when ArticleJSON::Elements::Heading
+                namespace::Heading.new(child_element).export
+              when ArticleJSON::Elements::Paragraph
+                namespace::Paragraph.new(child_element).export
+              when ArticleJSON::Elements::List
+                namespace::List.new(child_element).export
+              else
+                namespace::Text.new(child_element).export
+              end
+            end
+          end
+          # @return [Module]
+          def namespace
+            ArticleJSON::Export::AppleNews::Elements
+          end
+          # @return [Array]
+          def map_styles(elements)
+            elements.map do |child_element|
+              child_element.merge(layout: 'textBox' +  child_element[:layout].sub(/\S/, &:upcase))
+            end
+          end
+        end
+      end
+    end
+  end
+end

data/lib/article_json/export/apple_news/exporter.rb ADDED Viewed

@@ -0,0 +1,37 @@
+module ArticleJSON
+  module Export
+    module AppleNews
+      class Exporter
+        # @param [Array[ArticleJSON::Elements::Base]] elements
+        def initialize(elements)
+          @elements = elements
+        end
+        # Return the components section of an Apple News Article as JSON
+        #
+        # Images and EmbededVideos are nested in an array with the components
+        # array when they contain captions. As Apple News skips over these
+        # nested arrays, we must flatten the array.
+        #
+        # @return [String]
+        def to_json
+          { components: components.flatten }.to_json
+        end
+        private
+        # Generate an array with the plain text representation of all elements
+        #
+        # @return [Array]
+        def components
+          @components ||=
+            @elements.map do |element|
+              ArticleJSON::Export::AppleNews::Elements::Base
+                .build(element)
+                &.export
+            end.reject { |hash| hash.nil? || hash.empty? }
+        end
+      end
+    end
+  end
+end

data/lib/article_json/export/common/html/elements/embed.rb CHANGED Viewed

@@ -20,7 +20,7 @@ module ArticleJSON
             private
             def embed_node
-              type = @element.embed_type.to_s.tr('_','-')
+              type = @element.embed_type.to_s.tr('_', '-')
               create_element(:div, class: "embed #{type}") do |div|
                 div.add_child(embedded_object)
               end
@@ -28,6 +28,7 @@ module ArticleJSON
             def embedded_object
               return unavailable_node unless @element.oembed_data
               Nokogiri::HTML.fragment(@element.oembed_data[:html])
             end

data/lib/article_json/export/common/html/elements/image.rb CHANGED Viewed

@@ -19,7 +19,7 @@ module ArticleJSON
             # @return [Nokogiri::XML::NodeSet]
             def figure_node
               create_element(:figure, node_opts) do |figure|
-                node =  @element&.href ? href_node : image_node
+                node = @element&.href ? href_node : image_node
                 figure.add_child(node)
                 if @element.caption&.any?
                   figure.add_child(caption_node(:figcaption))
@@ -42,6 +42,7 @@ module ArticleJSON
             # @return [Hash]
             def node_opts
               return if floating_class.nil?
               { class: floating_class }
             end
           end

data/lib/article_json/export/common/html/elements/text.rb CHANGED Viewed

@@ -11,6 +11,7 @@ module ArticleJSON
               return bold_and_italic_node if @element.bold && @element.italic
               return bold_node if @element.bold
               return italic_node if @element.italic
               content_node
             end
@@ -38,6 +39,7 @@ module ArticleJSON
             # @return [Nokogiri::XML::NodeSet]
             def content_node
               return create_text_nodes(@element.content) if @element.href.nil?
               create_element(:a, href: @element.href) do |a|
                 a.add_child(create_text_nodes(@element.content))
               end

data/lib/article_json/import/google_doc/html/embedded_parser.rb CHANGED Viewed

@@ -104,6 +104,7 @@ module ArticleJSON
             def find_parser(text)
               text = text.strip.downcase
               return nil if text.empty?
               parsers.find { |klass| klass.matches?(text) }
             end
           end

data/lib/article_json/import/google_doc/html/heading_parser.rb CHANGED Viewed

@@ -19,11 +19,11 @@ module ArticleJSON
           # @return [Integer]
           def level
             case @node.name
-              when 'h1' then 1
-              when 'h2' then 2
-              when 'h3' then 3
-              when 'h4' then 4
-              when 'h5' then 5
+            when 'h1' then 1
+            when 'h2' then 2
+            when 'h3' then 3
+            when 'h4' then 4
+            when 'h5' then 5
             end
           end

data/lib/article_json/import/google_doc/html/image_parser.rb CHANGED Viewed

@@ -22,25 +22,31 @@ module ArticleJSON
           # The value of the image's `alt` attribute
           # @return [String]
           def alt
+            return '' if image_url?
             image_node.attribute('alt')&.value || ''
           end
           # The value of the image's `src` attribute
           # @return [String]
           def source_url
+            return @node.inner_text.strip if image_url?
             image_node.attribute('src').value
           end
           # The node of the actual image
           # @return [Nokogiri::HTML::Node]
           def image_node
-            @node.xpath('.//img').first
+            return @image_node if defined? @image_node
+            @image_node = @node.xpath('.//img').first
           end
           # Check if the image is floating (left, right or not at all)
           # @return [Symbol]
           def float
-            super if floatable_size?
+            super if image_url? || floatable_size?
           end
           # Extracts an href from the tag [image-link-to: url]) if present
@@ -48,8 +54,10 @@ module ArticleJSON
           # @return [String]
           def href
             return if @caption_node.nil?
             match = @caption_node.content.strip.match(href_regexp)
             return if match.nil?
             remove_image_link_tag
             match[:url]
           end
@@ -80,6 +88,7 @@ module ArticleJSON
           def href_regexp
             %r{\[image-link-to:\s+(?<url>.*?)\]}
           end
           # Check if the image's width can be determined and is less than 500px
           # This is about 3/4 of the google document width...
           # @return [Boolean]
@@ -101,6 +110,13 @@ module ArticleJSON
                 match['px'].to_i if match && match['px']
               end
           end
+          # When the current node doesn't contain an actual image tag,
+          # we're dealing with an image URL
+          # @return [Boolean]
+          def image_url?
+            image_node.nil?
+          end
         end
       end
     end

data/lib/article_json/import/google_doc/html/list_parser.rb CHANGED Viewed

@@ -14,8 +14,8 @@ module ArticleJSON
           # @return [Symbol]
           def list_type
             case @node.name
-              when 'ol' then :ordered
-              when 'ul' then :unordered
+            when 'ol' then :ordered
+            when 'ul' then :unordered
             end
           end

data/lib/article_json/import/google_doc/html/node_analyzer.rb CHANGED Viewed

@@ -31,6 +31,7 @@ module ArticleJSON
           # @return [Boolean]
           def empty?
             return @is_empty if defined? @is_empty
             @is_empty = node.inner_text.strip.empty? && !image? && !hr? && !br?
           end
@@ -38,6 +39,7 @@ module ArticleJSON
           # @return [Boolean]
           def heading?
             return @is_heading if defined? @is_heading
             @is_heading =
               !quote? && !text_box? && %w(h1 h2 h3 h4 h5).include?(node.name)
           end
@@ -52,6 +54,7 @@ module ArticleJSON
           # @return [Boolean]
           def paragraph?
             return @is_paragraph if defined? @is_paragraph
             @is_paragraph =
               node.name == 'p' &&
                 !empty? &&
@@ -65,7 +68,8 @@ module ArticleJSON
           # @return [Boolean]
           def list?
             return @is_list if defined? @is_list
-            @is_list = %w(ul ol).include?(node.name)
+            @is_list = %w[ul ol].include?(node.name)
           end
           # Check if the node starts a text box
@@ -73,6 +77,7 @@ module ArticleJSON
           # @return [Boolean]
           def text_box?
             return @is_text_box if defined? @is_text_box
             @is_text_box = begins_with?('textbox:') || begins_with?('highlight:')
           end
@@ -81,6 +86,7 @@ module ArticleJSON
           # @return [Boolean]
           def quote?
             return @is_quote if defined? @is_quote
             @is_quote = has_text?('quote:')
           end
@@ -88,13 +94,25 @@ module ArticleJSON
           # @return [Boolean]
           def image?
             return @is_image if defined? @is_image
-            @is_image = node.xpath('.//img').length > 0
+            @is_image = image_url? || node.xpath('.//img').length > 0
+          end
+          # Check if the node contains an image URL
+          # @return [Boolean]
+          def image_url?
+            return @is_image_url if defined? @is_image_url
+            text = node.inner_text.strip
+            url_regexp = %r{https?:\/\/\S+\.(?:jpg|jpeg|png|gif)}i
+            @is_image_url = !!(url_regexp =~ text)
           end
           # Check if the node contains an embedded element
           # @return [Boolean]
           def embed?
             return @is_embed if defined? @is_embed
             @is_embed = EmbeddedParser.supported?(node)
           end
@@ -103,6 +121,7 @@ module ArticleJSON
           # @return [Boolean]
           def br?
             return @is_br if defined? @is_br
             @is_br = node.name == 'br' || only_includes_brs?
           end
@@ -119,6 +138,7 @@ module ArticleJSON
             return :quote if quote?
             return :image if image?
             return :embed if embed?
             :unknown
           end
@@ -128,9 +148,11 @@ module ArticleJSON
           # @return [Boolean]
           def only_includes_brs?
             return false unless node.inner_text.strip.empty?
             tags = node.children.map(&:name)
             # Check if it only contains <br> and text nodes
-            return false unless tags.all? { |tag| %w(br text).include? tag }
+            return false unless tags.all? { |tag| %w[br text].include? tag }
             # Check if at least one is a `<br>` node
             tags.include?('br')
           end

data/lib/article_json/import/google_doc/html/parser.rb CHANGED Viewed

@@ -6,7 +6,12 @@ module ArticleJSON
           # @param [String] html
           def initialize(html)
             doc = Nokogiri::HTML(html)
-            @body_enumerator = doc.xpath('//body').last.children.to_enum
+            selection = if doc.xpath('//body/div').empty?
+                          doc.xpath('//body')
+                        else
+                          doc.xpath('//body/div')
+                        end
+            @body_enumerator = selection.last.children.to_enum
             css_node = doc.xpath('//head/style').last
             @css_analyzer = CSSAnalyzer.new(css_node&.inner_text)
@@ -113,6 +118,7 @@ module ArticleJSON
             nodes = []
             until !body_has_more_nodes? ||
                 NodeAnalyzer.new(@body_enumerator.peek).hr?
               nodes << @body_enumerator.next
             end
             nodes

data/lib/article_json/import/google_doc/html/shared/caption.rb CHANGED Viewed

@@ -8,6 +8,7 @@ module ArticleJSON
             # @return [Array[ArticleJSON::Elements::Text]]
             def caption
               return [] if no_caption?
               ArticleJSON::Import::GoogleDoc::HTML::TextParser.extract(
                 node: @caption_node,
                 css_analyzer: @css_analyzer

data/lib/article_json/import/google_doc/html/shared/float.rb CHANGED Viewed

@@ -8,9 +8,11 @@ module ArticleJSON
             # @return [Symbol]
             def float
               return unless @float_node.has_attribute?('class')
               node_class = @float_node.attribute('class').value || ''
               return :right if @css_analyzer.right_aligned?(node_class)
               return :left if @css_analyzer.left_aligned?(node_class)
               nil
             end
           end

data/lib/article_json/import/google_doc/html/text_box_parser.rb CHANGED Viewed

@@ -10,7 +10,7 @@ module ArticleJSON
           #                                         May contain tags, too.
           # @param [Array[Nokogiri::HTML::Node]] nodes
           # @param [ArticleJSON::Import::GoogleDoc::HTML::CSSAnalyzer] css_analyzer
-          def initialize(type_node: ,nodes:, css_analyzer:)
+          def initialize(type_node:, nodes:, css_analyzer:)
             @nodes = nodes.reject { |node| NodeAnalyzer.new(node).empty? }
             @css_analyzer = css_analyzer
@@ -32,6 +32,7 @@ module ArticleJSON
             match = /(.*?)[\s\u00A0]+\[(?<tags>.*)\]/
                       .match(@type_node.inner_text)
             return [] unless match
             match[:tags].split(' ')
           end

data/lib/article_json/import/google_doc/html/text_parser.rb CHANGED Viewed

@@ -43,6 +43,7 @@ module ArticleJSON
             if @node.name == 'span' &&
                 @node.first_element_child&.name == 'a' &&
                 @node.first_element_child&.has_attribute?('href')
               strip_google_redirect(
                 @node.first_element_child.attribute('href').value
               )
@@ -68,6 +69,7 @@ module ArticleJSON
             def extract(node:, css_analyzer:)
               node.children.map do |child_node|
                 next if NodeAnalyzer.new(child_node).empty?
                 new(node: child_node, css_analyzer: css_analyzer).element
               end.compact
             end

data/lib/article_json/utils/additional_element_placer.rb CHANGED Viewed

@@ -40,6 +40,7 @@ module ArticleJSON
       # @return [Array[ArticleJSON::Elements::Base|Object]]
       def merge_elements
         return @additional_elements if @elements.nil? || @elements.empty?
         remaining_elements = @additional_elements.dup
         next_in = insert_next_element_in(0, remaining_elements)
         characters_passed = 0
@@ -48,6 +49,7 @@ module ArticleJSON
           .each_with_object([]) do |(element, next_element), result|
             result << element
             next if remaining_elements.empty?
             if element.respond_to?(:length)
               characters_passed += element.length
               next_in -= element.length

data/lib/article_json/utils/o_embed_resolver/base.rb CHANGED Viewed

@@ -23,8 +23,10 @@ module ArticleJSON
         def unavailable_message
           [
             ArticleJSON::Elements::Text.new(content: "The #{name} "),
-            ArticleJSON::Elements::Text.new(content: source_url,
-                                            href: source_url),
+            ArticleJSON::Elements::Text.new(
+              content: source_url,
+              href: source_url
+            ),
             ArticleJSON::Elements::Text.new(content: ' is not available.'),
           ]
         end
@@ -44,19 +46,27 @@ module ArticleJSON
         # @return [Hash|nil]
         def parsed_api_response
           return @api_response if defined? @api_response
           @api_response = begin
             uri = URI.parse(oembed_url)
             http = Net::HTTP.new(uri.host, uri.port)
             http.use_ssl = (uri.scheme == 'https')
             response = http.request(Net::HTTP::Get.new(uri, http_headers))
-            if response.kind_of? Net::HTTPSuccess
-              JSON.parse(response.body, symbolize_names: true)
+            if response.is_a? Net::HTTPSuccess
+              data = JSON.parse(response.body, symbolize_names: true)
+              transform_api_response(data)
             end
           rescue Net::ProtocolError, JSON::ParserError
             nil
           end
         end
+        # @return [Hash]
+        def transform_api_response(data)
+          data
+        end
         # @return [Hash]
         def http_headers
           headers = { 'Content-Type' => 'application/json' }

data/lib/article_json/utils/o_embed_resolver/facebook_video.rb CHANGED Viewed

@@ -11,7 +11,8 @@ module ArticleJSON
         # The URL for the oembed API call
         # @return [String]
         def oembed_url
-          "https://www.facebook.com/plugins/video/oembed.json?url=#{source_url}"
+          "https://graph.facebook.com/v9.0/oembed_video?url=#{source_url}" \
+            "&access_token=#{access_token}"
         end
         # The video URL of the element
@@ -19,6 +20,21 @@ module ArticleJSON
         def source_url
           "https://www.facebook.com/facebook/videos/#{@element.embed_id}"
         end
+        # The facebook access token. If not set, it raises an exception
+        # explaining how to configure it.
+        #
+        # @return [String]
+        def access_token
+          token = ArticleJSON.configuration.facebook_token
+          if token.nil?
+            raise 'You need to configure the facebook token to use facebook' \
+                  'embed videos, see:' \
+                  'https://github.com/Devex/article_json#facebook-oembed'
+          end
+          token
+        end
       end
     end
   end