RubyGems - quesadilla - Versions diffs - 0.1.0 - Mend

quesadilla 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

checksums.yaml +7 -0
data/.gitignore +17 -0
data/.travis.yml +6 -0
data/Contributing.markdown +19 -0
data/Gemfile +19 -0
data/LICENSE +22 -0
data/Rakefile +21 -0
data/Readme.markdown +100 -0
data/lib/quesadilla/core_ext/string.rb +28 -0
data/lib/quesadilla/extractor/autolinks.rb +28 -0
data/lib/quesadilla/extractor/emoji.rb +43 -0
data/lib/quesadilla/extractor/hashtags.rb +28 -0
data/lib/quesadilla/extractor/html.rb +103 -0
data/lib/quesadilla/extractor/markdown.rb +187 -0
data/lib/quesadilla/extractor.rb +140 -0
data/lib/quesadilla/html_renderer.rb +57 -0
data/lib/quesadilla/version.rb +4 -0
data/lib/quesadilla.rb +45 -0
data/quesadilla.gemspec +28 -0
data/test/quesadilla/autolink_test.rb +84 -0
data/test/quesadilla/emoji_test.rb +103 -0
data/test/quesadilla/hashtags_test.rb +50 -0
data/test/quesadilla/html_test.rb +21 -0
data/test/quesadilla/markdown_test.rb +235 -0
data/test/quesadilla/multi_test.rb +64 -0
data/test/quesadilla_test.rb +9 -0
data/test/support/extractor_macros.rb +5 -0
data/test/test_helper.rb +18 -0
metadata +109 -0

data/lib/quesadilla/extractor.rb ADDED Viewed

@@ -0,0 +1,140 @@
+# encoding: UTF-8
+module Quesadilla
+  # Extract entities from text
+  class Extractor
+    require 'quesadilla/core_ext/string'
+    Dir[File.expand_path('../extractor/*.rb', __FILE__)].each { |f| require f }
+    include Autolinks
+    include Emoji
+    include Hashtags
+    include HTML
+    include Markdown
+    # @return [Hash] default extractor options
+    def self.default_options
+      {
+        markdown: true,
+        markdown_code: true,
+        markdown_links: true,
+        markdown_triple_emphasis: true,
+        markdown_double_emphasis: true,
+        markdown_emphasis: true,
+        markdown_strikethrough: true,
+        hashtags: true,
+        autolinks: true,
+        emoji: true,
+        html: true,
+        html_renderer: Quesadilla::HTMLRenderer
+      }
+    end
+    # @param options [Hash] an optional options hash. Defaults to `Quesadilla::Extractor.default_options`.
+    # @option options [Boolean] Should extract Markdown. Defaults to `true`.
+    # @option options markdown_code [Boolean] Should extract Markdown code. Defaults to `true`.
+    # @option options markdown_links [Boolean] Should extract Markdown links. Defaults to `true`.
+    # @option options markdown_triple_emphasis [Boolean] Should extract Markdown triple emphasis (bold italic). Defaults to `true`.
+    # @option options markdown_double_emphasis [Boolean] Should extract Markdown double emphasis (bold). Defaults to `true`.
+    # @option options markdown_emphasis [Boolean] Should extract Markdown emphasis (italic). Defaults to `true`.
+    # @option options markdown_strikethrough [Boolean] Should extract Markdown strikethrough. Defaults to `true`.
+    # @option options hashtags [Boolean] Should extract hashtags. Defaults to `true`.
+    # @option options autolinks [Boolean] Should automatically detect links. Defaults to `true`.
+    # @option options emoji [Boolean] Should extract named emoji. Defaults to `true`.
+    # @option options html [Boolean] Should generate HTML. Defaults to `true`.
+    # @option options html_renderer [Class] class to use as HTML renderer. Defaults to `Quesadilla::HTMLRenderer`.
+    def initialize(options = {})
+      @options = self.class.default_options.merge(options)
+      @renderer = @options[:html_renderer].new if @options[:html]
+    end
+    # Extract entities from text
+    # @param original_text the text to extract from
+    # @return [Hash] hash containing the display text, html text, and entities
+    def extract(original_text)
+      @original_text = original_text.dup
+      # Emoji colon-syntax
+      replace_emoji if @options[:emoji]
+      @working_text = @original_text.dup
+      @entities = []
+      # Get entities
+      extract_markdown if @options[:markdown]
+      extract_hashtags if @options[:hashtags]
+      extract_autolinks if @options[:autolinks]
+      # Sort entities
+      @entities.sort! do |a, b|
+        a[:indices].first <=> b[:indices].first
+      end
+      # Adjust display for each entity
+      display_text = sub_entities(@original_text, @entities)
+      # Return
+      hash = {
+        display_text: display_text,
+        entities: @entities
+      }
+      hash[:display_html] = display_html(display_text, @entities) if @options[:html]
+      hash
+    end
+  private
+    # Invisible character from the reserved range replaces markdown we've already parsed.
+    REPLACE_TOKEN = "\uf042".freeze
+    def display_url(url)
+      url = url.gsub(/(?:https?:\/\/)?(?:www\.)?/i, '').q_truncate(32, omission: '…')
+      url = url[0...(url.length - 1)] if url[-1, 1] == '/'
+      url
+    end
+    def quality_url(url)
+      return url if url.include?('://')
+      'http://' + url
+    end
+    def sub_entities(input_text, entities, display = false, &block)
+      # Adjust output text for each entity
+      output_text = input_text
+      offset = 0
+      entities.each do |entity|
+        entity_original_text = display ? entity[:display_text] : entity[:text]
+        entity_display_text = if block_given?
+          yield(entity)
+        else
+          entity[:display_text]
+        end
+        indices = display ? entity[:display_indices] : entity[:indices]
+        # Use the entity's display text instead of original text if they're different
+        unless entity_original_text == entity_display_text
+          # Get the fragment before the entity
+          bf_end = indices[0] - 1 - offset
+          before_frag = bf_end <= 0 ? '' : output_text[0..bf_end]
+          # Get the fragment after the entity
+          af_start = indices[1] - offset
+          af_end = output_text.length - 1
+          after_frag = af_start > af_end ? '' : output_text[af_start..af_end]
+          # Update the output text
+          output_text = before_frag + entity_display_text + after_frag
+        end
+        # Update offset
+        adjust = entity_original_text.length - entity_display_text.length
+        unless display
+          entity[:display_indices] = [entity[:indices][0] - offset, entity[:indices][1] - offset - adjust]
+        end
+        offset += adjust
+      end
+      output_text
+    end
+  end
+end

data/lib/quesadilla/html_renderer.rb ADDED Viewed

@@ -0,0 +1,57 @@
+module Quesadilla
+  # Default HTML renderer for generating HTML
+  class HTMLRenderer
+    # HTML representation of italic text
+    # @param display_text the italic text
+    # @return [String] HTML representation of the italic text
+    def emphasis(display_text)
+      %Q{<em>#{display_text}</em>}
+    end
+    # HTML representation of bold text
+    # @param display_text the bold text
+    # @return [String] HTML representation of the bold text
+    def double_emphasis(display_text)
+      %Q{<strong>#{display_text}</strong>}
+    end
+    # HTML representation of bold italic text
+    # @param display_text the bold italic text
+    # @return [String] HTML representation of the bold italic text
+    def triple_emphasis(display_text)
+      %Q{<strong><em>#{display_text}</em></strong>}
+    end
+    # HTML representation of strikethrough text
+    # @param display_text the strikethrough text
+    # @return [String] HTML representation of the strikethrough text
+    def strikethrough(display_text)
+      %Q{<del>#{display_text}</del>}
+    end
+    # HTML representation of code
+    # @param display_text the text of the code
+    # @return [String] HTML representation of the code
+    def code(display_text)
+      %Q{<code>#{display_text}</code>}
+    end
+    # HTML representation of a hashtag
+    # @param display_text the hashtag text (`#awesome`)
+    # @param hashtag the hashtag (just `awesome`)
+    # @return [String] HTML representation of the hashtag
+    def hashtag(display_text, hashtag)
+      %Q{<a href="#hashtag-#{hashtag}" class="hashtag">#{display_text}</a>}
+    end
+    # HTML representation of a link
+    # @param display_text the text of the link
+    # @param url the url of the link
+    # @param title the title of the link
+    # @return [String] HTML representation of the link
+    def link(display_text, url, title = nil)
+      title_attr = (title && title.length > 0) ? %Q{ title="#{title}"} : ''
+      %Q{<a href="#{url}" rel="external nofollow" class="link"#{title_attr}>#{display_text}</a>}
+    end
+  end
+end

data/lib/quesadilla/version.rb ADDED Viewed

@@ -0,0 +1,4 @@
+module Quesadilla
+  # Version of the Quesadilla gem
+  VERSION = '0.1.0'.freeze
+end

data/lib/quesadilla.rb ADDED Viewed

@@ -0,0 +1,45 @@
+require 'quesadilla/version'
+require 'quesadilla/html_renderer'
+require 'quesadilla/extractor'
+# Ruby library for entity-style text parsing. Quesadilla was extracted from [Cheddar](https://cheddarapp.com).
+module Quesadilla
+  # Emphasis (italic) entity type
+  ENTITY_TYPE_EMPHASIS = 'emphasis'.freeze
+  # Double emphasis (bold) entity type
+  ENTITY_TYPE_DOUBLE_EMPHASIS = 'double_emphasis'.freeze
+  # Triple emphasis (bold italic) entity type
+  ENTITY_TYPE_TRIPLE_EMPHASIS = 'triple_emphasis'.freeze
+  # Strikethrough entity type
+  ENTITY_TYPE_STRIKETHROUGH = 'strikethrough'.freeze
+  # Code entity type
+  ENTITY_TYPE_CODE = 'code'.freeze
+  # Hashtag entity type
+  ENTITY_TYPE_HASHTAG = 'hashtag'.freeze
+  # Link entity type
+  ENTITY_TYPE_LINK = 'link'.freeze
+  # Extract entities from text
+  # @param text the text to extract
+  # @option options markdown_code [Boolean] Should extract Markdown code. Defaults to `true`.
+  # @option options markdown_links [Boolean] Should extract Markdown links. Defaults to `true`.
+  # @option options markdown_triple_emphasis [Boolean] Should extract Markdown triple emphasis (bold italic). Defaults to `true`.
+  # @option options markdown_double_emphasis [Boolean] Should extract Markdown double emphasis (bold). Defaults to `true`.
+  # @option options markdown_emphasis [Boolean] Should extract Markdown emphasis (italic). Defaults to `true`.
+  # @option options markdown_strikethrough [Boolean] Should extract Markdown strikethrough. Defaults to `true`.
+  # @option options hashtags [Boolean] Should extract hashtags. Defaults to `true`.
+  # @option options autolinks [Boolean] Should automatically detect links. Defaults to `true`.
+  # @option options emoji [Boolean] Should extract named emoji. Defaults to `true`.
+  # @option options html [Boolean] Should generate HTML. Defaults to `true`.
+  # @option options html_renderer [Class] class to use as HTML renderer. Defaults to `Quesadilla::HTMLRenderer`.
+  # @return [Hash] hash containing the display text, html text, and entities
+  def self.extract(text, options = {})
+    Extractor.new(options).extract(text)
+  end
+end

data/quesadilla.gemspec ADDED Viewed

@@ -0,0 +1,28 @@
+# -*- encoding: utf-8 -*-
+lib = File.expand_path('../lib', __FILE__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'quesadilla/version'
+Gem::Specification.new do |gem|
+  gem.name          = 'quesadilla'
+  gem.version       = Quesadilla::VERSION
+  gem.authors       = ['Sam Soffes']
+  gem.email         = ['sam@soff.es']
+  gem.description   = 'Entity-style text parsing'
+  gem.summary       = gem.description
+  gem.homepage      = 'https://github.com/soffes/quesadilla'
+  gem.license       = 'MIT'
+  gem.files         = `git ls-files`.split($/)
+  gem.executables   = gem.files.grep(%r{^bin/}) { |f| File.basename(f) }
+  gem.test_files    = gem.files.grep(%r{^(test|spec|features)/})
+  gem.require_paths = ['lib']
+  gem.required_ruby_version = '>= 1.9.3'
+  # Hashtag and autolink parsing
+  gem.add_dependency 'twitter-text', '~> 1.5.0'
+  # Emoji detection
+  gem.add_dependency 'named_emoji', '~> 1.1.1'
+end

data/test/quesadilla/autolink_test.rb ADDED Viewed

@@ -0,0 +1,84 @@
+# encoding: UTF-8
+require 'test_helper'
+module Quesadilla
+  class AutolinkTest < TestCase
+    def test_that_it_extracts_plain_links
+      extraction = extract('Something with a link: http://samsoff.es/posts/hire-sam')
+      assert_equal extraction, {
+        display_text: 'Something with a link: samsoff.es/posts/hire-sam',
+        display_html: 'Something with a link: <a href="http://samsoff.es/posts/hire-sam" rel="external nofollow" class="link">samsoff.es&#x2F;posts&#x2F;hire-sam</a>',
+        entities: [
+          {
+            type: 'link',
+            text: 'http://samsoff.es/posts/hire-sam',
+            display_text: 'samsoff.es/posts/hire-sam',
+            url: 'http://samsoff.es/posts/hire-sam',
+            indices: [23, 55],
+            display_indices: [23, 48]
+          }
+        ]
+      }
+      extraction = extract('Try google.com')
+      assert_equal extraction, {
+        display_text: 'Try google.com',
+        display_html: 'Try <a href="http://google.com" rel="external nofollow" class="link">google.com</a>',
+        entities: [
+          {
+            type: 'link',
+            text: 'google.com',
+            display_text: 'google.com',
+            url: 'http://google.com',
+            indices: [4, 14],
+            display_indices: [4, 14]
+          }
+        ]
+      }
+    end
+    def test_that_it_pretifies_long_links
+      extraction = extract('Something with a long link: https://github.com/samsoffes/api.cheddarapp.com/blob/master/Readme.markdown')
+      assert_equal extraction, {
+        display_text: 'Something with a long link: github.com/samsoffes/api.chedda…',
+        display_html: 'Something with a long link: <a href="https://github.com/samsoffes/api.cheddarapp.com/blob/master/Readme.markdown" rel="external nofollow" class="link">github.com&#x2F;samsoffes&#x2F;api.chedda…</a>',
+        entities: [
+          {
+            type: 'link',
+            text: 'https://github.com/samsoffes/api.cheddarapp.com/blob/master/Readme.markdown',
+            display_text: 'github.com/samsoffes/api.chedda…',
+            url: 'https://github.com/samsoffes/api.cheddarapp.com/blob/master/Readme.markdown',
+            indices: [28, 103],
+            display_indices: [28, 60]
+          }
+        ]
+      }
+    end
+    def test_that_it_extracts_multiple_plain_links
+      extraction = extract('Something with a link: http://samsoff.es/posts/hire-sam - http://apple.com')
+      assert_equal extraction, {
+        display_text: 'Something with a link: samsoff.es/posts/hire-sam - apple.com',
+        display_html: 'Something with a link: <a href="http://samsoff.es/posts/hire-sam" rel="external nofollow" class="link">samsoff.es&#x2F;posts&#x2F;hire-sam</a> - <a href="http://apple.com" rel="external nofollow" class="link">apple.com</a>',
+        entities: [
+          {
+            type: 'link',
+            text: 'http://samsoff.es/posts/hire-sam',
+            display_text: 'samsoff.es/posts/hire-sam',
+            url: 'http://samsoff.es/posts/hire-sam',
+            indices: [23, 55],
+            display_indices: [23, 48]
+          },
+          {
+            type: 'link',
+            text: 'http://apple.com',
+            display_text: 'apple.com',
+            url: 'http://apple.com',
+            indices: [58, 74],
+            display_indices: [51, 60]
+          }
+        ]
+      }
+    end
+  end
+end

data/test/quesadilla/emoji_test.rb ADDED Viewed

@@ -0,0 +1,103 @@
+# encoding: UTF-8
+require 'test_helper'
+module Quesadilla
+  class EmojiTest < TestCase
+    def test_that_it_supports_emoji
+      extraction = extract('Something with 👨 beardface')
+      assert_equal extraction, {
+        display_text: 'Something with 👨 beardface',
+        display_html: 'Something with 👨 beardface',
+        entities: []
+      }
+    end
+    def test_that_it_supports_emoji_with_other_entities
+      extraction = extract('Something #tagged with 👨 beardface')
+      assert_equal extraction, {
+        display_text: 'Something #tagged with 👨 beardface',
+        display_html: 'Something <a href="#hashtag-tagged" class="hashtag">#tagged</a> with 👨 beardface',
+        entities: [
+          {
+            type: 'hashtag',
+            text: '#tagged',
+            display_text: '#tagged',
+            hashtag: 'tagged',
+            indices: [10, 17],
+            display_indices: [10, 17]
+          }
+        ]
+      }
+      extraction = extract('After 💇 #foo 👮 **Yep**')
+      assert_equal extraction, {
+        display_text: 'After 💇 #foo 👮 Yep',
+        display_html: 'After 💇 <a href="#hashtag-foo" class="hashtag">#foo</a> 👮 <strong>Yep</strong>',
+        entities: [
+          {
+            type: 'hashtag',
+            text: '#foo',
+            display_text: '#foo',
+            indices: [8, 12],
+            hashtag: 'foo',
+            display_indices: [8, 12]
+          },
+          {
+            type: 'double_emphasis',
+            text: '**Yep**',
+            display_text: 'Yep',
+            indices: [15, 22],
+            display_indices: [15, 18]
+          }
+        ]
+      }
+    end
+    def test_that_it_support_the_colon_syntax
+      extraction = extract('Beardface is :man:')
+      assert_equal extraction, {
+        display_text: 'Beardface is 👨',
+        display_html: 'Beardface is 👨',
+        entities: []
+      }
+      extraction = extract('Beardface is `not here :man:` :man:')
+      assert_equal extraction, {
+        display_text: 'Beardface is not here :man: 👨',
+        display_html: 'Beardface is <code>not here :man:</code> 👨',
+        entities: [
+          {
+            type: 'code',
+            text: '`not here :man:`',
+            display_text: 'not here :man:',
+            indices: [13, 29],
+            display_indices: [13, 27]
+          }
+        ]
+      }
+      # extraction = extract('Something #tagged with :man: **beardface**')
+      # assert_equal extraction, {
+      #   display_text: 'Something #tagged with 👨 beardface',
+      #   display_html: 'Something <a href="#hashtag-tagged" class="hashtag">#tagged</a> with 👨 <strong>beardface</strong>',
+      #   entities: [
+      #     {
+      #       type: 'hashtag',
+      #       text: '#tagged',
+      #       display_text: '#tagged',
+      #       hashtag: 'tagged',
+      #       indices: [10, 17],
+      #       display_indices: [10, 17]
+      #     },
+      #     {
+      #       type: 'double_emphasis',
+      #       text: '**beardface**',
+      #       display_text: 'beardface',
+      #       indices: [29, 42],
+      #       display_indices: [30, 39]
+      #     }
+      #   ]
+      # }
+    end
+  end
+end

data/test/quesadilla/hashtags_test.rb ADDED Viewed

@@ -0,0 +1,50 @@
+# encoding: UTF-8
+require 'test_helper'
+module Quesadilla
+  class HashtagsTest < TestCase
+    def test_that_it_extracts_tags
+      extraction = extract('Something #tagged')
+      assert_equal extraction, {
+        display_text: 'Something #tagged',
+        display_html: 'Something <a href="#hashtag-tagged" class="hashtag">#tagged</a>',
+        entities: [
+          {
+            type: 'hashtag',
+            text: '#tagged',
+            display_text: '#tagged',
+            hashtag: 'tagged',
+            indices: [10, 17],
+            display_indices: [10, 17]
+          }
+        ]
+      }
+    end
+    def test_that_it_extracts_multiple_tags
+      extraction = extract('A task with some #tags that are #awesome')
+      assert_equal extraction, {
+        display_text: 'A task with some #tags that are #awesome',
+        display_html: 'A task with some <a href="#hashtag-tags" class="hashtag">#tags</a> that are <a href="#hashtag-awesome" class="hashtag">#awesome</a>',
+        entities: [
+          {
+            type: 'hashtag',
+            text: '#tags',
+            display_text: '#tags',
+            hashtag: 'tags',
+            indices: [17, 22],
+            display_indices: [17, 22]
+          },
+          {
+            type: 'hashtag',
+            text: '#awesome',
+            display_text: '#awesome',
+            hashtag: 'awesome',
+            indices: [32, 40],
+            display_indices: [32, 40]
+          }
+        ]
+      }
+    end
+  end
+end

data/test/quesadilla/html_test.rb ADDED Viewed

@@ -0,0 +1,21 @@
+# encoding: UTF-8
+require 'test_helper'
+module Quesadilla
+  class CustomRenderer < HTMLRenderer
+    def hashtag(display_text, hashtag)
+      %Q{<a href="#tag-#{hashtag}" class="tag">#{display_text}</a>}
+    end
+  end
+  class HTMLTest < TestCase
+    def test_hashtag_url_format
+      extraction = extract('Something #tagged')
+      assert_equal 'Something <a href="#hashtag-tagged" class="hashtag">#tagged</a>', extraction[:display_html]
+      extraction = extract('Something #tagged', html_renderer: CustomRenderer)
+      assert_equal 'Something <a href="#tag-tagged" class="tag">#tagged</a>', extraction[:display_html]
+    end
+  end
+end