quesadilla 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,140 @@
1
+ # encoding: UTF-8
2
+
3
+ module Quesadilla
4
+ # Extract entities from text
5
+ class Extractor
6
+ require 'quesadilla/core_ext/string'
7
+ Dir[File.expand_path('../extractor/*.rb', __FILE__)].each { |f| require f }
8
+
9
+ include Autolinks
10
+ include Emoji
11
+ include Hashtags
12
+ include HTML
13
+ include Markdown
14
+
15
+ # @return [Hash] default extractor options
16
+ def self.default_options
17
+ {
18
+ markdown: true,
19
+ markdown_code: true,
20
+ markdown_links: true,
21
+ markdown_triple_emphasis: true,
22
+ markdown_double_emphasis: true,
23
+ markdown_emphasis: true,
24
+ markdown_strikethrough: true,
25
+ hashtags: true,
26
+ autolinks: true,
27
+ emoji: true,
28
+ html: true,
29
+ html_renderer: Quesadilla::HTMLRenderer
30
+ }
31
+ end
32
+
33
+ # @param options [Hash] an optional options hash. Defaults to `Quesadilla::Extractor.default_options`.
34
+ # @option options [Boolean] Should extract Markdown. Defaults to `true`.
35
+ # @option options markdown_code [Boolean] Should extract Markdown code. Defaults to `true`.
36
+ # @option options markdown_links [Boolean] Should extract Markdown links. Defaults to `true`.
37
+ # @option options markdown_triple_emphasis [Boolean] Should extract Markdown triple emphasis (bold italic). Defaults to `true`.
38
+ # @option options markdown_double_emphasis [Boolean] Should extract Markdown double emphasis (bold). Defaults to `true`.
39
+ # @option options markdown_emphasis [Boolean] Should extract Markdown emphasis (italic). Defaults to `true`.
40
+ # @option options markdown_strikethrough [Boolean] Should extract Markdown strikethrough. Defaults to `true`.
41
+ # @option options hashtags [Boolean] Should extract hashtags. Defaults to `true`.
42
+ # @option options autolinks [Boolean] Should automatically detect links. Defaults to `true`.
43
+ # @option options emoji [Boolean] Should extract named emoji. Defaults to `true`.
44
+ # @option options html [Boolean] Should generate HTML. Defaults to `true`.
45
+ # @option options html_renderer [Class] class to use as HTML renderer. Defaults to `Quesadilla::HTMLRenderer`.
46
+ def initialize(options = {})
47
+ @options = self.class.default_options.merge(options)
48
+ @renderer = @options[:html_renderer].new if @options[:html]
49
+ end
50
+
51
+ # Extract entities from text
52
+ # @param original_text the text to extract from
53
+ # @return [Hash] hash containing the display text, html text, and entities
54
+ def extract(original_text)
55
+ @original_text = original_text.dup
56
+
57
+ # Emoji colon-syntax
58
+ replace_emoji if @options[:emoji]
59
+
60
+ @working_text = @original_text.dup
61
+ @entities = []
62
+
63
+ # Get entities
64
+ extract_markdown if @options[:markdown]
65
+ extract_hashtags if @options[:hashtags]
66
+ extract_autolinks if @options[:autolinks]
67
+
68
+ # Sort entities
69
+ @entities.sort! do |a, b|
70
+ a[:indices].first <=> b[:indices].first
71
+ end
72
+
73
+ # Adjust display for each entity
74
+ display_text = sub_entities(@original_text, @entities)
75
+
76
+ # Return
77
+ hash = {
78
+ display_text: display_text,
79
+ entities: @entities
80
+ }
81
+ hash[:display_html] = display_html(display_text, @entities) if @options[:html]
82
+ hash
83
+ end
84
+
85
+ private
86
+
87
+ # Invisible character from the reserved range replaces markdown we've already parsed.
88
+ REPLACE_TOKEN = "\uf042".freeze
89
+
90
+ def display_url(url)
91
+ url = url.gsub(/(?:https?:\/\/)?(?:www\.)?/i, '').q_truncate(32, omission: '…')
92
+ url = url[0...(url.length - 1)] if url[-1, 1] == '/'
93
+ url
94
+ end
95
+
96
+ def quality_url(url)
97
+ return url if url.include?('://')
98
+ 'http://' + url
99
+ end
100
+
101
+ def sub_entities(input_text, entities, display = false, &block)
102
+ # Adjust output text for each entity
103
+ output_text = input_text
104
+ offset = 0
105
+ entities.each do |entity|
106
+ entity_original_text = display ? entity[:display_text] : entity[:text]
107
+ entity_display_text = if block_given?
108
+ yield(entity)
109
+ else
110
+ entity[:display_text]
111
+ end
112
+
113
+ indices = display ? entity[:display_indices] : entity[:indices]
114
+
115
+ # Use the entity's display text instead of original text if they're different
116
+ unless entity_original_text == entity_display_text
117
+ # Get the fragment before the entity
118
+ bf_end = indices[0] - 1 - offset
119
+ before_frag = bf_end <= 0 ? '' : output_text[0..bf_end]
120
+
121
+ # Get the fragment after the entity
122
+ af_start = indices[1] - offset
123
+ af_end = output_text.length - 1
124
+ after_frag = af_start > af_end ? '' : output_text[af_start..af_end]
125
+
126
+ # Update the output text
127
+ output_text = before_frag + entity_display_text + after_frag
128
+ end
129
+
130
+ # Update offset
131
+ adjust = entity_original_text.length - entity_display_text.length
132
+ unless display
133
+ entity[:display_indices] = [entity[:indices][0] - offset, entity[:indices][1] - offset - adjust]
134
+ end
135
+ offset += adjust
136
+ end
137
+ output_text
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,57 @@
1
+ module Quesadilla
2
+ # Default HTML renderer for generating HTML
3
+ class HTMLRenderer
4
+ # HTML representation of italic text
5
+ # @param display_text the italic text
6
+ # @return [String] HTML representation of the italic text
7
+ def emphasis(display_text)
8
+ %Q{<em>#{display_text}</em>}
9
+ end
10
+
11
+ # HTML representation of bold text
12
+ # @param display_text the bold text
13
+ # @return [String] HTML representation of the bold text
14
+ def double_emphasis(display_text)
15
+ %Q{<strong>#{display_text}</strong>}
16
+ end
17
+
18
+ # HTML representation of bold italic text
19
+ # @param display_text the bold italic text
20
+ # @return [String] HTML representation of the bold italic text
21
+ def triple_emphasis(display_text)
22
+ %Q{<strong><em>#{display_text}</em></strong>}
23
+ end
24
+
25
+ # HTML representation of strikethrough text
26
+ # @param display_text the strikethrough text
27
+ # @return [String] HTML representation of the strikethrough text
28
+ def strikethrough(display_text)
29
+ %Q{<del>#{display_text}</del>}
30
+ end
31
+
32
+ # HTML representation of code
33
+ # @param display_text the text of the code
34
+ # @return [String] HTML representation of the code
35
+ def code(display_text)
36
+ %Q{<code>#{display_text}</code>}
37
+ end
38
+
39
+ # HTML representation of a hashtag
40
+ # @param display_text the hashtag text (`#awesome`)
41
+ # @param hashtag the hashtag (just `awesome`)
42
+ # @return [String] HTML representation of the hashtag
43
+ def hashtag(display_text, hashtag)
44
+ %Q{<a href="#hashtag-#{hashtag}" class="hashtag">#{display_text}</a>}
45
+ end
46
+
47
+ # HTML representation of a link
48
+ # @param display_text the text of the link
49
+ # @param url the url of the link
50
+ # @param title the title of the link
51
+ # @return [String] HTML representation of the link
52
+ def link(display_text, url, title = nil)
53
+ title_attr = (title && title.length > 0) ? %Q{ title="#{title}"} : ''
54
+ %Q{<a href="#{url}" rel="external nofollow" class="link"#{title_attr}>#{display_text}</a>}
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,4 @@
1
+ module Quesadilla
2
+ # Version of the Quesadilla gem
3
+ VERSION = '0.1.0'.freeze
4
+ end
data/lib/quesadilla.rb ADDED
@@ -0,0 +1,45 @@
1
+ require 'quesadilla/version'
2
+ require 'quesadilla/html_renderer'
3
+ require 'quesadilla/extractor'
4
+
5
+ # Ruby library for entity-style text parsing. Quesadilla was extracted from [Cheddar](https://cheddarapp.com).
6
+ module Quesadilla
7
+ # Emphasis (italic) entity type
8
+ ENTITY_TYPE_EMPHASIS = 'emphasis'.freeze
9
+
10
+ # Double emphasis (bold) entity type
11
+ ENTITY_TYPE_DOUBLE_EMPHASIS = 'double_emphasis'.freeze
12
+
13
+ # Triple emphasis (bold italic) entity type
14
+ ENTITY_TYPE_TRIPLE_EMPHASIS = 'triple_emphasis'.freeze
15
+
16
+ # Strikethrough entity type
17
+ ENTITY_TYPE_STRIKETHROUGH = 'strikethrough'.freeze
18
+
19
+ # Code entity type
20
+ ENTITY_TYPE_CODE = 'code'.freeze
21
+
22
+ # Hashtag entity type
23
+ ENTITY_TYPE_HASHTAG = 'hashtag'.freeze
24
+
25
+ # Link entity type
26
+ ENTITY_TYPE_LINK = 'link'.freeze
27
+
28
+ # Extract entities from text
29
+ # @param text the text to extract
30
+ # @option options markdown_code [Boolean] Should extract Markdown code. Defaults to `true`.
31
+ # @option options markdown_links [Boolean] Should extract Markdown links. Defaults to `true`.
32
+ # @option options markdown_triple_emphasis [Boolean] Should extract Markdown triple emphasis (bold italic). Defaults to `true`.
33
+ # @option options markdown_double_emphasis [Boolean] Should extract Markdown double emphasis (bold). Defaults to `true`.
34
+ # @option options markdown_emphasis [Boolean] Should extract Markdown emphasis (italic). Defaults to `true`.
35
+ # @option options markdown_strikethrough [Boolean] Should extract Markdown strikethrough. Defaults to `true`.
36
+ # @option options hashtags [Boolean] Should extract hashtags. Defaults to `true`.
37
+ # @option options autolinks [Boolean] Should automatically detect links. Defaults to `true`.
38
+ # @option options emoji [Boolean] Should extract named emoji. Defaults to `true`.
39
+ # @option options html [Boolean] Should generate HTML. Defaults to `true`.
40
+ # @option options html_renderer [Class] class to use as HTML renderer. Defaults to `Quesadilla::HTMLRenderer`.
41
+ # @return [Hash] hash containing the display text, html text, and entities
42
+ def self.extract(text, options = {})
43
+ Extractor.new(options).extract(text)
44
+ end
45
+ end
@@ -0,0 +1,28 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'quesadilla/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = 'quesadilla'
8
+ gem.version = Quesadilla::VERSION
9
+ gem.authors = ['Sam Soffes']
10
+ gem.email = ['sam@soff.es']
11
+ gem.description = 'Entity-style text parsing'
12
+ gem.summary = gem.description
13
+ gem.homepage = 'https://github.com/soffes/quesadilla'
14
+ gem.license = 'MIT'
15
+
16
+ gem.files = `git ls-files`.split($/)
17
+ gem.executables = gem.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
19
+ gem.require_paths = ['lib']
20
+
21
+ gem.required_ruby_version = '>= 1.9.3'
22
+
23
+ # Hashtag and autolink parsing
24
+ gem.add_dependency 'twitter-text', '~> 1.5.0'
25
+
26
+ # Emoji detection
27
+ gem.add_dependency 'named_emoji', '~> 1.1.1'
28
+ end
@@ -0,0 +1,84 @@
1
+ # encoding: UTF-8
2
+ require 'test_helper'
3
+
4
+ module Quesadilla
5
+ class AutolinkTest < TestCase
6
+ def test_that_it_extracts_plain_links
7
+ extraction = extract('Something with a link: http://samsoff.es/posts/hire-sam')
8
+ assert_equal extraction, {
9
+ display_text: 'Something with a link: samsoff.es/posts/hire-sam',
10
+ display_html: 'Something with a link: <a href="http://samsoff.es/posts/hire-sam" rel="external nofollow" class="link">samsoff.es&#x2F;posts&#x2F;hire-sam</a>',
11
+ entities: [
12
+ {
13
+ type: 'link',
14
+ text: 'http://samsoff.es/posts/hire-sam',
15
+ display_text: 'samsoff.es/posts/hire-sam',
16
+ url: 'http://samsoff.es/posts/hire-sam',
17
+ indices: [23, 55],
18
+ display_indices: [23, 48]
19
+ }
20
+ ]
21
+ }
22
+
23
+ extraction = extract('Try google.com')
24
+ assert_equal extraction, {
25
+ display_text: 'Try google.com',
26
+ display_html: 'Try <a href="http://google.com" rel="external nofollow" class="link">google.com</a>',
27
+ entities: [
28
+ {
29
+ type: 'link',
30
+ text: 'google.com',
31
+ display_text: 'google.com',
32
+ url: 'http://google.com',
33
+ indices: [4, 14],
34
+ display_indices: [4, 14]
35
+ }
36
+ ]
37
+ }
38
+ end
39
+
40
+ def test_that_it_pretifies_long_links
41
+ extraction = extract('Something with a long link: https://github.com/samsoffes/api.cheddarapp.com/blob/master/Readme.markdown')
42
+ assert_equal extraction, {
43
+ display_text: 'Something with a long link: github.com/samsoffes/api.chedda…',
44
+ display_html: 'Something with a long link: <a href="https://github.com/samsoffes/api.cheddarapp.com/blob/master/Readme.markdown" rel="external nofollow" class="link">github.com&#x2F;samsoffes&#x2F;api.chedda…</a>',
45
+ entities: [
46
+ {
47
+ type: 'link',
48
+ text: 'https://github.com/samsoffes/api.cheddarapp.com/blob/master/Readme.markdown',
49
+ display_text: 'github.com/samsoffes/api.chedda…',
50
+ url: 'https://github.com/samsoffes/api.cheddarapp.com/blob/master/Readme.markdown',
51
+ indices: [28, 103],
52
+ display_indices: [28, 60]
53
+ }
54
+ ]
55
+ }
56
+ end
57
+
58
+ def test_that_it_extracts_multiple_plain_links
59
+ extraction = extract('Something with a link: http://samsoff.es/posts/hire-sam - http://apple.com')
60
+ assert_equal extraction, {
61
+ display_text: 'Something with a link: samsoff.es/posts/hire-sam - apple.com',
62
+ display_html: 'Something with a link: <a href="http://samsoff.es/posts/hire-sam" rel="external nofollow" class="link">samsoff.es&#x2F;posts&#x2F;hire-sam</a> - <a href="http://apple.com" rel="external nofollow" class="link">apple.com</a>',
63
+ entities: [
64
+ {
65
+ type: 'link',
66
+ text: 'http://samsoff.es/posts/hire-sam',
67
+ display_text: 'samsoff.es/posts/hire-sam',
68
+ url: 'http://samsoff.es/posts/hire-sam',
69
+ indices: [23, 55],
70
+ display_indices: [23, 48]
71
+ },
72
+ {
73
+ type: 'link',
74
+ text: 'http://apple.com',
75
+ display_text: 'apple.com',
76
+ url: 'http://apple.com',
77
+ indices: [58, 74],
78
+ display_indices: [51, 60]
79
+ }
80
+ ]
81
+ }
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,103 @@
1
+ # encoding: UTF-8
2
+ require 'test_helper'
3
+
4
+ module Quesadilla
5
+ class EmojiTest < TestCase
6
+ def test_that_it_supports_emoji
7
+ extraction = extract('Something with 👨 beardface')
8
+ assert_equal extraction, {
9
+ display_text: 'Something with 👨 beardface',
10
+ display_html: 'Something with 👨 beardface',
11
+ entities: []
12
+ }
13
+ end
14
+
15
+ def test_that_it_supports_emoji_with_other_entities
16
+ extraction = extract('Something #tagged with 👨 beardface')
17
+ assert_equal extraction, {
18
+ display_text: 'Something #tagged with 👨 beardface',
19
+ display_html: 'Something <a href="#hashtag-tagged" class="hashtag">#tagged</a> with 👨 beardface',
20
+ entities: [
21
+ {
22
+ type: 'hashtag',
23
+ text: '#tagged',
24
+ display_text: '#tagged',
25
+ hashtag: 'tagged',
26
+ indices: [10, 17],
27
+ display_indices: [10, 17]
28
+ }
29
+ ]
30
+ }
31
+
32
+ extraction = extract('After 💇 #foo 👮 **Yep**')
33
+ assert_equal extraction, {
34
+ display_text: 'After 💇 #foo 👮 Yep',
35
+ display_html: 'After 💇 <a href="#hashtag-foo" class="hashtag">#foo</a> 👮 <strong>Yep</strong>',
36
+ entities: [
37
+ {
38
+ type: 'hashtag',
39
+ text: '#foo',
40
+ display_text: '#foo',
41
+ indices: [8, 12],
42
+ hashtag: 'foo',
43
+ display_indices: [8, 12]
44
+ },
45
+ {
46
+ type: 'double_emphasis',
47
+ text: '**Yep**',
48
+ display_text: 'Yep',
49
+ indices: [15, 22],
50
+ display_indices: [15, 18]
51
+ }
52
+ ]
53
+ }
54
+ end
55
+
56
+ def test_that_it_support_the_colon_syntax
57
+ extraction = extract('Beardface is :man:')
58
+ assert_equal extraction, {
59
+ display_text: 'Beardface is 👨',
60
+ display_html: 'Beardface is 👨',
61
+ entities: []
62
+ }
63
+
64
+ extraction = extract('Beardface is `not here :man:` :man:')
65
+ assert_equal extraction, {
66
+ display_text: 'Beardface is not here :man: 👨',
67
+ display_html: 'Beardface is <code>not here :man:</code> 👨',
68
+ entities: [
69
+ {
70
+ type: 'code',
71
+ text: '`not here :man:`',
72
+ display_text: 'not here :man:',
73
+ indices: [13, 29],
74
+ display_indices: [13, 27]
75
+ }
76
+ ]
77
+ }
78
+
79
+ # extraction = extract('Something #tagged with :man: **beardface**')
80
+ # assert_equal extraction, {
81
+ # display_text: 'Something #tagged with 👨 beardface',
82
+ # display_html: 'Something <a href="#hashtag-tagged" class="hashtag">#tagged</a> with 👨 <strong>beardface</strong>',
83
+ # entities: [
84
+ # {
85
+ # type: 'hashtag',
86
+ # text: '#tagged',
87
+ # display_text: '#tagged',
88
+ # hashtag: 'tagged',
89
+ # indices: [10, 17],
90
+ # display_indices: [10, 17]
91
+ # },
92
+ # {
93
+ # type: 'double_emphasis',
94
+ # text: '**beardface**',
95
+ # display_text: 'beardface',
96
+ # indices: [29, 42],
97
+ # display_indices: [30, 39]
98
+ # }
99
+ # ]
100
+ # }
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,50 @@
1
+ # encoding: UTF-8
2
+ require 'test_helper'
3
+
4
+ module Quesadilla
5
+ class HashtagsTest < TestCase
6
+ def test_that_it_extracts_tags
7
+ extraction = extract('Something #tagged')
8
+ assert_equal extraction, {
9
+ display_text: 'Something #tagged',
10
+ display_html: 'Something <a href="#hashtag-tagged" class="hashtag">#tagged</a>',
11
+ entities: [
12
+ {
13
+ type: 'hashtag',
14
+ text: '#tagged',
15
+ display_text: '#tagged',
16
+ hashtag: 'tagged',
17
+ indices: [10, 17],
18
+ display_indices: [10, 17]
19
+ }
20
+ ]
21
+ }
22
+ end
23
+
24
+ def test_that_it_extracts_multiple_tags
25
+ extraction = extract('A task with some #tags that are #awesome')
26
+ assert_equal extraction, {
27
+ display_text: 'A task with some #tags that are #awesome',
28
+ display_html: 'A task with some <a href="#hashtag-tags" class="hashtag">#tags</a> that are <a href="#hashtag-awesome" class="hashtag">#awesome</a>',
29
+ entities: [
30
+ {
31
+ type: 'hashtag',
32
+ text: '#tags',
33
+ display_text: '#tags',
34
+ hashtag: 'tags',
35
+ indices: [17, 22],
36
+ display_indices: [17, 22]
37
+ },
38
+ {
39
+ type: 'hashtag',
40
+ text: '#awesome',
41
+ display_text: '#awesome',
42
+ hashtag: 'awesome',
43
+ indices: [32, 40],
44
+ display_indices: [32, 40]
45
+ }
46
+ ]
47
+ }
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,21 @@
1
+ # encoding: UTF-8
2
+ require 'test_helper'
3
+
4
+ module Quesadilla
5
+ class CustomRenderer < HTMLRenderer
6
+ def hashtag(display_text, hashtag)
7
+ %Q{<a href="#tag-#{hashtag}" class="tag">#{display_text}</a>}
8
+ end
9
+ end
10
+
11
+
12
+ class HTMLTest < TestCase
13
+ def test_hashtag_url_format
14
+ extraction = extract('Something #tagged')
15
+ assert_equal 'Something <a href="#hashtag-tagged" class="hashtag">#tagged</a>', extraction[:display_html]
16
+
17
+ extraction = extract('Something #tagged', html_renderer: CustomRenderer)
18
+ assert_equal 'Something <a href="#tag-tagged" class="tag">#tagged</a>', extraction[:display_html]
19
+ end
20
+ end
21
+ end