quesadilla 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 17f8cfec03e4c86278585c52d2ae57995a984d3c
4
+ data.tar.gz: e145d4496a0bc034e278f6a2c4738d48006d7a8b
5
+ SHA512:
6
+ metadata.gz: aaf8418d063286d7666e0b666b5badbcf63e2e54cceb225d35e69bc238fb3f0973544c547cbc806dde042e5d8f7ae37fa4accb1325c210ef5d9eb8a989f6330a
7
+ data.tar.gz: 39f070f60d71278bceeab4e939944ebc921609e4c6eb90b26f0e09ac693639b4dd80bd3a423e33dccd32f641fe6218213b665c3ebf43282e2a203134ef7454fb
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.travis.yml ADDED
@@ -0,0 +1,6 @@
1
+ language: ruby
2
+ bundler_args: --without development
3
+ rvm:
4
+ - 1.9.3
5
+ - 2.0.0
6
+ - jruby-19mode
@@ -0,0 +1,19 @@
1
+ ## Submitting a Pull Request
2
+
3
+ 1. [Fork the repository.][fork]
4
+ 2. [Create a topic branch.][branch]
5
+ 3. Add tests for your unimplemented feature or bug fix.
6
+ 4. Run `bundle exec rake`. If your tests pass, return to step 3.
7
+ 5. Implement your feature or bug fix.
8
+ 6. Run `bundle exec rake`. If your tests fail, return to step 5.
9
+ 7. Run `open coverage/index.html`. If your changes are not completely covered
10
+ by your tests, return to step 3.
11
+ 8. Add documentation for your feature or bug fix.
12
+ 9. Run `bundle exec rake doc`. If your changes are not 100% documented, go
13
+ back to step 8.
14
+ 10. Add, commit, and push your changes.
15
+ 11. [Submit a pull request.][pr]
16
+
17
+ [fork]: http://help.github.com/fork-a-repo/
18
+ [branch]: http://learn.github.com/p/branching.html
19
+ [pr]: http://help.github.com/send-pull-requests/
data/Gemfile ADDED
@@ -0,0 +1,19 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Gem dependencies
4
+ gemspec
5
+
6
+ gem 'rake', group: [:development, :test]
7
+
8
+ # Development dependencies
9
+ group :development do
10
+ gem 'yard'
11
+ gem 'redcarpet', platform: 'ruby'
12
+ end
13
+
14
+ # Testing dependencies
15
+ group :test do
16
+ gem 'minitest'
17
+ gem 'minitest-wscolor'
18
+ gem 'simplecov', require: false
19
+ end
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Sam Soffes
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,21 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rake/testtask'
5
+ Rake::TestTask.new(:test) do |t|
6
+ t.libs << 'test'
7
+ t.pattern = 'test/**/*_test.rb'
8
+ end
9
+ task default: :test
10
+
11
+ begin
12
+ require 'yard'
13
+ YARD::Rake::YardocTask.new(:doc) do |task|
14
+ task.files = ['Readme.markdown', 'LICENSE', 'lib/**/*.rb']
15
+ task.options = [
16
+ '--output-dir', 'doc',
17
+ '--markup', 'markdown',
18
+ ]
19
+ end
20
+ rescue LoadError
21
+ end
data/Readme.markdown ADDED
@@ -0,0 +1,100 @@
1
+ # Quesadilla
2
+
3
+ Entity-style text parsing. Quesadilla was extracted from [Cheddar](https://cheddarapp.com).
4
+
5
+ See the [Cheddar text guide](https://cheddarapp.com/text) for more information about how to type entities.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ``` ruby
12
+ gem 'quesadilla'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install quesadilla
22
+
23
+ ## Usage
24
+
25
+ To extract entites from text, simply call extract:
26
+
27
+ ``` ruby
28
+ Quesadilla.extract('Some #awesome text')
29
+ # => {
30
+ # display_text: "Some #awesome text",
31
+ # display_html: "Some <a href=\"#hashtag-awesome\" class=\"tag\">#awesome</a> text",
32
+ # entities: [
33
+ # {
34
+ # type: "hashtag",
35
+ # text: "#awesome",
36
+ # display_text: "#awesome",
37
+ # indices: [5, 13],
38
+ # hashtag: "awesome",
39
+ # display_indices: [5, 13]
40
+ # }
41
+ # ]
42
+ # }
43
+ ```
44
+
45
+ ### Configuring
46
+
47
+ Quesadilla supports extracting various span-level Markdown features as well as automatically detecting links and GitHub-style named emoji. Here are the list of options you can pass when extracting:
48
+
49
+ Option | Description
50
+ ----------------------------|-----------------------------------------------------------------
51
+ `:markdown` | All Markdown parsing
52
+ `:markdown_code` | Markdown code tags
53
+ `:markdown_links` | Markdown links (including `<http://soff.es>` style links)
54
+ `:markdown_triple_emphasis` | Markdown bold italic
55
+ `:markdown_double_emphasis` | Markdown bold
56
+ `:markdown_emphasis` | Markdown italic
57
+ `:markdown_strikethrough` | Markdown Extra strikethrough
58
+ `:hashtags` | Hashtags
59
+ `:autolinks` | Automatically detect links
60
+ `:emoji` | GitHub-style named emoji
61
+ `:html` | Generate HTML representations for entities and the entire string
62
+
63
+ Everything is enabled by deafult. If you don't want to extract Markdown, you should call the extractor this like:
64
+
65
+ ``` ruby
66
+ Quesadilla.extract('Some text', markdown: false)
67
+ ```
68
+
69
+ You can also just disable strikethrough and still extract the rest of the Markdown entities if you want:
70
+
71
+ ``` ruby
72
+ Quesadilla.extract('Some text', markdown_strikethrough: false)
73
+ ```
74
+
75
+ ### Customizing HTML
76
+
77
+ If you want to change the generated HTML, you can create a custom renderer:
78
+
79
+ ``` ruby
80
+ class CustomRenderer < Quesadilla::HTMLRenderer
81
+ def hashtag(display_text, hashtag)
82
+ %Q{<a href="http://example.com/tags/#{hashtag}" class="tag">#{display_text}</a>}
83
+ end
84
+ end
85
+
86
+ extraction = Quesadilla.extract('Some #awesome text', html_renderer: CustomRenderer)
87
+ extraction[:display_html] #=> 'Some <a href="http://example.com/tags/awesome" class="tag">#awesome</a> text'
88
+ ```
89
+
90
+ Take a look at [Quesadilla::HTMLRenderer](lib/quesadilla/html_renderer.html) for more details on creating a custom renderer.
91
+
92
+ ## Supported Ruby Versions
93
+
94
+ Quesadilla is tested under 1.9.3, 2.0.0, and JRuby (1.9 mode).
95
+
96
+ [![Build Status](https://travis-ci.org/soffes/quesadilla.png?branch=master)](https://travis-ci.org/soffes/quesadilla)
97
+
98
+ ## Contributing
99
+
100
+ See the [contributing guide](Contributing.markdown).
@@ -0,0 +1,28 @@
1
+ # String additions
2
+ class String
3
+ # Truncate method from ActiveSupport.
4
+ # @param truncate_at [Fixnum] number of characters to truncate after
5
+ # @param options [Hash] optional options hash
6
+ # @option options separator [String] truncate text only at a certain separator strings
7
+ # @option options omission [String] string to add at the end to endicated truncated text. Defaults to '...'
8
+ # @return [String] truncated string
9
+ def q_truncate(truncate_at, options = {})
10
+ return dup unless length > truncate_at
11
+
12
+ # Default omission to '...'
13
+ options[:omission] ||= '...'
14
+
15
+ # Account for the omission string in the truncated length
16
+ truncate_at -= options[:omission].length
17
+
18
+ # Calculate end index
19
+ stop = if options[:separator]
20
+ rindex(options[:separator], truncate_at) || truncate_at
21
+ else
22
+ truncate_at
23
+ end
24
+
25
+ # Return the trucnated string plus the omission string
26
+ self[0...stop] + options[:omission]
27
+ end
28
+ end
@@ -0,0 +1,28 @@
1
+ # encoding: UTF-8
2
+
3
+ module Quesadilla
4
+ class Extractor
5
+ # Extract plain links.
6
+ #
7
+ # This module has no public methods.
8
+ module Autolinks
9
+ private
10
+
11
+ require 'twitter-text'
12
+
13
+ def extract_autolinks
14
+ Twitter::Extractor::extract_urls_with_indices(@working_text).each do |entity|
15
+ entity_text = entity[:url]
16
+ @entities << {
17
+ type: ENTITY_TYPE_LINK,
18
+ text: entity_text,
19
+ display_text: display_url(entity[:url]),
20
+ url: quality_url(entity[:url]),
21
+ indices: entity[:indices]
22
+ }
23
+ @working_text.sub!(entity_text, REPLACE_TOKEN * entity_text.length)
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,43 @@
1
+ # encoding: UTF-8
2
+
3
+ module Quesadilla
4
+ class Extractor
5
+ # Extract named emoji.
6
+ #
7
+ # This module has no public methods.
8
+ module Emoji
9
+ private
10
+
11
+ require 'named_emoji'
12
+
13
+ # Emoji colon-syntax regex
14
+ EMOJI_COLON_REGEX = %r{:([a-zA-Z0-9_\-\+]+):}.freeze
15
+
16
+ def replace_emoji
17
+ codes = {}
18
+
19
+ # Replace codes with shas
20
+ i = 0
21
+ while match = @original_text.match(Markdown::CODE_REGEX)
22
+ original = match[0]
23
+ key = Digest::SHA1.hexdigest("#{original}-#{i}")
24
+ codes[key] = original
25
+ @original_text.sub!(original, key)
26
+ i += 1
27
+ end
28
+
29
+ # Replace emojis
30
+ while match = @original_text.match(EMOJI_COLON_REGEX)
31
+ sym = match[1].downcase.to_sym
32
+ next unless NamedEmoji.emojis.keys.include?(sym)
33
+ @original_text.sub!(match[0], NamedEmoji.emojis[sym])
34
+ end
35
+
36
+ # Unreplace codes
37
+ codes.each do |key, value|
38
+ @original_text.sub!(key, value)
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,28 @@
1
+ # encoding: UTF-8
2
+
3
+ module Quesadilla
4
+ class Extractor
5
+ # Extract hashtags.
6
+ #
7
+ # This module has no public methods.
8
+ module Hashtags
9
+ private
10
+
11
+ require 'twitter-text'
12
+
13
+ def extract_hashtags
14
+ Twitter::Extractor::extract_hashtags_with_indices(@working_text).each do |entity|
15
+ entity_text = "##{entity[:hashtag]}"
16
+ @entities << {
17
+ type: ENTITY_TYPE_HASHTAG,
18
+ text: entity_text,
19
+ display_text: entity_text,
20
+ indices: entity[:indices],
21
+ hashtag: entity[:hashtag].downcase
22
+ }
23
+ @working_text.sub!(entity_text, REPLACE_TOKEN * entity_text.length)
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,103 @@
1
+ # encoding: UTF-8
2
+
3
+ module Quesadilla
4
+ class Extractor
5
+ # Convert entites and entire string to HTML.
6
+ #
7
+ # This module has no public methods.
8
+ module HTML
9
+ private
10
+
11
+ HTML_ESCAPE_MAP = [
12
+ {
13
+ pattern: '&',
14
+ text: '&amp;',
15
+ placeholder: "\uf050",
16
+ },
17
+ {
18
+ pattern: '<',
19
+ text: '&lt;',
20
+ placeholder: "\uf051",
21
+ },
22
+ {
23
+ pattern: '>',
24
+ text: '&gt;',
25
+ placeholder: "\uf052",
26
+ },
27
+ {
28
+ pattern: '"',
29
+ text: '&quot;',
30
+ placeholder: "\uf053",
31
+ },
32
+ {
33
+ pattern: '\'',
34
+ text: '&#x27;',
35
+ placeholder: "\uf054",
36
+ },
37
+ {
38
+ pattern: '/',
39
+ text: '&#x2F;',
40
+ placeholder: "\uf055",
41
+ }
42
+ ].freeze
43
+
44
+ def display_html(display_text, entities)
45
+ return html_escape(display_text) unless entities and entities.length > 0
46
+
47
+ # Replace entities
48
+ html = sub_entities(display_text, entities, true) do |entity|
49
+ html_entity(entity)
50
+ end
51
+
52
+ # Return
53
+ html_un_pre_escape(html)
54
+ end
55
+
56
+ def html_entity(entity)
57
+ display_text = html_pre_escape(entity[:display_text])
58
+ case entity[:type]
59
+ when ENTITY_TYPE_EMPHASIS
60
+ @renderer.emphasis(display_text)
61
+ when ENTITY_TYPE_DOUBLE_EMPHASIS
62
+ @renderer.double_emphasis(display_text)
63
+ when ENTITY_TYPE_TRIPLE_EMPHASIS
64
+ @renderer.triple_emphasis(display_text)
65
+ when ENTITY_TYPE_STRIKETHROUGH
66
+ @renderer.strikethrough(display_text)
67
+ when ENTITY_TYPE_CODE
68
+ @renderer.code(display_text)
69
+ when ENTITY_TYPE_HASHTAG
70
+ @renderer.hashtag(display_text, html_pre_escape(entity[:hashtag]))
71
+ when ENTITY_TYPE_LINK
72
+ @renderer.link(display_text, entity[:url], html_pre_escape(entity[:title]))
73
+ else
74
+ # Catchall
75
+ html_pre_escape(entity[:text])
76
+ end
77
+ end
78
+
79
+ # Pre-escape. Convert bad characters to high UTF-8 characters
80
+ # We do this dance so we don't throw off the indexes so the entities get inserted correctly.
81
+ def html_pre_escape(string)
82
+ return '' unless string
83
+ HTML_ESCAPE_MAP.each do |escape|
84
+ string = string.gsub(escape[:pattern], escape[:placeholder])
85
+ end
86
+ string
87
+ end
88
+
89
+ # Convert bad characters (now, high UTF-8 characters) to HTML escaped ones
90
+ def html_un_pre_escape(string)
91
+ HTML_ESCAPE_MAP.each do |escape|
92
+ string = string.gsub(escape[:placeholder], escape[:text])
93
+ end
94
+ string
95
+ end
96
+
97
+ def html_escape(string)
98
+ return '' unless string
99
+ string.gsub(/&/, '&amp;').gsub(/</, '&lt;').gsub(/>/, '&gt;').gsub(/"/, '&quot;').gsub(/'/, '&#x27;').gsub(/\//, '&#x2F;')
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,187 @@
1
+ # encoding: UTF-8
2
+
3
+ module Quesadilla
4
+ class Extractor
5
+ # Extract Markdown
6
+ #
7
+ # This module has no public methods.
8
+ module Markdown
9
+ private
10
+
11
+ # Gruber's regex is recursive, but I can't figure out how to do it in Ruby without the `g` option.
12
+ # Maybe I should use StringScanner instead. For now, I think it's fine. Everything appears to work
13
+ # as expected.
14
+ NESTED_BRACKETS_REGEX = %r{
15
+ (?>
16
+ [^\[\]]+
17
+ )*
18
+ }x.freeze
19
+
20
+ # 2 = Text, 3 = URL, 6 = Title
21
+ LINK_REGEX = %r{
22
+ (
23
+ \[
24
+ (#{NESTED_BRACKETS_REGEX})
25
+ \]
26
+ \(
27
+ [ \t]*
28
+ <?(.*?)>?
29
+ [ \t]*
30
+ (
31
+ (['"])
32
+ (.*?)
33
+ \5
34
+ )?
35
+ \)
36
+ )
37
+ }x.freeze
38
+
39
+ # 1 = URL
40
+ AUTOLINK_LINK_REGEX = /<((?:https?|ftp):[^'">\s]+)>/i.freeze
41
+
42
+ # 1 = Email
43
+ AUTOLINK_EMAIL_REGEX = %r{
44
+ <
45
+ (?:mailto:)?
46
+ (
47
+ [-.\w]+
48
+ \@
49
+ [-a-z0-9]+(?:\.[-a-z0-9]+)*\.[a-z]+
50
+ )
51
+ >
52
+ }xi.freeze
53
+
54
+ # 1 = Delimiter, 2 = Text
55
+ BOLD_ITALIC_REGEX = %r{ (\*\*\*|___) (?=\S) (.+?[*_]*) (?<=\S) \1 }x.freeze
56
+
57
+ # 1 = Delimiter, 2 = Text
58
+ BOLD_REGEX = %r{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }x.freeze
59
+
60
+ # 1 = Delimiter, 2 = Text
61
+ ITALIC_REGEX = %r{ (\*|_) (?=\S) (.+?) (?<=\S) \1 }x.freeze
62
+
63
+ # 1 = Delimiter, 2 = Text
64
+ STRIKETHROUGH_REGEX = %r{ (~~) (?=\S) (.+?[~]*) (?<=\S) \1 }x.freeze
65
+
66
+ # 1 = Delimiter, 2 = Text
67
+ CODE_REGEX = %r{ (`+) (.+?) (?<!`) \1 (?!`) }x.freeze
68
+
69
+ def extract_markdown
70
+ extract_markdown_code if @options[:markdown_code]
71
+
72
+ if @options[:markdown_links]
73
+ extract_markdown_autolink_links
74
+ extract_markdown_autolink_email
75
+ extract_markdown_links
76
+ end
77
+
78
+ extract_markdown_span(BOLD_ITALIC_REGEX, ENTITY_TYPE_TRIPLE_EMPHASIS) if @options[:markdown_triple_emphasis]
79
+ extract_markdown_span(BOLD_REGEX, ENTITY_TYPE_DOUBLE_EMPHASIS) if @options[:markdown_double_emphasis]
80
+ extract_markdown_span(ITALIC_REGEX, ENTITY_TYPE_EMPHASIS) if @options[:markdown_emphasis]
81
+ extract_markdown_span(STRIKETHROUGH_REGEX, ENTITY_TYPE_STRIKETHROUGH) if @options[:markdown_strikethrough]
82
+ end
83
+
84
+ private
85
+
86
+ def extract_markdown_span(regex, type)
87
+ # Match until there's no results
88
+ while match = @working_text.match(regex)
89
+ original = match[0]
90
+ length = original.length
91
+
92
+ # Find the start position of the original
93
+ start = @working_text.index(original)
94
+
95
+ # Create the entity
96
+ entity = {
97
+ type: type,
98
+ text: original,
99
+ display_text: match[2],
100
+ indices: [
101
+ start,
102
+ start + length
103
+ ]
104
+ }
105
+
106
+ # Let block modify
107
+ entity = yield(entity, match) if block_given?
108
+
109
+ # Add the entity
110
+ @entities << entity
111
+
112
+ # Remove from the working text
113
+ @working_text.sub!(original, REPLACE_TOKEN * length)
114
+ end
115
+ end
116
+
117
+ def extract_markdown_code
118
+ extract_markdown_span(CODE_REGEX, 'code') do |entity, match|
119
+ # Strip tabs from the display text
120
+ display = match[2]
121
+ display.gsub!(/^[ \t]*/, '')
122
+ display.gsub!(/[ \t]*$/, '')
123
+ entity[:display_text] = display
124
+ entity
125
+ end
126
+ end
127
+
128
+ def extract_markdown_autolink(regex)
129
+ # Match until there's no results
130
+ while match = @working_text.match(regex)
131
+ original = match[0]
132
+ length = original.length
133
+
134
+ # Find the start position of the original
135
+ start = @working_text.index(original)
136
+
137
+ # Create the entity
138
+ entity = {
139
+ type: ENTITY_TYPE_LINK,
140
+ text: original,
141
+ indices: [
142
+ start,
143
+ start + length
144
+ ]
145
+ }
146
+
147
+ # Let block modify
148
+ entity = yield(entity, match) if block_given?
149
+
150
+ # Add the entity
151
+ @entities << entity
152
+
153
+ # Remove from the working text
154
+ @working_text.sub!(original, REPLACE_TOKEN * length)
155
+ end
156
+ end
157
+
158
+ def extract_markdown_autolink_links
159
+ extract_markdown_autolink AUTOLINK_LINK_REGEX do |entity, match|
160
+ entity[:url] = match[1]
161
+ entity[:display_text] = display_url(match[1])
162
+ entity
163
+ end
164
+ end
165
+
166
+ def extract_markdown_autolink_email
167
+ extract_markdown_autolink AUTOLINK_EMAIL_REGEX do |entity, match|
168
+ email = match[1]
169
+ entity[:url] = "mailto:#{email}"
170
+ entity[:display_text] = email
171
+ entity
172
+ end
173
+ end
174
+
175
+ def extract_markdown_links
176
+ extract_markdown_span(LINK_REGEX, ENTITY_TYPE_LINK) do |entity, match|
177
+ # Add the URL
178
+ entity[:url] = match[3]
179
+
180
+ # Add the title
181
+ entity[:title] = match[6] if match[6]
182
+ entity
183
+ end
184
+ end
185
+ end
186
+ end
187
+ end