quesadilla 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 17f8cfec03e4c86278585c52d2ae57995a984d3c
4
+ data.tar.gz: e145d4496a0bc034e278f6a2c4738d48006d7a8b
5
+ SHA512:
6
+ metadata.gz: aaf8418d063286d7666e0b666b5badbcf63e2e54cceb225d35e69bc238fb3f0973544c547cbc806dde042e5d8f7ae37fa4accb1325c210ef5d9eb8a989f6330a
7
+ data.tar.gz: 39f070f60d71278bceeab4e939944ebc921609e4c6eb90b26f0e09ac693639b4dd80bd3a423e33dccd32f641fe6218213b665c3ebf43282e2a203134ef7454fb
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.travis.yml ADDED
@@ -0,0 +1,6 @@
1
+ language: ruby
2
+ bundler_args: --without development
3
+ rvm:
4
+ - 1.9.3
5
+ - 2.0.0
6
+ - jruby-19mode
@@ -0,0 +1,19 @@
1
+ ## Submitting a Pull Request
2
+
3
+ 1. [Fork the repository.][fork]
4
+ 2. [Create a topic branch.][branch]
5
+ 3. Add tests for your unimplemented feature or bug fix.
6
+ 4. Run `bundle exec rake`. If your tests pass, return to step 3.
7
+ 5. Implement your feature or bug fix.
8
+ 6. Run `bundle exec rake`. If your tests fail, return to step 5.
9
+ 7. Run `open coverage/index.html`. If your changes are not completely covered
10
+ by your tests, return to step 3.
11
+ 8. Add documentation for your feature or bug fix.
12
+ 9. Run `bundle exec rake doc`. If your changes are not 100% documented, go
13
+ back to step 8.
14
+ 10. Add, commit, and push your changes.
15
+ 11. [Submit a pull request.][pr]
16
+
17
+ [fork]: http://help.github.com/fork-a-repo/
18
+ [branch]: http://learn.github.com/p/branching.html
19
+ [pr]: http://help.github.com/send-pull-requests/
data/Gemfile ADDED
@@ -0,0 +1,19 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Gem dependencies
4
+ gemspec
5
+
6
+ gem 'rake', group: [:development, :test]
7
+
8
+ # Development dependencies
9
+ group :development do
10
+ gem 'yard'
11
+ gem 'redcarpet', platform: 'ruby'
12
+ end
13
+
14
+ # Testing dependencies
15
+ group :test do
16
+ gem 'minitest'
17
+ gem 'minitest-wscolor'
18
+ gem 'simplecov', require: false
19
+ end
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Sam Soffes
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,21 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rake/testtask'
5
+ Rake::TestTask.new(:test) do |t|
6
+ t.libs << 'test'
7
+ t.pattern = 'test/**/*_test.rb'
8
+ end
9
+ task default: :test
10
+
11
+ begin
12
+ require 'yard'
13
+ YARD::Rake::YardocTask.new(:doc) do |task|
14
+ task.files = ['Readme.markdown', 'LICENSE', 'lib/**/*.rb']
15
+ task.options = [
16
+ '--output-dir', 'doc',
17
+ '--markup', 'markdown',
18
+ ]
19
+ end
20
+ rescue LoadError
21
+ end
data/Readme.markdown ADDED
@@ -0,0 +1,100 @@
1
+ # Quesadilla
2
+
3
+ Entity-style text parsing. Quesadilla was extracted from [Cheddar](https://cheddarapp.com).
4
+
5
+ See the [Cheddar text guide](https://cheddarapp.com/text) for more information about how to type entities.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ``` ruby
12
+ gem 'quesadilla'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install quesadilla
22
+
23
+ ## Usage
24
+
25
+ To extract entites from text, simply call extract:
26
+
27
+ ``` ruby
28
+ Quesadilla.extract('Some #awesome text')
29
+ # => {
30
+ # display_text: "Some #awesome text",
31
+ # display_html: "Some <a href=\"#hashtag-awesome\" class=\"tag\">#awesome</a> text",
32
+ # entities: [
33
+ # {
34
+ # type: "hashtag",
35
+ # text: "#awesome",
36
+ # display_text: "#awesome",
37
+ # indices: [5, 13],
38
+ # hashtag: "awesome",
39
+ # display_indices: [5, 13]
40
+ # }
41
+ # ]
42
+ # }
43
+ ```
44
+
45
+ ### Configuring
46
+
47
+ Quesadilla supports extracting various span-level Markdown features as well as automatically detecting links and GitHub-style named emoji. Here are the list of options you can pass when extracting:
48
+
49
+ Option | Description
50
+ ----------------------------|-----------------------------------------------------------------
51
+ `:markdown` | All Markdown parsing
52
+ `:markdown_code` | Markdown code tags
53
+ `:markdown_links` | Markdown links (including `<http://soff.es>` style links)
54
+ `:markdown_triple_emphasis` | Markdown bold italic
55
+ `:markdown_double_emphasis` | Markdown bold
56
+ `:markdown_emphasis` | Markdown italic
57
+ `:markdown_strikethrough` | Markdown Extra strikethrough
58
+ `:hashtags` | Hashtags
59
+ `:autolinks` | Automatically detect links
60
+ `:emoji` | GitHub-style named emoji
61
+ `:html` | Generate HTML representations for entities and the entire string
62
+
63
+ Everything is enabled by deafult. If you don't want to extract Markdown, you should call the extractor this like:
64
+
65
+ ``` ruby
66
+ Quesadilla.extract('Some text', markdown: false)
67
+ ```
68
+
69
+ You can also just disable strikethrough and still extract the rest of the Markdown entities if you want:
70
+
71
+ ``` ruby
72
+ Quesadilla.extract('Some text', markdown_strikethrough: false)
73
+ ```
74
+
75
+ ### Customizing HTML
76
+
77
+ If you want to change the generated HTML, you can create a custom renderer:
78
+
79
+ ``` ruby
80
+ class CustomRenderer < Quesadilla::HTMLRenderer
81
+ def hashtag(display_text, hashtag)
82
+ %Q{<a href="http://example.com/tags/#{hashtag}" class="tag">#{display_text}</a>}
83
+ end
84
+ end
85
+
86
+ extraction = Quesadilla.extract('Some #awesome text', html_renderer: CustomRenderer)
87
+ extraction[:display_html] #=> 'Some <a href="http://example.com/tags/awesome" class="tag">#awesome</a> text'
88
+ ```
89
+
90
+ Take a look at [Quesadilla::HTMLRenderer](lib/quesadilla/html_renderer.html) for more details on creating a custom renderer.
91
+
92
+ ## Supported Ruby Versions
93
+
94
+ Quesadilla is tested under 1.9.3, 2.0.0, and JRuby (1.9 mode).
95
+
96
+ [![Build Status](https://travis-ci.org/soffes/quesadilla.png?branch=master)](https://travis-ci.org/soffes/quesadilla)
97
+
98
+ ## Contributing
99
+
100
+ See the [contributing guide](Contributing.markdown).
@@ -0,0 +1,28 @@
1
+ # String additions
2
+ class String
3
+ # Truncate method from ActiveSupport.
4
+ # @param truncate_at [Fixnum] number of characters to truncate after
5
+ # @param options [Hash] optional options hash
6
+ # @option options separator [String] truncate text only at a certain separator strings
7
+ # @option options omission [String] string to add at the end to endicated truncated text. Defaults to '...'
8
+ # @return [String] truncated string
9
+ def q_truncate(truncate_at, options = {})
10
+ return dup unless length > truncate_at
11
+
12
+ # Default omission to '...'
13
+ options[:omission] ||= '...'
14
+
15
+ # Account for the omission string in the truncated length
16
+ truncate_at -= options[:omission].length
17
+
18
+ # Calculate end index
19
+ stop = if options[:separator]
20
+ rindex(options[:separator], truncate_at) || truncate_at
21
+ else
22
+ truncate_at
23
+ end
24
+
25
+ # Return the trucnated string plus the omission string
26
+ self[0...stop] + options[:omission]
27
+ end
28
+ end
@@ -0,0 +1,28 @@
1
+ # encoding: UTF-8
2
+
3
+ module Quesadilla
4
+ class Extractor
5
+ # Extract plain links.
6
+ #
7
+ # This module has no public methods.
8
+ module Autolinks
9
+ private
10
+
11
+ require 'twitter-text'
12
+
13
+ def extract_autolinks
14
+ Twitter::Extractor::extract_urls_with_indices(@working_text).each do |entity|
15
+ entity_text = entity[:url]
16
+ @entities << {
17
+ type: ENTITY_TYPE_LINK,
18
+ text: entity_text,
19
+ display_text: display_url(entity[:url]),
20
+ url: quality_url(entity[:url]),
21
+ indices: entity[:indices]
22
+ }
23
+ @working_text.sub!(entity_text, REPLACE_TOKEN * entity_text.length)
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,43 @@
1
+ # encoding: UTF-8
2
+
3
+ module Quesadilla
4
+ class Extractor
5
+ # Extract named emoji.
6
+ #
7
+ # This module has no public methods.
8
+ module Emoji
9
+ private
10
+
11
+ require 'named_emoji'
12
+
13
+ # Emoji colon-syntax regex
14
+ EMOJI_COLON_REGEX = %r{:([a-zA-Z0-9_\-\+]+):}.freeze
15
+
16
+ def replace_emoji
17
+ codes = {}
18
+
19
+ # Replace codes with shas
20
+ i = 0
21
+ while match = @original_text.match(Markdown::CODE_REGEX)
22
+ original = match[0]
23
+ key = Digest::SHA1.hexdigest("#{original}-#{i}")
24
+ codes[key] = original
25
+ @original_text.sub!(original, key)
26
+ i += 1
27
+ end
28
+
29
+ # Replace emojis
30
+ while match = @original_text.match(EMOJI_COLON_REGEX)
31
+ sym = match[1].downcase.to_sym
32
+ next unless NamedEmoji.emojis.keys.include?(sym)
33
+ @original_text.sub!(match[0], NamedEmoji.emojis[sym])
34
+ end
35
+
36
+ # Unreplace codes
37
+ codes.each do |key, value|
38
+ @original_text.sub!(key, value)
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,28 @@
1
+ # encoding: UTF-8
2
+
3
+ module Quesadilla
4
+ class Extractor
5
+ # Extract hashtags.
6
+ #
7
+ # This module has no public methods.
8
+ module Hashtags
9
+ private
10
+
11
+ require 'twitter-text'
12
+
13
+ def extract_hashtags
14
+ Twitter::Extractor::extract_hashtags_with_indices(@working_text).each do |entity|
15
+ entity_text = "##{entity[:hashtag]}"
16
+ @entities << {
17
+ type: ENTITY_TYPE_HASHTAG,
18
+ text: entity_text,
19
+ display_text: entity_text,
20
+ indices: entity[:indices],
21
+ hashtag: entity[:hashtag].downcase
22
+ }
23
+ @working_text.sub!(entity_text, REPLACE_TOKEN * entity_text.length)
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,103 @@
1
+ # encoding: UTF-8
2
+
3
+ module Quesadilla
4
+ class Extractor
5
+ # Convert entites and entire string to HTML.
6
+ #
7
+ # This module has no public methods.
8
+ module HTML
9
+ private
10
+
11
+ HTML_ESCAPE_MAP = [
12
+ {
13
+ pattern: '&',
14
+ text: '&amp;',
15
+ placeholder: "\uf050",
16
+ },
17
+ {
18
+ pattern: '<',
19
+ text: '&lt;',
20
+ placeholder: "\uf051",
21
+ },
22
+ {
23
+ pattern: '>',
24
+ text: '&gt;',
25
+ placeholder: "\uf052",
26
+ },
27
+ {
28
+ pattern: '"',
29
+ text: '&quot;',
30
+ placeholder: "\uf053",
31
+ },
32
+ {
33
+ pattern: '\'',
34
+ text: '&#x27;',
35
+ placeholder: "\uf054",
36
+ },
37
+ {
38
+ pattern: '/',
39
+ text: '&#x2F;',
40
+ placeholder: "\uf055",
41
+ }
42
+ ].freeze
43
+
44
+ def display_html(display_text, entities)
45
+ return html_escape(display_text) unless entities and entities.length > 0
46
+
47
+ # Replace entities
48
+ html = sub_entities(display_text, entities, true) do |entity|
49
+ html_entity(entity)
50
+ end
51
+
52
+ # Return
53
+ html_un_pre_escape(html)
54
+ end
55
+
56
+ def html_entity(entity)
57
+ display_text = html_pre_escape(entity[:display_text])
58
+ case entity[:type]
59
+ when ENTITY_TYPE_EMPHASIS
60
+ @renderer.emphasis(display_text)
61
+ when ENTITY_TYPE_DOUBLE_EMPHASIS
62
+ @renderer.double_emphasis(display_text)
63
+ when ENTITY_TYPE_TRIPLE_EMPHASIS
64
+ @renderer.triple_emphasis(display_text)
65
+ when ENTITY_TYPE_STRIKETHROUGH
66
+ @renderer.strikethrough(display_text)
67
+ when ENTITY_TYPE_CODE
68
+ @renderer.code(display_text)
69
+ when ENTITY_TYPE_HASHTAG
70
+ @renderer.hashtag(display_text, html_pre_escape(entity[:hashtag]))
71
+ when ENTITY_TYPE_LINK
72
+ @renderer.link(display_text, entity[:url], html_pre_escape(entity[:title]))
73
+ else
74
+ # Catchall
75
+ html_pre_escape(entity[:text])
76
+ end
77
+ end
78
+
79
+ # Pre-escape. Convert bad characters to high UTF-8 characters
80
+ # We do this dance so we don't throw off the indexes so the entities get inserted correctly.
81
+ def html_pre_escape(string)
82
+ return '' unless string
83
+ HTML_ESCAPE_MAP.each do |escape|
84
+ string = string.gsub(escape[:pattern], escape[:placeholder])
85
+ end
86
+ string
87
+ end
88
+
89
+ # Convert bad characters (now, high UTF-8 characters) to HTML escaped ones
90
+ def html_un_pre_escape(string)
91
+ HTML_ESCAPE_MAP.each do |escape|
92
+ string = string.gsub(escape[:placeholder], escape[:text])
93
+ end
94
+ string
95
+ end
96
+
97
+ def html_escape(string)
98
+ return '' unless string
99
+ string.gsub(/&/, '&amp;').gsub(/</, '&lt;').gsub(/>/, '&gt;').gsub(/"/, '&quot;').gsub(/'/, '&#x27;').gsub(/\//, '&#x2F;')
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,187 @@
1
+ # encoding: UTF-8
2
+
3
+ module Quesadilla
4
+ class Extractor
5
+ # Extract Markdown
6
+ #
7
+ # This module has no public methods.
8
+ module Markdown
9
+ private
10
+
11
+ # Gruber's regex is recursive, but I can't figure out how to do it in Ruby without the `g` option.
12
+ # Maybe I should use StringScanner instead. For now, I think it's fine. Everything appears to work
13
+ # as expected.
14
+ NESTED_BRACKETS_REGEX = %r{
15
+ (?>
16
+ [^\[\]]+
17
+ )*
18
+ }x.freeze
19
+
20
+ # 2 = Text, 3 = URL, 6 = Title
21
+ LINK_REGEX = %r{
22
+ (
23
+ \[
24
+ (#{NESTED_BRACKETS_REGEX})
25
+ \]
26
+ \(
27
+ [ \t]*
28
+ <?(.*?)>?
29
+ [ \t]*
30
+ (
31
+ (['"])
32
+ (.*?)
33
+ \5
34
+ )?
35
+ \)
36
+ )
37
+ }x.freeze
38
+
39
+ # 1 = URL
40
+ AUTOLINK_LINK_REGEX = /<((?:https?|ftp):[^'">\s]+)>/i.freeze
41
+
42
+ # 1 = Email
43
+ AUTOLINK_EMAIL_REGEX = %r{
44
+ <
45
+ (?:mailto:)?
46
+ (
47
+ [-.\w]+
48
+ \@
49
+ [-a-z0-9]+(?:\.[-a-z0-9]+)*\.[a-z]+
50
+ )
51
+ >
52
+ }xi.freeze
53
+
54
+ # 1 = Delimiter, 2 = Text
55
+ BOLD_ITALIC_REGEX = %r{ (\*\*\*|___) (?=\S) (.+?[*_]*) (?<=\S) \1 }x.freeze
56
+
57
+ # 1 = Delimiter, 2 = Text
58
+ BOLD_REGEX = %r{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }x.freeze
59
+
60
+ # 1 = Delimiter, 2 = Text
61
+ ITALIC_REGEX = %r{ (\*|_) (?=\S) (.+?) (?<=\S) \1 }x.freeze
62
+
63
+ # 1 = Delimiter, 2 = Text
64
+ STRIKETHROUGH_REGEX = %r{ (~~) (?=\S) (.+?[~]*) (?<=\S) \1 }x.freeze
65
+
66
+ # 1 = Delimiter, 2 = Text
67
+ CODE_REGEX = %r{ (`+) (.+?) (?<!`) \1 (?!`) }x.freeze
68
+
69
+ def extract_markdown
70
+ extract_markdown_code if @options[:markdown_code]
71
+
72
+ if @options[:markdown_links]
73
+ extract_markdown_autolink_links
74
+ extract_markdown_autolink_email
75
+ extract_markdown_links
76
+ end
77
+
78
+ extract_markdown_span(BOLD_ITALIC_REGEX, ENTITY_TYPE_TRIPLE_EMPHASIS) if @options[:markdown_triple_emphasis]
79
+ extract_markdown_span(BOLD_REGEX, ENTITY_TYPE_DOUBLE_EMPHASIS) if @options[:markdown_double_emphasis]
80
+ extract_markdown_span(ITALIC_REGEX, ENTITY_TYPE_EMPHASIS) if @options[:markdown_emphasis]
81
+ extract_markdown_span(STRIKETHROUGH_REGEX, ENTITY_TYPE_STRIKETHROUGH) if @options[:markdown_strikethrough]
82
+ end
83
+
84
+ private
85
+
86
+ def extract_markdown_span(regex, type)
87
+ # Match until there's no results
88
+ while match = @working_text.match(regex)
89
+ original = match[0]
90
+ length = original.length
91
+
92
+ # Find the start position of the original
93
+ start = @working_text.index(original)
94
+
95
+ # Create the entity
96
+ entity = {
97
+ type: type,
98
+ text: original,
99
+ display_text: match[2],
100
+ indices: [
101
+ start,
102
+ start + length
103
+ ]
104
+ }
105
+
106
+ # Let block modify
107
+ entity = yield(entity, match) if block_given?
108
+
109
+ # Add the entity
110
+ @entities << entity
111
+
112
+ # Remove from the working text
113
+ @working_text.sub!(original, REPLACE_TOKEN * length)
114
+ end
115
+ end
116
+
117
+ def extract_markdown_code
118
+ extract_markdown_span(CODE_REGEX, 'code') do |entity, match|
119
+ # Strip tabs from the display text
120
+ display = match[2]
121
+ display.gsub!(/^[ \t]*/, '')
122
+ display.gsub!(/[ \t]*$/, '')
123
+ entity[:display_text] = display
124
+ entity
125
+ end
126
+ end
127
+
128
+ def extract_markdown_autolink(regex)
129
+ # Match until there's no results
130
+ while match = @working_text.match(regex)
131
+ original = match[0]
132
+ length = original.length
133
+
134
+ # Find the start position of the original
135
+ start = @working_text.index(original)
136
+
137
+ # Create the entity
138
+ entity = {
139
+ type: ENTITY_TYPE_LINK,
140
+ text: original,
141
+ indices: [
142
+ start,
143
+ start + length
144
+ ]
145
+ }
146
+
147
+ # Let block modify
148
+ entity = yield(entity, match) if block_given?
149
+
150
+ # Add the entity
151
+ @entities << entity
152
+
153
+ # Remove from the working text
154
+ @working_text.sub!(original, REPLACE_TOKEN * length)
155
+ end
156
+ end
157
+
158
+ def extract_markdown_autolink_links
159
+ extract_markdown_autolink AUTOLINK_LINK_REGEX do |entity, match|
160
+ entity[:url] = match[1]
161
+ entity[:display_text] = display_url(match[1])
162
+ entity
163
+ end
164
+ end
165
+
166
+ def extract_markdown_autolink_email
167
+ extract_markdown_autolink AUTOLINK_EMAIL_REGEX do |entity, match|
168
+ email = match[1]
169
+ entity[:url] = "mailto:#{email}"
170
+ entity[:display_text] = email
171
+ entity
172
+ end
173
+ end
174
+
175
+ def extract_markdown_links
176
+ extract_markdown_span(LINK_REGEX, ENTITY_TYPE_LINK) do |entity, match|
177
+ # Add the URL
178
+ entity[:url] = match[3]
179
+
180
+ # Add the title
181
+ entity[:title] = match[6] if match[6]
182
+ entity
183
+ end
184
+ end
185
+ end
186
+ end
187
+ end