quesadilla 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.travis.yml +6 -0
- data/Contributing.markdown +19 -0
- data/Gemfile +19 -0
- data/LICENSE +22 -0
- data/Rakefile +21 -0
- data/Readme.markdown +100 -0
- data/lib/quesadilla/core_ext/string.rb +28 -0
- data/lib/quesadilla/extractor/autolinks.rb +28 -0
- data/lib/quesadilla/extractor/emoji.rb +43 -0
- data/lib/quesadilla/extractor/hashtags.rb +28 -0
- data/lib/quesadilla/extractor/html.rb +103 -0
- data/lib/quesadilla/extractor/markdown.rb +187 -0
- data/lib/quesadilla/extractor.rb +140 -0
- data/lib/quesadilla/html_renderer.rb +57 -0
- data/lib/quesadilla/version.rb +4 -0
- data/lib/quesadilla.rb +45 -0
- data/quesadilla.gemspec +28 -0
- data/test/quesadilla/autolink_test.rb +84 -0
- data/test/quesadilla/emoji_test.rb +103 -0
- data/test/quesadilla/hashtags_test.rb +50 -0
- data/test/quesadilla/html_test.rb +21 -0
- data/test/quesadilla/markdown_test.rb +235 -0
- data/test/quesadilla/multi_test.rb +64 -0
- data/test/quesadilla_test.rb +9 -0
- data/test/support/extractor_macros.rb +5 -0
- data/test/test_helper.rb +18 -0
- metadata +109 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 17f8cfec03e4c86278585c52d2ae57995a984d3c
|
4
|
+
data.tar.gz: e145d4496a0bc034e278f6a2c4738d48006d7a8b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: aaf8418d063286d7666e0b666b5badbcf63e2e54cceb225d35e69bc238fb3f0973544c547cbc806dde042e5d8f7ae37fa4accb1325c210ef5d9eb8a989f6330a
|
7
|
+
data.tar.gz: 39f070f60d71278bceeab4e939944ebc921609e4c6eb90b26f0e09ac693639b4dd80bd3a423e33dccd32f641fe6218213b665c3ebf43282e2a203134ef7454fb
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
## Submitting a Pull Request
|
2
|
+
|
3
|
+
1. [Fork the repository.][fork]
|
4
|
+
2. [Create a topic branch.][branch]
|
5
|
+
3. Add tests for your unimplemented feature or bug fix.
|
6
|
+
4. Run `bundle exec rake`. If your tests pass, return to step 3.
|
7
|
+
5. Implement your feature or bug fix.
|
8
|
+
6. Run `bundle exec rake`. If your tests fail, return to step 5.
|
9
|
+
7. Run `open coverage/index.html`. If your changes are not completely covered
|
10
|
+
by your tests, return to step 3.
|
11
|
+
8. Add documentation for your feature or bug fix.
|
12
|
+
9. Run `bundle exec rake doc`. If your changes are not 100% documented, go
|
13
|
+
back to step 8.
|
14
|
+
10. Add, commit, and push your changes.
|
15
|
+
11. [Submit a pull request.][pr]
|
16
|
+
|
17
|
+
[fork]: http://help.github.com/fork-a-repo/
|
18
|
+
[branch]: http://learn.github.com/p/branching.html
|
19
|
+
[pr]: http://help.github.com/send-pull-requests/
|
data/Gemfile
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
|
3
|
+
# Gem dependencies
|
4
|
+
gemspec
|
5
|
+
|
6
|
+
gem 'rake', group: [:development, :test]
|
7
|
+
|
8
|
+
# Development dependencies
|
9
|
+
group :development do
|
10
|
+
gem 'yard'
|
11
|
+
gem 'redcarpet', platform: 'ruby'
|
12
|
+
end
|
13
|
+
|
14
|
+
# Testing dependencies
|
15
|
+
group :test do
|
16
|
+
gem 'minitest'
|
17
|
+
gem 'minitest-wscolor'
|
18
|
+
gem 'simplecov', require: false
|
19
|
+
end
|
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Sam Soffes
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
Bundler::GemHelper.install_tasks
|
3
|
+
|
4
|
+
require 'rake/testtask'
|
5
|
+
Rake::TestTask.new(:test) do |t|
|
6
|
+
t.libs << 'test'
|
7
|
+
t.pattern = 'test/**/*_test.rb'
|
8
|
+
end
|
9
|
+
task default: :test
|
10
|
+
|
11
|
+
begin
|
12
|
+
require 'yard'
|
13
|
+
YARD::Rake::YardocTask.new(:doc) do |task|
|
14
|
+
task.files = ['Readme.markdown', 'LICENSE', 'lib/**/*.rb']
|
15
|
+
task.options = [
|
16
|
+
'--output-dir', 'doc',
|
17
|
+
'--markup', 'markdown',
|
18
|
+
]
|
19
|
+
end
|
20
|
+
rescue LoadError
|
21
|
+
end
|
data/Readme.markdown
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
# Quesadilla
|
2
|
+
|
3
|
+
Entity-style text parsing. Quesadilla was extracted from [Cheddar](https://cheddarapp.com).
|
4
|
+
|
5
|
+
See the [Cheddar text guide](https://cheddarapp.com/text) for more information about how to type entities.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
``` ruby
|
12
|
+
gem 'quesadilla'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install quesadilla
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
To extract entites from text, simply call extract:
|
26
|
+
|
27
|
+
``` ruby
|
28
|
+
Quesadilla.extract('Some #awesome text')
|
29
|
+
# => {
|
30
|
+
# display_text: "Some #awesome text",
|
31
|
+
# display_html: "Some <a href=\"#hashtag-awesome\" class=\"tag\">#awesome</a> text",
|
32
|
+
# entities: [
|
33
|
+
# {
|
34
|
+
# type: "hashtag",
|
35
|
+
# text: "#awesome",
|
36
|
+
# display_text: "#awesome",
|
37
|
+
# indices: [5, 13],
|
38
|
+
# hashtag: "awesome",
|
39
|
+
# display_indices: [5, 13]
|
40
|
+
# }
|
41
|
+
# ]
|
42
|
+
# }
|
43
|
+
```
|
44
|
+
|
45
|
+
### Configuring
|
46
|
+
|
47
|
+
Quesadilla supports extracting various span-level Markdown features as well as automatically detecting links and GitHub-style named emoji. Here are the list of options you can pass when extracting:
|
48
|
+
|
49
|
+
Option | Description
|
50
|
+
----------------------------|-----------------------------------------------------------------
|
51
|
+
`:markdown` | All Markdown parsing
|
52
|
+
`:markdown_code` | Markdown code tags
|
53
|
+
`:markdown_links` | Markdown links (including `<http://soff.es>` style links)
|
54
|
+
`:markdown_triple_emphasis` | Markdown bold italic
|
55
|
+
`:markdown_double_emphasis` | Markdown bold
|
56
|
+
`:markdown_emphasis` | Markdown italic
|
57
|
+
`:markdown_strikethrough` | Markdown Extra strikethrough
|
58
|
+
`:hashtags` | Hashtags
|
59
|
+
`:autolinks` | Automatically detect links
|
60
|
+
`:emoji` | GitHub-style named emoji
|
61
|
+
`:html` | Generate HTML representations for entities and the entire string
|
62
|
+
|
63
|
+
Everything is enabled by deafult. If you don't want to extract Markdown, you should call the extractor this like:
|
64
|
+
|
65
|
+
``` ruby
|
66
|
+
Quesadilla.extract('Some text', markdown: false)
|
67
|
+
```
|
68
|
+
|
69
|
+
You can also just disable strikethrough and still extract the rest of the Markdown entities if you want:
|
70
|
+
|
71
|
+
``` ruby
|
72
|
+
Quesadilla.extract('Some text', markdown_strikethrough: false)
|
73
|
+
```
|
74
|
+
|
75
|
+
### Customizing HTML
|
76
|
+
|
77
|
+
If you want to change the generated HTML, you can create a custom renderer:
|
78
|
+
|
79
|
+
``` ruby
|
80
|
+
class CustomRenderer < Quesadilla::HTMLRenderer
|
81
|
+
def hashtag(display_text, hashtag)
|
82
|
+
%Q{<a href="http://example.com/tags/#{hashtag}" class="tag">#{display_text}</a>}
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
extraction = Quesadilla.extract('Some #awesome text', html_renderer: CustomRenderer)
|
87
|
+
extraction[:display_html] #=> 'Some <a href="http://example.com/tags/awesome" class="tag">#awesome</a> text'
|
88
|
+
```
|
89
|
+
|
90
|
+
Take a look at [Quesadilla::HTMLRenderer](lib/quesadilla/html_renderer.html) for more details on creating a custom renderer.
|
91
|
+
|
92
|
+
## Supported Ruby Versions
|
93
|
+
|
94
|
+
Quesadilla is tested under 1.9.3, 2.0.0, and JRuby (1.9 mode).
|
95
|
+
|
96
|
+
[![Build Status](https://travis-ci.org/soffes/quesadilla.png?branch=master)](https://travis-ci.org/soffes/quesadilla)
|
97
|
+
|
98
|
+
## Contributing
|
99
|
+
|
100
|
+
See the [contributing guide](Contributing.markdown).
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# String additions
|
2
|
+
class String
|
3
|
+
# Truncate method from ActiveSupport.
|
4
|
+
# @param truncate_at [Fixnum] number of characters to truncate after
|
5
|
+
# @param options [Hash] optional options hash
|
6
|
+
# @option options separator [String] truncate text only at a certain separator strings
|
7
|
+
# @option options omission [String] string to add at the end to endicated truncated text. Defaults to '...'
|
8
|
+
# @return [String] truncated string
|
9
|
+
def q_truncate(truncate_at, options = {})
|
10
|
+
return dup unless length > truncate_at
|
11
|
+
|
12
|
+
# Default omission to '...'
|
13
|
+
options[:omission] ||= '...'
|
14
|
+
|
15
|
+
# Account for the omission string in the truncated length
|
16
|
+
truncate_at -= options[:omission].length
|
17
|
+
|
18
|
+
# Calculate end index
|
19
|
+
stop = if options[:separator]
|
20
|
+
rindex(options[:separator], truncate_at) || truncate_at
|
21
|
+
else
|
22
|
+
truncate_at
|
23
|
+
end
|
24
|
+
|
25
|
+
# Return the trucnated string plus the omission string
|
26
|
+
self[0...stop] + options[:omission]
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module Quesadilla
|
4
|
+
class Extractor
|
5
|
+
# Extract plain links.
|
6
|
+
#
|
7
|
+
# This module has no public methods.
|
8
|
+
module Autolinks
|
9
|
+
private
|
10
|
+
|
11
|
+
require 'twitter-text'
|
12
|
+
|
13
|
+
def extract_autolinks
|
14
|
+
Twitter::Extractor::extract_urls_with_indices(@working_text).each do |entity|
|
15
|
+
entity_text = entity[:url]
|
16
|
+
@entities << {
|
17
|
+
type: ENTITY_TYPE_LINK,
|
18
|
+
text: entity_text,
|
19
|
+
display_text: display_url(entity[:url]),
|
20
|
+
url: quality_url(entity[:url]),
|
21
|
+
indices: entity[:indices]
|
22
|
+
}
|
23
|
+
@working_text.sub!(entity_text, REPLACE_TOKEN * entity_text.length)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module Quesadilla
|
4
|
+
class Extractor
|
5
|
+
# Extract named emoji.
|
6
|
+
#
|
7
|
+
# This module has no public methods.
|
8
|
+
module Emoji
|
9
|
+
private
|
10
|
+
|
11
|
+
require 'named_emoji'
|
12
|
+
|
13
|
+
# Emoji colon-syntax regex
|
14
|
+
EMOJI_COLON_REGEX = %r{:([a-zA-Z0-9_\-\+]+):}.freeze
|
15
|
+
|
16
|
+
def replace_emoji
|
17
|
+
codes = {}
|
18
|
+
|
19
|
+
# Replace codes with shas
|
20
|
+
i = 0
|
21
|
+
while match = @original_text.match(Markdown::CODE_REGEX)
|
22
|
+
original = match[0]
|
23
|
+
key = Digest::SHA1.hexdigest("#{original}-#{i}")
|
24
|
+
codes[key] = original
|
25
|
+
@original_text.sub!(original, key)
|
26
|
+
i += 1
|
27
|
+
end
|
28
|
+
|
29
|
+
# Replace emojis
|
30
|
+
while match = @original_text.match(EMOJI_COLON_REGEX)
|
31
|
+
sym = match[1].downcase.to_sym
|
32
|
+
next unless NamedEmoji.emojis.keys.include?(sym)
|
33
|
+
@original_text.sub!(match[0], NamedEmoji.emojis[sym])
|
34
|
+
end
|
35
|
+
|
36
|
+
# Unreplace codes
|
37
|
+
codes.each do |key, value|
|
38
|
+
@original_text.sub!(key, value)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module Quesadilla
|
4
|
+
class Extractor
|
5
|
+
# Extract hashtags.
|
6
|
+
#
|
7
|
+
# This module has no public methods.
|
8
|
+
module Hashtags
|
9
|
+
private
|
10
|
+
|
11
|
+
require 'twitter-text'
|
12
|
+
|
13
|
+
def extract_hashtags
|
14
|
+
Twitter::Extractor::extract_hashtags_with_indices(@working_text).each do |entity|
|
15
|
+
entity_text = "##{entity[:hashtag]}"
|
16
|
+
@entities << {
|
17
|
+
type: ENTITY_TYPE_HASHTAG,
|
18
|
+
text: entity_text,
|
19
|
+
display_text: entity_text,
|
20
|
+
indices: entity[:indices],
|
21
|
+
hashtag: entity[:hashtag].downcase
|
22
|
+
}
|
23
|
+
@working_text.sub!(entity_text, REPLACE_TOKEN * entity_text.length)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module Quesadilla
|
4
|
+
class Extractor
|
5
|
+
# Convert entites and entire string to HTML.
|
6
|
+
#
|
7
|
+
# This module has no public methods.
|
8
|
+
module HTML
|
9
|
+
private
|
10
|
+
|
11
|
+
HTML_ESCAPE_MAP = [
|
12
|
+
{
|
13
|
+
pattern: '&',
|
14
|
+
text: '&',
|
15
|
+
placeholder: "\uf050",
|
16
|
+
},
|
17
|
+
{
|
18
|
+
pattern: '<',
|
19
|
+
text: '<',
|
20
|
+
placeholder: "\uf051",
|
21
|
+
},
|
22
|
+
{
|
23
|
+
pattern: '>',
|
24
|
+
text: '>',
|
25
|
+
placeholder: "\uf052",
|
26
|
+
},
|
27
|
+
{
|
28
|
+
pattern: '"',
|
29
|
+
text: '"',
|
30
|
+
placeholder: "\uf053",
|
31
|
+
},
|
32
|
+
{
|
33
|
+
pattern: '\'',
|
34
|
+
text: ''',
|
35
|
+
placeholder: "\uf054",
|
36
|
+
},
|
37
|
+
{
|
38
|
+
pattern: '/',
|
39
|
+
text: '/',
|
40
|
+
placeholder: "\uf055",
|
41
|
+
}
|
42
|
+
].freeze
|
43
|
+
|
44
|
+
def display_html(display_text, entities)
|
45
|
+
return html_escape(display_text) unless entities and entities.length > 0
|
46
|
+
|
47
|
+
# Replace entities
|
48
|
+
html = sub_entities(display_text, entities, true) do |entity|
|
49
|
+
html_entity(entity)
|
50
|
+
end
|
51
|
+
|
52
|
+
# Return
|
53
|
+
html_un_pre_escape(html)
|
54
|
+
end
|
55
|
+
|
56
|
+
def html_entity(entity)
|
57
|
+
display_text = html_pre_escape(entity[:display_text])
|
58
|
+
case entity[:type]
|
59
|
+
when ENTITY_TYPE_EMPHASIS
|
60
|
+
@renderer.emphasis(display_text)
|
61
|
+
when ENTITY_TYPE_DOUBLE_EMPHASIS
|
62
|
+
@renderer.double_emphasis(display_text)
|
63
|
+
when ENTITY_TYPE_TRIPLE_EMPHASIS
|
64
|
+
@renderer.triple_emphasis(display_text)
|
65
|
+
when ENTITY_TYPE_STRIKETHROUGH
|
66
|
+
@renderer.strikethrough(display_text)
|
67
|
+
when ENTITY_TYPE_CODE
|
68
|
+
@renderer.code(display_text)
|
69
|
+
when ENTITY_TYPE_HASHTAG
|
70
|
+
@renderer.hashtag(display_text, html_pre_escape(entity[:hashtag]))
|
71
|
+
when ENTITY_TYPE_LINK
|
72
|
+
@renderer.link(display_text, entity[:url], html_pre_escape(entity[:title]))
|
73
|
+
else
|
74
|
+
# Catchall
|
75
|
+
html_pre_escape(entity[:text])
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# Pre-escape. Convert bad characters to high UTF-8 characters
|
80
|
+
# We do this dance so we don't throw off the indexes so the entities get inserted correctly.
|
81
|
+
def html_pre_escape(string)
|
82
|
+
return '' unless string
|
83
|
+
HTML_ESCAPE_MAP.each do |escape|
|
84
|
+
string = string.gsub(escape[:pattern], escape[:placeholder])
|
85
|
+
end
|
86
|
+
string
|
87
|
+
end
|
88
|
+
|
89
|
+
# Convert bad characters (now, high UTF-8 characters) to HTML escaped ones
|
90
|
+
def html_un_pre_escape(string)
|
91
|
+
HTML_ESCAPE_MAP.each do |escape|
|
92
|
+
string = string.gsub(escape[:placeholder], escape[:text])
|
93
|
+
end
|
94
|
+
string
|
95
|
+
end
|
96
|
+
|
97
|
+
def html_escape(string)
|
98
|
+
return '' unless string
|
99
|
+
string.gsub(/&/, '&').gsub(/</, '<').gsub(/>/, '>').gsub(/"/, '"').gsub(/'/, ''').gsub(/\//, '/')
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,187 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module Quesadilla
|
4
|
+
class Extractor
|
5
|
+
# Extract Markdown
|
6
|
+
#
|
7
|
+
# This module has no public methods.
|
8
|
+
module Markdown
|
9
|
+
private
|
10
|
+
|
11
|
+
# Gruber's regex is recursive, but I can't figure out how to do it in Ruby without the `g` option.
|
12
|
+
# Maybe I should use StringScanner instead. For now, I think it's fine. Everything appears to work
|
13
|
+
# as expected.
|
14
|
+
NESTED_BRACKETS_REGEX = %r{
|
15
|
+
(?>
|
16
|
+
[^\[\]]+
|
17
|
+
)*
|
18
|
+
}x.freeze
|
19
|
+
|
20
|
+
# 2 = Text, 3 = URL, 6 = Title
|
21
|
+
LINK_REGEX = %r{
|
22
|
+
(
|
23
|
+
\[
|
24
|
+
(#{NESTED_BRACKETS_REGEX})
|
25
|
+
\]
|
26
|
+
\(
|
27
|
+
[ \t]*
|
28
|
+
<?(.*?)>?
|
29
|
+
[ \t]*
|
30
|
+
(
|
31
|
+
(['"])
|
32
|
+
(.*?)
|
33
|
+
\5
|
34
|
+
)?
|
35
|
+
\)
|
36
|
+
)
|
37
|
+
}x.freeze
|
38
|
+
|
39
|
+
# 1 = URL
|
40
|
+
AUTOLINK_LINK_REGEX = /<((?:https?|ftp):[^'">\s]+)>/i.freeze
|
41
|
+
|
42
|
+
# 1 = Email
|
43
|
+
AUTOLINK_EMAIL_REGEX = %r{
|
44
|
+
<
|
45
|
+
(?:mailto:)?
|
46
|
+
(
|
47
|
+
[-.\w]+
|
48
|
+
\@
|
49
|
+
[-a-z0-9]+(?:\.[-a-z0-9]+)*\.[a-z]+
|
50
|
+
)
|
51
|
+
>
|
52
|
+
}xi.freeze
|
53
|
+
|
54
|
+
# 1 = Delimiter, 2 = Text
|
55
|
+
BOLD_ITALIC_REGEX = %r{ (\*\*\*|___) (?=\S) (.+?[*_]*) (?<=\S) \1 }x.freeze
|
56
|
+
|
57
|
+
# 1 = Delimiter, 2 = Text
|
58
|
+
BOLD_REGEX = %r{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }x.freeze
|
59
|
+
|
60
|
+
# 1 = Delimiter, 2 = Text
|
61
|
+
ITALIC_REGEX = %r{ (\*|_) (?=\S) (.+?) (?<=\S) \1 }x.freeze
|
62
|
+
|
63
|
+
# 1 = Delimiter, 2 = Text
|
64
|
+
STRIKETHROUGH_REGEX = %r{ (~~) (?=\S) (.+?[~]*) (?<=\S) \1 }x.freeze
|
65
|
+
|
66
|
+
# 1 = Delimiter, 2 = Text
|
67
|
+
CODE_REGEX = %r{ (`+) (.+?) (?<!`) \1 (?!`) }x.freeze
|
68
|
+
|
69
|
+
def extract_markdown
|
70
|
+
extract_markdown_code if @options[:markdown_code]
|
71
|
+
|
72
|
+
if @options[:markdown_links]
|
73
|
+
extract_markdown_autolink_links
|
74
|
+
extract_markdown_autolink_email
|
75
|
+
extract_markdown_links
|
76
|
+
end
|
77
|
+
|
78
|
+
extract_markdown_span(BOLD_ITALIC_REGEX, ENTITY_TYPE_TRIPLE_EMPHASIS) if @options[:markdown_triple_emphasis]
|
79
|
+
extract_markdown_span(BOLD_REGEX, ENTITY_TYPE_DOUBLE_EMPHASIS) if @options[:markdown_double_emphasis]
|
80
|
+
extract_markdown_span(ITALIC_REGEX, ENTITY_TYPE_EMPHASIS) if @options[:markdown_emphasis]
|
81
|
+
extract_markdown_span(STRIKETHROUGH_REGEX, ENTITY_TYPE_STRIKETHROUGH) if @options[:markdown_strikethrough]
|
82
|
+
end
|
83
|
+
|
84
|
+
private
|
85
|
+
|
86
|
+
def extract_markdown_span(regex, type)
|
87
|
+
# Match until there's no results
|
88
|
+
while match = @working_text.match(regex)
|
89
|
+
original = match[0]
|
90
|
+
length = original.length
|
91
|
+
|
92
|
+
# Find the start position of the original
|
93
|
+
start = @working_text.index(original)
|
94
|
+
|
95
|
+
# Create the entity
|
96
|
+
entity = {
|
97
|
+
type: type,
|
98
|
+
text: original,
|
99
|
+
display_text: match[2],
|
100
|
+
indices: [
|
101
|
+
start,
|
102
|
+
start + length
|
103
|
+
]
|
104
|
+
}
|
105
|
+
|
106
|
+
# Let block modify
|
107
|
+
entity = yield(entity, match) if block_given?
|
108
|
+
|
109
|
+
# Add the entity
|
110
|
+
@entities << entity
|
111
|
+
|
112
|
+
# Remove from the working text
|
113
|
+
@working_text.sub!(original, REPLACE_TOKEN * length)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def extract_markdown_code
|
118
|
+
extract_markdown_span(CODE_REGEX, 'code') do |entity, match|
|
119
|
+
# Strip tabs from the display text
|
120
|
+
display = match[2]
|
121
|
+
display.gsub!(/^[ \t]*/, '')
|
122
|
+
display.gsub!(/[ \t]*$/, '')
|
123
|
+
entity[:display_text] = display
|
124
|
+
entity
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def extract_markdown_autolink(regex)
|
129
|
+
# Match until there's no results
|
130
|
+
while match = @working_text.match(regex)
|
131
|
+
original = match[0]
|
132
|
+
length = original.length
|
133
|
+
|
134
|
+
# Find the start position of the original
|
135
|
+
start = @working_text.index(original)
|
136
|
+
|
137
|
+
# Create the entity
|
138
|
+
entity = {
|
139
|
+
type: ENTITY_TYPE_LINK,
|
140
|
+
text: original,
|
141
|
+
indices: [
|
142
|
+
start,
|
143
|
+
start + length
|
144
|
+
]
|
145
|
+
}
|
146
|
+
|
147
|
+
# Let block modify
|
148
|
+
entity = yield(entity, match) if block_given?
|
149
|
+
|
150
|
+
# Add the entity
|
151
|
+
@entities << entity
|
152
|
+
|
153
|
+
# Remove from the working text
|
154
|
+
@working_text.sub!(original, REPLACE_TOKEN * length)
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def extract_markdown_autolink_links
|
159
|
+
extract_markdown_autolink AUTOLINK_LINK_REGEX do |entity, match|
|
160
|
+
entity[:url] = match[1]
|
161
|
+
entity[:display_text] = display_url(match[1])
|
162
|
+
entity
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
def extract_markdown_autolink_email
|
167
|
+
extract_markdown_autolink AUTOLINK_EMAIL_REGEX do |entity, match|
|
168
|
+
email = match[1]
|
169
|
+
entity[:url] = "mailto:#{email}"
|
170
|
+
entity[:display_text] = email
|
171
|
+
entity
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
def extract_markdown_links
|
176
|
+
extract_markdown_span(LINK_REGEX, ENTITY_TYPE_LINK) do |entity, match|
|
177
|
+
# Add the URL
|
178
|
+
entity[:url] = match[3]
|
179
|
+
|
180
|
+
# Add the title
|
181
|
+
entity[:title] = match[6] if match[6]
|
182
|
+
entity
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|