goethe 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 169efb6b4b42556780aeabb970d696b7d966f324
4
+ data.tar.gz: 5e2305c4ea7fe484696fc4842295ff680c25d9f1
5
+ SHA512:
6
+ metadata.gz: b392b59a5b0b4b6cd4295fcc1fcabc0e65a1113c2fcfdcf0d7e0767cbb301df8c7db1bc2f75754c6a1a95a4f31ff9727189df3e04e643fb0dc22e113b5f007f8
7
+ data.tar.gz: 33772e2c75a50fbe2a5c36a06832c4304baf776b3caae853b87a929cd470854d2c6598ad4ad414661b43723e8c38625b98338cece0be743a9f8d965fbff48d1f
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ coverage
6
+ InstalledFiles
7
+ lib/bundler/man
8
+ pkg
9
+ rdoc
10
+ spec/reports
11
+ test/tmp
12
+ test/version_tmp
13
+ tmp
14
+
15
+ # YARD artifacts
16
+ .yardoc
17
+ _yardoc
18
+ doc/
@@ -0,0 +1,26 @@
1
+ rspec:
2
+ script:
3
+ - http_proxy=http://10.8.15.225:8443 https_proxy=http://10.8.15.225:8443 bundle install
4
+ - bundle exec rspec spec
5
+ tags:
6
+ - ruby
7
+ - rspec
8
+
9
+ # generate-yard-doc:
10
+ # script:
11
+ # - http_proxy=http://10.8.15.225:8443 https_proxy=http://10.8.15.225:8443 bundle install --without development production
12
+ # - bundle exec yard
13
+ # - rsync -av doc/ deployer@intweb01.jianshu.io:/home/deployer/deploy/docs.jianshu.io/maleskine
14
+ # only:
15
+ # - master
16
+ # tags:
17
+ # - docs
18
+ # allow_failure: true
19
+
20
+ # karma:
21
+ # script:
22
+ # - cnpm install
23
+ # - npm run bs_test
24
+ # tags:
25
+ # - javascript
26
+ # - vue
@@ -0,0 +1 @@
1
+ goethe
@@ -0,0 +1 @@
1
+ ruby-2.4.2
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "https://gems.ruby-china.org"
2
+
3
+ # Specify your gem's dependencies in goethe.gemspec
4
+ gemspec
@@ -0,0 +1,37 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ goethe (0.6.0)
5
+ nokogiri (~> 1.8.1)
6
+
7
+ GEM
8
+ remote: https://gems.ruby-china.org/
9
+ specs:
10
+ diff-lcs (1.3)
11
+ mini_portile2 (2.3.0)
12
+ nokogiri (1.8.1)
13
+ mini_portile2 (~> 2.3.0)
14
+ rspec (3.7.0)
15
+ rspec-core (~> 3.7.0)
16
+ rspec-expectations (~> 3.7.0)
17
+ rspec-mocks (~> 3.7.0)
18
+ rspec-core (3.7.0)
19
+ rspec-support (~> 3.7.0)
20
+ rspec-expectations (3.7.0)
21
+ diff-lcs (>= 1.2.0, < 2.0)
22
+ rspec-support (~> 3.7.0)
23
+ rspec-mocks (3.7.0)
24
+ diff-lcs (>= 1.2.0, < 2.0)
25
+ rspec-support (~> 3.7.0)
26
+ rspec-support (3.7.0)
27
+
28
+ PLATFORMS
29
+ ruby
30
+
31
+ DEPENDENCIES
32
+ bundler (~> 1.3)
33
+ goethe!
34
+ rspec (~> 3.7.0)
35
+
36
+ BUNDLED WITH
37
+ 1.15.4
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 jjy
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,19 @@
1
+ Goethe
2
+ ======
3
+
4
+ 歌德 - Text processing library used by Maleskine
5
+
6
+ ### 0.6.0
7
+
8
+ - 升级 gems
9
+ - 简化,去除现在 Maleskine 没有用到的方法
10
+
11
+ ### 0.5.0
12
+
13
+ - 给 `Goethe::Utils.remove_html_tags` 添加参数 `replacement` 用以指定替换 HTML Tag 的字符
14
+
15
+ ### 0.4.0
16
+
17
+ - 去除在 `link` 上添加的 `rel=nofollow`
18
+ - markdown render 时,带 `class=m-footnote` 的 `link` 不添加 `target=_blank`
19
+ - sanitize 白名单针对 `a` 元素添加 `id` 和 `class` 属性
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'goethe/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "goethe"
8
+ spec.version = Goethe::VERSION
9
+ spec.authors = ["larryzhao"]
10
+ spec.email = ["thehiddendepth@gmail.com"]
11
+ spec.description = %q{Goethe - Text processing library.}
12
+ spec.summary = %q{Goethe - Text processing library.}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rspec", "~> 3.7.0"
23
+
24
+ spec.add_dependency "nokogiri", "~> 1.8.1"
25
+ end
@@ -0,0 +1,3 @@
1
+ require "goethe/version"
2
+ require "goethe/markdown"
3
+ require "goethe/utils"
@@ -0,0 +1,3 @@
1
+ require "goethe/version"
2
+ require "goethe/regex"
3
+ require "goethe/utils"
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+
3
+ module Goethe
4
+ class Regex
5
+ REGEXEN = {}
6
+
7
+ HTML_TAGS = %w[a abbr address area article aside audio b base bdi bdo blockquote body br button canvas caption cite code col colgroup command data datagrid datalist dd del details dfn div dl font dt em embed eventsource fieldset figcaption figure footer form h1 h2 h3 h4 h5 h6 head header hgroup hr html i iframe img input ins kbd keygen label legend li link mark map menu meta meter nav noscript object ol optgroup option output p param pre progress q ruby rp rt s samp script section select small source span strong style sub summary sup table tbody td textarea tfoot th thead time title tr track u ul var video wbr center]
8
+ ADDITIONAL_HTML_TAG = %w[&nbsp; &copy;]
9
+
10
+ REGEXEN[:HTML_TAGS] = /<\/?(#{HTML_TAGS.join('|')}).*?\/?>/im
11
+ REGEXEN[:ADDITIONAL_HTML_TAG] = /(#{ADDITIONAL_HTML_TAG.join('|')};?)/m
12
+ REGEXEN[:MARKDOWN] = { :HEADERS => /(^\s*#+\s*)|(^[-=]+$)/m,
13
+ :EMPHASIS => /^(\*+|_+)(.*)(\1)$/,
14
+ :BLOCKQUOTES => /^\s*>\s*/m,
15
+ :LISTS => /^\s{0,3}(-|\*|\+|\d\.)\s+/,
16
+ :HRULERS => /^(\*\s{0,3}\*\s{0,3}\*|-\s{0,3}-\s{0,3}-|_\s{0,3}_\s{0,3}_)[\s\*]*$/,
17
+ :IMAGES => /!\[.*?\]\(.*?\)/m,
18
+ :LINKS => /\[(.*?)\]\(.*?\)/m,
19
+ :QUICK_LINKS => /<(.*)>/m,
20
+ :COPYRIGHT => /&copy;/i
21
+ }
22
+
23
+ def self.[](key)
24
+ REGEXEN[key]
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,102 @@
1
+ require "nokogiri"
2
+
3
+ module Goethe
4
+ class Utils
5
+ class << self
6
+
7
+ #
8
+ # 从文本中找到链接,转化为 `<a>` 标签,找到换行符,转换为 `<br>` 标签
9
+ #
10
+ # @param [String] str
11
+ #
12
+ # @return [String]
13
+ #
14
+ def auto_link(str)
15
+ str = str.dup
16
+ str.gsub!(/<(S*?)[^>]*>.*?|<.*? \/>/) do |s|
17
+ s.gsub!(/</, "&lt;")
18
+ s.gsub!(/>/, "&gt;")
19
+ end
20
+
21
+ str.gsub!(/((https|http|ftp):\/\/)([a-zA-Z0-9.\-_%&=\/\#:\?]+)/i) do
22
+ protocol, url = $1, $3
23
+ %Q{<a href="#{protocol}#{url}" rel="nofollow" target="_blank">#{protocol}#{url}</a>}
24
+ end
25
+
26
+ str.gsub!(/\r\n|\n/, "<br>")
27
+ str
28
+ end
29
+
30
+ #
31
+ # 替换文本中所有的 HTML 标签,默认替换成空格
32
+ #
33
+ # @param [String] str
34
+ # @param [String] replacement - 替换的字符,默认是空格
35
+ #
36
+ # @return [String]
37
+ #
38
+ def remove_html_tags(str, replacement: " ")
39
+ return "" if str.nil?
40
+ str.gsub(Goethe::Regex[:HTML_TAGS], replacement)
41
+ .gsub(Goethe::Regex[:ADDITIONAL_HTML_TAG], replacement)
42
+ end
43
+
44
+ #
45
+ # 移除文本中所有的 Markdown 控制字符
46
+ #
47
+ # @param [String] str
48
+ #
49
+ # @return [String]
50
+ #
51
+ def remove_markdown_symbols(str)
52
+ return "" if str.nil?
53
+
54
+ result = ""
55
+
56
+ # HEADERS
57
+ result = str.gsub(Goethe::Regex[:MARKDOWN][:HEADERS], "")
58
+ #p "HEADERS: #{result}, #{result.size}"
59
+
60
+
61
+ result = result.gsub(Goethe::Regex[:MARKDOWN][:BLOCKQUOTES], "")
62
+ #p "BLOCKQUOTES: #{result}, #{result.size}"
63
+
64
+
65
+ # RULERS
66
+ result = result.gsub(Goethe::Regex[:MARKDOWN][:HRULERS], "") do
67
+ $2
68
+ end
69
+ #p "RULERS: #{result}, #{result.size}"
70
+
71
+ # LISTS
72
+ result = result.gsub(Goethe::Regex[:MARKDOWN][:LISTS], "")
73
+ #p "LISTS: #{result}, #{result.size}"
74
+
75
+ # EMPHASIS
76
+ result = result.gsub(Goethe::Regex[:MARKDOWN][:EMPHASIS]) do
77
+ $2
78
+ end
79
+
80
+
81
+ # IMAGES
82
+ result = result.gsub(Goethe::Regex[:MARKDOWN][:IMAGES], "")
83
+
84
+
85
+ # LINKs
86
+ result = result.gsub(Goethe::Regex[:MARKDOWN][:LINKS]) do
87
+ $1
88
+ end
89
+
90
+ # COPYRIGHT
91
+ result = result.gsub(Goethe::Regex[:MARKDOWN][:COPYRIGHT], "")
92
+
93
+ # <quock_link>
94
+ result = result.gsub(Goethe::Regex[:MARKDOWN][:QUICK_LINKS]) do
95
+ " #{$1} "
96
+ end
97
+
98
+ result
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,3 @@
1
+ module Goethe
2
+ VERSION = "0.6.0"
3
+ end
@@ -0,0 +1 @@
1
+ require 'goethe'
@@ -0,0 +1,127 @@
1
+ require "spec_helper"
2
+
3
+ describe Goethe::Utils do
4
+ describe ".remove_html_tags" do
5
+ it "should replace html tags with space" do
6
+ str = "<p>我爱你</p>"
7
+ expect(Goethe::Utils.remove_html_tags(str)).to eq(" 我爱你 ")
8
+ end
9
+
10
+ it "should replace html char with space" do
11
+ str = "<p>hello&nbsp;</p>"
12
+ expect(Goethe::Utils.remove_html_tags(str)).to eq(" hello ")
13
+ end
14
+
15
+ it "should replace html elements with given charater" do
16
+ str = "<p>hello&nbsp;</p>"
17
+ expect(Goethe::Utils.remove_html_tags(str, :replacement => "**")).to eq("**hello****")
18
+ end
19
+ end
20
+
21
+ describe ".remove_markdown_symbols" do
22
+ it "should remove headers" do
23
+ str = "#abc\n"
24
+ expect(Goethe::Utils.remove_markdown_symbols(str)).to eq("abc\n")
25
+
26
+ str = " #### abc\n"
27
+ expect(Goethe::Utils.remove_markdown_symbols(str)).to eq("abc\n")
28
+
29
+ str = "cba #### abc\n"
30
+ expect(Goethe::Utils.remove_markdown_symbols(str)).to eq("cba #### abc\n")
31
+
32
+ str = "abc\n==="
33
+ expect(Goethe::Utils.remove_markdown_symbols(str)).to eq("abc\n")
34
+
35
+ str = "abc\n---"
36
+ expect(Goethe::Utils.remove_markdown_symbols(str)).to eq("abc\n")
37
+ end
38
+
39
+ context "Emphasis" do
40
+ it "should remove 1 * emphasis" do
41
+ str = "*abc*"
42
+ expect(Goethe::Utils.remove_markdown_symbols(str)).to eq("abc")
43
+ end
44
+
45
+ it "should remove 2 * emphasis" do
46
+ str = "**abc**"
47
+ expect(Goethe::Utils.remove_markdown_symbols(str)).to eq("abc")
48
+ end
49
+
50
+ it "should remove 3 * emphasis" do
51
+ str = "***abc***"
52
+ expect(Goethe::Utils.remove_markdown_symbols(str)).to eq("abc")
53
+ end
54
+
55
+ it "should remove _ emphasis" do
56
+ str = "_abc_"
57
+ expect(Goethe::Utils.remove_markdown_symbols(str)).to eq("abc")
58
+
59
+ str = "__abc__"
60
+ expect(Goethe::Utils.remove_markdown_symbols(str)).to eq("abc")
61
+
62
+ str = "___abc___"
63
+ expect(Goethe::Utils.remove_markdown_symbols(str)).to eq("abc")
64
+ end
65
+
66
+ it "should remove uneven emphasis" do
67
+ str = "***abc**"
68
+ expect(Goethe::Utils.remove_markdown_symbols(str)).to eq("*abc")
69
+
70
+ str = "__abc___"
71
+ expect(Goethe::Utils.remove_markdown_symbols(str)).to eq("abc_")
72
+ end
73
+ end
74
+
75
+ it "should remove blockquotes" do
76
+ str = "> abc\n"
77
+ expect(Goethe::Utils.remove_markdown_symbols(str)).to eq("abc\n")
78
+ end
79
+
80
+ it "should remove lists" do
81
+ str = "* abc\n* bbc"
82
+ expect(Goethe::Utils.remove_markdown_symbols(str)).to eq("abc\nbbc")
83
+ end
84
+
85
+ context "hrulers" do
86
+ it "should remove hrulers" do
87
+ str = "abc\n* * *\nbbc\n***\ncbc\n*****\ndbc\n- - -\nebc\n___"
88
+ expect(Goethe::Utils.remove_markdown_symbols(str)).to eq("abc\n\nbbc\n\ncbc\n\ndbc\n\nebc\n")
89
+ end
90
+ end
91
+
92
+ context "links" do
93
+ it "should remove links correctly" do
94
+ str = "please go to google: [Google](http://www.google.com)"
95
+ expect(Goethe::Utils.remove_markdown_symbols(str)).to eq("please go to google: Google")
96
+ end
97
+ end
98
+
99
+ context "images" do
100
+ it "should remove images correctly" do
101
+ str = "This is ![Alt text](/path/to/img.jpg 'Optional title') an image."
102
+ expect(Goethe::Utils.remove_markdown_symbols(str)).to eq("This is an image.")
103
+ end
104
+ end
105
+
106
+ context "copyright" do
107
+ it "should remove the copyright sign" do
108
+ str = "Here is our &copy; brand"
109
+ expect(Goethe::Utils.remove_markdown_symbols(str)).to eq("Here is our brand")
110
+ end
111
+ end
112
+ end
113
+
114
+ describe "auto_link" do
115
+ it "should parse correctly" do
116
+ str = "hellohttp://www.google.com/google"
117
+ expect(Goethe::Utils.auto_link(str)).to eq(%Q{hello<a href="http://www.google.com/google" rel="nofollow" target="_blank">http://www.google.com/google</a>})
118
+ str = "hellohttp://www.google.com/ google"
119
+ expect(Goethe::Utils.auto_link(str)).to eq(%Q{hello<a href="http://www.google.com/" rel="nofollow" target="_blank">http://www.google.com/</a> google})
120
+ end
121
+
122
+ it "should allow params" do
123
+ str = "hellohttp://www.google.com:8080/?search=google&keyword=google"
124
+ expect(Goethe::Utils.auto_link(str)).to eq(%Q{hello<a href="http://www.google.com:8080/?search=google&keyword=google" rel="nofollow" target="_blank">http://www.google.com:8080/?search=google&keyword=google</a>})
125
+ end
126
+ end
127
+ end
metadata ADDED
@@ -0,0 +1,104 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: goethe
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.6.0
5
+ platform: ruby
6
+ authors:
7
+ - larryzhao
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-10-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 3.7.0
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 3.7.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 1.8.1
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 1.8.1
55
+ description: Goethe - Text processing library.
56
+ email:
57
+ - thehiddendepth@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - ".gitlab-ci.yml"
64
+ - ".ruby-gemset"
65
+ - ".ruby-version"
66
+ - Gemfile
67
+ - Gemfile.lock
68
+ - LICENSE.txt
69
+ - README.md
70
+ - goethe.gemspec
71
+ - goethe.rb
72
+ - lib/goethe.rb
73
+ - lib/goethe/regex.rb
74
+ - lib/goethe/utils.rb
75
+ - lib/goethe/version.rb
76
+ - spec/spec_helper.rb
77
+ - spec/utils_spec.rb
78
+ homepage: ''
79
+ licenses:
80
+ - MIT
81
+ metadata: {}
82
+ post_install_message:
83
+ rdoc_options: []
84
+ require_paths:
85
+ - lib
86
+ required_ruby_version: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ required_rubygems_version: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ requirements: []
97
+ rubyforge_project:
98
+ rubygems_version: 2.6.14
99
+ signing_key:
100
+ specification_version: 4
101
+ summary: Goethe - Text processing library.
102
+ test_files:
103
+ - spec/spec_helper.rb
104
+ - spec/utils_spec.rb