me2text-ruby 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -1
- data/README.md +10 -4
- data/Rakefile +10 -0
- data/lib/me2text/me2text.rb +27 -5
- data/lib/me2text/string_ext.rb +2 -0
- data/lib/me2text/token.rb +12 -12
- data/lib/me2text/version.rb +3 -1
- data/lib/me2text.rb +2 -14
- data/me2text-ruby.gemspec +2 -2
- data/test/{test.rb → me2text_test.rb} +34 -18
- data/test/test_helper.rb +2 -7
- metadata +8 -7
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -94,12 +94,18 @@ TODO
|
|
94
94
|
----
|
95
95
|
|
96
96
|
* Ruby 1.9 지원
|
97
|
+
|
98
|
+
Contributors
|
99
|
+
------------
|
97
100
|
|
98
|
-
|
99
|
-
---------------------
|
101
|
+
Authors ordered by first contribution.
|
100
102
|
|
101
|
-
|
103
|
+
Heungseok Do <codian@gmail.com>
|
104
|
+
MinYoung Jung <kkungkkung@gmail.com>
|
102
105
|
|
103
|
-
|
106
|
+
License
|
107
|
+
-------
|
108
|
+
|
109
|
+
me2text-ruby is released under the MIT license:
|
104
110
|
|
105
111
|
* www.opensource.org/licenses/MIT
|
data/Rakefile
CHANGED
data/lib/me2text/me2text.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# encoding: utf-8
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
2
|
|
3
3
|
require 'me2text/token'
|
4
4
|
|
@@ -71,6 +71,28 @@ module Me2Text
|
|
71
71
|
:limit => nil,
|
72
72
|
:link_handler => nil
|
73
73
|
}.merge(options)
|
74
|
+
|
75
|
+
if RUBY_VERSION < '1.9'
|
76
|
+
begin
|
77
|
+
text.dup.unpack('U*')
|
78
|
+
rescue ArgumentError
|
79
|
+
raise ArgumentError.new('me2text는 유효한 UTF-8 입력만 처리 가능합니다.')
|
80
|
+
end
|
81
|
+
else
|
82
|
+
is_utf8 = case text.encoding
|
83
|
+
when Encoding::UTF_8
|
84
|
+
text.dup.valid_encoding?
|
85
|
+
when Encoding::ASCII_8BIT, Encoding::US_ASCII
|
86
|
+
text.dup.force_encoding(Encoding::UTF_8).valid_encoding?
|
87
|
+
else
|
88
|
+
false
|
89
|
+
end
|
90
|
+
|
91
|
+
raise ArgumentError.new('me2text는 유효한 UTF-8 입력만 처리 가능합니다.') unless is_utf8
|
92
|
+
|
93
|
+
text = text.force_encoding(Encoding::UTF_8)
|
94
|
+
end
|
95
|
+
|
74
96
|
text = strip_linebreak(text) unless options[:allow_line_break]
|
75
97
|
|
76
98
|
text = Token.join_tokens(Token.tokenize(text), format, options)
|
@@ -87,22 +109,22 @@ module Me2Text
|
|
87
109
|
end
|
88
110
|
|
89
111
|
def doublequotize(text) #:nodoc:
|
90
|
-
text.gsub(/\"([^"]*)\"/) { |s| "“#{$1}”" }
|
112
|
+
text.gsub(/\"([^"]*)\"/u) { |s| "“#{$1}”" }
|
91
113
|
end
|
92
114
|
|
93
115
|
# 라인브레이크를 <br /> 태그로 대체한다.
|
94
116
|
def htmlize_linebreak(text)
|
95
|
-
text.gsub(/\r\n
|
117
|
+
text.gsub(/\r\n/u, "<br />").gsub(/\n/u, "<br />").gsub(/\r/u, "<br />")
|
96
118
|
end
|
97
119
|
|
98
120
|
# 컨트롤 문자를 제거한다.
|
99
121
|
def strip_control_chars(text)
|
100
|
-
text.gsub(/[[:cntrl:]]
|
122
|
+
text.gsub(/[[:cntrl:]]/u, "")
|
101
123
|
end
|
102
124
|
|
103
125
|
# 라인브래이크를 공백으로 변환한다
|
104
126
|
def strip_linebreak(text)
|
105
|
-
text.gsub(/\s\r\n
|
127
|
+
text.gsub(/\s\r\n/u, "").gsub(/\r\n/u, " ").gsub(/[\r\n]/u, " ")
|
106
128
|
end
|
107
129
|
end
|
108
130
|
end
|
data/lib/me2text/string_ext.rb
CHANGED
data/lib/me2text/token.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# encoding: utf-8
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
2
|
|
3
3
|
module Me2Text
|
4
4
|
class Token
|
@@ -6,9 +6,9 @@ module Me2Text
|
|
6
6
|
dquota_s = '\\xe2\\x80\\x9c|\\xe2\\x80\\x9f|\\xe2\\x9d\\x9d|\\xe2\\x80\\xb6|\\xe2\\x80\\x9d|\\x22|\\xef\\xbc\\x82'
|
7
7
|
dquota_e = '\\xe2\\x80\\x9d|\\xe2\\x80\\x9e|\\xe2\\x9d\\x9e|\\xcb\\x9d|\\xe2\\x80\\xb3|\\xe2\\x80\\x9e|\\xe2\\x80\\x9f|\\x22|\\xef\\xbc\\x82'
|
8
8
|
dquota_ne = '\\xe2\\x80\\x9d\\xe2\\x80\\x9e\\xe2\\x9d\\x9e\\xcb\\x9d\\xe2\\x80\\xb3\\xe2\\x80\\x9e\\xe2\\x80\\x9f\\x22\\xef\\xbc\\x82'
|
9
|
-
/(?:#{dquota_s})([^#{dquota_ne}]*)(?:#{dquota_e}):(http[s]?:\/\/[^\s]*)(\s|$)/
|
9
|
+
/(?:#{dquota_s})([^#{dquota_ne}]*)(?:#{dquota_e}):(http[s]?:\/\/[^\s]*)(\s|$)/u
|
10
10
|
end
|
11
|
-
REGEX_URL = /(http[s]?:\/\/[^\s|^\'|^\"]*)([\'|\"|\s]|$)/
|
11
|
+
REGEX_URL = /(http[s]?:\/\/[^\s|^\'|^\"]*)([\'|\"|\s]|$)/u
|
12
12
|
ESCAPE_CHAR = "\xc2\xa0"
|
13
13
|
|
14
14
|
attr_accessor :text
|
@@ -71,7 +71,7 @@ module Me2Text
|
|
71
71
|
end
|
72
72
|
|
73
73
|
def htmlize_chars(text, options = {})
|
74
|
-
html_result = text.to_s.gsub(
|
74
|
+
html_result = text.to_s.gsub(/&/u, "&").gsub(/</u, "<").gsub(/>/u, ">")
|
75
75
|
html_result = textize(html_result, options)
|
76
76
|
html_result
|
77
77
|
end
|
@@ -82,18 +82,18 @@ module Me2Text
|
|
82
82
|
}.merge(options)
|
83
83
|
|
84
84
|
if options[:symbolize]
|
85
|
-
text = text.gsub(
|
86
|
-
gsub(/\(TM\)
|
87
|
-
gsub(/\(R\)
|
88
|
-
gsub(/\(C\)
|
89
|
-
gsub(
|
85
|
+
text = text.gsub(/\.\.\./u, "…").
|
86
|
+
gsub(/\(TM\)/u, "™").
|
87
|
+
gsub(/\(R\)/u, "®").
|
88
|
+
gsub(/\(C\)/u, "©").
|
89
|
+
gsub(/--/u, "—")
|
90
90
|
end
|
91
91
|
text
|
92
92
|
end
|
93
93
|
|
94
94
|
class << self
|
95
95
|
def tokenize(text, options = {})
|
96
|
-
tokenize_me2link(text.gsub(/\\\"
|
96
|
+
tokenize_me2link(text.gsub(/\\\"/u, ESCAPE_CHAR), options)
|
97
97
|
end
|
98
98
|
|
99
99
|
def tokenize_me2link(text, options = {})
|
@@ -295,7 +295,7 @@ module Me2Text
|
|
295
295
|
end
|
296
296
|
|
297
297
|
class Keyword < Token
|
298
|
-
KEYWORD_REGEX = /(\[([^\[\]]+)\])/
|
298
|
+
KEYWORD_REGEX = /(\[([^\[\]]+)\])/u
|
299
299
|
attr_accessor :link
|
300
300
|
|
301
301
|
def initialize(keyword, options)
|
@@ -303,7 +303,7 @@ module Me2Text
|
|
303
303
|
_keyword = keyword.to_s.strip.scan(KEYWORD_REGEX)
|
304
304
|
_keyword = _keyword.flatten[1]
|
305
305
|
|
306
|
-
raise "키워드가 없습니다." if _keyword.nil?
|
306
|
+
raise ArgumentError.new("키워드가 없습니다.") if _keyword.nil?
|
307
307
|
|
308
308
|
@text = _keyword.gsub(ESCAPE_CHAR, "\"")
|
309
309
|
end
|
data/lib/me2text/version.rb
CHANGED
data/lib/me2text.rb
CHANGED
@@ -1,20 +1,8 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
major, minor, patch = RUBY_VERSION.split('.')
|
4
|
-
if major.to_i == 1 && minor.to_i > 8
|
5
|
-
raise("me2text는 ruby 1.9 이상은 현재 지원하지 않습니다.")
|
6
|
-
else
|
7
|
-
# Ruby 1.8 $KCODE check.
|
8
|
-
unless $KCODE[0].chr =~ /u/i
|
9
|
-
raise("me2text를 사용하기 위해서는 $KCODE 변수를 'UTF8' 또는 'u'로 지정해야 합니다.")
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
1
|
+
# -*- encoding: utf-8 -*-
|
13
2
|
$:.push(File.expand_path("..", __FILE__))
|
14
3
|
|
15
4
|
module Me2Text
|
16
5
|
end
|
17
6
|
|
18
7
|
require 'me2text/version'
|
19
|
-
require 'me2text/me2text'
|
20
|
-
|
8
|
+
require 'me2text/me2text'
|
data/me2text-ruby.gemspec
CHANGED
@@ -5,12 +5,12 @@ require "me2text/version"
|
|
5
5
|
Gem::Specification.new do |s|
|
6
6
|
s.name = "me2text-ruby"
|
7
7
|
s.version = Me2Text::VERSION
|
8
|
-
s.authors = ["codian"]
|
8
|
+
s.authors = ["codian", 'kkung']
|
9
9
|
s.email = ["codian@gmail.com"]
|
10
10
|
s.homepage = "https://github.com/me2day/me2text-ruby"
|
11
11
|
s.summary = %q{me2text parser for ruby}
|
12
12
|
s.description = %q{me2text is text format for me2day posting.
|
13
|
-
me2text-ruby is ruby library to me2text to HTML or plain text}
|
13
|
+
me2text-ruby is ruby library to convert me2text to HTML or plain text}
|
14
14
|
s.rubyforge_project = "me2text-ruby"
|
15
15
|
s.files = Dir['{lib/**/*,test/**/*}'] +
|
16
16
|
%w(.gitignore me2text-ruby.gemspec Gemfile MIT-LICENSE Rakefile README.md)
|
@@ -1,8 +1,38 @@
|
|
1
|
-
# encoding: utf-8
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
2
|
|
3
3
|
require File.expand_path('../test_helper', __FILE__)
|
4
4
|
|
5
|
-
class Me2TextTest < Test::Unit::TestCase
|
5
|
+
class Me2TextTest < Test::Unit::TestCase
|
6
|
+
|
7
|
+
if RUBY_VERSION < '1.9'
|
8
|
+
def test_encoding
|
9
|
+
#'안녕?' in EUC_KR
|
10
|
+
invalid_text = "\xBE\xC8\xB3\xE7?"
|
11
|
+
|
12
|
+
assert_raise ArgumentError do
|
13
|
+
Me2Text.me2text(invalid_text)
|
14
|
+
end
|
15
|
+
|
16
|
+
assert_nothing_raised ArgumentError do
|
17
|
+
require 'iconv'
|
18
|
+
Me2Text.me2text(Iconv.conv('UTF-8', 'EUC-KR', invalid_text))
|
19
|
+
end
|
20
|
+
end
|
21
|
+
else
|
22
|
+
def test_encoding
|
23
|
+
#'안녕?' in EUC_KR
|
24
|
+
invalid_text = "\xBE\xC8\xB3\xE7?"
|
25
|
+
|
26
|
+
assert_raise ArgumentError do
|
27
|
+
Me2Text.me2text(invalid_text)
|
28
|
+
end
|
29
|
+
|
30
|
+
assert_nothing_raised ArgumentError do
|
31
|
+
Me2Text.me2text(invalid_text.force_encoding(Encoding::EUC_KR).encode(Encoding::UTF_8))
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
6
36
|
def test_to_html
|
7
37
|
cases = [
|
8
38
|
# 일반 텍스트
|
@@ -30,14 +60,7 @@ class Me2TextTest < Test::Unit::TestCase
|
|
30
60
|
]
|
31
61
|
|
32
62
|
cases.each_with_index do |test, index|
|
33
|
-
|
34
|
-
expect = test[1]
|
35
|
-
result = Me2Text.me2text(text, :html)
|
36
|
-
if expect != result
|
37
|
-
flunk "TEXT: #{text}\n" +
|
38
|
-
"EXPECT: #{expect}\n" +
|
39
|
-
"RESULT: #{result}\n"
|
40
|
-
end
|
63
|
+
assert_equal test[1], Me2Text.me2text(test[0], :html)
|
41
64
|
end
|
42
65
|
end
|
43
66
|
|
@@ -50,14 +73,7 @@ class Me2TextTest < Test::Unit::TestCase
|
|
50
73
|
]
|
51
74
|
|
52
75
|
cases.each_with_index do |test, index|
|
53
|
-
|
54
|
-
expect = test[1]
|
55
|
-
result = Me2Text.me2text(text, :html, :allow_line_break => true)
|
56
|
-
if expect != result
|
57
|
-
flunk "TEXT: #{text.inspect}\n" +
|
58
|
-
"EXPECT: #{expect}\n" +
|
59
|
-
"RESULT: #{result}\n"
|
60
|
-
end
|
76
|
+
assert_equal test[1], Me2Text.me2text(test[0], :html, :allow_line_break => true)
|
61
77
|
end
|
62
78
|
end
|
63
79
|
end
|
data/test/test_helper.rb
CHANGED
@@ -1,9 +1,4 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
major, minor, patch = RUBY_VERSION.split('.')
|
4
|
-
$KCODE = "U" if major.to_i == 1 && minor.to_i < 9
|
1
|
+
# -*- encoding: utf-8 -*-
|
5
2
|
|
6
3
|
require 'test/unit'
|
7
|
-
require File.expand_path('../../lib/me2text', __FILE__)
|
8
|
-
|
9
|
-
|
4
|
+
require File.expand_path('../../lib/me2text', __FILE__)
|
metadata
CHANGED
@@ -1,26 +1,27 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: me2text-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 19
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 1.0.
|
9
|
+
- 2
|
10
|
+
version: 1.0.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- codian
|
14
|
+
- kkung
|
14
15
|
autorequire:
|
15
16
|
bindir: bin
|
16
17
|
cert_chain: []
|
17
18
|
|
18
|
-
date: 2012-05-
|
19
|
+
date: 2012-05-22 00:00:00 Z
|
19
20
|
dependencies: []
|
20
21
|
|
21
22
|
description: |-
|
22
23
|
me2text is text format for me2day posting.
|
23
|
-
me2text-ruby is ruby library to me2text to HTML or plain text
|
24
|
+
me2text-ruby is ruby library to convert me2text to HTML or plain text
|
24
25
|
email:
|
25
26
|
- codian@gmail.com
|
26
27
|
executables: []
|
@@ -35,7 +36,7 @@ files:
|
|
35
36
|
- lib/me2text/token.rb
|
36
37
|
- lib/me2text/version.rb
|
37
38
|
- lib/me2text.rb
|
38
|
-
- test/
|
39
|
+
- test/me2text_test.rb
|
39
40
|
- test/test_helper.rb
|
40
41
|
- .gitignore
|
41
42
|
- me2text-ruby.gemspec
|
@@ -77,6 +78,6 @@ signing_key:
|
|
77
78
|
specification_version: 3
|
78
79
|
summary: me2text parser for ruby
|
79
80
|
test_files:
|
80
|
-
- test/
|
81
|
+
- test/me2text_test.rb
|
81
82
|
- test/test_helper.rb
|
82
83
|
has_rdoc:
|