twitter-text 1.4.11 → 1.4.12

Sign up to get free protection for your applications and to get access to all the features.
data/.gemtest ADDED
File without changes
data/.gitignore CHANGED
@@ -1,6 +1,40 @@
1
1
  *.gem
2
2
  *.rbc
3
+ *.sw[a-p]
4
+ *.tmproj
5
+ *.tmproject
6
+ *.un~
7
+ *~
8
+ .DS_Store
9
+ .Spotlight-V100
10
+ .Trashes
11
+ ._*
3
12
  .bundle
4
- pkg/*
13
+ .config
14
+ .directory
15
+ .elc
16
+ .emacs.desktop
17
+ .emacs.desktop.lock
18
+ .redcar
19
+ .yardoc
20
+ Desktop.ini
21
+ Gemfile.lock
22
+ Icon?
23
+ InstalledFiles
24
+ Session.vim
25
+ Thumbs.db
26
+ \#*\#
27
+ _yardoc
28
+ auto-save-list
5
29
  coverage
6
30
  doc
31
+ lib/bundler/man
32
+ pkg
33
+ pkg/*
34
+ rdoc
35
+ spec/reports
36
+ test/tmp
37
+ test/version_tmp
38
+ tmp
39
+ tmtags
40
+ tramp
data/Rakefile CHANGED
@@ -1,7 +1,9 @@
1
+ #!/usr/bin/env rake
1
2
  require 'bundler'
2
3
  Bundler::GemHelper.install_tasks
3
4
 
4
- task :default => ["spec", "test:conformance"]
5
+ task :default => ['spec', 'test:conformance']
6
+ task :test => :spec
5
7
 
6
8
  require 'rspec/core/rake_task'
7
9
  RSpec::Core::RakeTask.new(:spec)
@@ -48,9 +50,9 @@ namespace :test do
48
50
  end
49
51
  end
50
52
 
51
- require 'rake/rdoctask'
53
+ require 'rdoc/task'
52
54
  namespace :doc do
53
- Rake::RDocTask.new do |rd|
55
+ RDoc::Task.new do |rd|
54
56
  rd.main = "README.rdoc"
55
57
  rd.rdoc_dir = 'doc'
56
58
  rd.rdoc_files.include("README.rdoc", "lib/**/*.rb")
data/lib/extractor.rb CHANGED
@@ -158,11 +158,21 @@ module Twitter
158
158
  text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, port, path, query|
159
159
  valid_url_match_data = $~
160
160
 
161
+ start_position = valid_url_match_data.char_begin(3)
162
+ end_position = valid_url_match_data.char_end(3)
163
+
164
+ # If protocol is missing, check against valid_ascii_domain
165
+ if !protocol
166
+ next unless domain =~ Twitter::Regex[:valid_ascii_domain]
167
+ if $~.char_begin(0)
168
+ start_position += $~.char_begin(0)
169
+ url.sub!(domain, $~.to_s())
170
+ end
171
+ end
172
+
161
173
  # Regex in Ruby 1.8 doesn't support lookbehind, so we need to manually filter out
162
174
  # the short URLs without protocol and path, i.e., [domain].[ccTLD]
163
175
  unless !protocol && !path && domain =~ Twitter::Regex[:valid_short_domain]
164
- start_position = valid_url_match_data.char_begin(3)
165
- end_position = valid_url_match_data.char_end(3)
166
176
  urls << {
167
177
  :url => url,
168
178
  :indices => [start_position, end_position]
data/lib/regex.rb CHANGED
@@ -39,9 +39,16 @@ module Twitter
39
39
  0x202F, # White_Space # Zs NARROW NO-BREAK SPACE
40
40
  0x205F, # White_Space # Zs MEDIUM MATHEMATICAL SPACE
41
41
  0x3000, # White_Space # Zs IDEOGRAPHIC SPACE
42
- ].flatten.freeze
43
- SPACE_CHAR_CLASS_VALUE = Regexp.new(UNICODE_SPACES.collect{ |e| [e].pack 'U*' }.join(''))
44
- REGEXEN[:spaces] = Regexp.new(UNICODE_SPACES.collect{ |e| [e].pack 'U*' }.join('|'))
42
+ ].flatten.map{|c| [c].pack('U*')}.freeze
43
+ REGEXEN[:spaces] = /[#{UNICODE_SPACES.join('')}]/o
44
+
45
+ # Character not allowed in Tweets
46
+ INVALID_CHARACTERS = [
47
+ 0xFFFE, 0xFEFF, # BOM
48
+ 0xFFFF, # Special
49
+ 0x202A, 0x202B, 0x202C, 0x202D, 0x202E # Directional change
50
+ ].map{|cp| [cp].pack('U') }.freeze
51
+ REGEXEN[:invalid_control_characters] = /[#{INVALID_CHARACTERS.join('')}]/o
45
52
 
46
53
  REGEXEN[:at_signs] = /[@@]/
47
54
  REGEXEN[:extract_mentions] = /(^|[^a-zA-Z0-9_])#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})(?=(.|$))/o
@@ -97,7 +104,7 @@ module Twitter
97
104
  regex_range(0x2F800, 0x2FA1F), regex_range(0x3005), regex_range(0x303B) # Kanji (CJK supplement)
98
105
  ].join('').freeze
99
106
 
100
- HASHTAG_BOUNDARY = /(?:\A|\z|#{REGEXEN[:spaces]}|[「」。、.,!?!?:;"'])/
107
+ HASHTAG_BOUNDARY = /(?:\A|\z|#{REGEXEN[:spaces]}|[「」。、.,!?!?:;"'])/o
101
108
 
102
109
  # A hashtag must contain latin characters, numbers and underscores, but not all numbers.
103
110
  HASHTAG_ALPHA = /[a-z_#{LATIN_ACCENTS}#{NON_LATIN_HASHTAG_CHARS}#{CJ_HASHTAG_CHARACTERS}]/io
@@ -111,11 +118,11 @@ module Twitter
111
118
  REGEXEN[:auto_link_emoticon] = /(8\-\#|8\-E|\+\-\(|\`\@|\`O|\&lt;\|:~\(|\}:o\{|:\-\[|\&gt;o\&lt;|X\-\/|\[:-\]\-I\-|\/\/\/\/Ö\\\\\\\\|\(\|:\|\/\)|∑:\*\)|\( \| \))/
112
119
 
113
120
  # URL related hash regex collection
114
- REGEXEN[:valid_preceding_chars] = /(?:[^-\/"'!=A-Z0-9_@@\.]|^)/i
121
+ REGEXEN[:valid_preceding_chars] = /(?:[^-\/"'!=A-Z0-9_@@\.#{INVALID_CHARACTERS.join('')}]|^)/io
115
122
 
116
- DOMAIN_VALID_CHARS = "[^[:punct:][:space:][:blank:]#{[0x00A0].pack('U')}]"
117
- REGEXEN[:valid_subdomain] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[_-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/i
118
- REGEXEN[:valid_domain_name] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/i
123
+ DOMAIN_VALID_CHARS = "[^[:punct:][:space:][:blank:][:cntrl:]#{INVALID_CHARACTERS.join('')}#{UNICODE_SPACES.join('')}]"
124
+ REGEXEN[:valid_subdomain] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[_-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/io
125
+ REGEXEN[:valid_domain_name] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/io
119
126
 
120
127
  REGEXEN[:valid_gTLD] = /(?:(?:aero|asia|biz|cat|com|coop|edu|gov|info|int|jobs|mil|mobi|museum|name|net|org|pro|tel|travel)(?=[^[:alpha:]]|$))/i
121
128
  REGEXEN[:valid_ccTLD] = %r{
@@ -134,26 +141,36 @@ module Twitter
134
141
  REGEXEN[:valid_domain] = /(?:
135
142
  #{REGEXEN[:valid_subdomain]}*#{REGEXEN[:valid_domain_name]}
136
143
  (?:#{REGEXEN[:valid_gTLD]}|#{REGEXEN[:valid_ccTLD]}|#{REGEXEN[:valid_punycode]})
137
- )/ix
138
- REGEXEN[:valid_short_domain] = /^#{REGEXEN[:valid_domain_name]}#{REGEXEN[:valid_ccTLD]}$/
144
+ )/iox
145
+
146
+ # This is used in Extractor
147
+ REGEXEN[:valid_ascii_domain] = /
148
+ (?:(?:[[:alnum:]\-_]|#{REGEXEN[:latin_accents]})+\.)+
149
+ (?:#{REGEXEN[:valid_gTLD]}|#{REGEXEN[:valid_ccTLD]}|#{REGEXEN[:valid_punycode]})
150
+ /iox
151
+
152
+ # This is used in Extractor to filter out unwanted URLs.
153
+ REGEXEN[:valid_short_domain] = /^#{REGEXEN[:valid_domain_name]}#{REGEXEN[:valid_ccTLD]}$/io
139
154
 
140
155
  REGEXEN[:valid_port_number] = /[0-9]+/
141
156
 
142
- REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\$\/%#\[\]\-_~&|#{LATIN_ACCENTS}]/i
157
+ REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\.\$\/%#\[\]\-_~&|#{LATIN_ACCENTS}]/io
143
158
  # Allow URL paths to contain balanced parens
144
159
  # 1. Used in Wikipedia URLs like /Primer_(film)
145
160
  # 2. Used in IIS sessions like /S(dfd346)/
146
- REGEXEN[:wikipedia_disambiguation] = /(?:\(#{REGEXEN[:valid_general_url_path_chars]}+\))/i
147
- # Allow @ in a url, but only in the middle. Catch things like http://example.com/@user
148
- REGEXEN[:valid_url_path_chars] = /(?:
149
- #{REGEXEN[:wikipedia_disambiguation]}|
150
- @#{REGEXEN[:valid_general_url_path_chars]}+\/|
151
- [\.,]#{REGEXEN[:valid_general_url_path_chars]}?|
152
- #{REGEXEN[:valid_general_url_path_chars]}+
153
- )/ix
161
+ REGEXEN[:valid_url_balanced_parens] = /\(#{REGEXEN[:valid_general_url_path_chars]}+\)/io
154
162
  # Valid end-of-path chracters (so /foo. does not gobble the period).
155
163
  # 1. Allow =&# for empty URL parameters and other URL-join artifacts
156
- REGEXEN[:valid_url_path_ending_chars] = /[a-z0-9=_#\/\+\-#{LATIN_ACCENTS}]|#{REGEXEN[:wikipedia_disambiguation]}/io
164
+ REGEXEN[:valid_url_path_ending_chars] = /[a-z0-9=_#\/\+\-#{LATIN_ACCENTS}]|(?:#{REGEXEN[:valid_url_balanced_parens]})/io
165
+ # Allow @ in a url, but only in the middle. Catch things like http://example.com/@user/
166
+ REGEXEN[:valid_url_path] = /(?:
167
+ (?:
168
+ #{REGEXEN[:valid_general_url_path_chars]}*
169
+ (?:#{REGEXEN[:valid_url_balanced_parens]} #{REGEXEN[:valid_general_url_path_chars]}*)*
170
+ #{REGEXEN[:valid_url_path_ending_chars]}
171
+ )|(?:@#{REGEXEN[:valid_general_url_path_chars]}+\/)
172
+ )/iox
173
+
157
174
  REGEXEN[:valid_url_query_chars] = /[a-z0-9!\*'\(\);:&=\+\$\/%#\[\]\-_\.,~|]/i
158
175
  REGEXEN[:valid_url_query_ending_chars] = /[a-z0-9_&=#\/]/i
159
176
  REGEXEN[:valid_url] = %r{
@@ -163,13 +180,7 @@ module Twitter
163
180
  (https?:\/\/)? # $4 Protocol (optional)
164
181
  (#{REGEXEN[:valid_domain]}) # $5 Domain(s)
165
182
  (?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional)
166
- (/
167
- (?:
168
- #{REGEXEN[:valid_url_path_chars]}+#{REGEXEN[:valid_url_path_ending_chars]}| # 1+ path chars and a valid last char
169
- #{REGEXEN[:valid_url_path_chars]}+#{REGEXEN[:valid_url_path_ending_chars]}?| # Optional last char to handle /@foo/ case
170
- #{REGEXEN[:valid_url_path_ending_chars]} # Just a # case
171
- )?
172
- )? # $7 URL Path and anchor
183
+ (/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor
173
184
  (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $8 Query String
174
185
  )
175
186
  )
data/lib/validation.rb CHANGED
@@ -2,13 +2,6 @@ module Twitter
2
2
  module Validation extend self
3
3
  MAX_LENGTH = 140
4
4
 
5
- # Character not allowed in Tweets
6
- INVALID_CHARACTERS = [
7
- 0xFFFE, 0xFEFF, # BOM
8
- 0xFFFF, # Special
9
- 0x202A, 0x202B, 0x202C, 0x202D, 0x202E # Directional change
10
- ].map{|cp| [cp].pack('U') }.freeze
11
-
12
5
  # Returns the length of the string as it would be displayed. This is equivilent to the length of the Unicode NFC
13
6
  # (See: http://www.unicode.org/reports/tr15). This is needed in order to consistently calculate the length of a
14
7
  # string no matter which actual form was transmitted. For example:
@@ -38,7 +31,7 @@ module Twitter
38
31
  return :empty if !text || text.empty?
39
32
  begin
40
33
  return :too_long if tweet_length(text) > MAX_LENGTH
41
- return :invalid_characters if INVALID_CHARACTERS.any?{|invalid_char| text.include?(invalid_char) }
34
+ return :invalid_characters if Twitter::Regex::INVALID_CHARACTERS.any?{|invalid_char| text.include?(invalid_char) }
42
35
  rescue ArgumentError, ActiveSupport::Multibyte::EncodingError => e
43
36
  # non-Unicode value.
44
37
  return :invalid_characters
@@ -1,6 +1,5 @@
1
- #encoding: UTF-8
2
- # require File.dirname(__FILE__) + '/spec_helper'
3
- require 'spec_helper'
1
+ # encoding: utf-8
2
+ require File.dirname(__FILE__) + '/spec_helper'
4
3
 
5
4
  class TestAutolink
6
5
  include Twitter::Autolink
@@ -1,4 +1,4 @@
1
- #encoding: UTF-8
1
+ # encoding: utf-8
2
2
  require File.dirname(__FILE__) + '/spec_helper'
3
3
 
4
4
  class TestExtractor
@@ -1,4 +1,4 @@
1
- #encoding: UTF-8
1
+ # encoding: utf-8
2
2
  require File.dirname(__FILE__) + '/spec_helper'
3
3
 
4
4
  class TestHitHighlighter
@@ -1,6 +1,5 @@
1
- # encoding: UTF-8
2
-
3
- require 'spec_helper'
1
+ # encoding: utf-8
2
+ require File.dirname(__FILE__) + '/spec_helper'
4
3
 
5
4
  describe Twitter::Rewriter do
6
5
  def original_text; end
data/spec/test_urls.rb CHANGED
@@ -1,4 +1,5 @@
1
- #encoding: UTF-8
1
+ # encoding: utf-8
2
+
2
3
  module TestUrls
3
4
  VALID = [
4
5
  "http://google.com",
@@ -26,6 +27,7 @@ module TestUrls
26
27
  "http://a_b.c-d.com",
27
28
  "http://a-b.b.com",
28
29
  "http://twitter-dash.com",
30
+ "http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx",
29
31
  "www.foobar.com",
30
32
  "WWW.FOOBAR.COM",
31
33
  "www.foobar.co.jp",
@@ -45,7 +47,11 @@ module TestUrls
45
47
  "http://trailingdash-.com",
46
48
  "http://no_underscores.com",
47
49
  "http://test.c_o_m",
48
- "http://test.c-o-m"
50
+ "http://test.c-o-m",
51
+ "http://twitt#{[0x202A].pack('U')}er.com",
52
+ "http://twitt#{[0x202B].pack('U')}er.com",
53
+ "http://twitt#{[0x202C].pack('U')}er.com",
54
+ "http://twitt#{[0x202D].pack('U')}er.com",
55
+ "http://twitt#{[0x202E].pack('U')}er.com",
49
56
  ] unless defined?(TestUrls::INVALID)
50
-
51
57
  end
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require File.dirname(__FILE__) + '/spec_helper'
2
3
 
3
4
  major, minor, patch = RUBY_VERSION.split('.')
data/spec/unicode_spec.rb CHANGED
@@ -1,4 +1,4 @@
1
- #encoding: UTF-8
1
+ # encoding: utf-8
2
2
  require File.dirname(__FILE__) + '/spec_helper'
3
3
 
4
4
  describe Twitter::Unicode do
@@ -28,4 +28,4 @@ describe Twitter::Unicode do
28
28
  lambda { Twitter::Unicode::FFFFFF }.should raise_error(NameError)
29
29
  end
30
30
 
31
- end
31
+ end
@@ -1,4 +1,4 @@
1
- #encoding: BINARY
1
+ # encoding: binary
2
2
  require File.dirname(__FILE__) + '/spec_helper'
3
3
 
4
4
  class TestValidation
@@ -40,4 +40,4 @@ describe Twitter::Validation do
40
40
  TestValidation.new.tweet_invalid?(char * 141).should == :too_long
41
41
  end
42
42
 
43
- end
43
+ end
data/twitter-text.gemspec CHANGED
@@ -1,6 +1,8 @@
1
- spec = Gem::Specification.new do |s|
1
+ # encoding: utf-8
2
+
3
+ Gem::Specification.new do |s|
2
4
  s.name = "twitter-text"
3
- s.version = "1.4.11"
5
+ s.version = "1.4.12"
4
6
  s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle",
5
7
  "Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii"]
6
8
  s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com",
@@ -14,6 +16,7 @@ spec = Gem::Specification.new do |s|
14
16
 
15
17
  s.add_development_dependency "nokogiri"
16
18
  s.add_development_dependency "rake"
19
+ s.add_development_dependency "rdoc"
17
20
  s.add_development_dependency "rspec"
18
21
  s.add_development_dependency "simplecov"
19
22
  s.add_runtime_dependency "activesupport"
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitter-text
3
3
  version: !ruby/object:Gem::Version
4
- hash: 17
4
+ hash: 31
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
8
  - 4
9
- - 11
10
- version: 1.4.11
9
+ - 12
10
+ version: 1.4.12
11
11
  platform: ruby
12
12
  authors:
13
13
  - Matt Sanford
@@ -22,7 +22,7 @@ autorequire:
22
22
  bindir: bin
23
23
  cert_chain: []
24
24
 
25
- date: 2011-09-26 00:00:00 -07:00
25
+ date: 2011-10-04 00:00:00 -07:00
26
26
  default_executable:
27
27
  dependencies:
28
28
  - !ruby/object:Gem::Dependency
@@ -54,7 +54,7 @@ dependencies:
54
54
  type: :development
55
55
  version_requirements: *id002
56
56
  - !ruby/object:Gem::Dependency
57
- name: rspec
57
+ name: rdoc
58
58
  prerelease: false
59
59
  requirement: &id003 !ruby/object:Gem::Requirement
60
60
  none: false
@@ -68,7 +68,7 @@ dependencies:
68
68
  type: :development
69
69
  version_requirements: *id003
70
70
  - !ruby/object:Gem::Dependency
71
- name: simplecov
71
+ name: rspec
72
72
  prerelease: false
73
73
  requirement: &id004 !ruby/object:Gem::Requirement
74
74
  none: false
@@ -82,7 +82,7 @@ dependencies:
82
82
  type: :development
83
83
  version_requirements: *id004
84
84
  - !ruby/object:Gem::Dependency
85
- name: activesupport
85
+ name: simplecov
86
86
  prerelease: false
87
87
  requirement: &id005 !ruby/object:Gem::Requirement
88
88
  none: false
@@ -93,8 +93,22 @@ dependencies:
93
93
  segments:
94
94
  - 0
95
95
  version: "0"
96
- type: :runtime
96
+ type: :development
97
97
  version_requirements: *id005
98
+ - !ruby/object:Gem::Dependency
99
+ name: activesupport
100
+ prerelease: false
101
+ requirement: &id006 !ruby/object:Gem::Requirement
102
+ none: false
103
+ requirements:
104
+ - - ">="
105
+ - !ruby/object:Gem::Version
106
+ hash: 3
107
+ segments:
108
+ - 0
109
+ version: "0"
110
+ type: :runtime
111
+ version_requirements: *id006
98
112
  description: A gem that provides text handling for Twitter
99
113
  email:
100
114
  - matt@twitter.com
@@ -112,11 +126,11 @@ extensions: []
112
126
  extra_rdoc_files: []
113
127
 
114
128
  files:
129
+ - .gemtest
115
130
  - .gitignore
116
131
  - .gitmodules
117
132
  - .rspec
118
133
  - Gemfile
119
- - Gemfile.lock
120
134
  - LICENSE
121
135
  - README.rdoc
122
136
  - Rakefile
data/Gemfile.lock DELETED
@@ -1,40 +0,0 @@
1
- PATH
2
- remote: .
3
- specs:
4
- twitter-text (1.4.11)
5
- activesupport
6
-
7
- GEM
8
- remote: http://rubygems.org/
9
- specs:
10
- activesupport (3.1.0)
11
- multi_json (~> 1.0)
12
- diff-lcs (1.1.2)
13
- multi_json (1.0.4)
14
- nokogiri (1.4.4)
15
- nokogiri (1.4.4-java)
16
- weakling (>= 0.0.3)
17
- rake (0.8.7)
18
- rspec (2.3.0)
19
- rspec-core (~> 2.3.0)
20
- rspec-expectations (~> 2.3.0)
21
- rspec-mocks (~> 2.3.0)
22
- rspec-core (2.3.1)
23
- rspec-expectations (2.3.0)
24
- diff-lcs (~> 1.1.2)
25
- rspec-mocks (2.3.0)
26
- simplecov (0.3.7)
27
- simplecov-html (>= 0.3.7)
28
- simplecov-html (0.3.9)
29
- weakling (0.0.4-java)
30
-
31
- PLATFORMS
32
- java
33
- ruby
34
-
35
- DEPENDENCIES
36
- nokogiri
37
- rake
38
- rspec
39
- simplecov
40
- twitter-text!