twitter-text 1.13.4 → 1.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +122 -0
- data/lib/assets/tld_lib.yml +1393 -1061
- data/lib/twitter-text/regex.rb +40 -6
- data/test/conformance_test.rb +4 -0
- data/twitter-text.gemspec +1 -1
- metadata +4 -4
- data/README.rdoc +0 -106
data/lib/twitter-text/regex.rb
CHANGED
@@ -103,12 +103,46 @@ module Twitter
|
|
103
103
|
SPACE_CHARS = " \t\n\x0B\f\r"
|
104
104
|
CTRL_CHARS = "\x00-\x1F\x7F"
|
105
105
|
|
106
|
-
#
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
106
|
+
# Generated from unicode_regex/unicode_regex_groups.scala, more inclusive than Ruby's \p{L}\p{M}
|
107
|
+
HASHTAG_LETTERS_AND_MARKS = "\\p{L}\\p{M}" +
|
108
|
+
"\u037f\u0528-\u052f\u08a0-\u08b2\u08e4-\u08ff\u0978\u0980\u0c00\u0c34\u0c81\u0d01\u0ede\u0edf" +
|
109
|
+
"\u10c7\u10cd\u10fd-\u10ff\u16f1-\u16f8\u17b4\u17b5\u191d\u191e\u1ab0-\u1abe\u1bab-\u1bad\u1bba-" +
|
110
|
+
"\u1bbf\u1cf3-\u1cf6\u1cf8\u1cf9\u1de7-\u1df5\u2cf2\u2cf3\u2d27\u2d2d\u2d66\u2d67\u9fcc\ua674-" +
|
111
|
+
"\ua67b\ua698-\ua69d\ua69f\ua792-\ua79f\ua7aa-\ua7ad\ua7b0\ua7b1\ua7f7-\ua7f9\ua9e0-\ua9ef\ua9fa-" +
|
112
|
+
"\ua9fe\uaa7c-\uaa7f\uaae0-\uaaef\uaaf2-\uaaf6\uab30-\uab5a\uab5c-\uab5f\uab64\uab65\uf870-\uf87f" +
|
113
|
+
"\uf882\uf884-\uf89f\uf8b8\uf8c1-\uf8d6\ufa2e\ufa2f\ufe27-\ufe2d\u{102e0}\u{1031f}\u{10350}-\u{1037a}" +
|
114
|
+
"\u{10500}-\u{10527}\u{10530}-\u{10563}\u{10600}-\u{10736}\u{10740}-\u{10755}\u{10760}-\u{10767}" +
|
115
|
+
"\u{10860}-\u{10876}\u{10880}-\u{1089e}\u{10980}-\u{109b7}\u{109be}\u{109bf}\u{10a80}-\u{10a9c}" +
|
116
|
+
"\u{10ac0}-\u{10ac7}\u{10ac9}-\u{10ae6}\u{10b80}-\u{10b91}\u{1107f}\u{110d0}-\u{110e8}\u{11100}-" +
|
117
|
+
"\u{11134}\u{11150}-\u{11173}\u{11176}\u{11180}-\u{111c4}\u{111da}\u{11200}-\u{11211}\u{11213}-" +
|
118
|
+
"\u{11237}\u{112b0}-\u{112ea}\u{11301}-\u{11303}\u{11305}-\u{1130c}\u{1130f}\u{11310}\u{11313}-" +
|
119
|
+
"\u{11328}\u{1132a}-\u{11330}\u{11332}\u{11333}\u{11335}-\u{11339}\u{1133c}-\u{11344}\u{11347}" +
|
120
|
+
"\u{11348}\u{1134b}-\u{1134d}\u{11357}\u{1135d}-\u{11363}\u{11366}-\u{1136c}\u{11370}-\u{11374}" +
|
121
|
+
"\u{11480}-\u{114c5}\u{114c7}\u{11580}-\u{115b5}\u{115b8}-\u{115c0}\u{11600}-\u{11640}\u{11644}" +
|
122
|
+
"\u{11680}-\u{116b7}\u{118a0}-\u{118df}\u{118ff}\u{11ac0}-\u{11af8}\u{1236f}-\u{12398}\u{16a40}-" +
|
123
|
+
"\u{16a5e}\u{16ad0}-\u{16aed}\u{16af0}-\u{16af4}\u{16b00}-\u{16b36}\u{16b40}-\u{16b43}\u{16b63}-" +
|
124
|
+
"\u{16b77}\u{16b7d}-\u{16b8f}\u{16f00}-\u{16f44}\u{16f50}-\u{16f7e}\u{16f8f}-\u{16f9f}\u{1bc00}-" +
|
125
|
+
"\u{1bc6a}\u{1bc70}-\u{1bc7c}\u{1bc80}-\u{1bc88}\u{1bc90}-\u{1bc99}\u{1bc9d}\u{1bc9e}\u{1e800}-" +
|
126
|
+
"\u{1e8c4}\u{1e8d0}-\u{1e8d6}\u{1ee00}-\u{1ee03}\u{1ee05}-\u{1ee1f}\u{1ee21}\u{1ee22}\u{1ee24}" +
|
127
|
+
"\u{1ee27}\u{1ee29}-\u{1ee32}\u{1ee34}-\u{1ee37}\u{1ee39}\u{1ee3b}\u{1ee42}\u{1ee47}\u{1ee49}" +
|
128
|
+
"\u{1ee4b}\u{1ee4d}-\u{1ee4f}\u{1ee51}\u{1ee52}\u{1ee54}\u{1ee57}\u{1ee59}\u{1ee5b}\u{1ee5d}\u{1ee5f}" +
|
129
|
+
"\u{1ee61}\u{1ee62}\u{1ee64}\u{1ee67}-\u{1ee6a}\u{1ee6c}-\u{1ee72}\u{1ee74}-\u{1ee77}\u{1ee79}-" +
|
130
|
+
"\u{1ee7c}\u{1ee7e}\u{1ee80}-\u{1ee89}\u{1ee8b}-\u{1ee9b}\u{1eea1}-\u{1eea3}\u{1eea5}-\u{1eea9}" +
|
131
|
+
"\u{1eeab}-\u{1eebb}"
|
132
|
+
|
133
|
+
# Generated from unicode_regex/unicode_regex_groups.scala, more inclusive than Ruby's \p{Nd}
|
134
|
+
HASHTAG_NUMERALS = "\\p{Nd}" +
|
135
|
+
"\u0de6-\u0def\ua9f0-\ua9f9\u{110f0}-\u{110f9}\u{11136}-\u{1113f}\u{111d0}-\u{111d9}\u{112f0}-" +
|
136
|
+
"\u{112f9}\u{114d0}-\u{114d9}\u{11650}-\u{11659}\u{116c0}-\u{116c9}\u{118e0}-\u{118e9}\u{16a60}-" +
|
137
|
+
"\u{16a69}\u{16b50}-\u{16b59}"
|
138
|
+
|
139
|
+
HASHTAG_SPECIAL_CHARS = "_\u200c\u200d\ua67e\u05be\u05f3\u05f4\uff5e\u301c\u309b\u309c\u30a0\u30fb\u3003\u0f0b\u0f0c\u00b7"
|
140
|
+
|
141
|
+
HASHTAG_LETTERS_NUMERALS = "#{HASHTAG_LETTERS_AND_MARKS}#{HASHTAG_NUMERALS}#{HASHTAG_SPECIAL_CHARS}"
|
142
|
+
HASHTAG_LETTERS_NUMERALS_SET = "[#{HASHTAG_LETTERS_NUMERALS}]"
|
143
|
+
HASHTAG_LETTERS_SET = "[#{HASHTAG_LETTERS_AND_MARKS}]"
|
144
|
+
|
145
|
+
HASHTAG = /(\A|[^&#{HASHTAG_LETTERS_NUMERALS}])(#|#)(?!\ufe0f|\u20e3)(#{HASHTAG_LETTERS_NUMERALS_SET}*#{HASHTAG_LETTERS_SET}#{HASHTAG_LETTERS_NUMERALS_SET}*)/io
|
112
146
|
|
113
147
|
REGEXEN[:valid_hashtag] = /#{HASHTAG}/io
|
114
148
|
# Used in Extractor for final filtering
|
data/test/conformance_test.rb
CHANGED
@@ -124,6 +124,10 @@ class ConformanceTest < Test::Unit::TestCase
|
|
124
124
|
assert_equal expected, extract_hashtags(text), description
|
125
125
|
end
|
126
126
|
|
127
|
+
def_conformance_test("extract.yml", :hashtags_from_astral) do
|
128
|
+
assert_equal expected, extract_hashtags(text), description
|
129
|
+
end
|
130
|
+
|
127
131
|
def_conformance_test("extract.yml", :hashtags_with_indices) do
|
128
132
|
e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} }
|
129
133
|
assert_equal e, extract_hashtags_with_indices(text), description
|
data/twitter-text.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = "twitter-text"
|
5
|
-
s.version = "1.
|
5
|
+
s.version = "1.14.0"
|
6
6
|
s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle",
|
7
7
|
"Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii", "James Koval"]
|
8
8
|
s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com",
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.14.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matt Sanford
|
@@ -16,7 +16,7 @@ authors:
|
|
16
16
|
autorequire:
|
17
17
|
bindir: bin
|
18
18
|
cert_chain: []
|
19
|
-
date: 2016-
|
19
|
+
date: 2016-07-08 00:00:00.000000000 Z
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
22
22
|
name: multi_json
|
@@ -137,7 +137,7 @@ files:
|
|
137
137
|
- ".rspec"
|
138
138
|
- Gemfile
|
139
139
|
- LICENSE
|
140
|
-
- README.
|
140
|
+
- README.md
|
141
141
|
- Rakefile
|
142
142
|
- lib/assets/tld_lib.yml
|
143
143
|
- lib/twitter-text.rb
|
@@ -184,7 +184,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
184
184
|
version: '0'
|
185
185
|
requirements: []
|
186
186
|
rubyforge_project:
|
187
|
-
rubygems_version: 2.4.
|
187
|
+
rubygems_version: 2.4.5.1
|
188
188
|
signing_key:
|
189
189
|
specification_version: 4
|
190
190
|
summary: Twitter text handling library
|
data/README.rdoc
DELETED
@@ -1,106 +0,0 @@
|
|
1
|
-
{rdoc-image:https://img.shields.io/gem/v/twitter-text.svg}[https://rubygems.org/gems/twitter-text]
|
2
|
-
|
3
|
-
== twitter-text
|
4
|
-
|
5
|
-
A gem that provides text processing routines for Twitter Tweets. The major
|
6
|
-
reason for this is to unify the various auto-linking and extraction of
|
7
|
-
usernames, lists, hashtags and URLs.
|
8
|
-
|
9
|
-
== Extraction Examples
|
10
|
-
|
11
|
-
# Extraction
|
12
|
-
class MyClass
|
13
|
-
include Twitter::Extractor
|
14
|
-
usernames = extract_mentioned_screen_names("Mentioning @twitter and @jack")
|
15
|
-
# usernames = ["twitter", "jack"]
|
16
|
-
end
|
17
|
-
|
18
|
-
# Extraction with a block argument
|
19
|
-
class MyClass
|
20
|
-
include Twitter::Extractor
|
21
|
-
extract_reply_screen_name("@twitter are you hiring?").do |username|
|
22
|
-
# username = "twitter"
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
== Auto-linking Examples
|
27
|
-
|
28
|
-
# Auto-link
|
29
|
-
class MyClass
|
30
|
-
include Twitter::Autolink
|
31
|
-
|
32
|
-
html = auto_link("link @user, please #request")
|
33
|
-
end
|
34
|
-
|
35
|
-
# For Ruby on Rails you want to add this to app/helpers/application_helper.rb
|
36
|
-
module ApplicationHelper
|
37
|
-
include Twitter::Autolink
|
38
|
-
end
|
39
|
-
|
40
|
-
# Now the auto_link function is available in every view. So in index.html.erb:
|
41
|
-
<%= auto_link("link @user, please #request") %>
|
42
|
-
|
43
|
-
=== Usernames
|
44
|
-
|
45
|
-
Username extraction and linking matches all valid Twitter usernames but does
|
46
|
-
not verify that the username is a valid Twitter account.
|
47
|
-
|
48
|
-
=== Lists
|
49
|
-
|
50
|
-
Auto-link and extract list names when they are written in @user/list-name
|
51
|
-
format.
|
52
|
-
|
53
|
-
=== Hashtags
|
54
|
-
|
55
|
-
Auto-link and extract hashtags, where a hashtag can contain most letters or
|
56
|
-
numbers but cannot be solely numbers and cannot contain punctuation.
|
57
|
-
|
58
|
-
=== URLs
|
59
|
-
|
60
|
-
Asian languages like Chinese, Japanese or Korean may not use a delimiter such as
|
61
|
-
a space to separate normal text from URLs making it difficult to identify where
|
62
|
-
the URL ends and the text starts.
|
63
|
-
|
64
|
-
For this reason twitter-text currently does not support extracting or auto-linking
|
65
|
-
of URLs immediately followed by non-Latin characters.
|
66
|
-
|
67
|
-
Example: "http://twitter.com/は素晴らしい" .
|
68
|
-
The normal text is "は素晴らしい" and is not part of the URL even though
|
69
|
-
it isn't space separated.
|
70
|
-
|
71
|
-
=== International
|
72
|
-
|
73
|
-
Special care has been taken to be sure that auto-linking and extraction work
|
74
|
-
in Tweets of all languages. This means that languages without spaces between
|
75
|
-
words should work equally well.
|
76
|
-
|
77
|
-
=== Hit Highlighting
|
78
|
-
|
79
|
-
Use to provide emphasis around the "hits" returned from the Search API, built
|
80
|
-
to work against text that has been auto-linked already.
|
81
|
-
|
82
|
-
=== Thanks
|
83
|
-
|
84
|
-
Thanks to everybody who has filed issues, provided feedback or contributed patches. Patches courtesy of:
|
85
|
-
|
86
|
-
* At Twitter …
|
87
|
-
* Matt Sanford - http://github.com/mzsanford
|
88
|
-
* Raffi Krikorian - http://github.com/r
|
89
|
-
* Ben Cherry - http://github.com/bcherry
|
90
|
-
* Patrick Ewing - http://github.com/hoverbird
|
91
|
-
* Jeff Smick - http://github.com/sprsquish
|
92
|
-
* Kenneth Kufluk - https://github.com/kennethkufluk
|
93
|
-
* Keita Fujii - https://github.com/keitaf
|
94
|
-
* Yoshimasa Niwa - https://github.com/niw
|
95
|
-
|
96
|
-
* Patches from the community …
|
97
|
-
* Jean-Philippe Bougie - http://github.com/jpbougie
|
98
|
-
* Erik Michaels-Ober - https://github.com/sferik
|
99
|
-
|
100
|
-
* Anyone who has filed an issue. It helps. Really.
|
101
|
-
|
102
|
-
=== Copyright and License
|
103
|
-
|
104
|
-
Copyright 2011 Twitter, Inc.
|
105
|
-
|
106
|
-
Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0
|