twitter-text 1.13.4 → 1.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +122 -0
- data/lib/assets/tld_lib.yml +1393 -1061
- data/lib/twitter-text/regex.rb +40 -6
- data/test/conformance_test.rb +4 -0
- data/twitter-text.gemspec +1 -1
- metadata +4 -4
- data/README.rdoc +0 -106
data/lib/twitter-text/regex.rb
CHANGED
@@ -103,12 +103,46 @@ module Twitter
|
|
103
103
|
SPACE_CHARS = " \t\n\x0B\f\r"
|
104
104
|
CTRL_CHARS = "\x00-\x1F\x7F"
|
105
105
|
|
106
|
-
#
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
106
|
+
# Generated from unicode_regex/unicode_regex_groups.scala, more inclusive than Ruby's \p{L}\p{M}
|
107
|
+
HASHTAG_LETTERS_AND_MARKS = "\\p{L}\\p{M}" +
|
108
|
+
"\u037f\u0528-\u052f\u08a0-\u08b2\u08e4-\u08ff\u0978\u0980\u0c00\u0c34\u0c81\u0d01\u0ede\u0edf" +
|
109
|
+
"\u10c7\u10cd\u10fd-\u10ff\u16f1-\u16f8\u17b4\u17b5\u191d\u191e\u1ab0-\u1abe\u1bab-\u1bad\u1bba-" +
|
110
|
+
"\u1bbf\u1cf3-\u1cf6\u1cf8\u1cf9\u1de7-\u1df5\u2cf2\u2cf3\u2d27\u2d2d\u2d66\u2d67\u9fcc\ua674-" +
|
111
|
+
"\ua67b\ua698-\ua69d\ua69f\ua792-\ua79f\ua7aa-\ua7ad\ua7b0\ua7b1\ua7f7-\ua7f9\ua9e0-\ua9ef\ua9fa-" +
|
112
|
+
"\ua9fe\uaa7c-\uaa7f\uaae0-\uaaef\uaaf2-\uaaf6\uab30-\uab5a\uab5c-\uab5f\uab64\uab65\uf870-\uf87f" +
|
113
|
+
"\uf882\uf884-\uf89f\uf8b8\uf8c1-\uf8d6\ufa2e\ufa2f\ufe27-\ufe2d\u{102e0}\u{1031f}\u{10350}-\u{1037a}" +
|
114
|
+
"\u{10500}-\u{10527}\u{10530}-\u{10563}\u{10600}-\u{10736}\u{10740}-\u{10755}\u{10760}-\u{10767}" +
|
115
|
+
"\u{10860}-\u{10876}\u{10880}-\u{1089e}\u{10980}-\u{109b7}\u{109be}\u{109bf}\u{10a80}-\u{10a9c}" +
|
116
|
+
"\u{10ac0}-\u{10ac7}\u{10ac9}-\u{10ae6}\u{10b80}-\u{10b91}\u{1107f}\u{110d0}-\u{110e8}\u{11100}-" +
|
117
|
+
"\u{11134}\u{11150}-\u{11173}\u{11176}\u{11180}-\u{111c4}\u{111da}\u{11200}-\u{11211}\u{11213}-" +
|
118
|
+
"\u{11237}\u{112b0}-\u{112ea}\u{11301}-\u{11303}\u{11305}-\u{1130c}\u{1130f}\u{11310}\u{11313}-" +
|
119
|
+
"\u{11328}\u{1132a}-\u{11330}\u{11332}\u{11333}\u{11335}-\u{11339}\u{1133c}-\u{11344}\u{11347}" +
|
120
|
+
"\u{11348}\u{1134b}-\u{1134d}\u{11357}\u{1135d}-\u{11363}\u{11366}-\u{1136c}\u{11370}-\u{11374}" +
|
121
|
+
"\u{11480}-\u{114c5}\u{114c7}\u{11580}-\u{115b5}\u{115b8}-\u{115c0}\u{11600}-\u{11640}\u{11644}" +
|
122
|
+
"\u{11680}-\u{116b7}\u{118a0}-\u{118df}\u{118ff}\u{11ac0}-\u{11af8}\u{1236f}-\u{12398}\u{16a40}-" +
|
123
|
+
"\u{16a5e}\u{16ad0}-\u{16aed}\u{16af0}-\u{16af4}\u{16b00}-\u{16b36}\u{16b40}-\u{16b43}\u{16b63}-" +
|
124
|
+
"\u{16b77}\u{16b7d}-\u{16b8f}\u{16f00}-\u{16f44}\u{16f50}-\u{16f7e}\u{16f8f}-\u{16f9f}\u{1bc00}-" +
|
125
|
+
"\u{1bc6a}\u{1bc70}-\u{1bc7c}\u{1bc80}-\u{1bc88}\u{1bc90}-\u{1bc99}\u{1bc9d}\u{1bc9e}\u{1e800}-" +
|
126
|
+
"\u{1e8c4}\u{1e8d0}-\u{1e8d6}\u{1ee00}-\u{1ee03}\u{1ee05}-\u{1ee1f}\u{1ee21}\u{1ee22}\u{1ee24}" +
|
127
|
+
"\u{1ee27}\u{1ee29}-\u{1ee32}\u{1ee34}-\u{1ee37}\u{1ee39}\u{1ee3b}\u{1ee42}\u{1ee47}\u{1ee49}" +
|
128
|
+
"\u{1ee4b}\u{1ee4d}-\u{1ee4f}\u{1ee51}\u{1ee52}\u{1ee54}\u{1ee57}\u{1ee59}\u{1ee5b}\u{1ee5d}\u{1ee5f}" +
|
129
|
+
"\u{1ee61}\u{1ee62}\u{1ee64}\u{1ee67}-\u{1ee6a}\u{1ee6c}-\u{1ee72}\u{1ee74}-\u{1ee77}\u{1ee79}-" +
|
130
|
+
"\u{1ee7c}\u{1ee7e}\u{1ee80}-\u{1ee89}\u{1ee8b}-\u{1ee9b}\u{1eea1}-\u{1eea3}\u{1eea5}-\u{1eea9}" +
|
131
|
+
"\u{1eeab}-\u{1eebb}"
|
132
|
+
|
133
|
+
# Generated from unicode_regex/unicode_regex_groups.scala, more inclusive than Ruby's \p{Nd}
|
134
|
+
HASHTAG_NUMERALS = "\\p{Nd}" +
|
135
|
+
"\u0de6-\u0def\ua9f0-\ua9f9\u{110f0}-\u{110f9}\u{11136}-\u{1113f}\u{111d0}-\u{111d9}\u{112f0}-" +
|
136
|
+
"\u{112f9}\u{114d0}-\u{114d9}\u{11650}-\u{11659}\u{116c0}-\u{116c9}\u{118e0}-\u{118e9}\u{16a60}-" +
|
137
|
+
"\u{16a69}\u{16b50}-\u{16b59}"
|
138
|
+
|
139
|
+
HASHTAG_SPECIAL_CHARS = "_\u200c\u200d\ua67e\u05be\u05f3\u05f4\uff5e\u301c\u309b\u309c\u30a0\u30fb\u3003\u0f0b\u0f0c\u00b7"
|
140
|
+
|
141
|
+
HASHTAG_LETTERS_NUMERALS = "#{HASHTAG_LETTERS_AND_MARKS}#{HASHTAG_NUMERALS}#{HASHTAG_SPECIAL_CHARS}"
|
142
|
+
HASHTAG_LETTERS_NUMERALS_SET = "[#{HASHTAG_LETTERS_NUMERALS}]"
|
143
|
+
HASHTAG_LETTERS_SET = "[#{HASHTAG_LETTERS_AND_MARKS}]"
|
144
|
+
|
145
|
+
HASHTAG = /(\A|[^&#{HASHTAG_LETTERS_NUMERALS}])(#|#)(?!\ufe0f|\u20e3)(#{HASHTAG_LETTERS_NUMERALS_SET}*#{HASHTAG_LETTERS_SET}#{HASHTAG_LETTERS_NUMERALS_SET}*)/io
|
112
146
|
|
113
147
|
REGEXEN[:valid_hashtag] = /#{HASHTAG}/io
|
114
148
|
# Used in Extractor for final filtering
|
data/test/conformance_test.rb
CHANGED
@@ -124,6 +124,10 @@ class ConformanceTest < Test::Unit::TestCase
|
|
124
124
|
assert_equal expected, extract_hashtags(text), description
|
125
125
|
end
|
126
126
|
|
127
|
+
def_conformance_test("extract.yml", :hashtags_from_astral) do
|
128
|
+
assert_equal expected, extract_hashtags(text), description
|
129
|
+
end
|
130
|
+
|
127
131
|
def_conformance_test("extract.yml", :hashtags_with_indices) do
|
128
132
|
e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} }
|
129
133
|
assert_equal e, extract_hashtags_with_indices(text), description
|
data/twitter-text.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = "twitter-text"
|
5
|
-
s.version = "1.
|
5
|
+
s.version = "1.14.0"
|
6
6
|
s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle",
|
7
7
|
"Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii", "James Koval"]
|
8
8
|
s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com",
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.14.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matt Sanford
|
@@ -16,7 +16,7 @@ authors:
|
|
16
16
|
autorequire:
|
17
17
|
bindir: bin
|
18
18
|
cert_chain: []
|
19
|
-
date: 2016-
|
19
|
+
date: 2016-07-08 00:00:00.000000000 Z
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
22
22
|
name: multi_json
|
@@ -137,7 +137,7 @@ files:
|
|
137
137
|
- ".rspec"
|
138
138
|
- Gemfile
|
139
139
|
- LICENSE
|
140
|
-
- README.
|
140
|
+
- README.md
|
141
141
|
- Rakefile
|
142
142
|
- lib/assets/tld_lib.yml
|
143
143
|
- lib/twitter-text.rb
|
@@ -184,7 +184,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
184
184
|
version: '0'
|
185
185
|
requirements: []
|
186
186
|
rubyforge_project:
|
187
|
-
rubygems_version: 2.4.
|
187
|
+
rubygems_version: 2.4.5.1
|
188
188
|
signing_key:
|
189
189
|
specification_version: 4
|
190
190
|
summary: Twitter text handling library
|
data/README.rdoc
DELETED
@@ -1,106 +0,0 @@
|
|
1
|
-
{rdoc-image:https://img.shields.io/gem/v/twitter-text.svg}[https://rubygems.org/gems/twitter-text]
|
2
|
-
|
3
|
-
== twitter-text
|
4
|
-
|
5
|
-
A gem that provides text processing routines for Twitter Tweets. The major
|
6
|
-
reason for this is to unify the various auto-linking and extraction of
|
7
|
-
usernames, lists, hashtags and URLs.
|
8
|
-
|
9
|
-
== Extraction Examples
|
10
|
-
|
11
|
-
# Extraction
|
12
|
-
class MyClass
|
13
|
-
include Twitter::Extractor
|
14
|
-
usernames = extract_mentioned_screen_names("Mentioning @twitter and @jack")
|
15
|
-
# usernames = ["twitter", "jack"]
|
16
|
-
end
|
17
|
-
|
18
|
-
# Extraction with a block argument
|
19
|
-
class MyClass
|
20
|
-
include Twitter::Extractor
|
21
|
-
extract_reply_screen_name("@twitter are you hiring?").do |username|
|
22
|
-
# username = "twitter"
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
== Auto-linking Examples
|
27
|
-
|
28
|
-
# Auto-link
|
29
|
-
class MyClass
|
30
|
-
include Twitter::Autolink
|
31
|
-
|
32
|
-
html = auto_link("link @user, please #request")
|
33
|
-
end
|
34
|
-
|
35
|
-
# For Ruby on Rails you want to add this to app/helpers/application_helper.rb
|
36
|
-
module ApplicationHelper
|
37
|
-
include Twitter::Autolink
|
38
|
-
end
|
39
|
-
|
40
|
-
# Now the auto_link function is available in every view. So in index.html.erb:
|
41
|
-
<%= auto_link("link @user, please #request") %>
|
42
|
-
|
43
|
-
=== Usernames
|
44
|
-
|
45
|
-
Username extraction and linking matches all valid Twitter usernames but does
|
46
|
-
not verify that the username is a valid Twitter account.
|
47
|
-
|
48
|
-
=== Lists
|
49
|
-
|
50
|
-
Auto-link and extract list names when they are written in @user/list-name
|
51
|
-
format.
|
52
|
-
|
53
|
-
=== Hashtags
|
54
|
-
|
55
|
-
Auto-link and extract hashtags, where a hashtag can contain most letters or
|
56
|
-
numbers but cannot be solely numbers and cannot contain punctuation.
|
57
|
-
|
58
|
-
=== URLs
|
59
|
-
|
60
|
-
Asian languages like Chinese, Japanese or Korean may not use a delimiter such as
|
61
|
-
a space to separate normal text from URLs making it difficult to identify where
|
62
|
-
the URL ends and the text starts.
|
63
|
-
|
64
|
-
For this reason twitter-text currently does not support extracting or auto-linking
|
65
|
-
of URLs immediately followed by non-Latin characters.
|
66
|
-
|
67
|
-
Example: "http://twitter.com/は素晴らしい" .
|
68
|
-
The normal text is "は素晴らしい" and is not part of the URL even though
|
69
|
-
it isn't space separated.
|
70
|
-
|
71
|
-
=== International
|
72
|
-
|
73
|
-
Special care has been taken to be sure that auto-linking and extraction work
|
74
|
-
in Tweets of all languages. This means that languages without spaces between
|
75
|
-
words should work equally well.
|
76
|
-
|
77
|
-
=== Hit Highlighting
|
78
|
-
|
79
|
-
Use to provide emphasis around the "hits" returned from the Search API, built
|
80
|
-
to work against text that has been auto-linked already.
|
81
|
-
|
82
|
-
=== Thanks
|
83
|
-
|
84
|
-
Thanks to everybody who has filed issues, provided feedback or contributed patches. Patches courtesy of:
|
85
|
-
|
86
|
-
* At Twitter …
|
87
|
-
* Matt Sanford - http://github.com/mzsanford
|
88
|
-
* Raffi Krikorian - http://github.com/r
|
89
|
-
* Ben Cherry - http://github.com/bcherry
|
90
|
-
* Patrick Ewing - http://github.com/hoverbird
|
91
|
-
* Jeff Smick - http://github.com/sprsquish
|
92
|
-
* Kenneth Kufluk - https://github.com/kennethkufluk
|
93
|
-
* Keita Fujii - https://github.com/keitaf
|
94
|
-
* Yoshimasa Niwa - https://github.com/niw
|
95
|
-
|
96
|
-
* Patches from the community …
|
97
|
-
* Jean-Philippe Bougie - http://github.com/jpbougie
|
98
|
-
* Erik Michaels-Ober - https://github.com/sferik
|
99
|
-
|
100
|
-
* Anyone who has filed an issue. It helps. Really.
|
101
|
-
|
102
|
-
=== Copyright and License
|
103
|
-
|
104
|
-
Copyright 2011 Twitter, Inc.
|
105
|
-
|
106
|
-
Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0
|