twitter-text 1.13.4 → 1.14.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -103,12 +103,46 @@ module Twitter
103
103
  SPACE_CHARS = " \t\n\x0B\f\r"
104
104
  CTRL_CHARS = "\x00-\x1F\x7F"
105
105
 
106
- # A hashtag must contain at least one unicode letter or mark, as well as numbers, underscores, and select special characters.
107
- HASHTAG_ALPHA = /[\p{L}\p{M}]/
108
- HASHTAG_ALPHANUMERIC = /[\p{L}\p{M}\p{Nd}_\u200c\u200d\u0482\ua673\ua67e\u05be\u05f3\u05f4\uff5e\u301c\u309b\u309c\u30a0\u30fb\u3003\u0f0b\u0f0c\u00b7]/
109
- HASHTAG_BOUNDARY = /\A|\z|[^&\p{L}\p{M}\p{Nd}_\u200c\u200d\u0482\ua673\ua67e\u05be\u05f3\u05f4\u309b\u309c\u30a0\u30fb\u3003\u0f0b\u0f0c\u00b7]/
110
-
111
- HASHTAG = /(#{HASHTAG_BOUNDARY})(#|#)(?!\ufe0f|\u20e3)(#{HASHTAG_ALPHANUMERIC}*#{HASHTAG_ALPHA}#{HASHTAG_ALPHANUMERIC}*)/io
106
+ # Generated from unicode_regex/unicode_regex_groups.scala, more inclusive than Ruby's \p{L}\p{M}
107
+ HASHTAG_LETTERS_AND_MARKS = "\\p{L}\\p{M}" +
108
+ "\u037f\u0528-\u052f\u08a0-\u08b2\u08e4-\u08ff\u0978\u0980\u0c00\u0c34\u0c81\u0d01\u0ede\u0edf" +
109
+ "\u10c7\u10cd\u10fd-\u10ff\u16f1-\u16f8\u17b4\u17b5\u191d\u191e\u1ab0-\u1abe\u1bab-\u1bad\u1bba-" +
110
+ "\u1bbf\u1cf3-\u1cf6\u1cf8\u1cf9\u1de7-\u1df5\u2cf2\u2cf3\u2d27\u2d2d\u2d66\u2d67\u9fcc\ua674-" +
111
+ "\ua67b\ua698-\ua69d\ua69f\ua792-\ua79f\ua7aa-\ua7ad\ua7b0\ua7b1\ua7f7-\ua7f9\ua9e0-\ua9ef\ua9fa-" +
112
+ "\ua9fe\uaa7c-\uaa7f\uaae0-\uaaef\uaaf2-\uaaf6\uab30-\uab5a\uab5c-\uab5f\uab64\uab65\uf870-\uf87f" +
113
+ "\uf882\uf884-\uf89f\uf8b8\uf8c1-\uf8d6\ufa2e\ufa2f\ufe27-\ufe2d\u{102e0}\u{1031f}\u{10350}-\u{1037a}" +
114
+ "\u{10500}-\u{10527}\u{10530}-\u{10563}\u{10600}-\u{10736}\u{10740}-\u{10755}\u{10760}-\u{10767}" +
115
+ "\u{10860}-\u{10876}\u{10880}-\u{1089e}\u{10980}-\u{109b7}\u{109be}\u{109bf}\u{10a80}-\u{10a9c}" +
116
+ "\u{10ac0}-\u{10ac7}\u{10ac9}-\u{10ae6}\u{10b80}-\u{10b91}\u{1107f}\u{110d0}-\u{110e8}\u{11100}-" +
117
+ "\u{11134}\u{11150}-\u{11173}\u{11176}\u{11180}-\u{111c4}\u{111da}\u{11200}-\u{11211}\u{11213}-" +
118
+ "\u{11237}\u{112b0}-\u{112ea}\u{11301}-\u{11303}\u{11305}-\u{1130c}\u{1130f}\u{11310}\u{11313}-" +
119
+ "\u{11328}\u{1132a}-\u{11330}\u{11332}\u{11333}\u{11335}-\u{11339}\u{1133c}-\u{11344}\u{11347}" +
120
+ "\u{11348}\u{1134b}-\u{1134d}\u{11357}\u{1135d}-\u{11363}\u{11366}-\u{1136c}\u{11370}-\u{11374}" +
121
+ "\u{11480}-\u{114c5}\u{114c7}\u{11580}-\u{115b5}\u{115b8}-\u{115c0}\u{11600}-\u{11640}\u{11644}" +
122
+ "\u{11680}-\u{116b7}\u{118a0}-\u{118df}\u{118ff}\u{11ac0}-\u{11af8}\u{1236f}-\u{12398}\u{16a40}-" +
123
+ "\u{16a5e}\u{16ad0}-\u{16aed}\u{16af0}-\u{16af4}\u{16b00}-\u{16b36}\u{16b40}-\u{16b43}\u{16b63}-" +
124
+ "\u{16b77}\u{16b7d}-\u{16b8f}\u{16f00}-\u{16f44}\u{16f50}-\u{16f7e}\u{16f8f}-\u{16f9f}\u{1bc00}-" +
125
+ "\u{1bc6a}\u{1bc70}-\u{1bc7c}\u{1bc80}-\u{1bc88}\u{1bc90}-\u{1bc99}\u{1bc9d}\u{1bc9e}\u{1e800}-" +
126
+ "\u{1e8c4}\u{1e8d0}-\u{1e8d6}\u{1ee00}-\u{1ee03}\u{1ee05}-\u{1ee1f}\u{1ee21}\u{1ee22}\u{1ee24}" +
127
+ "\u{1ee27}\u{1ee29}-\u{1ee32}\u{1ee34}-\u{1ee37}\u{1ee39}\u{1ee3b}\u{1ee42}\u{1ee47}\u{1ee49}" +
128
+ "\u{1ee4b}\u{1ee4d}-\u{1ee4f}\u{1ee51}\u{1ee52}\u{1ee54}\u{1ee57}\u{1ee59}\u{1ee5b}\u{1ee5d}\u{1ee5f}" +
129
+ "\u{1ee61}\u{1ee62}\u{1ee64}\u{1ee67}-\u{1ee6a}\u{1ee6c}-\u{1ee72}\u{1ee74}-\u{1ee77}\u{1ee79}-" +
130
+ "\u{1ee7c}\u{1ee7e}\u{1ee80}-\u{1ee89}\u{1ee8b}-\u{1ee9b}\u{1eea1}-\u{1eea3}\u{1eea5}-\u{1eea9}" +
131
+ "\u{1eeab}-\u{1eebb}"
132
+
133
+ # Generated from unicode_regex/unicode_regex_groups.scala, more inclusive than Ruby's \p{Nd}
134
+ HASHTAG_NUMERALS = "\\p{Nd}" +
135
+ "\u0de6-\u0def\ua9f0-\ua9f9\u{110f0}-\u{110f9}\u{11136}-\u{1113f}\u{111d0}-\u{111d9}\u{112f0}-" +
136
+ "\u{112f9}\u{114d0}-\u{114d9}\u{11650}-\u{11659}\u{116c0}-\u{116c9}\u{118e0}-\u{118e9}\u{16a60}-" +
137
+ "\u{16a69}\u{16b50}-\u{16b59}"
138
+
139
+ HASHTAG_SPECIAL_CHARS = "_\u200c\u200d\ua67e\u05be\u05f3\u05f4\uff5e\u301c\u309b\u309c\u30a0\u30fb\u3003\u0f0b\u0f0c\u00b7"
140
+
141
+ HASHTAG_LETTERS_NUMERALS = "#{HASHTAG_LETTERS_AND_MARKS}#{HASHTAG_NUMERALS}#{HASHTAG_SPECIAL_CHARS}"
142
+ HASHTAG_LETTERS_NUMERALS_SET = "[#{HASHTAG_LETTERS_NUMERALS}]"
143
+ HASHTAG_LETTERS_SET = "[#{HASHTAG_LETTERS_AND_MARKS}]"
144
+
145
+ HASHTAG = /(\A|[^&#{HASHTAG_LETTERS_NUMERALS}])(#|#)(?!\ufe0f|\u20e3)(#{HASHTAG_LETTERS_NUMERALS_SET}*#{HASHTAG_LETTERS_SET}#{HASHTAG_LETTERS_NUMERALS_SET}*)/io
112
146
 
113
147
  REGEXEN[:valid_hashtag] = /#{HASHTAG}/io
114
148
  # Used in Extractor for final filtering
@@ -124,6 +124,10 @@ class ConformanceTest < Test::Unit::TestCase
124
124
  assert_equal expected, extract_hashtags(text), description
125
125
  end
126
126
 
127
+ def_conformance_test("extract.yml", :hashtags_from_astral) do
128
+ assert_equal expected, extract_hashtags(text), description
129
+ end
130
+
127
131
  def_conformance_test("extract.yml", :hashtags_with_indices) do
128
132
  e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} }
129
133
  assert_equal e, extract_hashtags_with_indices(text), description
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "twitter-text"
5
- s.version = "1.13.4"
5
+ s.version = "1.14.0"
6
6
  s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle",
7
7
  "Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii", "James Koval"]
8
8
  s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com",
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitter-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.13.4
4
+ version: 1.14.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Sanford
@@ -16,7 +16,7 @@ authors:
16
16
  autorequire:
17
17
  bindir: bin
18
18
  cert_chain: []
19
- date: 2016-03-03 00:00:00.000000000 Z
19
+ date: 2016-07-08 00:00:00.000000000 Z
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
22
22
  name: multi_json
@@ -137,7 +137,7 @@ files:
137
137
  - ".rspec"
138
138
  - Gemfile
139
139
  - LICENSE
140
- - README.rdoc
140
+ - README.md
141
141
  - Rakefile
142
142
  - lib/assets/tld_lib.yml
143
143
  - lib/twitter-text.rb
@@ -184,7 +184,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
184
184
  version: '0'
185
185
  requirements: []
186
186
  rubyforge_project:
187
- rubygems_version: 2.4.3
187
+ rubygems_version: 2.4.5.1
188
188
  signing_key:
189
189
  specification_version: 4
190
190
  summary: Twitter text handling library
@@ -1,106 +0,0 @@
1
- {rdoc-image:https://img.shields.io/gem/v/twitter-text.svg}[https://rubygems.org/gems/twitter-text]
2
-
3
- == twitter-text
4
-
5
- A gem that provides text processing routines for Twitter Tweets. The major
6
- reason for this is to unify the various auto-linking and extraction of
7
- usernames, lists, hashtags and URLs.
8
-
9
- == Extraction Examples
10
-
11
- # Extraction
12
- class MyClass
13
- include Twitter::Extractor
14
- usernames = extract_mentioned_screen_names("Mentioning @twitter and @jack")
15
- # usernames = ["twitter", "jack"]
16
- end
17
-
18
- # Extraction with a block argument
19
- class MyClass
20
- include Twitter::Extractor
21
- extract_reply_screen_name("@twitter are you hiring?").do |username|
22
- # username = "twitter"
23
- end
24
- end
25
-
26
- == Auto-linking Examples
27
-
28
- # Auto-link
29
- class MyClass
30
- include Twitter::Autolink
31
-
32
- html = auto_link("link @user, please #request")
33
- end
34
-
35
- # For Ruby on Rails you want to add this to app/helpers/application_helper.rb
36
- module ApplicationHelper
37
- include Twitter::Autolink
38
- end
39
-
40
- # Now the auto_link function is available in every view. So in index.html.erb:
41
- <%= auto_link("link @user, please #request") %>
42
-
43
- === Usernames
44
-
45
- Username extraction and linking matches all valid Twitter usernames but does
46
- not verify that the username is a valid Twitter account.
47
-
48
- === Lists
49
-
50
- Auto-link and extract list names when they are written in @user/list-name
51
- format.
52
-
53
- === Hashtags
54
-
55
- Auto-link and extract hashtags, where a hashtag can contain most letters or
56
- numbers but cannot be solely numbers and cannot contain punctuation.
57
-
58
- === URLs
59
-
60
- Asian languages like Chinese, Japanese or Korean may not use a delimiter such as
61
- a space to separate normal text from URLs making it difficult to identify where
62
- the URL ends and the text starts.
63
-
64
- For this reason twitter-text currently does not support extracting or auto-linking
65
- of URLs immediately followed by non-Latin characters.
66
-
67
- Example: "http://twitter.com/は素晴らしい" .
68
- The normal text is "は素晴らしい" and is not part of the URL even though
69
- it isn't space separated.
70
-
71
- === International
72
-
73
- Special care has been taken to be sure that auto-linking and extraction work
74
- in Tweets of all languages. This means that languages without spaces between
75
- words should work equally well.
76
-
77
- === Hit Highlighting
78
-
79
- Use to provide emphasis around the "hits" returned from the Search API, built
80
- to work against text that has been auto-linked already.
81
-
82
- === Thanks
83
-
84
- Thanks to everybody who has filed issues, provided feedback or contributed patches. Patches courtesy of:
85
-
86
- * At Twitter …
87
- * Matt Sanford - http://github.com/mzsanford
88
- * Raffi Krikorian - http://github.com/r
89
- * Ben Cherry - http://github.com/bcherry
90
- * Patrick Ewing - http://github.com/hoverbird
91
- * Jeff Smick - http://github.com/sprsquish
92
- * Kenneth Kufluk - https://github.com/kennethkufluk
93
- * Keita Fujii - https://github.com/keitaf
94
- * Yoshimasa Niwa - https://github.com/niw
95
-
96
- * Patches from the community …
97
- * Jean-Philippe Bougie - http://github.com/jpbougie
98
- * Erik Michaels-Ober - https://github.com/sferik
99
-
100
- * Anyone who has filed an issue. It helps. Really.
101
-
102
- === Copyright and License
103
-
104
- Copyright 2011 Twitter, Inc.
105
-
106
- Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0