twitter-text 1.13.4 → 1.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -103,12 +103,46 @@ module Twitter
103
103
  SPACE_CHARS = " \t\n\x0B\f\r"
104
104
  CTRL_CHARS = "\x00-\x1F\x7F"
105
105
 
106
- # A hashtag must contain at least one unicode letter or mark, as well as numbers, underscores, and select special characters.
107
- HASHTAG_ALPHA = /[\p{L}\p{M}]/
108
- HASHTAG_ALPHANUMERIC = /[\p{L}\p{M}\p{Nd}_\u200c\u200d\u0482\ua673\ua67e\u05be\u05f3\u05f4\uff5e\u301c\u309b\u309c\u30a0\u30fb\u3003\u0f0b\u0f0c\u00b7]/
109
- HASHTAG_BOUNDARY = /\A|\z|[^&\p{L}\p{M}\p{Nd}_\u200c\u200d\u0482\ua673\ua67e\u05be\u05f3\u05f4\u309b\u309c\u30a0\u30fb\u3003\u0f0b\u0f0c\u00b7]/
110
-
111
- HASHTAG = /(#{HASHTAG_BOUNDARY})(#|#)(?!\ufe0f|\u20e3)(#{HASHTAG_ALPHANUMERIC}*#{HASHTAG_ALPHA}#{HASHTAG_ALPHANUMERIC}*)/io
106
+ # Generated from unicode_regex/unicode_regex_groups.scala, more inclusive than Ruby's \p{L}\p{M}
107
+ HASHTAG_LETTERS_AND_MARKS = "\\p{L}\\p{M}" +
108
+ "\u037f\u0528-\u052f\u08a0-\u08b2\u08e4-\u08ff\u0978\u0980\u0c00\u0c34\u0c81\u0d01\u0ede\u0edf" +
109
+ "\u10c7\u10cd\u10fd-\u10ff\u16f1-\u16f8\u17b4\u17b5\u191d\u191e\u1ab0-\u1abe\u1bab-\u1bad\u1bba-" +
110
+ "\u1bbf\u1cf3-\u1cf6\u1cf8\u1cf9\u1de7-\u1df5\u2cf2\u2cf3\u2d27\u2d2d\u2d66\u2d67\u9fcc\ua674-" +
111
+ "\ua67b\ua698-\ua69d\ua69f\ua792-\ua79f\ua7aa-\ua7ad\ua7b0\ua7b1\ua7f7-\ua7f9\ua9e0-\ua9ef\ua9fa-" +
112
+ "\ua9fe\uaa7c-\uaa7f\uaae0-\uaaef\uaaf2-\uaaf6\uab30-\uab5a\uab5c-\uab5f\uab64\uab65\uf870-\uf87f" +
113
+ "\uf882\uf884-\uf89f\uf8b8\uf8c1-\uf8d6\ufa2e\ufa2f\ufe27-\ufe2d\u{102e0}\u{1031f}\u{10350}-\u{1037a}" +
114
+ "\u{10500}-\u{10527}\u{10530}-\u{10563}\u{10600}-\u{10736}\u{10740}-\u{10755}\u{10760}-\u{10767}" +
115
+ "\u{10860}-\u{10876}\u{10880}-\u{1089e}\u{10980}-\u{109b7}\u{109be}\u{109bf}\u{10a80}-\u{10a9c}" +
116
+ "\u{10ac0}-\u{10ac7}\u{10ac9}-\u{10ae6}\u{10b80}-\u{10b91}\u{1107f}\u{110d0}-\u{110e8}\u{11100}-" +
117
+ "\u{11134}\u{11150}-\u{11173}\u{11176}\u{11180}-\u{111c4}\u{111da}\u{11200}-\u{11211}\u{11213}-" +
118
+ "\u{11237}\u{112b0}-\u{112ea}\u{11301}-\u{11303}\u{11305}-\u{1130c}\u{1130f}\u{11310}\u{11313}-" +
119
+ "\u{11328}\u{1132a}-\u{11330}\u{11332}\u{11333}\u{11335}-\u{11339}\u{1133c}-\u{11344}\u{11347}" +
120
+ "\u{11348}\u{1134b}-\u{1134d}\u{11357}\u{1135d}-\u{11363}\u{11366}-\u{1136c}\u{11370}-\u{11374}" +
121
+ "\u{11480}-\u{114c5}\u{114c7}\u{11580}-\u{115b5}\u{115b8}-\u{115c0}\u{11600}-\u{11640}\u{11644}" +
122
+ "\u{11680}-\u{116b7}\u{118a0}-\u{118df}\u{118ff}\u{11ac0}-\u{11af8}\u{1236f}-\u{12398}\u{16a40}-" +
123
+ "\u{16a5e}\u{16ad0}-\u{16aed}\u{16af0}-\u{16af4}\u{16b00}-\u{16b36}\u{16b40}-\u{16b43}\u{16b63}-" +
124
+ "\u{16b77}\u{16b7d}-\u{16b8f}\u{16f00}-\u{16f44}\u{16f50}-\u{16f7e}\u{16f8f}-\u{16f9f}\u{1bc00}-" +
125
+ "\u{1bc6a}\u{1bc70}-\u{1bc7c}\u{1bc80}-\u{1bc88}\u{1bc90}-\u{1bc99}\u{1bc9d}\u{1bc9e}\u{1e800}-" +
126
+ "\u{1e8c4}\u{1e8d0}-\u{1e8d6}\u{1ee00}-\u{1ee03}\u{1ee05}-\u{1ee1f}\u{1ee21}\u{1ee22}\u{1ee24}" +
127
+ "\u{1ee27}\u{1ee29}-\u{1ee32}\u{1ee34}-\u{1ee37}\u{1ee39}\u{1ee3b}\u{1ee42}\u{1ee47}\u{1ee49}" +
128
+ "\u{1ee4b}\u{1ee4d}-\u{1ee4f}\u{1ee51}\u{1ee52}\u{1ee54}\u{1ee57}\u{1ee59}\u{1ee5b}\u{1ee5d}\u{1ee5f}" +
129
+ "\u{1ee61}\u{1ee62}\u{1ee64}\u{1ee67}-\u{1ee6a}\u{1ee6c}-\u{1ee72}\u{1ee74}-\u{1ee77}\u{1ee79}-" +
130
+ "\u{1ee7c}\u{1ee7e}\u{1ee80}-\u{1ee89}\u{1ee8b}-\u{1ee9b}\u{1eea1}-\u{1eea3}\u{1eea5}-\u{1eea9}" +
131
+ "\u{1eeab}-\u{1eebb}"
132
+
133
+ # Generated from unicode_regex/unicode_regex_groups.scala, more inclusive than Ruby's \p{Nd}
134
+ HASHTAG_NUMERALS = "\\p{Nd}" +
135
+ "\u0de6-\u0def\ua9f0-\ua9f9\u{110f0}-\u{110f9}\u{11136}-\u{1113f}\u{111d0}-\u{111d9}\u{112f0}-" +
136
+ "\u{112f9}\u{114d0}-\u{114d9}\u{11650}-\u{11659}\u{116c0}-\u{116c9}\u{118e0}-\u{118e9}\u{16a60}-" +
137
+ "\u{16a69}\u{16b50}-\u{16b59}"
138
+
139
+ HASHTAG_SPECIAL_CHARS = "_\u200c\u200d\ua67e\u05be\u05f3\u05f4\uff5e\u301c\u309b\u309c\u30a0\u30fb\u3003\u0f0b\u0f0c\u00b7"
140
+
141
+ HASHTAG_LETTERS_NUMERALS = "#{HASHTAG_LETTERS_AND_MARKS}#{HASHTAG_NUMERALS}#{HASHTAG_SPECIAL_CHARS}"
142
+ HASHTAG_LETTERS_NUMERALS_SET = "[#{HASHTAG_LETTERS_NUMERALS}]"
143
+ HASHTAG_LETTERS_SET = "[#{HASHTAG_LETTERS_AND_MARKS}]"
144
+
145
+ HASHTAG = /(\A|[^&#{HASHTAG_LETTERS_NUMERALS}])(#|#)(?!\ufe0f|\u20e3)(#{HASHTAG_LETTERS_NUMERALS_SET}*#{HASHTAG_LETTERS_SET}#{HASHTAG_LETTERS_NUMERALS_SET}*)/io
112
146
 
113
147
  REGEXEN[:valid_hashtag] = /#{HASHTAG}/io
114
148
  # Used in Extractor for final filtering
@@ -124,6 +124,10 @@ class ConformanceTest < Test::Unit::TestCase
124
124
  assert_equal expected, extract_hashtags(text), description
125
125
  end
126
126
 
127
+ def_conformance_test("extract.yml", :hashtags_from_astral) do
128
+ assert_equal expected, extract_hashtags(text), description
129
+ end
130
+
127
131
  def_conformance_test("extract.yml", :hashtags_with_indices) do
128
132
  e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} }
129
133
  assert_equal e, extract_hashtags_with_indices(text), description
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "twitter-text"
5
- s.version = "1.13.4"
5
+ s.version = "1.14.0"
6
6
  s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle",
7
7
  "Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii", "James Koval"]
8
8
  s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com",
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitter-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.13.4
4
+ version: 1.14.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Sanford
@@ -16,7 +16,7 @@ authors:
16
16
  autorequire:
17
17
  bindir: bin
18
18
  cert_chain: []
19
- date: 2016-03-03 00:00:00.000000000 Z
19
+ date: 2016-07-08 00:00:00.000000000 Z
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
22
22
  name: multi_json
@@ -137,7 +137,7 @@ files:
137
137
  - ".rspec"
138
138
  - Gemfile
139
139
  - LICENSE
140
- - README.rdoc
140
+ - README.md
141
141
  - Rakefile
142
142
  - lib/assets/tld_lib.yml
143
143
  - lib/twitter-text.rb
@@ -184,7 +184,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
184
184
  version: '0'
185
185
  requirements: []
186
186
  rubyforge_project:
187
- rubygems_version: 2.4.3
187
+ rubygems_version: 2.4.5.1
188
188
  signing_key:
189
189
  specification_version: 4
190
190
  summary: Twitter text handling library
@@ -1,106 +0,0 @@
1
- {rdoc-image:https://img.shields.io/gem/v/twitter-text.svg}[https://rubygems.org/gems/twitter-text]
2
-
3
- == twitter-text
4
-
5
- A gem that provides text processing routines for Twitter Tweets. The major
6
- reason for this is to unify the various auto-linking and extraction of
7
- usernames, lists, hashtags and URLs.
8
-
9
- == Extraction Examples
10
-
11
- # Extraction
12
- class MyClass
13
- include Twitter::Extractor
14
- usernames = extract_mentioned_screen_names("Mentioning @twitter and @jack")
15
- # usernames = ["twitter", "jack"]
16
- end
17
-
18
- # Extraction with a block argument
19
- class MyClass
20
- include Twitter::Extractor
21
- extract_reply_screen_name("@twitter are you hiring?").do |username|
22
- # username = "twitter"
23
- end
24
- end
25
-
26
- == Auto-linking Examples
27
-
28
- # Auto-link
29
- class MyClass
30
- include Twitter::Autolink
31
-
32
- html = auto_link("link @user, please #request")
33
- end
34
-
35
- # For Ruby on Rails you want to add this to app/helpers/application_helper.rb
36
- module ApplicationHelper
37
- include Twitter::Autolink
38
- end
39
-
40
- # Now the auto_link function is available in every view. So in index.html.erb:
41
- <%= auto_link("link @user, please #request") %>
42
-
43
- === Usernames
44
-
45
- Username extraction and linking matches all valid Twitter usernames but does
46
- not verify that the username is a valid Twitter account.
47
-
48
- === Lists
49
-
50
- Auto-link and extract list names when they are written in @user/list-name
51
- format.
52
-
53
- === Hashtags
54
-
55
- Auto-link and extract hashtags, where a hashtag can contain most letters or
56
- numbers but cannot be solely numbers and cannot contain punctuation.
57
-
58
- === URLs
59
-
60
- Asian languages like Chinese, Japanese or Korean may not use a delimiter such as
61
- a space to separate normal text from URLs making it difficult to identify where
62
- the URL ends and the text starts.
63
-
64
- For this reason twitter-text currently does not support extracting or auto-linking
65
- of URLs immediately followed by non-Latin characters.
66
-
67
- Example: "http://twitter.com/は素晴らしい" .
68
- The normal text is "は素晴らしい" and is not part of the URL even though
69
- it isn't space separated.
70
-
71
- === International
72
-
73
- Special care has been taken to be sure that auto-linking and extraction work
74
- in Tweets of all languages. This means that languages without spaces between
75
- words should work equally well.
76
-
77
- === Hit Highlighting
78
-
79
- Use to provide emphasis around the "hits" returned from the Search API, built
80
- to work against text that has been auto-linked already.
81
-
82
- === Thanks
83
-
84
- Thanks to everybody who has filed issues, provided feedback or contributed patches. Patches courtesy of:
85
-
86
- * At Twitter …
87
- * Matt Sanford - http://github.com/mzsanford
88
- * Raffi Krikorian - http://github.com/r
89
- * Ben Cherry - http://github.com/bcherry
90
- * Patrick Ewing - http://github.com/hoverbird
91
- * Jeff Smick - http://github.com/sprsquish
92
- * Kenneth Kufluk - https://github.com/kennethkufluk
93
- * Keita Fujii - https://github.com/keitaf
94
- * Yoshimasa Niwa - https://github.com/niw
95
-
96
- * Patches from the community …
97
- * Jean-Philippe Bougie - http://github.com/jpbougie
98
- * Erik Michaels-Ober - https://github.com/sferik
99
-
100
- * Anyone who has filed an issue. It helps. Really.
101
-
102
- === Copyright and License
103
-
104
- Copyright 2011 Twitter, Inc.
105
-
106
- Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0