twitter-text 2.1.0 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: 3f7622cf10e3345995a426a4e371acef2e6eaa7344c430d9c5f944ba9f822d98
4
- data.tar.gz: 1c70f9348e8a801f1df6eaeac98f98c7c2cca685354cc06181ac081acdcfd304
2
+ SHA1:
3
+ metadata.gz: 5ddb91693d013ffff5407a5ba5a6f08985221611
4
+ data.tar.gz: 85e60ba0ce0ad24a92d570204408f8cd78fd62df
5
5
  SHA512:
6
- metadata.gz: 5ab02e8044a6d8fd1c25dc70a53d5b981b37c3b5c1458d193c090e401a4822349c99463946827101d27ee4156bcf98597be54990026145c820dedecda1d9b675
7
- data.tar.gz: b3163606bd143c4d13efbc1aab07cea05ad03d2e26f319c053662a758a44f990d132a3579fa5a0cc8f0023cdfdcf3584fb1bfe23d63c3636b276c02efdb38049
6
+ metadata.gz: 259ad390fb78ea8e090ab17b2843b0c7a6b15ac2baf837278ae12449d36ea65cd37403cf7451b5c4288b8bb743469ae6da316d0150269b5eb6bf3d233d78dcfb
7
+ data.tar.gz: 5ad46e00e0d79a31bfac2a491dc690b93ea727743b900e665d76dfdc4f4b1f07e829b69525e5461b0760a73cfd3eefb28042bf880fe759f603e996a948179f76
@@ -1,7 +1,17 @@
1
1
  # Changelog
2
2
  All notable changes to this project will be documented in this file.
3
3
 
4
- ## [Unreleased]
4
+ ## [3.0.0]
5
+ ### Added
6
+ - New v3.json config file with emojiParsingEnabled config option. When
7
+ true, twitter-text will parse and discount emoji supported by the
8
+ twemoji library (see https://github.com/twitter/twemoji). The length
9
+ of these emoji will be the default weight (200 or two characters) even
10
+ if they contain multiple code points combined by zero-width
11
+ joiners. This means that emoji with skin tone and gender modifiers no
12
+ longer count as more characters than those without such modifiers.
13
+ ### Changed
14
+ - Updates known gTLDs to recognize recent additions by IANA (#261)
5
15
 
6
16
  ## [2.1] - 2017-12-20
7
17
  ### Added
data/README.md CHANGED
@@ -188,6 +188,6 @@ Have a bug? Please create an issue here on GitHub!
188
188
 
189
189
  ## License
190
190
 
191
- Copyright 2012-2017 Twitter, Inc and other contributors
191
+ Copyright 2012-2018 Twitter, Inc and other contributors
192
192
 
193
193
  Licensed under the [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)
data/Rakefile CHANGED
@@ -24,7 +24,7 @@ namespace :test do
24
24
  namespace :conformance do
25
25
  desc "Run conformance test suite"
26
26
  task :run => ['prebuild'] do
27
- ruby '-rubygems', "test/conformance_test.rb"
27
+ ruby "test/conformance_test.rb"
28
28
  end
29
29
  end
30
30
 
@@ -13,6 +13,7 @@ The configuration format is a JSON string. The JSON can have the following prope
13
13
  * `maxWeightedTweetLength` (required, integer, min value 0)
14
14
  * `scale` (required, integer, min value 1)
15
15
  * `defaultWeight` (required, integer, min value 0)
16
+ * `emojiParsingEnabled` (optional, boolean)
16
17
  * `transformedURLLength` (integer, min value 0)
17
18
  * `ranges` (array of range items)
18
19
 
@@ -48,6 +49,13 @@ The Tweet length is the (`weighted length` / `scale`).
48
49
  The default weight applied to all code points. This is overridden in
49
50
  one or more range items.
50
51
 
52
+ ### emojiParsingEnabled
53
+
54
+ When set to true, the weighted Tweet length considers all emoji as a
55
+ single code point (with a default weight of 200), including longer
56
+ grapheme clusters combined by zero-width joiners. When set to false,
57
+ Tweet length is calculated by weighing individual Unicode code points.
58
+
51
59
  ### transformedURLLength
52
60
 
53
61
  The length counted for URLs against the total weight of the Tweet. In
@@ -0,0 +1,30 @@
1
+ {
2
+ "version": 3,
3
+ "maxWeightedTweetLength": 280,
4
+ "scale": 100,
5
+ "defaultWeight": 200,
6
+ "emojiParsingEnabled": true,
7
+ "transformedURLLength": 23,
8
+ "ranges": [
9
+ {
10
+ "start": 0,
11
+ "end": 4351,
12
+ "weight": 100
13
+ },
14
+ {
15
+ "start": 8192,
16
+ "end": 8205,
17
+ "weight": 100
18
+ },
19
+ {
20
+ "start": 8208,
21
+ "end": 8223,
22
+ "weight": 100
23
+ },
24
+ {
25
+ "start": 8242,
26
+ "end": 8247,
27
+ "weight": 100
28
+ }
29
+ ]
30
+ }
@@ -343,6 +343,7 @@ generic:
343
343
  - 新闻
344
344
  - 政府
345
345
  - 政务
346
+ - 招聘
346
347
  - 手表
347
348
  - 手机
348
349
  - 我爱你
@@ -598,6 +599,7 @@ generic:
598
599
  - srl
599
600
  - spreadbetting
600
601
  - spot
602
+ - sport
601
603
  - spiegel
602
604
  - space
603
605
  - soy
@@ -942,6 +944,7 @@ generic:
942
944
  - locker
943
945
  - loans
944
946
  - loan
947
+ - llc
945
948
  - lixil
946
949
  - living
947
950
  - live
@@ -1047,6 +1050,7 @@ generic:
1047
1050
  - info
1048
1051
  - infiniti
1049
1052
  - industries
1053
+ - inc
1050
1054
  - immobilien
1051
1055
  - immo
1052
1056
  - imdb
@@ -1364,6 +1368,7 @@ generic:
1364
1368
  - cheap
1365
1369
  - chat
1366
1370
  - chase
1371
+ - charity
1367
1372
  - channel
1368
1373
  - chanel
1369
1374
  - cfd
@@ -1,3 +1,7 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
1
5
  major, minor, _patch = RUBY_VERSION.split('.')
2
6
 
3
7
  $RUBY_1_9 = if major.to_i == 1 && minor.to_i < 9
@@ -10,6 +14,7 @@ end
10
14
 
11
15
  %w(
12
16
  deprecation
17
+ emoji_regex
13
18
  regex
14
19
  rewriter
15
20
  autolink
@@ -1,3 +1,7 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
1
5
  # encoding: utf-8
2
6
 
3
7
  require 'set'
@@ -1,3 +1,7 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
1
5
  # encoding: UTF-8
2
6
 
3
7
  module Twitter
@@ -6,7 +10,10 @@ module Twitter
6
10
  require 'json'
7
11
 
8
12
  PARSER_VERSION_CLASSIC = "v1"
9
- PARSER_VERSION_DEFAULT = "v2"
13
+ PARSER_VERSION_WEIGHTED = "v2"
14
+ PARSER_VERSION_EMOJI_PARSING = "v3"
15
+
16
+ PARSER_VERSION_DEFAULT = PARSER_VERSION_WEIGHTED
10
17
 
11
18
  class << self
12
19
  attr_accessor :default_configuration
@@ -14,6 +21,7 @@ module Twitter
14
21
 
15
22
  attr_reader :version, :max_weighted_tweet_length, :scale
16
23
  attr_reader :default_weight, :transformed_url_length, :ranges
24
+ attr_reader :emoji_parsing_enabled
17
25
 
18
26
  CONFIG_V1 = File.join(
19
27
  File.expand_path('../../../config', __FILE__), # project root
@@ -22,7 +30,12 @@ module Twitter
22
30
 
23
31
  CONFIG_V2 = File.join(
24
32
  File.expand_path('../../../config', __FILE__), # project root
25
- "#{PARSER_VERSION_DEFAULT}.json"
33
+ "#{PARSER_VERSION_WEIGHTED}.json"
34
+ )
35
+
36
+ CONFIG_V3 = File.join(
37
+ File.expand_path('../../../config', __FILE__), # project root
38
+ "#{PARSER_VERSION_EMOJI_PARSING}.json"
26
39
  )
27
40
 
28
41
  def self.parse_string(string, options = {})
@@ -45,10 +58,11 @@ module Twitter
45
58
  @scale = config[:scale]
46
59
  @default_weight = config[:defaultWeight]
47
60
  @transformed_url_length = config[:transformedURLLength]
61
+ @emoji_parsing_enabled = config[:emojiParsingEnabled]
48
62
  @ranges = config[:ranges].map { |range| Twitter::TwitterText::WeightedRange.new(range) } if config.key?(:ranges) && config[:ranges].is_a?(Array)
49
63
  end
50
64
 
51
- self.default_configuration = self.configuration_from_file(CONFIG_V2)
65
+ self.default_configuration = self.configuration_from_file(CONFIG_V3)
52
66
  end
53
67
  end
54
68
  end
@@ -1,3 +1,7 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
1
5
  module Twitter
2
6
  module TwitterText
3
7
  module Deprecation
@@ -0,0 +1,27 @@
1
+ #
2
+ # emoji_regex.rb
3
+ #
4
+ # Copyright © 2018 Twitter. All rights reserved.
5
+ #
6
+ # DO NOT MODIFY THIS FILE -- it is generated for twitter-text automatically
7
+
8
+ # encoding: utf-8
9
+
10
+ module Twitter
11
+ module TwitterText
12
+ class Regex
13
+ class Emoji
14
+ REGEXEN = {} # :nodoc:
15
+
16
+ # This regex pattern matches a single emoji
17
+ REGEXEN[:valid_emoji] = %r{
18
+ [\u{01f468}\u{01f469}][\u{01f3fb}-\u{01f3ff}]?\u200d(?:\u2695\ufe0f|\u2696\ufe0f|\u2708\ufe0f|[\u{01f33e}\u{01f373}\u{01f393}\u{01f3a4}\u{01f3a8}\u{01f3eb}\u{01f3ed}\u{01f4bb}\u{01f4bc}\u{01f527}\u{01f52c}\u{01f680}\u{01f692}\u{01f9b0}-\u{01f9b3}])|[\u26f9\u{01f3cb}\u{01f3cc}\u{01f574}\u{01f575}](?:[\ufe0f\u{01f3fb}-\u{01f3ff}]\u200d[\u2640\u2642]\ufe0f)|[\u{01f3c3}\u{01f3c4}\u{01f3ca}\u{01f46e}\u{01f471}\u{01f473}\u{01f477}\u{01f481}\u{01f482}\u{01f486}\u{01f487}\u{01f645}-\u{01f647}\u{01f64b}\u{01f64d}\u{01f64e}\u{01f6a3}\u{01f6b4}-\u{01f6b6}\u{01f926}\u{01f935}\u{01f937}-\u{01f939}\u{01f93d}\u{01f93e}\u{01f9b8}\u{01f9b9}\u{01f9d6}-\u{01f9dd}][\u{01f3fb}-\u{01f3ff}]?\u200d[\u2640\u2642]\ufe0f|(?:\u{01f468}\u200d\u2764\ufe0f\u200d\u{01f48b}\u200d\u{01f468}|\u{01f469}\u200d\u2764\ufe0f\u200d\u{01f48b}\u200d[\u{01f468}\u{01f469}]|\u{01f468}\u200d\u{01f468}\u200d\u{01f466}\u200d\u{01f466}|\u{01f468}\u200d\u{01f468}\u200d\u{01f467}\u200d[\u{01f466}\u{01f467}]|\u{01f468}\u200d\u{01f469}\u200d\u{01f466}\u200d\u{01f466}|\u{01f468}\u200d\u{01f469}\u200d\u{01f467}\u200d[\u{01f466}\u{01f467}]|\u{01f469}\u200d\u{01f469}\u200d\u{01f466}\u200d\u{01f466}|\u{01f469}\u200d\u{01f469}\u200d\u{01f467}\u200d[\u{01f466}\u{01f467}]|\u{01f468}\u200d\u2764\ufe0f\u200d\u{01f468}|\u{01f469}\u200d\u2764\ufe0f\u200d[\u{01f468}\u{01f469}]|\u{01f468}\u200d\u{01f466}\u200d\u{01f466}|\u{01f468}\u200d\u{01f467}\u200d[\u{01f466}\u{01f467}]|\u{01f468}\u200d\u{01f468}\u200d[\u{01f466}\u{01f467}]|\u{01f468}\u200d\u{01f469}\u200d[\u{01f466}\u{01f467}]|\u{01f469}\u200d\u{01f466}\u200d\u{01f466}|\u{01f469}\u200d\u{01f467}\u200d[\u{01f466}\u{01f467}]|\u{01f469}\u200d\u{01f469}\u200d[\u{01f466}\u{01f467}]|\u{01f3f3}\ufe0f\u200d\u{01f308}|\u{01f3f4}\u200d\u2620\ufe0f|\u{01f46f}\u200d\u2640\ufe0f|\u{01f46f}\u200d\u2642\ufe0f|\u{01f93c}\u200d\u2640\ufe0f|\u{01f93c}\u200d\u2642\ufe0f|\u{01f9de}\u200d\u2640\ufe0f|\u{01f9de}\u200d\u2642\ufe0f|\u{01f9df}\u200d\u2640\ufe0f|\u{01f9df}\u200d\u2642\ufe0f|\u{01f441}\u200d\u{01f5e8}|\u{01f468}\u200d[\u{01f466}\u{01f467}]|\u{01f469}\u200d[\u{01f466}\u{01f467}])|[#*0-9]\ufe0f?\u20e3|(?:[©®\u2122\u265f]\ufe0f)|[\u203c\u2049\u2139\u2194-\u2199\u21a9\u21aa\u231a\u231b\u2328\u23cf\u23ed-\u23ef\u23f1\u23f2\u23f8-\u23fa\u24c2\u25aa\u25ab\u25b6\u25c0\u25fb-\u25fe\u2600-\u2604\u260e\u2611\u2614\u2615\u2618\u2620\u2622\u2623\u2626\u262a\u262e\u262f\u2638-\u263a\u2640\u2642\u2648-\u2653\u2660\u2663\u2665\u2666\u2668\u267b\u267f\u2692-\u2697\u2699\u269b\u269c\u26a0\u26a1\u26aa\u26ab\u26b0\u26b1\u26bd\u26be\u26c4\u26c5\u26c8\u26cf\u26d1\u26d3\u26d4\u26e9\u26ea\u26f0-\u26f5\u26f8\u26fa\u26fd\u2702\u2708\u2709\u270f\u2712\u2714\u2716\u271d\u2721\u2733\u2734\u2744\u2747\u2757\u2763\u2764\u27a1\u2934\u2935\u2b05-\u2b07\u2b1b\u2b1c\u2b50\u2b55\u3030\u303d\u3297\u3299\u{01f004}\u{01f170}\u{01f171}\u{01f17e}\u{01f17f}\u{01f202}\u{01f21a}\u{01f22f}\u{01f237}\u{01f321}\u{01f324}-\u{01f32c}\u{01f336}\u{01f37d}\u{01f396}\u{01f397}\u{01f399}-\u{01f39b}\u{01f39e}\u{01f39f}\u{01f3cd}\u{01f3ce}\u{01f3d4}-\u{01f3df}\u{01f3f3}\u{01f3f5}\u{01f3f7}\u{01f43f}\u{01f441}\u{01f4fd}\u{01f549}\u{01f54a}\u{01f56f}\u{01f570}\u{01f573}\u{01f576}-\u{01f579}\u{01f587}\u{01f58a}-\u{01f58d}\u{01f5a5}\u{01f5a8}\u{01f5b1}\u{01f5b2}\u{01f5bc}\u{01f5c2}-\u{01f5c4}\u{01f5d1}-\u{01f5d3}\u{01f5dc}-\u{01f5de}\u{01f5e1}\u{01f5e3}\u{01f5e8}\u{01f5ef}\u{01f5f3}\u{01f5fa}\u{01f6cb}\u{01f6cd}-\u{01f6cf}\u{01f6e0}-\u{01f6e5}\u{01f6e9}\u{01f6f0}\u{01f6f3}](?:\ufe0f|(?!\ufe0e))|(?:[\u261d\u26f7\u26f9\u270c\u270d\u{01f3cb}\u{01f3cc}\u{01f574}\u{01f575}\u{01f590}](?:\ufe0f|(?!\ufe0e))|[\u270a\u270b\u{01f385}\u{01f3c2}-\u{01f3c4}\u{01f3c7}\u{01f3ca}\u{01f442}\u{01f443}\u{01f446}-\u{01f450}\u{01f466}-\u{01f469}\u{01f46e}\u{01f470}-\u{01f478}\u{01f47c}\u{01f481}-\u{01f483}\u{01f485}-\u{01f487}\u{01f4aa}\u{01f57a}\u{01f595}\u{01f596}\u{01f645}-\u{01f647}\u{01f64b}-\u{01f64f}\u{01f6a3}\u{01f6b4}-\u{01f6b6}\u{01f6c0}\u{01f6cc}\u{01f918}-\u{01f91c}\u{01f91e}\u{01f91f}\u{01f926}\u{01f930}-\u{01f939}\u{01f93d}\u{01f93e}\u{01f9b5}\u{01f9b6}\u{01f9b8}\u{01f9b9}\u{01f9d1}-\u{01f9dd}])[\u{01f3fb}-\u{01f3ff}]?|(?:\u{01f3f4}\u{0e0067}\u{0e0062}\u{0e0065}\u{0e006e}\u{0e0067}\u{0e007f}|\u{01f3f4}\u{0e0067}\u{0e0062}\u{0e0073}\u{0e0063}\u{0e0074}\u{0e007f}|\u{01f3f4}\u{0e0067}\u{0e0062}\u{0e0077}\u{0e006c}\u{0e0073}\u{0e007f}|\u{01f1e6}[\u{01f1e8}-\u{01f1ec}\u{01f1ee}\u{01f1f1}\u{01f1f2}\u{01f1f4}\u{01f1f6}-\u{01f1fa}\u{01f1fc}\u{01f1fd}\u{01f1ff}]|\u{01f1e7}[\u{01f1e6}\u{01f1e7}\u{01f1e9}-\u{01f1ef}\u{01f1f1}-\u{01f1f4}\u{01f1f6}-\u{01f1f9}\u{01f1fb}\u{01f1fc}\u{01f1fe}\u{01f1ff}]|\u{01f1e8}[\u{01f1e6}\u{01f1e8}\u{01f1e9}\u{01f1eb}-\u{01f1ee}\u{01f1f0}-\u{01f1f5}\u{01f1f7}\u{01f1fa}-\u{01f1ff}]|\u{01f1e9}[\u{01f1ea}\u{01f1ec}\u{01f1ef}\u{01f1f0}\u{01f1f2}\u{01f1f4}\u{01f1ff}]|\u{01f1ea}[\u{01f1e6}\u{01f1e8}\u{01f1ea}\u{01f1ec}\u{01f1ed}\u{01f1f7}-\u{01f1fa}]|\u{01f1eb}[\u{01f1ee}-\u{01f1f0}\u{01f1f2}\u{01f1f4}\u{01f1f7}]|\u{01f1ec}[\u{01f1e6}\u{01f1e7}\u{01f1e9}-\u{01f1ee}\u{01f1f1}-\u{01f1f3}\u{01f1f5}-\u{01f1fa}\u{01f1fc}\u{01f1fe}]|\u{01f1ed}[\u{01f1f0}\u{01f1f2}\u{01f1f3}\u{01f1f7}\u{01f1f9}\u{01f1fa}]|\u{01f1ee}[\u{01f1e8}-\u{01f1ea}\u{01f1f1}-\u{01f1f4}\u{01f1f6}-\u{01f1f9}]|\u{01f1ef}[\u{01f1ea}\u{01f1f2}\u{01f1f4}\u{01f1f5}]|\u{01f1f0}[\u{01f1ea}\u{01f1ec}-\u{01f1ee}\u{01f1f2}\u{01f1f3}\u{01f1f5}\u{01f1f7}\u{01f1fc}\u{01f1fe}\u{01f1ff}]|\u{01f1f1}[\u{01f1e6}-\u{01f1e8}\u{01f1ee}\u{01f1f0}\u{01f1f7}-\u{01f1fb}\u{01f1fe}]|\u{01f1f2}[\u{01f1e6}\u{01f1e8}-\u{01f1ed}\u{01f1f0}-\u{01f1ff}]|\u{01f1f3}[\u{01f1e6}\u{01f1e8}\u{01f1ea}-\u{01f1ec}\u{01f1ee}\u{01f1f1}\u{01f1f4}\u{01f1f5}\u{01f1f7}\u{01f1fa}\u{01f1ff}]|\u{01f1f4}\u{01f1f2}|\u{01f1f5}[\u{01f1e6}\u{01f1ea}-\u{01f1ed}\u{01f1f0}-\u{01f1f3}\u{01f1f7}-\u{01f1f9}\u{01f1fc}\u{01f1fe}]|\u{01f1f6}\u{01f1e6}|\u{01f1f7}[\u{01f1ea}\u{01f1f4}\u{01f1f8}\u{01f1fa}\u{01f1fc}]|\u{01f1f8}[\u{01f1e6}-\u{01f1ea}\u{01f1ec}-\u{01f1f4}\u{01f1f7}-\u{01f1f9}\u{01f1fb}\u{01f1fd}-\u{01f1ff}]|\u{01f1f9}[\u{01f1e6}\u{01f1e8}\u{01f1e9}\u{01f1eb}-\u{01f1ed}\u{01f1ef}-\u{01f1f4}\u{01f1f7}\u{01f1f9}\u{01f1fb}\u{01f1fc}\u{01f1ff}]|\u{01f1fa}[\u{01f1e6}\u{01f1ec}\u{01f1f2}\u{01f1f3}\u{01f1f8}\u{01f1fe}\u{01f1ff}]|\u{01f1fb}[\u{01f1e6}\u{01f1e8}\u{01f1ea}\u{01f1ec}\u{01f1ee}\u{01f1f3}\u{01f1fa}]|\u{01f1fc}[\u{01f1eb}\u{01f1f8}]|\u{01f1fd}\u{01f1f0}|\u{01f1fe}[\u{01f1ea}\u{01f1f9}]|\u{01f1ff}[\u{01f1e6}\u{01f1f2}\u{01f1fc}]|[\u23e9-\u23ec\u23f0\u23f3\u267e\u26ce\u2705\u2728\u274c\u274e\u2753-\u2755\u2795-\u2797\u27b0\u27bf\ue50a\u{01f0cf}\u{01f18e}\u{01f191}-\u{01f19a}\u{01f1e6}-\u{01f1ff}\u{01f201}\u{01f232}-\u{01f236}\u{01f238}-\u{01f23a}\u{01f250}\u{01f251}\u{01f300}-\u{01f320}\u{01f32d}-\u{01f335}\u{01f337}-\u{01f37c}\u{01f37e}-\u{01f384}\u{01f386}-\u{01f393}\u{01f3a0}-\u{01f3c1}\u{01f3c5}\u{01f3c6}\u{01f3c8}\u{01f3c9}\u{01f3cf}-\u{01f3d3}\u{01f3e0}-\u{01f3f0}\u{01f3f4}\u{01f3f8}-\u{01f43e}\u{01f440}\u{01f444}\u{01f445}\u{01f451}-\u{01f465}\u{01f46a}-\u{01f46d}\u{01f46f}\u{01f479}-\u{01f47b}\u{01f47d}-\u{01f480}\u{01f484}\u{01f488}-\u{01f4a9}\u{01f4ab}-\u{01f4fc}\u{01f4ff}-\u{01f53d}\u{01f54b}-\u{01f54e}\u{01f550}-\u{01f567}\u{01f5a4}\u{01f5fb}-\u{01f644}\u{01f648}-\u{01f64a}\u{01f680}-\u{01f6a2}\u{01f6a4}-\u{01f6b3}\u{01f6b7}-\u{01f6bf}\u{01f6c1}-\u{01f6c5}\u{01f6d0}-\u{01f6d2}\u{01f6eb}\u{01f6ec}\u{01f6f4}-\u{01f6f9}\u{01f910}-\u{01f917}\u{01f91d}\u{01f920}-\u{01f925}\u{01f927}-\u{01f92f}\u{01f93a}\u{01f93c}\u{01f940}-\u{01f945}\u{01f947}-\u{01f970}\u{01f973}-\u{01f976}\u{01f97a}\u{01f97c}-\u{01f9a2}\u{01f9b4}\u{01f9b7}\u{01f9c0}-\u{01f9c2}\u{01f9d0}\u{01f9de}-\u{01f9ff}])|\ufe0f
19
+ }iox
20
+
21
+ def self.[](key)
22
+ REGEXEN[key]
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -1,3 +1,7 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
1
5
  # encoding: utf-8
2
6
  require 'idn'
3
7
 
@@ -5,7 +9,7 @@ class String
5
9
  # Helper function to count the character length by first converting to an
6
10
  # array. This is needed because with unicode strings, the return value
7
11
  # of length may be incorrect
8
- def char_length
12
+ def codepoint_length
9
13
  if respond_to? :codepoints
10
14
  length
11
15
  else
@@ -13,25 +17,25 @@ class String
13
17
  end
14
18
  end
15
19
 
16
- # Helper function to convert this string into an array of unicode characters.
17
- def to_char_a
18
- @to_char_a ||= if chars.kind_of?(Enumerable)
20
+ # Helper function to convert this string into an array of unicode code points.
21
+ def to_codepoint_a
22
+ @to_codepoint_a ||= if chars.kind_of?(Enumerable)
19
23
  chars.to_a
20
24
  else
21
- char_array = []
22
- 0.upto(char_length - 1) { |i| char_array << [chars.slice(i)].pack('U') }
23
- char_array
25
+ codepoint_array = []
26
+ 0.upto(codepoint_length - 1) { |i| codepoint_array << [chars.slice(i)].pack('U') }
27
+ codepoint_array
24
28
  end
25
29
  end
26
30
  end
27
31
 
28
- # Helper functions to return character offsets instead of byte offsets.
32
+ # Helper functions to return code point offsets instead of byte offsets.
29
33
  class MatchData
30
34
  def char_begin(n)
31
35
  if string.respond_to? :codepoints
32
36
  self.begin(n)
33
37
  else
34
- string[0, self.begin(n)].char_length
38
+ string[0, self.begin(n)].codepoint_length
35
39
  end
36
40
  end
37
41
 
@@ -39,7 +43,7 @@ class MatchData
39
43
  if string.respond_to? :codepoints
40
44
  self.end(n)
41
45
  else
42
- string[0, self.end(n)].char_length
46
+ string[0, self.end(n)].codepoint_length
43
47
  end
44
48
  end
45
49
  end
@@ -77,11 +81,14 @@ module Twitter
77
81
  #
78
82
  # If a block is given then it will be called for each entity.
79
83
  def extract_entities_with_indices(text, options = {}, &block)
84
+ config = options[:config] || Twitter::TwitterText::Configuration.default_configuration
85
+
80
86
  # extract all entities
81
87
  entities = extract_urls_with_indices(text, options) +
82
88
  extract_hashtags_with_indices(text, :check_url_overlap => false) +
83
89
  extract_mentions_or_lists_with_indices(text) +
84
90
  extract_cashtags_with_indices(text)
91
+ entities += extract_emoji_with_indices(text) if config.emoji_parsing_enabled
85
92
 
86
93
  return [] if entities.empty?
87
94
 
@@ -218,11 +225,7 @@ module Twitter
218
225
  :indices => [start_position + $~.char_begin(0),
219
226
  start_position + $~.char_end(0)]
220
227
  }
221
- if path ||
222
- ascii_domain =~ Twitter::TwitterText::Regex[:valid_special_short_domain] ||
223
- ascii_domain !~ Twitter::TwitterText::Regex[:invalid_short_domain]
224
- urls << last_url
225
- end
228
+ urls << last_url
226
229
  end
227
230
 
228
231
  # no ASCII-only domain found. Skip the entire URL
@@ -239,7 +242,7 @@ module Twitter
239
242
  if url =~ Twitter::TwitterText::Regex[:valid_tco_url]
240
243
  next if $1 && $1.length > MAX_TCO_SLUG_LENGTH
241
244
  url = $&
242
- end_position = start_position + url.char_length
245
+ end_position = start_position + url.codepoint_length
243
246
  end
244
247
 
245
248
  next unless is_valid_domain(url.length, domain, protocol)
@@ -341,6 +344,31 @@ module Twitter
341
344
  tags
342
345
  end
343
346
 
347
+ def extract_emoji_with_indices(text) # :yields: emoji, start, end
348
+ emoji = []
349
+ text.scan(Twitter::TwitterText::Regex[:valid_emoji]) do |emoji_text|
350
+ match_data = $~
351
+ start_position = match_data.char_begin(0)
352
+ end_position = match_data.char_end(0)
353
+ emoji << {
354
+ :emoji => emoji_text,
355
+ :indices => [start_position, end_position]
356
+ }
357
+ end
358
+ emoji
359
+ end
360
+
361
+ def is_valid_emoji(text)
362
+ begin
363
+ raise ArgumentError.new("invalid empty emoji") unless text
364
+ entities = extract_emoji_with_indices(text)
365
+ entities.count == 1 && entities[0][:emoji] == text
366
+ rescue Exception
367
+ # On error don't consider this a valid domain.
368
+ return false
369
+ end
370
+ end
371
+
344
372
  def is_valid_domain(url_length, domain, protocol)
345
373
  begin
346
374
  raise ArgumentError.new("invalid empty domain") unless domain
@@ -1,3 +1,7 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
1
5
  module Twitter
2
6
  module TwitterText
3
7
  module HashHelper
@@ -1,3 +1,7 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
1
5
  module Twitter
2
6
  module TwitterText
3
7
  # Module for doing "hit highlighting" on tweets that have been auto-linked already.
@@ -26,7 +30,7 @@ module Twitter
26
30
 
27
31
  result = []
28
32
  chunk_index, chunk = 0, chunks[0]
29
- chunk_chars = chunk.to_s.to_char_a
33
+ chunk_chars = chunk.to_s.to_codepoint_a
30
34
  prev_chunks_len = 0
31
35
  chunk_cursor = 0
32
36
  start_in_chunk = false
@@ -50,7 +54,7 @@ module Twitter
50
54
  chunk_cursor = 0
51
55
  chunk_index += 2
52
56
  chunk = chunks[chunk_index]
53
- chunk_chars = chunk.to_s.to_char_a
57
+ chunk_chars = chunk.to_s.to_codepoint_a
54
58
  start_in_chunk = false
55
59
  end
56
60
 
@@ -1,3 +1,7 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
1
5
  # encoding: utf-8
2
6
 
3
7
  module Twitter
@@ -53,11 +57,26 @@ module Twitter
53
57
  ].flatten.map{|c| [c].pack('U*')}.freeze
54
58
  REGEXEN[:spaces] = /[#{UNICODE_SPACES.join('')}]/o
55
59
 
60
+ DIRECTIONAL_CHARACTERS = [
61
+ 0x061C, # ARABIC LETTER MARK (ALM)
62
+ 0x200E, # LEFT-TO-RIGHT MARK (LRM)
63
+ 0x200F, # RIGHT-TO-LEFT MARK (RLM)
64
+ 0x202A, # LEFT-TO-RIGHT EMBEDDING (LRE)
65
+ 0x202B, # RIGHT-TO-LEFT EMBEDDING (RLE)
66
+ 0x202C, # POP DIRECTIONAL FORMATTING (PDF)
67
+ 0x202D, # LEFT-TO-RIGHT OVERRIDE (LRO)
68
+ 0x202E, # RIGHT-TO-LEFT OVERRIDE (RLO)
69
+ 0x2066, # LEFT-TO-RIGHT ISOLATE (LRI)
70
+ 0x2067, # RIGHT-TO-LEFT ISOLATE (RLI)
71
+ 0x2068, # FIRST STRONG ISOLATE (FSI)
72
+ 0x2069, # POP DIRECTIONAL ISOLATE (PDI)
73
+ ].map{|cp| [cp].pack('U')}.freeze
74
+ REGEXEN[:directional_characters] = /[#{DIRECTIONAL_CHARACTERS.join('')}]/o
75
+
56
76
  # Character not allowed in Tweets
57
77
  INVALID_CHARACTERS = [
58
78
  0xFFFE, 0xFEFF, # BOM
59
79
  0xFFFF, # Special
60
- 0x202A, 0x202B, 0x202C, 0x202D, 0x202E # Directional change
61
80
  ].map{|cp| [cp].pack('U') }.freeze
62
81
  REGEXEN[:invalid_control_characters] = /[#{INVALID_CHARACTERS.join('')}]/o
63
82
 
@@ -157,14 +176,16 @@ module Twitter
157
176
  ([a-z0-9_]{1,20}) # $3: Screen name
158
177
  (\/[a-z][a-zA-Z0-9_\-]{0,24})? # $4: List (optional)
159
178
  /iox
160
- REGEXEN[:valid_reply] = /^(?:#{REGEXEN[:spaces]})*#{REGEXEN[:at_signs]}([a-z0-9_]{1,20})/io
179
+ REGEXEN[:valid_reply] = /^(?:[#{UNICODE_SPACES}#{DIRECTIONAL_CHARACTERS}])*#{REGEXEN[:at_signs]}([a-z0-9_]{1,20})/io
161
180
  # Used in Extractor for final filtering
162
181
  REGEXEN[:end_mention_match] = /\A(?:#{REGEXEN[:at_signs]}|#{REGEXEN[:latin_accents]}|:\/\/)/io
163
182
 
164
183
  # URL related hash regex collection
165
- REGEXEN[:valid_url_preceding_chars] = /(?:[^A-Z0-9@@$###{INVALID_CHARACTERS.join('')}]|^)/io
184
+ REGEXEN[:valid_url_preceding_chars] = /(?:[^A-Z0-9@@$###{INVALID_CHARACTERS.join('')}]|[#{DIRECTIONAL_CHARACTERS.join('')}]|^)/io
166
185
  REGEXEN[:invalid_url_without_protocol_preceding_chars] = /[-_.\/]$/
167
- DOMAIN_VALID_CHARS = "[^#{PUNCTUATION_CHARS}#{SPACE_CHARS}#{CTRL_CHARS}#{INVALID_CHARACTERS.join('')}#{UNICODE_SPACES.join('')}]"
186
+
187
+ DOMAIN_VALID_CHARS = "[^#{DIRECTIONAL_CHARACTERS.join('')}#{PUNCTUATION_CHARS}#{SPACE_CHARS}#{CTRL_CHARS}#{INVALID_CHARACTERS.join('')}#{UNICODE_SPACES.join('')}]"
188
+ # "[a-z0-9#{LATIN_ACCENTS}]"
168
189
  REGEXEN[:valid_subdomain] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[_-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/io
169
190
  REGEXEN[:valid_domain_name] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/io
170
191
 
@@ -183,13 +204,6 @@ module Twitter
183
204
  }ix
184
205
  REGEXEN[:valid_punycode] = /(?:xn--[0-9a-z]+)/i
185
206
 
186
- REGEXEN[:valid_special_cctld] = %r{
187
- (?:
188
- (?:co|tv)
189
- (?=[^0-9a-z@]|$)
190
- )
191
- }ix
192
-
193
207
  REGEXEN[:valid_domain] = /(?:
194
208
  #{REGEXEN[:valid_subdomain]}*#{REGEXEN[:valid_domain_name]}
195
209
  (?:#{REGEXEN[:valid_gTLD]}|#{REGEXEN[:valid_ccTLD]}|#{REGEXEN[:valid_punycode]})
@@ -204,10 +218,6 @@ module Twitter
204
218
  # This is used in Extractor for stricter t.co URL extraction
205
219
  REGEXEN[:valid_tco_url] = /^https?:\/\/t\.co\/([a-z0-9]+)/i
206
220
 
207
- # This is used in Extractor to filter out unwanted URLs.
208
- REGEXEN[:invalid_short_domain] = /\A#{REGEXEN[:valid_domain_name]}#{REGEXEN[:valid_ccTLD]}\Z/io
209
- REGEXEN[:valid_special_short_domain] = /\A#{REGEXEN[:valid_domain_name]}#{REGEXEN[:valid_special_cctld]}\Z/io
210
-
211
221
  REGEXEN[:valid_port_number] = /[0-9]+/
212
222
 
213
223
  REGEXEN[:valid_general_url_path_chars] = /[a-z\p{Cyrillic}0-9!\*';:=\+\,\.\$\/%#\[\]\p{Pd}_~&\|@#{LATIN_ACCENTS}]/io
@@ -258,7 +268,7 @@ module Twitter
258
268
  }iox
259
269
 
260
270
  REGEXEN[:cashtag] = /[a-z]{1,6}(?:[._][a-z]{1,2})?/i
261
- REGEXEN[:valid_cashtag] = /(^|#{REGEXEN[:spaces]})(\$)(#{REGEXEN[:cashtag]})(?=$|\s|[#{PUNCTUATION_CHARS}])/i
271
+ REGEXEN[:valid_cashtag] = /(^|[#{UNICODE_SPACES}#{DIRECTIONAL_CHARACTERS}])(\$)(#{REGEXEN[:cashtag]})(?=$|\s|[#{PUNCTUATION_CHARS}])/i
262
272
 
263
273
  # These URL validation pattern strings are based on the ABNF from RFC 3986
264
274
  REGEXEN[:validate_url_unreserved] = /[a-z\p{Cyrillic}0-9\p{Pd}._~]/i
@@ -339,6 +349,8 @@ module Twitter
339
349
  REGEXEN[:validate_url_query] = %r{(#{REGEXEN[:validate_url_pchar]}|/|\?)*}i
340
350
  REGEXEN[:validate_url_fragment] = %r{(#{REGEXEN[:validate_url_pchar]}|/|\?)*}i
341
351
 
352
+ REGEXEN[:valid_emoji] = Twitter::TwitterText::Regex::Emoji[:valid_emoji]
353
+
342
354
  # Modified version of RFC 3986 Appendix B
343
355
  REGEXEN[:validate_url_unencoded] = %r{
344
356
  \A # Full URL
@@ -1,9 +1,13 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
1
5
  module Twitter
2
6
  module TwitterText
3
7
  # A module provides base methods to rewrite usernames, lists, hashtags and URLs.
4
8
  module Rewriter extend self
5
9
  def rewrite_entities(text, entities)
6
- chars = text.to_s.to_char_a
10
+ codepoints = text.to_s.to_codepoint_a
7
11
 
8
12
  # sort by start index
9
13
  entities = entities.sort_by do |entity|
@@ -14,11 +18,11 @@ module Twitter
14
18
  result = []
15
19
  last_index = entities.inject(0) do |index, entity|
16
20
  indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
17
- result << chars[index...indices.first]
18
- result << yield(entity, chars)
21
+ result << codepoints[index...indices.first]
22
+ result << yield(entity, codepoints)
19
23
  indices.last
20
24
  end
21
- result << chars[last_index..-1]
25
+ result << codepoints[last_index..-1]
22
26
 
23
27
  result.flatten.join
24
28
  end
@@ -35,8 +39,8 @@ module Twitter
35
39
 
36
40
  def rewrite_usernames_or_lists(text)
37
41
  entities = Extractor.extract_mentions_or_lists_with_indices(text)
38
- rewrite_entities(text, entities) do |entity, chars|
39
- at = chars[entity[:indices].first]
42
+ rewrite_entities(text, entities) do |entity, codepoints|
43
+ at = codepoints[entity[:indices].first]
40
44
  list_slug = entity[:list_slug]
41
45
  list_slug = nil if list_slug.empty?
42
46
  yield(at, entity[:screen_name], list_slug)
@@ -46,8 +50,8 @@ module Twitter
46
50
 
47
51
  def rewrite_hashtags(text)
48
52
  entities = Extractor.extract_hashtags_with_indices(text)
49
- rewrite_entities(text, entities) do |entity, chars|
50
- hash = chars[entity[:indices].first]
53
+ rewrite_entities(text, entities) do |entity, codepoints|
54
+ hash = codepoints[entity[:indices].first]
51
55
  yield(hash, entity[:hashtag])
52
56
  end
53
57
  end
@@ -55,7 +59,7 @@ module Twitter
55
59
 
56
60
  def rewrite_urls(text)
57
61
  entities = Extractor.extract_urls_with_indices(text, :extract_url_without_protocol => false)
58
- rewrite_entities(text, entities) do |entity, chars|
62
+ rewrite_entities(text, entities) do |entity, codepoints|
59
63
  yield(entity[:url])
60
64
  end
61
65
  end
@@ -1,3 +1,7 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
1
5
  module Twitter
2
6
  module TwitterText
3
7
  # This module lazily defines constants of the form Uxxxx for all Unicode
@@ -1,3 +1,7 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
1
5
  require 'unf'
2
6
 
3
7
  module Twitter
@@ -34,8 +38,7 @@ module Twitter
34
38
  options = DEFAULT_TCO_URL_LENGTHS.merge(options)
35
39
  config = options[:config] || Twitter::TwitterText::Configuration.default_configuration
36
40
  normalized_text = text.to_nfc
37
- normalized_text_length = normalized_text.char_length
38
- unless (normalized_text_length > 0)
41
+ unless (normalized_text.length > 0)
39
42
  ParseResults.empty()
40
43
  end
41
44
 
@@ -46,6 +49,7 @@ module Twitter
46
49
  ranges = config.ranges
47
50
 
48
51
  url_entities = Twitter::TwitterText::Extractor.extract_urls_with_indices(normalized_text)
52
+ emoji_entities = config.emoji_parsing_enabled ? Twitter::TwitterText::Extractor.extract_emoji_with_indices(normalized_text) : []
49
53
 
50
54
  has_invalid_chars = false
51
55
  weighted_count = 0
@@ -53,24 +57,42 @@ module Twitter
53
57
  display_offset = 0
54
58
  valid_offset = 0
55
59
 
56
- while offset < normalized_text_length
60
+ while offset < normalized_text.codepoint_length
57
61
  # Reset the default char weight each pass through the loop
58
62
  char_weight = config.default_weight
63
+ entity_length = 0
64
+
59
65
  url_entities.each do |url_entity|
60
66
  if url_entity[:indices].first == offset
61
- url_length = url_entity[:indices].last - url_entity[:indices].first
67
+ entity_length = url_entity[:indices].last - url_entity[:indices].first
62
68
  weighted_count += transformed_url_length
63
- offset += url_length
64
- display_offset += url_length
69
+ offset += entity_length
70
+ display_offset += entity_length
71
+ if weighted_count <= scaled_max_weighted_tweet_length
72
+ valid_offset += entity_length
73
+ end
74
+ # Finding a match breaks the loop
75
+ break
76
+ end
77
+ end
78
+
79
+ emoji_entities.each do |emoji_entity|
80
+ if emoji_entity[:indices].first == offset
81
+ entity_length = emoji_entity[:indices].last - emoji_entity[:indices].first
82
+ weighted_count += char_weight # the default weight
83
+ offset += entity_length
84
+ display_offset += entity_length
65
85
  if weighted_count <= scaled_max_weighted_tweet_length
66
- valid_offset += url_length
86
+ valid_offset += entity_length
67
87
  end
68
- # Finding a match breaks the loop; order of ranges matters.
88
+ # Finding a match breaks the loop
69
89
  break
70
90
  end
71
91
  end
72
92
 
73
- if offset < normalized_text_length
93
+ next if entity_length > 0
94
+
95
+ if offset < normalized_text.codepoint_length
74
96
  code_point = normalized_text[offset]
75
97
 
76
98
  ranges.each do |range|
@@ -82,17 +104,19 @@ module Twitter
82
104
 
83
105
  weighted_count += char_weight
84
106
 
85
- has_invalid_chars = contains_invalid?(normalized_text[offset]) unless has_invalid_chars
86
- char_count = code_point.char_length
87
- offset += char_count
88
- display_offset += char_count
107
+ has_invalid_chars = contains_invalid?(code_point) unless has_invalid_chars
108
+ codepoint_length = code_point.codepoint_length
109
+ offset += codepoint_length
110
+ display_offset += codepoint_length
111
+ # index += codepoint_length
89
112
 
90
113
  if !has_invalid_chars && (weighted_count <= scaled_max_weighted_tweet_length)
91
- valid_offset += char_count
114
+ valid_offset += codepoint_length
92
115
  end
93
116
  end
94
117
  end
95
- normalized_text_offset = text.char_length - normalized_text.char_length
118
+
119
+ normalized_text_offset = text.codepoint_length - normalized_text.codepoint_length
96
120
  scaled_weighted_length = weighted_count / scale
97
121
  is_valid = !has_invalid_chars && (scaled_weighted_length <= max_weighted_tweet_length)
98
122
  permillage = scaled_weighted_length * 1000 / max_weighted_tweet_length
@@ -1,3 +1,7 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
1
5
  # encoding: UTF-8
2
6
 
3
7
  module Twitter
@@ -1,3 +1,7 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
1
5
  # encoding: utf-8
2
6
  require File.dirname(__FILE__) + '/spec_helper'
3
7
 
@@ -1,3 +1,7 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
1
5
  # encoding: utf-8
2
6
  require File.dirname(__FILE__) + '/spec_helper'
3
7
 
@@ -18,6 +22,7 @@ describe Twitter::TwitterText::Configuration do
18
22
  it "should define version constants" do
19
23
  expect(Twitter::TwitterText::Configuration.const_defined?(:CONFIG_V1)).to be true
20
24
  expect(Twitter::TwitterText::Configuration.const_defined?(:CONFIG_V2)).to be true
25
+ expect(Twitter::TwitterText::Configuration.const_defined?(:CONFIG_V3)).to be true
21
26
  end
22
27
 
23
28
  it "should define a default configuration" do
@@ -87,5 +92,45 @@ describe Twitter::TwitterText::Configuration do
87
92
  expect(weighted_range.weight).to be_kind_of(Integer)
88
93
  end
89
94
  end
95
+
96
+ context "with v3 configuration" do
97
+ before do
98
+ @config = Twitter::TwitterText::Configuration.configuration_from_file(Twitter::TwitterText::Configuration::CONFIG_V3)
99
+ end
100
+
101
+ it "should have a version" do
102
+ expect(@config.version).to eq(3)
103
+ end
104
+
105
+ it "should have a max_weighted_tweet_length" do
106
+ expect(@config.max_weighted_tweet_length).to eq(280)
107
+ end
108
+
109
+ it "should have a scale" do
110
+ expect(@config.scale).to eq(100)
111
+ end
112
+
113
+ it "should have a default_weight" do
114
+ expect(@config.default_weight).to eq(200)
115
+ end
116
+
117
+ it "should have a transformed_url_length" do
118
+ expect(@config.transformed_url_length).to eq(23)
119
+ end
120
+
121
+ it "should have a configured range" do
122
+ expect(@config.ranges).to be_kind_of(Array)
123
+ expect(@config.ranges.count).to be > 0
124
+ expect(@config.ranges[0]).to be_kind_of(Twitter::TwitterText::WeightedRange)
125
+ weighted_range = @config.ranges[0]
126
+ expect(weighted_range.start).to be_kind_of(Integer)
127
+ expect(weighted_range.end).to be_kind_of(Integer)
128
+ expect(weighted_range.weight).to be_kind_of(Integer)
129
+ end
130
+
131
+ it "should support discounting emoji" do
132
+ expect(@config.emoji_parsing_enabled).to be true
133
+ end
134
+ end
90
135
  end
91
136
  end
@@ -1,3 +1,7 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
1
5
  # encoding: utf-8
2
6
  require File.dirname(__FILE__) + '/spec_helper'
3
7
 
@@ -1,3 +1,7 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
1
5
  # encoding: utf-8
2
6
  require File.dirname(__FILE__) + '/spec_helper'
3
7
 
@@ -1,3 +1,7 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
1
5
  # encoding: utf-8
2
6
  require File.dirname(__FILE__) + '/spec_helper'
3
7
 
@@ -35,4 +39,38 @@ describe "Twitter::TwitterText::Regex regular expressions" do
35
39
  end
36
40
 
37
41
  end
42
+
43
+ describe "matching Unicode 10.0 emoji" do
44
+ it "should match new emoji" do
45
+ input = "Unicode 10.0; grinning face with one large and one small eye: 🤪; woman with headscarf: 🧕; (fitzpatrick) woman with headscarf + medium-dark skin tone: 🧕🏾; flag (England): 🏴󠁧󠁢󠁥󠁮󠁧󠁿"
46
+ expected = ["🤪", "🧕", "🧕🏾", "🏴󠁧󠁢󠁥󠁮󠁧󠁿"]
47
+ entities = Twitter::TwitterText::Extractor.extract_emoji_with_indices(input)
48
+ entities.each_with_index do |entity, i|
49
+ expect(entity[:emoji]).to be_kind_of(String)
50
+ expect(entity[:indices]).to be_kind_of(Array)
51
+ entity[:indices].each do |position|
52
+ expect(position).to be_kind_of(Integer)
53
+ end
54
+ expect(entity[:emoji]).to be == expected[i]
55
+ expect(Twitter::TwitterText::Extractor.is_valid_emoji(entity[:emoji])).to be true
56
+ end
57
+ end
58
+ end
59
+
60
+ describe "matching Unicode 9.0 emoji" do
61
+ it "should match new emoji" do
62
+ input = "Unicode 9.0; face with cowboy hat: 🤠; woman dancing: 💃, woman dancing + medium-dark skin tone: 💃🏾"
63
+ expected = ["🤠", "💃", "💃🏾"]
64
+ entities = Twitter::TwitterText::Extractor.extract_emoji_with_indices(input)
65
+ entities.each_with_index do |entity, i|
66
+ expect(entity[:emoji]).to be_kind_of(String)
67
+ expect(entity[:indices]).to be_kind_of(Array)
68
+ entity[:indices].each do |position|
69
+ expect(position).to be_kind_of(Integer)
70
+ end
71
+ expect(entity[:emoji]).to be == expected[i]
72
+ expect(Twitter::TwitterText::Extractor.is_valid_emoji(entity[:emoji])).to be true
73
+ end
74
+ end
75
+ end
38
76
  end
@@ -1,3 +1,7 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
1
5
  # encoding: utf-8
2
6
  require File.dirname(__FILE__) + '/spec_helper'
3
7
 
@@ -1,3 +1,7 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
1
5
  $TESTING=true
2
6
 
3
7
  # Ruby 1.8 encoding check
@@ -20,6 +24,15 @@ require File.expand_path('../test_urls', __FILE__)
20
24
 
21
25
  RSpec.configure do |config|
22
26
  config.include TestUrls
27
+
28
+ config.filter_run_excluding :ruby => lambda { |version|
29
+ case version.to_s
30
+ when /^> (.*)/
31
+ !(RUBY_VERSION.to_s > $1)
32
+ else
33
+ !(RUBY_VERSION.to_s =~ /^#{version.to_s}/)
34
+ end
35
+ }
23
36
  end
24
37
 
25
38
  RSpec::Matchers.define :match_autolink_expression do
@@ -1,3 +1,7 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
1
5
  # encoding: utf-8
2
6
 
3
7
  module TestUrls
@@ -53,11 +57,11 @@ module TestUrls
53
57
  "http://no_underscores.com",
54
58
  "http://test.c_o_m",
55
59
  "http://test.c-o-m",
56
- "http://twitt#{[0x202A].pack('U')}er.com",
57
- "http://twitt#{[0x202B].pack('U')}er.com",
58
- "http://twitt#{[0x202C].pack('U')}er.com",
59
- "http://twitt#{[0x202D].pack('U')}er.com",
60
- "http://twitt#{[0x202E].pack('U')}er.com",
60
+ "http://twitt#{[0x202A].pack('U')}.com",
61
+ "http://twitt#{[0x202B].pack('U')}.com",
62
+ "http://twitt#{[0x202C].pack('U')}.com",
63
+ "http://twitt#{[0x202D].pack('U')}.com",
64
+ "http://twitt#{[0x202E].pack('U')}.com",
61
65
  "https://somesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurl.com/foo https://somesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurl.com/foo https://somesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurlsomesuperlongurl.com/foo"
62
66
  ] unless defined?(TestUrls::INVALID)
63
67
 
@@ -1,3 +1,7 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
1
5
  # encoding: utf-8
2
6
  require File.dirname(__FILE__) + '/spec_helper'
3
7
 
@@ -1,3 +1,7 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
1
5
  # encoding: utf-8
2
6
  require File.dirname(__FILE__) + '/spec_helper'
3
7
 
@@ -1,3 +1,7 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
1
5
  # encoding: utf-8
2
6
  require File.dirname(__FILE__) + '/spec_helper'
3
7
 
@@ -16,9 +20,9 @@ describe Twitter::TwitterText::Validation do
16
20
  expect(TestValidation.new.tweet_invalid?("Bom:#{Twitter::TwitterText::Unicode::UFFFF}")).to be == :invalid_characters
17
21
  end
18
22
 
19
- it "should disallow direction change characters" do
23
+ it "should allow direction change characters" do
20
24
  [0x202A, 0x202B, 0x202C, 0x202D, 0x202E].map{|cp| [cp].pack('U') }.each do |char|
21
- expect(TestValidation.new.tweet_invalid?("Invalid:#{char}")).to eq(:invalid_characters)
25
+ expect(TestValidation.new.tweet_invalid?("Invalid:#{char}")).to be false
22
26
  end
23
27
  end
24
28
 
@@ -64,4 +68,20 @@ describe Twitter::TwitterText::Validation do
64
68
  expect(results[:valid_range_end]).to eq(0)
65
69
  end
66
70
  end
71
+
72
+ context "when parsing tweet text" do
73
+ it "should properly parse ZWJ and ZWNJ when grapheme clusters are enabled", ruby: ">= 2.5.0" do
74
+ # Grapheme clustering of devenghali script differs based on platform implementation
75
+ text = "ZWJ: क्ष -> क्\u200Dष; ZWNJ: क्ष -> क्\u200Cष"
76
+ config = Twitter::TwitterText::Configuration::configuration_from_file(Twitter::TwitterText::Configuration::CONFIG_V3)
77
+ results = Twitter::TwitterText::Validation::parse_tweet(text, config: config)
78
+ expect(results[:weighted_length]).to eq(29)
79
+ expect(results[:permillage]).to eq(103)
80
+ expect(results[:valid]).to be true
81
+ expect(results[:display_range_start]).to eq(0)
82
+ expect(results[:display_range_end]).to eq(34)
83
+ expect(results[:valid_range_start]).to eq(0)
84
+ expect(results[:valid_range_end]).to eq(34)
85
+ end
86
+ end
67
87
  end
@@ -1,3 +1,7 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
1
5
  require 'multi_json'
2
6
  require 'nokogiri'
3
7
  require 'test/unit'
@@ -130,6 +134,11 @@ class ConformanceTest < Test::Unit::TestCase
130
134
  assert_equal e, extract_urls_with_indices(text), description
131
135
  end
132
136
 
137
+ def_conformance_test("extract.yml", :urls_with_directional_markers) do
138
+ e = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} }
139
+ assert_equal e, extract_urls_with_indices(text), description
140
+ end
141
+
133
142
  def_conformance_test("extract.yml", :hashtags) do
134
143
  assert_equal expected, extract_hashtags(text), description
135
144
  end
@@ -215,11 +224,19 @@ class ConformanceTest < Test::Unit::TestCase
215
224
  assert_equal expected, valid_hashtag?(text), description
216
225
  end
217
226
 
218
- def_conformance_test("validate.yml", :lengths) do
219
- assert_equal expected, tweet_length(text), description
227
+ def_conformance_test("validate.yml", :WeightedTweetsCounterTest) do
228
+ # Force v2 configuration, basic weighted code point support
229
+ config = Twitter::TwitterText::Configuration::configuration_from_file(Twitter::TwitterText::Configuration::CONFIG_V2)
230
+ assert_equal_parse_results expected, parse_tweet(text, config: config), description
220
231
  end
221
232
 
222
- def_conformance_test("validate.yml", :WeightedTweetsCounterTest) do
233
+ def_conformance_test("validate.yml", :WeightedTweetsWithDiscountedEmojiCounterTest) do
234
+ # Force v3 configuration, which supports discounting grapheme clusters that are emoji
235
+ config = Twitter::TwitterText::Configuration::configuration_from_file(Twitter::TwitterText::Configuration::CONFIG_V3)
236
+ assert_equal_parse_results expected, parse_tweet(text, config: config), description
237
+ end
238
+
239
+ def_conformance_test("validate.yml", :UnicodeDirectionalMarkerCounterTest) do
223
240
  assert_equal_parse_results expected, parse_tweet(text), description
224
241
  end
225
242
  end
@@ -1,8 +1,12 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
1
5
  # encoding: utf-8
2
6
 
3
7
  Gem::Specification.new do |s|
4
8
  s.name = "twitter-text"
5
- s.version = "2.1.0"
9
+ s.version = "3.0.0"
6
10
  s.authors = ["David LaMacchia", "Sudheer Guntupalli", "Kaushik Lakshmikanth", "Jose Antonio Marquez Russo", "Lee Adams",
7
11
  "Yoshimasa Niwa"]
8
12
  s.email = ["opensource@twitter.com"]
@@ -14,7 +18,6 @@ Gem::Specification.new do |s|
14
18
  s.has_rdoc = true
15
19
  s.summary = "Twitter text handling library"
16
20
 
17
- s.add_development_dependency "pry"
18
21
  s.add_development_dependency "test-unit"
19
22
  s.add_development_dependency "multi_json", "~> 1.3"
20
23
  s.add_development_dependency "nokogiri", "~> 1.8.0"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitter-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.0
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David LaMacchia
@@ -13,22 +13,8 @@ authors:
13
13
  autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
- date: 2017-12-21 00:00:00.000000000 Z
16
+ date: 2018-10-14 00:00:00.000000000 Z
17
17
  dependencies:
18
- - !ruby/object:Gem::Dependency
19
- name: pry
20
- requirement: !ruby/object:Gem::Requirement
21
- requirements:
22
- - - ">="
23
- - !ruby/object:Gem::Version
24
- version: '0'
25
- type: :development
26
- prerelease: false
27
- version_requirements: !ruby/object:Gem::Requirement
28
- requirements:
29
- - - ">="
30
- - !ruby/object:Gem::Version
31
- version: '0'
32
18
  - !ruby/object:Gem::Dependency
33
19
  name: test-unit
34
20
  requirement: !ruby/object:Gem::Requirement
@@ -174,11 +160,13 @@ files:
174
160
  - config/README.md
175
161
  - config/v1.json
176
162
  - config/v2.json
163
+ - config/v3.json
177
164
  - lib/assets/tld_lib.yml
178
165
  - lib/twitter-text.rb
179
166
  - lib/twitter-text/autolink.rb
180
167
  - lib/twitter-text/configuration.rb
181
168
  - lib/twitter-text/deprecation.rb
169
+ - lib/twitter-text/emoji_regex.rb
182
170
  - lib/twitter-text/extractor.rb
183
171
  - lib/twitter-text/hash_helper.rb
184
172
  - lib/twitter-text/hit_highlighter.rb
@@ -222,7 +210,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
222
210
  version: '0'
223
211
  requirements: []
224
212
  rubyforge_project:
225
- rubygems_version: 2.7.0
213
+ rubygems_version: 2.5.1
226
214
  signing_key:
227
215
  specification_version: 4
228
216
  summary: Twitter text handling library