twitter-text 1.10.0 → 1.11.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9bf8f0458f83224b2f818d0167deece43aa85875
4
- data.tar.gz: dadc8f874379ed383cf7b66172b5eb65ec130906
3
+ metadata.gz: f435b95a47697def8e995b0d2dbdbbbde2b7e568
4
+ data.tar.gz: 10d1c8186c09520f2dfcbb9de8a3d336fdac7b04
5
5
  SHA512:
6
- metadata.gz: efadacd1f014d0b422dc513c548e3c7cf22df17a21c42c382160a54883045530355b4766d21bfd038d9dd20ef4fcf9c33767874e2e708a357d9c63ab8dfbd46b
7
- data.tar.gz: 43d0537e6b2e0411a3929b89ce5977c91ec786ed7e409fa7b0b0ed16beb8fff425b7c012e1f2073bc279af3c1b9814b0d51b6bf10c5f05b59dbefeec2696e2ec
6
+ metadata.gz: 0d4584b362d0bf89d5173d44949f4fa1187c8d82dd69c61fc4f6038b8fd0f7badbbb78551f04b63c63aa5074de3539784f7291891231a7e120592bd1dd0edfca
7
+ data.tar.gz: 065adc1b81bf6452b162a94f23cc91748b5cf1938ef6dc54211825a3c08139304deb3911f1c2484d81980fe3125a6190bef83c9f7f5a61f4ed271b911ee3829a
data/README.rdoc CHANGED
@@ -1,4 +1,6 @@
1
- == twitter-text {<img src="https://secure.travis-ci.org/twitter/twitter-text-rb.png?branch=master" />}[http://travis-ci.org/twitter/twitter-text-rb] {<img src="https://codeclimate.com/badge.png" />}[https://codeclimate.com/github/twitter/twitter-text-rb]
1
+ {rdoc-image:https://img.shields.io/gem/v/twitter-text.svg}[https://rubygems.org/gems/twitter-text]
2
+
3
+ == twitter-text
2
4
 
3
5
  A gem that provides text processing routines for Twitter Tweets. The major
4
6
  reason for this is to unify the various auto-linking and extraction of
@@ -77,14 +79,6 @@ words should work equally well.
77
79
  Use to provide emphasis around the "hits" returned from the Search API, built
78
80
  to work against text that has been auto-linked already.
79
81
 
80
- === Conformance
81
-
82
- To run the Conformance suite, you'll need to add that project as a git submodule. From the root twitter-text-rb directory, run:
83
-
84
- git submodule add git@github.com:twitter/twitter-text-conformance.git test/twitter-text-conformance/
85
- git submodule init
86
- git submodule update
87
-
88
82
  === Thanks
89
83
 
90
84
  Thanks to everybody who has filed issues, provided feedback or contributed patches. Patches courtesy of:
data/Rakefile CHANGED
@@ -5,40 +5,12 @@ Bundler::GemHelper.install_tasks
5
5
  task :default => ['spec', 'test:conformance']
6
6
  task :test => :spec
7
7
 
8
+ require 'rubygems'
8
9
  require 'rspec/core/rake_task'
9
10
  RSpec::Core::RakeTask.new(:spec)
10
11
 
11
- def conformance_version(dir)
12
- require 'digest'
13
- Dir[File.join(dir, '*')].inject(Digest::SHA1.new){|digest, file| digest.update(Digest::SHA1.file(file).hexdigest) }
14
- end
15
-
16
12
  namespace :test do
17
13
  namespace :conformance do
18
- desc "Update conformance testing data"
19
- task :update do
20
- puts "Updating conformance data ... "
21
- system("git submodule init") || raise("Failed to init submodule")
22
- system("git submodule update") || raise("Failed to update submodule")
23
- puts "Updating conformance data ... DONE"
24
- end
25
-
26
- desc "Change conformance test data to the lastest version"
27
- task :latest => ['conformance:update'] do
28
- current_dir = File.dirname(__FILE__)
29
- submodule_dir = File.join(File.dirname(__FILE__), "test", "twitter-text-conformance")
30
- version_before = conformance_version(submodule_dir)
31
- system("cd #{submodule_dir} && git pull origin master") || raise("Failed to pull submodule version")
32
- system("cd #{current_dir}")
33
- if conformance_version(submodule_dir) != version_before
34
- system("cd #{current_dir} && git add #{submodule_dir}") || raise("Failed to add upgrade files")
35
- system("git commit -m \"Upgraded to the latest conformance suite\" #{submodule_dir}") || raise("Failed to commit upgraded conformacne data")
36
- puts "Upgraded conformance suite."
37
- else
38
- puts "No conformance suite changes."
39
- end
40
- end
41
-
42
14
  desc "Run conformance test suite"
43
15
  task :run do
44
16
  ruby '-rubygems', "test/conformance_test.rb"
@@ -46,7 +18,7 @@ namespace :test do
46
18
  end
47
19
 
48
20
  desc "Run conformance test suite"
49
- task :conformance => ['conformance:update', 'conformance:run'] do
21
+ task :conformance => ['conformance:run'] do
50
22
  end
51
23
  end
52
24
 
@@ -262,6 +262,7 @@ country:
262
262
  - "срб"
263
263
  - "укр"
264
264
  - "қаз"
265
+ - "հայ"
265
266
  - "الاردن"
266
267
  - "الجزائر"
267
268
  - "السعودية"
@@ -304,11 +305,14 @@ generic:
304
305
  - accountants
305
306
  - active
306
307
  - actor
308
+ - adult
307
309
  - aero
308
310
  - agency
309
311
  - airforce
310
312
  - allfinanz
311
313
  - alsace
314
+ - android
315
+ - aquarelle
312
316
  - archi
313
317
  - army
314
318
  - arpa
@@ -332,6 +336,7 @@ generic:
332
336
  - biz
333
337
  - black
334
338
  - blackfriday
339
+ - bloomberg
335
340
  - blue
336
341
  - bmw
337
342
  - bnpparibas
@@ -356,6 +361,7 @@ generic:
356
361
  - care
357
362
  - career
358
363
  - careers
364
+ - cartier
359
365
  - casa
360
366
  - cash
361
367
  - cat
@@ -376,6 +382,7 @@ generic:
376
382
  - clinic
377
383
  - clothing
378
384
  - club
385
+ - coach
379
386
  - codes
380
387
  - coffee
381
388
  - college
@@ -394,6 +401,7 @@ generic:
394
401
  - country
395
402
  - credit
396
403
  - creditcard
404
+ - cricket
397
405
  - crs
398
406
  - cruises
399
407
  - cuisinella
@@ -404,6 +412,7 @@ generic:
404
412
  - day
405
413
  - deals
406
414
  - degree
415
+ - delivery
407
416
  - democrat
408
417
  - dental
409
418
  - dentist
@@ -423,22 +432,27 @@ generic:
423
432
  - education
424
433
  - email
425
434
  - emerck
435
+ - energy
426
436
  - engineer
427
437
  - engineering
428
438
  - enterprises
429
439
  - equipment
430
440
  - esq
431
441
  - estate
442
+ - eurovision
432
443
  - eus
433
444
  - events
445
+ - everbank
434
446
  - exchange
435
447
  - expert
436
448
  - exposed
437
449
  - fail
438
450
  - farm
451
+ - fashion
439
452
  - feedback
440
453
  - finance
441
454
  - financial
455
+ - firmdale
442
456
  - fish
443
457
  - fishing
444
458
  - fitness
@@ -505,6 +519,7 @@ generic:
505
519
  - int
506
520
  - international
507
521
  - investments
522
+ - irish
508
523
  - jetzt
509
524
  - jobs
510
525
  - joburg
@@ -518,8 +533,11 @@ generic:
518
533
  - kred
519
534
  - lacaixa
520
535
  - land
536
+ - latrobe
521
537
  - lawyer
538
+ - lds
522
539
  - lease
540
+ - legal
523
541
  - lgbt
524
542
  - life
525
543
  - lighting
@@ -532,6 +550,7 @@ generic:
532
550
  - ltda
533
551
  - luxe
534
552
  - luxury
553
+ - madrid
535
554
  - maison
536
555
  - management
537
556
  - mango
@@ -541,6 +560,7 @@ generic:
541
560
  - meet
542
561
  - melbourne
543
562
  - meme
563
+ - memorial
544
564
  - menu
545
565
  - miami
546
566
  - mil
@@ -549,6 +569,8 @@ generic:
549
569
  - moda
550
570
  - moe
551
571
  - monash
572
+ - money
573
+ - mormon
552
574
  - mortgage
553
575
  - moscow
554
576
  - motorcycles
@@ -579,6 +601,7 @@ generic:
579
601
  - paris
580
602
  - partners
581
603
  - parts
604
+ - party
582
605
  - pharmacy
583
606
  - photo
584
607
  - photography
@@ -592,6 +615,7 @@ generic:
592
615
  - plumbing
593
616
  - pohl
594
617
  - poker
618
+ - porn
595
619
  - post
596
620
  - praxi
597
621
  - press
@@ -610,6 +634,7 @@ generic:
610
634
  - rehab
611
635
  - reise
612
636
  - reisen
637
+ - reit
613
638
  - ren
614
639
  - rentals
615
640
  - repair
@@ -627,11 +652,13 @@ generic:
627
652
  - ruhr
628
653
  - ryukyu
629
654
  - saarland
655
+ - samsung
630
656
  - sarl
631
657
  - sca
632
658
  - scb
633
659
  - schmidt
634
660
  - schule
661
+ - science
635
662
  - scot
636
663
  - services
637
664
  - sexy
@@ -652,6 +679,7 @@ generic:
652
679
  - surf
653
680
  - surgery
654
681
  - suzuki
682
+ - sydney
655
683
  - systems
656
684
  - taipei
657
685
  - tatar
@@ -671,6 +699,7 @@ generic:
671
699
  - trade
672
700
  - training
673
701
  - travel
702
+ - trust
674
703
  - tui
675
704
  - university
676
705
  - uno
@@ -728,14 +757,17 @@ generic:
728
757
  - "موقع"
729
758
  - "संगठन"
730
759
  - "みんな"
760
+ - "グーグル"
731
761
  - "世界"
732
762
  - "中信"
733
763
  - "中文网"
734
764
  - "企业"
735
765
  - "佛山"
766
+ - "八卦"
736
767
  - "公司"
737
768
  - "公益"
738
769
  - "商城"
770
+ - "商店"
739
771
  - "商标"
740
772
  - "在线"
741
773
  - "广东"
@@ -747,6 +779,9 @@ generic:
747
779
  - "移动"
748
780
  - "组织机构"
749
781
  - "网址"
782
+ - "网店"
750
783
  - "网络"
784
+ - "谷歌"
751
785
  - "集团"
752
786
  - "삼성"
787
+ - onion
@@ -29,7 +29,7 @@ module Twitter
29
29
  TLDS = YAML.load_file(
30
30
  File.join(
31
31
  File.expand_path('../../..', __FILE__), # project root
32
- 'test', 'twitter-text-conformance', 'tld_lib.yml'
32
+ 'lib', 'assets', 'tld_lib.yml'
33
33
  )
34
34
  )
35
35
 
@@ -90,6 +90,7 @@ module Twitter
90
90
  regex_range(0x0300, 0x036f),
91
91
  regex_range(0x1e00, 0x1eff)
92
92
  ].join('').freeze
93
+ REGEXEN[:latin_accents] = /[#{LATIN_ACCENTS}]+/o
93
94
 
94
95
  RTL_CHARACTERS = [
95
96
  regex_range(0x0600,0x06FF),
@@ -98,78 +99,14 @@ module Twitter
98
99
  regex_range(0xFE70,0xFEFF)
99
100
  ].join('').freeze
100
101
 
101
-
102
- NON_LATIN_HASHTAG_CHARS = [
103
- # Cyrillic (Russian, Ukrainian, etc.)
104
- regex_range(0x0400, 0x04ff), # Cyrillic
105
- regex_range(0x0500, 0x0527), # Cyrillic Supplement
106
- regex_range(0x2de0, 0x2dff), # Cyrillic Extended A
107
- regex_range(0xa640, 0xa69f), # Cyrillic Extended B
108
- regex_range(0x0591, 0x05bf), # Hebrew
109
- regex_range(0x05c1, 0x05c2),
110
- regex_range(0x05c4, 0x05c5),
111
- regex_range(0x05c7),
112
- regex_range(0x05d0, 0x05ea),
113
- regex_range(0x05f0, 0x05f4),
114
- regex_range(0xfb12, 0xfb28), # Hebrew Presentation Forms
115
- regex_range(0xfb2a, 0xfb36),
116
- regex_range(0xfb38, 0xfb3c),
117
- regex_range(0xfb3e),
118
- regex_range(0xfb40, 0xfb41),
119
- regex_range(0xfb43, 0xfb44),
120
- regex_range(0xfb46, 0xfb4f),
121
- regex_range(0x0610, 0x061a), # Arabic
122
- regex_range(0x0620, 0x065f),
123
- regex_range(0x066e, 0x06d3),
124
- regex_range(0x06d5, 0x06dc),
125
- regex_range(0x06de, 0x06e8),
126
- regex_range(0x06ea, 0x06ef),
127
- regex_range(0x06fa, 0x06fc),
128
- regex_range(0x06ff),
129
- regex_range(0x0750, 0x077f), # Arabic Supplement
130
- regex_range(0x08a0), # Arabic Extended A
131
- regex_range(0x08a2, 0x08ac),
132
- regex_range(0x08e4, 0x08fe),
133
- regex_range(0xfb50, 0xfbb1), # Arabic Pres. Forms A
134
- regex_range(0xfbd3, 0xfd3d),
135
- regex_range(0xfd50, 0xfd8f),
136
- regex_range(0xfd92, 0xfdc7),
137
- regex_range(0xfdf0, 0xfdfb),
138
- regex_range(0xfe70, 0xfe74), # Arabic Pres. Forms B
139
- regex_range(0xfe76, 0xfefc),
140
- regex_range(0x200c, 0x200c), # Zero-Width Non-Joiner
141
- regex_range(0x0e01, 0x0e3a), # Thai
142
- regex_range(0x0e40, 0x0e4e), # Hangul (Korean)
143
- regex_range(0x1100, 0x11ff), # Hangul Jamo
144
- regex_range(0x3130, 0x3185), # Hangul Compatibility Jamo
145
- regex_range(0xA960, 0xA97F), # Hangul Jamo Extended-A
146
- regex_range(0xAC00, 0xD7AF), # Hangul Syllables
147
- regex_range(0xD7B0, 0xD7FF), # Hangul Jamo Extended-B
148
- regex_range(0xFFA1, 0xFFDC) # Half-width Hangul
149
- ].join('').freeze
150
- REGEXEN[:latin_accents] = /[#{LATIN_ACCENTS}]+/o
151
-
152
- CJ_HASHTAG_CHARACTERS = [
153
- regex_range(0x30A1, 0x30FA), regex_range(0x30FC, 0x30FE), # Katakana (full-width)
154
- regex_range(0xFF66, 0xFF9F), # Katakana (half-width)
155
- regex_range(0xFF10, 0xFF19), regex_range(0xFF21, 0xFF3A), regex_range(0xFF41, 0xFF5A), # Latin (full-width)
156
- regex_range(0x3041, 0x3096), regex_range(0x3099, 0x309E), # Hiragana
157
- regex_range(0x3400, 0x4DBF), # Kanji (CJK Extension A)
158
- regex_range(0x4E00, 0x9FFF), # Kanji (Unified)
159
- regex_range(0x20000, 0x2A6DF), # Kanji (CJK Extension B)
160
- regex_range(0x2A700, 0x2B73F), # Kanji (CJK Extension C)
161
- regex_range(0x2B740, 0x2B81F), # Kanji (CJK Extension D)
162
- regex_range(0x2F800, 0x2FA1F), regex_range(0x3003), regex_range(0x3005), regex_range(0x303B) # Kanji (CJK supplement)
163
- ].join('').freeze
164
-
165
102
  PUNCTUATION_CHARS = '!"#$%&\'()*+,-./:;<=>?@\[\]^_\`{|}~'
166
103
  SPACE_CHARS = " \t\n\x0B\f\r"
167
104
  CTRL_CHARS = "\x00-\x1F\x7F"
168
105
 
169
- # A hashtag must contain latin characters, numbers and underscores, but not all numbers.
170
- HASHTAG_ALPHA = /[a-z_#{LATIN_ACCENTS}#{NON_LATIN_HASHTAG_CHARS}#{CJ_HASHTAG_CHARACTERS}]/io
171
- HASHTAG_ALPHANUMERIC = /[a-z0-9_#{LATIN_ACCENTS}#{NON_LATIN_HASHTAG_CHARS}#{CJ_HASHTAG_CHARACTERS}]/io
172
- HASHTAG_BOUNDARY = /\A|\z|[^&a-z0-9_#{LATIN_ACCENTS}#{NON_LATIN_HASHTAG_CHARS}#{CJ_HASHTAG_CHARACTERS}]/o
106
+ # A hashtag must contain at least one unicode letter or mark, as well as numbers, underscores, and select special characters.
107
+ HASHTAG_ALPHA = /[\p{L}\p{M}]/
108
+ HASHTAG_ALPHANUMERIC = /[\p{L}\p{M}\p{Nd}_\u200c\u0482\ua673\ua67e\u05be\u05f3\u05f4\u309b\u309c\u30a0\u30fb\u3003\u0f0b\u0f0c\u0f0d]/
109
+ HASHTAG_BOUNDARY = /\A|\z|[^&\p{L}\p{M}\p{Nd}_\u200c\u0482\ua673\ua67e\u05be\u05f3\u05f4\u309b\u309c\u30a0\u30fb\u3003\u0f0b\u0f0c\u0f0d]/
173
110
 
174
111
  HASHTAG = /(#{HASHTAG_BOUNDARY})(#|#)(#{HASHTAG_ALPHANUMERIC}*#{HASHTAG_ALPHA}#{HASHTAG_ALPHANUMERIC}*)/io
175
112
 
@@ -62,7 +62,7 @@ class ConformanceTest < Test::Unit::TestCase
62
62
  element.attribute_nodes.map{|attr| [attr.name, attr.value]}.sort
63
63
  end
64
64
 
65
- CONFORMANCE_DIR = ENV['CONFORMANCE_DIR'] || File.expand_path("../twitter-text-conformance", __FILE__)
65
+ CONFORMANCE_DIR = ENV['CONFORMANCE_DIR'] || File.expand_path("../../../conformance", __FILE__)
66
66
 
67
67
  def self.def_conformance_test(file, test_type, &block)
68
68
  yaml = YAML.load_file(File.join(CONFORMANCE_DIR, file))
data/twitter-text.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "twitter-text"
5
- s.version = "1.10.0"
5
+ s.version = "1.11.0"
6
6
  s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle",
7
7
  "Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii", "James Koval"]
8
8
  s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com",
@@ -23,7 +23,7 @@ Gem::Specification.new do |s|
23
23
  s.add_development_dependency "simplecov", "~> 0.8.0"
24
24
  s.add_runtime_dependency "unf", "~> 0.1.0"
25
25
 
26
- s.files = `git ls-files`.split("\n") + ['test/twitter-text-conformance/tld_lib.yml']
26
+ s.files = `git ls-files`.split("\n") + ['lib/assets/tld_lib.yml']
27
27
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
28
28
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
29
29
  s.require_paths = ["lib"]
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitter-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.10.0
4
+ version: 1.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Sanford
@@ -16,7 +16,7 @@ authors:
16
16
  autorequire:
17
17
  bindir: bin
18
18
  cert_chain: []
19
- date: 2014-10-29 00:00:00.000000000 Z
19
+ date: 2015-01-27 00:00:00.000000000 Z
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
22
22
  name: multi_json
@@ -135,11 +135,11 @@ files:
135
135
  - ".gitignore"
136
136
  - ".gitmodules"
137
137
  - ".rspec"
138
- - ".travis.yml"
139
138
  - Gemfile
140
139
  - LICENSE
141
140
  - README.rdoc
142
141
  - Rakefile
142
+ - lib/assets/tld_lib.yml
143
143
  - lib/twitter-text.rb
144
144
  - lib/twitter-text/autolink.rb
145
145
  - lib/twitter-text/deprecation.rb
@@ -163,7 +163,6 @@ files:
163
163
  - spec/unicode_spec.rb
164
164
  - spec/validation_spec.rb
165
165
  - test/conformance_test.rb
166
- - test/twitter-text-conformance/tld_lib.yml
167
166
  - twitter-text.gemspec
168
167
  homepage: http://twitter.com
169
168
  licenses:
@@ -185,7 +184,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
185
184
  version: '0'
186
185
  requirements: []
187
186
  rubyforge_project:
188
- rubygems_version: 2.2.2
187
+ rubygems_version: 2.4.4
189
188
  signing_key:
190
189
  specification_version: 4
191
190
  summary: Twitter text handling library
data/.travis.yml DELETED
@@ -1,9 +0,0 @@
1
- language: ruby
2
- rvm:
3
- - 1.8.7
4
- - 1.9.3
5
- - 2.0.0
6
- - 2.1.0
7
- before_install:
8
- - gem update --system
9
- - gem --version