rb_lib_text 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8f0aed61487891e9bd3d73f89aaa6f520f0a60ed
4
- data.tar.gz: 67b856b2eff50898d292f35ee3d3cb1da4c99765
3
+ metadata.gz: 31fba2668d0fe67e413f836abdc9a9d25047463b
4
+ data.tar.gz: 42475fec2a1e786f11f4fa816e4ecb547ef7b513
5
5
  SHA512:
6
- metadata.gz: 8e7bcd6115ccdebf3568d59c7f22ba412fb4847120936a6e7fceffb2a3b0dcab3283dfe9ddd6d3f30a54ce14fcba42dfc0cb5a7e587093b1598defcd5d10e898
7
- data.tar.gz: 955a9ba22a9cac716ee28622b45da8d44fcd64e389686c3fc04796a35a9d0752cc9c1abfc94c376f4e6ab51c2be1811923948c1b4a18288b76c244aba559af27
6
+ metadata.gz: a1087345a482f63dad742161c0aa6f3717de82a9e7d77082b112c15a574bd95cfea13c0a111722d497e2114fd4ff4875022e1353f7cd7743433d426ad7b0566f
7
+ data.tar.gz: da9b9e08ebab7b744c63b897e0fa42bd0503b3c15ab12d768b33f3285a8a9448d475fdf79cdde22f5b571045a21cee801e13f7658f3ff70232bed64e7576cd14
data/README.md CHANGED
@@ -1,7 +1,7 @@
1
1
  # RbLibText
2
2
  A little text processing library for Ruby.
3
3
 
4
- [![Build Status](https://travis-ci.com/peoplepattern/rb-lib-text.svg?token=6qVa5jHEpFstFuG6QbBE)](https://travis-ci.com/peoplepattern/rb-lib-text)
4
+ [![Build Status](https://travis-ci.org/peoplepattern/rb-lib-text.svg)](https://travis-ci.org/peoplepattern/rb-lib-text)
5
5
 
6
6
  ## Overview
7
7
  The tokenization has been tuned to work well with text conventions commonly used in social media such as Twitter, and supports URLs, hashtags, emails and @-mentions cleanly.
@@ -14,23 +14,24 @@ module RbLibText
14
14
  tags_contractions: '[\w]+[\'‘’][\w]+', #don't split don't and can't and it's
15
15
  emails: '[\w\.\d]+@[\w\.\d]+\.[\w]+', #catch email addresses
16
16
  urls: 'https?://[-_/~%\w\d\.]*[_/~\w\d]', #Catch url addresses
17
- #sideways_text_emoji: '>?[:;=][\'\-D\)\]\(\[pPdoO/\*3\\]+',
17
+ # sideways_text_emoji: '>?[:;=][\'\-D\)\]\(\[pPdoO/\*3\\]+',
18
+ sideways_text_emoji: '>?[:;=8][\'\-D\)\(3DdPpOo\*\/]+',
18
19
  ellipses: '\.{3}',
19
20
  en_em_dash: '-{2,3}', #Catch en and em dashes
20
21
  slashes: '[\w]+(?:[/\-][\w]+)+', #Grammatical / -
21
22
  punct: '[\"“”‘’\'\\.\\?!…,:;»«\(\)]', #punctuation to split on
22
23
  tags_mentions: '[\w#@\d%$\u00B0]+', #Group all of these things together
24
+ hearts: '<+\/?3', # <3
23
25
  emoji_block0: '[\U00002600-\U000027BF]',
24
26
  emoji_block1: '[\U0001f300-\U0001f64F]',
25
27
  emoji_block2: '[\U0001f680-\U0001f6FF]',
26
- hearts: '<+/?3+', # <3
27
- other_punct: '[\u2014\u2013]',
28
+ other_punct: '[\u2014\u2013]',
28
29
  all_other: '[^\s]', #Split any other weird chars that may have been missed
29
30
  }
30
-
31
+
31
32
  return Regexp.union(patterns.values.map{|value| Regexp.new(value)})
32
33
  end
33
-
34
+
34
35
  def self.tokens(text)
35
36
  text = text.gsub("\u2026", "...")
36
37
  text = text.gsub(/\.{2,}/, "...")
@@ -1,3 +1,3 @@
1
1
  module RbLibText
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
metadata CHANGED
@@ -1,55 +1,55 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rb_lib_text
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - johnnytomcat
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-12-29 00:00:00.000000000 Z
11
+ date: 2016-02-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - '>='
18
18
  - !ruby/object:Gem::Version
19
19
  version: 1.8.4
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - '>='
25
25
  - !ruby/object:Gem::Version
26
26
  version: 1.8.4
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "~>"
31
+ - - ~>
32
32
  - !ruby/object:Gem::Version
33
33
  version: '10.0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - "~>"
38
+ - - ~>
39
39
  - !ruby/object:Gem::Version
40
40
  version: '10.0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rspec
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ">="
45
+ - - '>='
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ">="
52
+ - - '>='
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  description: The tokenization has been tuned to work well with text conventions commonly
@@ -61,9 +61,9 @@ executables: []
61
61
  extensions: []
62
62
  extra_rdoc_files: []
63
63
  files:
64
- - ".gitignore"
65
- - ".rspec"
66
- - ".travis.yml"
64
+ - .gitignore
65
+ - .rspec
66
+ - .travis.yml
67
67
  - CODE_OF_CONDUCT.md
68
68
  - Gemfile
69
69
  - LICENSE.txt
@@ -85,17 +85,17 @@ require_paths:
85
85
  - lib
86
86
  required_ruby_version: !ruby/object:Gem::Requirement
87
87
  requirements:
88
- - - ">="
88
+ - - '>='
89
89
  - !ruby/object:Gem::Version
90
90
  version: '0'
91
91
  required_rubygems_version: !ruby/object:Gem::Requirement
92
92
  requirements:
93
- - - ">="
93
+ - - '>='
94
94
  - !ruby/object:Gem::Version
95
95
  version: '0'
96
96
  requirements: []
97
97
  rubyforge_project:
98
- rubygems_version: 2.4.5
98
+ rubygems_version: 2.0.14
99
99
  signing_key:
100
100
  specification_version: 4
101
101
  summary: A little text processing library for Ruby.