commonregex 0.0.2 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/commonregex.rb +66 -104
- data/lib/commonregex/version.rb +3 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 02fda7030f78adab40ecec65c9c2ea02a9dec872
|
4
|
+
data.tar.gz: 0a909452d950d28317a23cbb1a5ffef194805632
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 98417b983aea23f07e81581b870b4e411f53404e8041919e3c42e2a61318e781874154747d175ed508ac7092c79544b49911cc27cc783030dbce398088b6440b
|
7
|
+
data.tar.gz: 137d12c82a6fc9801b5355b41b5dd7ecf5cb3c7b66522a57e210ceff1ac8c1a62c044b9b262cea0ef4d5634bb54f6476dc0e69a3a32a711a86c60a5cadd5e365
|
data/lib/commonregex.rb
CHANGED
@@ -1,106 +1,68 @@
|
|
1
1
|
class CommonRegex
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
get_matches(text, @@ipv4_regex)
|
70
|
-
end
|
71
|
-
|
72
|
-
def get_ipv6(text = @text)
|
73
|
-
get_matches(text, @@ipv6_regex)
|
74
|
-
end
|
75
|
-
|
76
|
-
def get_hex_colors(text = @text)
|
77
|
-
get_matches(text, @@hex_colors_regex)
|
78
|
-
end
|
79
|
-
|
80
|
-
def get_acronyms(text = @text)
|
81
|
-
get_matches(text, @@acronyms_regex)
|
82
|
-
end
|
83
|
-
|
84
|
-
def get_money(text = @text)
|
85
|
-
get_matches(text, @@money_regex)
|
86
|
-
end
|
87
|
-
|
88
|
-
def get_percentages(text = @text)
|
89
|
-
get_matches(text, @@percentage_regex)
|
90
|
-
end
|
91
|
-
|
92
|
-
def get_credit_cards(text = @text)
|
93
|
-
get_matches(text, @@credit_card_regex)
|
94
|
-
end
|
95
|
-
|
96
|
-
def get_addresses(text = @text)
|
97
|
-
get_matches(text, @@address_regex)
|
98
|
-
end
|
99
|
-
|
100
|
-
private
|
101
|
-
|
102
|
-
def get_matches(text, regex)
|
103
|
-
text.scan(regex).collect{|x| x[0]}
|
104
|
-
end
|
105
|
-
|
106
|
-
end
|
3
|
+
# Methods used to generate @date_regex
|
4
|
+
def self.opt(regex)
|
5
|
+
'(?:' + regex + ')?'
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.group(regex)
|
9
|
+
'(?:' + regex + ')'
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.any(regexes)
|
13
|
+
regexes.join('|')
|
14
|
+
end
|
15
|
+
|
16
|
+
# Generate @date_regex
|
17
|
+
month_regex = '(?:jan\\.?|january|feb\\.?|february|mar\\.?|march|apr\\.?|april|may|jun\\.?|june|jul\\.?|july|aug\\.?|august|sep\\.?|september|oct\\.?|october|nov\\.?|november|dec\\.?|december)'
|
18
|
+
day_regex = '[0-3]?\\d(?:st|nd|rd|th)?'
|
19
|
+
year_regex = '\\d{4}'
|
20
|
+
|
21
|
+
@@dates_regex = Regexp.new('(' + CommonRegex.group(
|
22
|
+
CommonRegex.any(
|
23
|
+
[
|
24
|
+
day_regex + '\\s+(?:of\\s+)?' + month_regex,
|
25
|
+
month_regex + '\\s+' + day_regex
|
26
|
+
]
|
27
|
+
)
|
28
|
+
) + '(?:\\,)?\\s*' + CommonRegex.opt(year_regex) + '|[0-3]?\\d[-/][0-3]?\\d[-/]\\d{2,4})', Regexp::IGNORECASE || Regexp::MULTILINE)
|
29
|
+
|
30
|
+
@@acronyms_regex = /\b(([A-Z]\.)+|([A-Z]){2,})/m
|
31
|
+
@@addresses_regex = /(\d{1,4} [\w\s]{1,20}(?:(street|avenue|road|highway|square|traill|drive|court|parkway|boulevard)\b|(st|ave|rd|hwy|sq|trl|dr|ct|pkwy|blvd)\.(?=\b)?))/im
|
32
|
+
@@credit_cards_regex = /((?:(?:\d{4}[- ]){3}\d{4}|\d{16}))(?![\d])/m
|
33
|
+
@@emails_regex = /([a-z0-9!#$%&'*+\/=?\^_`{|}~\-]+@([a-z0-9]+\.)+([a-z0-9]+))/im
|
34
|
+
@@hex_colors_regex = /(#(?:[0-9a-fA-F]{3}){1,2})\b/im
|
35
|
+
@@ipv4_regex = /\b(((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))\b/m
|
36
|
+
@@ipv6_regex = /((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(([0-9A-Fa-f]{1,4}:){0,5}:((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(::([0-9A-Fa-f]{1,4}:){0,5}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|(::([0-9A-Fa-f]@{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))\b/im
|
37
|
+
@@links_regex = /((?:https?:\/\/|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*\))+(?:\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*@\)|[^\s`!()\[\]{};:\'".,<>?]))/im
|
38
|
+
@@money_regex = /(((^|\b)US?)?\$\s?[0-9]{1,3}((,[0-9]{3})+|([0-9]{3})+)?(\.[0-9]{1,2})?\b)/m
|
39
|
+
@@percentages_regex = /((100(\.0+)?|[0-9]{1,2}(\.[0-9]+)?)%)/m
|
40
|
+
@@phones_regex = /(\d?[^\s\w]*(?:\(?\d{3}\)?\W*)?\d{3}\W*\d{4})/im
|
41
|
+
@@times_regex = /\b((0?[0-9]|1[0-2])(:[0-5][0-9])?(am|pm)|([01]?[0-9]|2[0-3]):[0-5][0-9])/im
|
42
|
+
|
43
|
+
%w{acronyms addresses credit_cards dates emails hex_colors ipv4 ipv6 links
|
44
|
+
money percentages phones times}.each do |regex|
|
45
|
+
class_eval <<-RUBY.gsub(/^\s{6}/, ''), __FILE__, __LINE__
|
46
|
+
def self.get_#{regex}(text)
|
47
|
+
get_matches(text, @@#{regex}_regex)
|
48
|
+
end
|
49
|
+
|
50
|
+
def get_#{regex}
|
51
|
+
self.class.get_#{regex}(@text)
|
52
|
+
end
|
53
|
+
RUBY
|
54
|
+
end
|
55
|
+
|
56
|
+
def initialize(text = '')
|
57
|
+
@text = text;
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
def self.get_matches(text, regex)
|
63
|
+
text.scan(regex).collect{|x| x[0]}
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
require "commonregex/version"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: commonregex
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Talysson Oliveira Cassiano
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-01-
|
11
|
+
date: 2015-01-24 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Find a lot of kinds of common information in a string. CommonRegex port
|
14
14
|
for Ruby.
|
@@ -18,6 +18,7 @@ extensions: []
|
|
18
18
|
extra_rdoc_files: []
|
19
19
|
files:
|
20
20
|
- lib/commonregex.rb
|
21
|
+
- lib/commonregex/version.rb
|
21
22
|
homepage: https://github.com/talyssonoc/CommonRegexRuby
|
22
23
|
licenses:
|
23
24
|
- MIT
|