commonregex 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/commonregex.rb +66 -104
- data/lib/commonregex/version.rb +3 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 02fda7030f78adab40ecec65c9c2ea02a9dec872
|
4
|
+
data.tar.gz: 0a909452d950d28317a23cbb1a5ffef194805632
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 98417b983aea23f07e81581b870b4e411f53404e8041919e3c42e2a61318e781874154747d175ed508ac7092c79544b49911cc27cc783030dbce398088b6440b
|
7
|
+
data.tar.gz: 137d12c82a6fc9801b5355b41b5dd7ecf5cb3c7b66522a57e210ceff1ac8c1a62c044b9b262cea0ef4d5634bb54f6476dc0e69a3a32a711a86c60a5cadd5e365
|
data/lib/commonregex.rb
CHANGED
@@ -1,106 +1,68 @@
|
|
1
1
|
class CommonRegex
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
get_matches(text, @@ipv4_regex)
|
70
|
-
end
|
71
|
-
|
72
|
-
def get_ipv6(text = @text)
|
73
|
-
get_matches(text, @@ipv6_regex)
|
74
|
-
end
|
75
|
-
|
76
|
-
def get_hex_colors(text = @text)
|
77
|
-
get_matches(text, @@hex_colors_regex)
|
78
|
-
end
|
79
|
-
|
80
|
-
def get_acronyms(text = @text)
|
81
|
-
get_matches(text, @@acronyms_regex)
|
82
|
-
end
|
83
|
-
|
84
|
-
def get_money(text = @text)
|
85
|
-
get_matches(text, @@money_regex)
|
86
|
-
end
|
87
|
-
|
88
|
-
def get_percentages(text = @text)
|
89
|
-
get_matches(text, @@percentage_regex)
|
90
|
-
end
|
91
|
-
|
92
|
-
def get_credit_cards(text = @text)
|
93
|
-
get_matches(text, @@credit_card_regex)
|
94
|
-
end
|
95
|
-
|
96
|
-
def get_addresses(text = @text)
|
97
|
-
get_matches(text, @@address_regex)
|
98
|
-
end
|
99
|
-
|
100
|
-
private
|
101
|
-
|
102
|
-
def get_matches(text, regex)
|
103
|
-
text.scan(regex).collect{|x| x[0]}
|
104
|
-
end
|
105
|
-
|
106
|
-
end
|
3
|
+
# Methods used to generate @date_regex
|
4
|
+
def self.opt(regex)
|
5
|
+
'(?:' + regex + ')?'
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.group(regex)
|
9
|
+
'(?:' + regex + ')'
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.any(regexes)
|
13
|
+
regexes.join('|')
|
14
|
+
end
|
15
|
+
|
16
|
+
# Generate @date_regex
|
17
|
+
month_regex = '(?:jan\\.?|january|feb\\.?|february|mar\\.?|march|apr\\.?|april|may|jun\\.?|june|jul\\.?|july|aug\\.?|august|sep\\.?|september|oct\\.?|october|nov\\.?|november|dec\\.?|december)'
|
18
|
+
day_regex = '[0-3]?\\d(?:st|nd|rd|th)?'
|
19
|
+
year_regex = '\\d{4}'
|
20
|
+
|
21
|
+
@@dates_regex = Regexp.new('(' + CommonRegex.group(
|
22
|
+
CommonRegex.any(
|
23
|
+
[
|
24
|
+
day_regex + '\\s+(?:of\\s+)?' + month_regex,
|
25
|
+
month_regex + '\\s+' + day_regex
|
26
|
+
]
|
27
|
+
)
|
28
|
+
) + '(?:\\,)?\\s*' + CommonRegex.opt(year_regex) + '|[0-3]?\\d[-/][0-3]?\\d[-/]\\d{2,4})', Regexp::IGNORECASE || Regexp::MULTILINE)
|
29
|
+
|
30
|
+
@@acronyms_regex = /\b(([A-Z]\.)+|([A-Z]){2,})/m
|
31
|
+
@@addresses_regex = /(\d{1,4} [\w\s]{1,20}(?:(street|avenue|road|highway|square|traill|drive|court|parkway|boulevard)\b|(st|ave|rd|hwy|sq|trl|dr|ct|pkwy|blvd)\.(?=\b)?))/im
|
32
|
+
@@credit_cards_regex = /((?:(?:\d{4}[- ]){3}\d{4}|\d{16}))(?![\d])/m
|
33
|
+
@@emails_regex = /([a-z0-9!#$%&'*+\/=?\^_`{|}~\-]+@([a-z0-9]+\.)+([a-z0-9]+))/im
|
34
|
+
@@hex_colors_regex = /(#(?:[0-9a-fA-F]{3}){1,2})\b/im
|
35
|
+
@@ipv4_regex = /\b(((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))\b/m
|
36
|
+
@@ipv6_regex = /((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(([0-9A-Fa-f]{1,4}:){0,5}:((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(::([0-9A-Fa-f]{1,4}:){0,5}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|(::([0-9A-Fa-f]@{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))\b/im
|
37
|
+
@@links_regex = /((?:https?:\/\/|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*\))+(?:\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*@\)|[^\s`!()\[\]{};:\'".,<>?]))/im
|
38
|
+
@@money_regex = /(((^|\b)US?)?\$\s?[0-9]{1,3}((,[0-9]{3})+|([0-9]{3})+)?(\.[0-9]{1,2})?\b)/m
|
39
|
+
@@percentages_regex = /((100(\.0+)?|[0-9]{1,2}(\.[0-9]+)?)%)/m
|
40
|
+
@@phones_regex = /(\d?[^\s\w]*(?:\(?\d{3}\)?\W*)?\d{3}\W*\d{4})/im
|
41
|
+
@@times_regex = /\b((0?[0-9]|1[0-2])(:[0-5][0-9])?(am|pm)|([01]?[0-9]|2[0-3]):[0-5][0-9])/im
|
42
|
+
|
43
|
+
%w{acronyms addresses credit_cards dates emails hex_colors ipv4 ipv6 links
|
44
|
+
money percentages phones times}.each do |regex|
|
45
|
+
class_eval <<-RUBY.gsub(/^\s{6}/, ''), __FILE__, __LINE__
|
46
|
+
def self.get_#{regex}(text)
|
47
|
+
get_matches(text, @@#{regex}_regex)
|
48
|
+
end
|
49
|
+
|
50
|
+
def get_#{regex}
|
51
|
+
self.class.get_#{regex}(@text)
|
52
|
+
end
|
53
|
+
RUBY
|
54
|
+
end
|
55
|
+
|
56
|
+
def initialize(text = '')
|
57
|
+
@text = text;
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
def self.get_matches(text, regex)
|
63
|
+
text.scan(regex).collect{|x| x[0]}
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
require "commonregex/version"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: commonregex
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Talysson Oliveira Cassiano
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-01-
|
11
|
+
date: 2015-01-24 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Find a lot of kinds of common information in a string. CommonRegex port
|
14
14
|
for Ruby.
|
@@ -18,6 +18,7 @@ extensions: []
|
|
18
18
|
extra_rdoc_files: []
|
19
19
|
files:
|
20
20
|
- lib/commonregex.rb
|
21
|
+
- lib/commonregex/version.rb
|
21
22
|
homepage: https://github.com/talyssonoc/CommonRegexRuby
|
22
23
|
licenses:
|
23
24
|
- MIT
|