commonregex 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/commonregex.rb +105 -0
- metadata +45 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f521d3d2c69a9cbb89d93cddfc274286eb38da66
|
4
|
+
data.tar.gz: dfd92a777e9ec16d0f8d985bd2843d634af5f578
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 34388df37522ed929de93dcdc25ad1dc0141dce4b0928d0f91f7152f413c4fe64e91988269e3efb2dc68d39d902ddf66a95d7ef5e593ded0d6eb1cc76472d46e
|
7
|
+
data.tar.gz: 6ab28848a8cb12e6d132d0d1e278e9ef94f1e76f6442393f716030d2981b588d1b17e01e4353e5e486efc5c9bc50193e94bc99e99bdb962179c0b1545b259268
|
data/lib/commonregex.rb
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
class CommonRegex
|
2
|
+
|
3
|
+
# Methods used to generate @date_regex
|
4
|
+
def self.opt (regex)
|
5
|
+
'(?:' + regex + ')?'
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.group(regex)
|
9
|
+
'(?:' + regex + ')'
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.any(regexes)
|
13
|
+
regexes.join('|')
|
14
|
+
end
|
15
|
+
|
16
|
+
# Generate @date_regex
|
17
|
+
month_regex = '(?:jan\\.?|january|feb\\.?|february|mar\\.?|march|apr\\.?|april|may|jun\\.?|june|jul\\.?|july|aug\\.?|august|sep\\.?|september|oct\\.?|october|nov\\.?|november|dec\\.?|december)'
|
18
|
+
day_regex = '[0-3]?\\d(?:st|nd|rd|th)?'
|
19
|
+
year_regex = '\\d{4}'
|
20
|
+
|
21
|
+
@@date_regex = Regexp.new('(' + CommonRegex.group(
|
22
|
+
CommonRegex.any(
|
23
|
+
[
|
24
|
+
day_regex + '\\s+(?:of\\s+)?' + month_regex,
|
25
|
+
month_regex + '\\s+' + day_regex
|
26
|
+
]
|
27
|
+
)
|
28
|
+
) + '(?:\\,)?\\s*' + CommonRegex.opt(year_regex) + '|[0-3]?\\d[-/][0-3]?\\d[-/]\\d{2,4})', Regexp::IGNORECASE || Regexp::MULTILINE)
|
29
|
+
|
30
|
+
@@time_regex = /\b((0?[0-9]|1[0-2])(:[0-5][0-9])?(am|pm)|([01]?[0-9]|2[0-3]):[0-5][0-9])/im
|
31
|
+
@@phone_regex = /(\d?[^\s\w]*(?:\(?\d{3}\)?\W*)?\d{3}\W*\d{4})/im
|
32
|
+
@@links_regex = /((?:https?:\/\/|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*\))+(?:\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*@\)|[^\s`!()\[\]{};:\'".,<>?]))/im
|
33
|
+
@@emails_regex = /([a-z0-9!#$%&'*+\/=?\^_`{|}~\-]+@([a-z0-9]+\.)+([a-z0-9]+))/im
|
34
|
+
@@ipv4_regex = /\b(((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))\b/m
|
35
|
+
@@ipv6_regex = /((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(([0-9A-Fa-f]{1,4}:){0,5}:((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(::([0-9A-Fa-f]{1,4}:){0,5}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|(::([0-9A-Fa-f]@{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))\b/im
|
36
|
+
@@hex_colors_regex = /(#(?:[0-9a-fA-F]{3}){1,2})\b/im
|
37
|
+
@@acronyms_regex = /\b(([A-Z]\.)+|([A-Z]){2,})/m
|
38
|
+
@@money_regex = /(((^|\b)US?)?\$\s?[0-9]{1,3}((,[0-9]{3})+|([0-9]{3})+)?(\.[0-9]{1,2})?\b)/m
|
39
|
+
@@percentage_regex = /((100(\.0+)?|[0-9]{1,2}(\.[0-9]+)?)%)/m
|
40
|
+
@@credit_card_regex = /((?:(?:\d{4}[- ]){3}\d{4}|\d{16}))(?![\d])/m
|
41
|
+
@@address_regex = /(\d{1,4} [\w\s]{1,20}(?:(street|avenue|road|highway|square|traill|drive|court|parkway|boulevard)\b|(st|ave|rd|hwy|sq|trl|dr|ct|pkwy|blvd)\.(?=\b)?))/im
|
42
|
+
|
43
|
+
def initialize(text = '')
|
44
|
+
@text = text;
|
45
|
+
end
|
46
|
+
|
47
|
+
def get_dates(text = @text)
|
48
|
+
get_matches(text, @@date_regex)
|
49
|
+
end
|
50
|
+
|
51
|
+
def get_times(text = @text)
|
52
|
+
get_matches(text, @@time_regex)
|
53
|
+
end
|
54
|
+
|
55
|
+
def get_phones(text = @text)
|
56
|
+
get_matches(text, @@phone_regex)
|
57
|
+
end
|
58
|
+
|
59
|
+
def get_links(text = @text)
|
60
|
+
get_matches(text, @@links_regex)
|
61
|
+
end
|
62
|
+
|
63
|
+
def get_emails(text = @text)
|
64
|
+
get_matches(text, @@emails_regex)
|
65
|
+
end
|
66
|
+
|
67
|
+
def get_ipv4(text = @text)
|
68
|
+
get_matches(text, @@ipv4_regex)
|
69
|
+
end
|
70
|
+
|
71
|
+
def get_ipv6(text = @text)
|
72
|
+
get_matches(text, @@ipv6_regex)
|
73
|
+
end
|
74
|
+
|
75
|
+
def get_hex_colors(text = @text)
|
76
|
+
get_matches(text, @@hex_colors_regex)
|
77
|
+
end
|
78
|
+
|
79
|
+
def get_acronyms(text = @text)
|
80
|
+
get_matches(text, @@acronyms_regex)
|
81
|
+
end
|
82
|
+
|
83
|
+
def get_money(text = @text)
|
84
|
+
get_matches(text, @@money_regex)
|
85
|
+
end
|
86
|
+
|
87
|
+
def get_percentages(text = @text)
|
88
|
+
get_matches(text, @@percentage_regex)
|
89
|
+
end
|
90
|
+
|
91
|
+
def get_credit_cards(text = @text)
|
92
|
+
get_matches(text, @@credit_card_regex)
|
93
|
+
end
|
94
|
+
|
95
|
+
def get_addresses(text = @text)
|
96
|
+
get_matches(text, @@address_regex)
|
97
|
+
end
|
98
|
+
|
99
|
+
private
|
100
|
+
|
101
|
+
def get_matches(text, regex)
|
102
|
+
text.scan(regex).collect{|x| x[0]}
|
103
|
+
end
|
104
|
+
|
105
|
+
end
|
metadata
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: commonregex
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Talysson Oliveira Cassiano
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-01-23 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Find a lot of kinds of common information in a string. CommonRegex port
|
14
|
+
for Ruby.
|
15
|
+
email: talyssonoc@gmail.com
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- lib/commonregex.rb
|
21
|
+
homepage: https://github.com/talyssonoc/CommonRegexRuby
|
22
|
+
licenses:
|
23
|
+
- MIT
|
24
|
+
metadata: {}
|
25
|
+
post_install_message:
|
26
|
+
rdoc_options: []
|
27
|
+
require_paths:
|
28
|
+
- lib
|
29
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
35
|
+
requirements:
|
36
|
+
- - ">="
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
requirements: []
|
40
|
+
rubyforge_project:
|
41
|
+
rubygems_version: 2.4.3
|
42
|
+
signing_key:
|
43
|
+
specification_version: 4
|
44
|
+
summary: CommonRegex port for Ruby
|
45
|
+
test_files: []
|