commonregex 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/commonregex.rb +105 -0
- metadata +45 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: f521d3d2c69a9cbb89d93cddfc274286eb38da66
|
|
4
|
+
data.tar.gz: dfd92a777e9ec16d0f8d985bd2843d634af5f578
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 34388df37522ed929de93dcdc25ad1dc0141dce4b0928d0f91f7152f413c4fe64e91988269e3efb2dc68d39d902ddf66a95d7ef5e593ded0d6eb1cc76472d46e
|
|
7
|
+
data.tar.gz: 6ab28848a8cb12e6d132d0d1e278e9ef94f1e76f6442393f716030d2981b588d1b17e01e4353e5e486efc5c9bc50193e94bc99e99bdb962179c0b1545b259268
|
data/lib/commonregex.rb
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
class CommonRegex
|
|
2
|
+
|
|
3
|
+
# Methods used to generate @date_regex
|
|
4
|
+
def self.opt (regex)
|
|
5
|
+
'(?:' + regex + ')?'
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def self.group(regex)
|
|
9
|
+
'(?:' + regex + ')'
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def self.any(regexes)
|
|
13
|
+
regexes.join('|')
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Generate @date_regex
|
|
17
|
+
month_regex = '(?:jan\\.?|january|feb\\.?|february|mar\\.?|march|apr\\.?|april|may|jun\\.?|june|jul\\.?|july|aug\\.?|august|sep\\.?|september|oct\\.?|october|nov\\.?|november|dec\\.?|december)'
|
|
18
|
+
day_regex = '[0-3]?\\d(?:st|nd|rd|th)?'
|
|
19
|
+
year_regex = '\\d{4}'
|
|
20
|
+
|
|
21
|
+
@@date_regex = Regexp.new('(' + CommonRegex.group(
|
|
22
|
+
CommonRegex.any(
|
|
23
|
+
[
|
|
24
|
+
day_regex + '\\s+(?:of\\s+)?' + month_regex,
|
|
25
|
+
month_regex + '\\s+' + day_regex
|
|
26
|
+
]
|
|
27
|
+
)
|
|
28
|
+
) + '(?:\\,)?\\s*' + CommonRegex.opt(year_regex) + '|[0-3]?\\d[-/][0-3]?\\d[-/]\\d{2,4})', Regexp::IGNORECASE || Regexp::MULTILINE)
|
|
29
|
+
|
|
30
|
+
@@time_regex = /\b((0?[0-9]|1[0-2])(:[0-5][0-9])?(am|pm)|([01]?[0-9]|2[0-3]):[0-5][0-9])/im
|
|
31
|
+
@@phone_regex = /(\d?[^\s\w]*(?:\(?\d{3}\)?\W*)?\d{3}\W*\d{4})/im
|
|
32
|
+
@@links_regex = /((?:https?:\/\/|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*\))+(?:\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*@\)|[^\s`!()\[\]{};:\'".,<>?]))/im
|
|
33
|
+
@@emails_regex = /([a-z0-9!#$%&'*+\/=?\^_`{|}~\-]+@([a-z0-9]+\.)+([a-z0-9]+))/im
|
|
34
|
+
@@ipv4_regex = /\b(((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))\b/m
|
|
35
|
+
@@ipv6_regex = /((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(([0-9A-Fa-f]{1,4}:){0,5}:((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(::([0-9A-Fa-f]{1,4}:){0,5}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|(::([0-9A-Fa-f]@{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))\b/im
|
|
36
|
+
@@hex_colors_regex = /(#(?:[0-9a-fA-F]{3}){1,2})\b/im
|
|
37
|
+
@@acronyms_regex = /\b(([A-Z]\.)+|([A-Z]){2,})/m
|
|
38
|
+
@@money_regex = /(((^|\b)US?)?\$\s?[0-9]{1,3}((,[0-9]{3})+|([0-9]{3})+)?(\.[0-9]{1,2})?\b)/m
|
|
39
|
+
@@percentage_regex = /((100(\.0+)?|[0-9]{1,2}(\.[0-9]+)?)%)/m
|
|
40
|
+
@@credit_card_regex = /((?:(?:\d{4}[- ]){3}\d{4}|\d{16}))(?![\d])/m
|
|
41
|
+
@@address_regex = /(\d{1,4} [\w\s]{1,20}(?:(street|avenue|road|highway|square|traill|drive|court|parkway|boulevard)\b|(st|ave|rd|hwy|sq|trl|dr|ct|pkwy|blvd)\.(?=\b)?))/im
|
|
42
|
+
|
|
43
|
+
def initialize(text = '')
|
|
44
|
+
@text = text;
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def get_dates(text = @text)
|
|
48
|
+
get_matches(text, @@date_regex)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def get_times(text = @text)
|
|
52
|
+
get_matches(text, @@time_regex)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def get_phones(text = @text)
|
|
56
|
+
get_matches(text, @@phone_regex)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def get_links(text = @text)
|
|
60
|
+
get_matches(text, @@links_regex)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def get_emails(text = @text)
|
|
64
|
+
get_matches(text, @@emails_regex)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def get_ipv4(text = @text)
|
|
68
|
+
get_matches(text, @@ipv4_regex)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def get_ipv6(text = @text)
|
|
72
|
+
get_matches(text, @@ipv6_regex)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def get_hex_colors(text = @text)
|
|
76
|
+
get_matches(text, @@hex_colors_regex)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def get_acronyms(text = @text)
|
|
80
|
+
get_matches(text, @@acronyms_regex)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def get_money(text = @text)
|
|
84
|
+
get_matches(text, @@money_regex)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def get_percentages(text = @text)
|
|
88
|
+
get_matches(text, @@percentage_regex)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def get_credit_cards(text = @text)
|
|
92
|
+
get_matches(text, @@credit_card_regex)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def get_addresses(text = @text)
|
|
96
|
+
get_matches(text, @@address_regex)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
private
|
|
100
|
+
|
|
101
|
+
def get_matches(text, regex)
|
|
102
|
+
text.scan(regex).collect{|x| x[0]}
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: commonregex
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.1
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Talysson Oliveira Cassiano
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2015-01-23 00:00:00.000000000 Z
|
|
12
|
+
dependencies: []
|
|
13
|
+
description: Find a lot of kinds of common information in a string. CommonRegex port
|
|
14
|
+
for Ruby.
|
|
15
|
+
email: talyssonoc@gmail.com
|
|
16
|
+
executables: []
|
|
17
|
+
extensions: []
|
|
18
|
+
extra_rdoc_files: []
|
|
19
|
+
files:
|
|
20
|
+
- lib/commonregex.rb
|
|
21
|
+
homepage: https://github.com/talyssonoc/CommonRegexRuby
|
|
22
|
+
licenses:
|
|
23
|
+
- MIT
|
|
24
|
+
metadata: {}
|
|
25
|
+
post_install_message:
|
|
26
|
+
rdoc_options: []
|
|
27
|
+
require_paths:
|
|
28
|
+
- lib
|
|
29
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - ">="
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '0'
|
|
34
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
35
|
+
requirements:
|
|
36
|
+
- - ">="
|
|
37
|
+
- !ruby/object:Gem::Version
|
|
38
|
+
version: '0'
|
|
39
|
+
requirements: []
|
|
40
|
+
rubyforge_project:
|
|
41
|
+
rubygems_version: 2.4.3
|
|
42
|
+
signing_key:
|
|
43
|
+
specification_version: 4
|
|
44
|
+
summary: CommonRegex port for Ruby
|
|
45
|
+
test_files: []
|