commonregex 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/commonregex.rb +105 -0
  3. metadata +45 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f521d3d2c69a9cbb89d93cddfc274286eb38da66
4
+ data.tar.gz: dfd92a777e9ec16d0f8d985bd2843d634af5f578
5
+ SHA512:
6
+ metadata.gz: 34388df37522ed929de93dcdc25ad1dc0141dce4b0928d0f91f7152f413c4fe64e91988269e3efb2dc68d39d902ddf66a95d7ef5e593ded0d6eb1cc76472d46e
7
+ data.tar.gz: 6ab28848a8cb12e6d132d0d1e278e9ef94f1e76f6442393f716030d2981b588d1b17e01e4353e5e486efc5c9bc50193e94bc99e99bdb962179c0b1545b259268
@@ -0,0 +1,105 @@
1
+ class CommonRegex
2
+
3
+ # Methods used to generate @date_regex
4
+ def self.opt (regex)
5
+ '(?:' + regex + ')?'
6
+ end
7
+
8
+ def self.group(regex)
9
+ '(?:' + regex + ')'
10
+ end
11
+
12
+ def self.any(regexes)
13
+ regexes.join('|')
14
+ end
15
+
16
+ # Generate @date_regex
17
+ month_regex = '(?:jan\\.?|january|feb\\.?|february|mar\\.?|march|apr\\.?|april|may|jun\\.?|june|jul\\.?|july|aug\\.?|august|sep\\.?|september|oct\\.?|october|nov\\.?|november|dec\\.?|december)'
18
+ day_regex = '[0-3]?\\d(?:st|nd|rd|th)?'
19
+ year_regex = '\\d{4}'
20
+
21
+ @@date_regex = Regexp.new('(' + CommonRegex.group(
22
+ CommonRegex.any(
23
+ [
24
+ day_regex + '\\s+(?:of\\s+)?' + month_regex,
25
+ month_regex + '\\s+' + day_regex
26
+ ]
27
+ )
28
+ ) + '(?:\\,)?\\s*' + CommonRegex.opt(year_regex) + '|[0-3]?\\d[-/][0-3]?\\d[-/]\\d{2,4})', Regexp::IGNORECASE || Regexp::MULTILINE)
29
+
30
+ @@time_regex = /\b((0?[0-9]|1[0-2])(:[0-5][0-9])?(am|pm)|([01]?[0-9]|2[0-3]):[0-5][0-9])/im
31
+ @@phone_regex = /(\d?[^\s\w]*(?:\(?\d{3}\)?\W*)?\d{3}\W*\d{4})/im
32
+ @@links_regex = /((?:https?:\/\/|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*\))+(?:\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*@\)|[^\s`!()\[\]{};:\'".,<>?]))/im
33
+ @@emails_regex = /([a-z0-9!#$%&'*+\/=?\^_`{|}~\-]+@([a-z0-9]+\.)+([a-z0-9]+))/im
34
+ @@ipv4_regex = /\b(((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))\b/m
35
+ @@ipv6_regex = /((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(([0-9A-Fa-f]{1,4}:){0,5}:((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(::([0-9A-Fa-f]{1,4}:){0,5}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|(::([0-9A-Fa-f]@{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))\b/im
36
+ @@hex_colors_regex = /(#(?:[0-9a-fA-F]{3}){1,2})\b/im
37
+ @@acronyms_regex = /\b(([A-Z]\.)+|([A-Z]){2,})/m
38
+ @@money_regex = /(((^|\b)US?)?\$\s?[0-9]{1,3}((,[0-9]{3})+|([0-9]{3})+)?(\.[0-9]{1,2})?\b)/m
39
+ @@percentage_regex = /((100(\.0+)?|[0-9]{1,2}(\.[0-9]+)?)%)/m
40
+ @@credit_card_regex = /((?:(?:\d{4}[- ]){3}\d{4}|\d{16}))(?![\d])/m
41
+ @@address_regex = /(\d{1,4} [\w\s]{1,20}(?:(street|avenue|road|highway|square|traill|drive|court|parkway|boulevard)\b|(st|ave|rd|hwy|sq|trl|dr|ct|pkwy|blvd)\.(?=\b)?))/im
42
+
43
+ def initialize(text = '')
44
+ @text = text;
45
+ end
46
+
47
+ def get_dates(text = @text)
48
+ get_matches(text, @@date_regex)
49
+ end
50
+
51
+ def get_times(text = @text)
52
+ get_matches(text, @@time_regex)
53
+ end
54
+
55
+ def get_phones(text = @text)
56
+ get_matches(text, @@phone_regex)
57
+ end
58
+
59
+ def get_links(text = @text)
60
+ get_matches(text, @@links_regex)
61
+ end
62
+
63
+ def get_emails(text = @text)
64
+ get_matches(text, @@emails_regex)
65
+ end
66
+
67
+ def get_ipv4(text = @text)
68
+ get_matches(text, @@ipv4_regex)
69
+ end
70
+
71
+ def get_ipv6(text = @text)
72
+ get_matches(text, @@ipv6_regex)
73
+ end
74
+
75
+ def get_hex_colors(text = @text)
76
+ get_matches(text, @@hex_colors_regex)
77
+ end
78
+
79
+ def get_acronyms(text = @text)
80
+ get_matches(text, @@acronyms_regex)
81
+ end
82
+
83
+ def get_money(text = @text)
84
+ get_matches(text, @@money_regex)
85
+ end
86
+
87
+ def get_percentages(text = @text)
88
+ get_matches(text, @@percentage_regex)
89
+ end
90
+
91
+ def get_credit_cards(text = @text)
92
+ get_matches(text, @@credit_card_regex)
93
+ end
94
+
95
+ def get_addresses(text = @text)
96
+ get_matches(text, @@address_regex)
97
+ end
98
+
99
+ private
100
+
101
+ def get_matches(text, regex)
102
+ text.scan(regex).collect{|x| x[0]}
103
+ end
104
+
105
+ end
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: commonregex
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Talysson Oliveira Cassiano
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-01-23 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Find a lot of kinds of common information in a string. CommonRegex port
14
+ for Ruby.
15
+ email: talyssonoc@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/commonregex.rb
21
+ homepage: https://github.com/talyssonoc/CommonRegexRuby
22
+ licenses:
23
+ - MIT
24
+ metadata: {}
25
+ post_install_message:
26
+ rdoc_options: []
27
+ require_paths:
28
+ - lib
29
+ required_ruby_version: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ required_rubygems_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ">="
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ requirements: []
40
+ rubyforge_project:
41
+ rubygems_version: 2.4.3
42
+ signing_key:
43
+ specification_version: 4
44
+ summary: CommonRegex port for Ruby
45
+ test_files: []