commonregex 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/commonregex.rb +105 -0
  3. metadata +45 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f521d3d2c69a9cbb89d93cddfc274286eb38da66
4
+ data.tar.gz: dfd92a777e9ec16d0f8d985bd2843d634af5f578
5
+ SHA512:
6
+ metadata.gz: 34388df37522ed929de93dcdc25ad1dc0141dce4b0928d0f91f7152f413c4fe64e91988269e3efb2dc68d39d902ddf66a95d7ef5e593ded0d6eb1cc76472d46e
7
+ data.tar.gz: 6ab28848a8cb12e6d132d0d1e278e9ef94f1e76f6442393f716030d2981b588d1b17e01e4353e5e486efc5c9bc50193e94bc99e99bdb962179c0b1545b259268
@@ -0,0 +1,105 @@
1
+ class CommonRegex
2
+
3
+ # Methods used to generate @date_regex
4
+ def self.opt (regex)
5
+ '(?:' + regex + ')?'
6
+ end
7
+
8
+ def self.group(regex)
9
+ '(?:' + regex + ')'
10
+ end
11
+
12
+ def self.any(regexes)
13
+ regexes.join('|')
14
+ end
15
+
16
+ # Generate @date_regex
17
+ month_regex = '(?:jan\\.?|january|feb\\.?|february|mar\\.?|march|apr\\.?|april|may|jun\\.?|june|jul\\.?|july|aug\\.?|august|sep\\.?|september|oct\\.?|october|nov\\.?|november|dec\\.?|december)'
18
+ day_regex = '[0-3]?\\d(?:st|nd|rd|th)?'
19
+ year_regex = '\\d{4}'
20
+
21
+ @@date_regex = Regexp.new('(' + CommonRegex.group(
22
+ CommonRegex.any(
23
+ [
24
+ day_regex + '\\s+(?:of\\s+)?' + month_regex,
25
+ month_regex + '\\s+' + day_regex
26
+ ]
27
+ )
28
+ ) + '(?:\\,)?\\s*' + CommonRegex.opt(year_regex) + '|[0-3]?\\d[-/][0-3]?\\d[-/]\\d{2,4})', Regexp::IGNORECASE || Regexp::MULTILINE)
29
+
30
+ @@time_regex = /\b((0?[0-9]|1[0-2])(:[0-5][0-9])?(am|pm)|([01]?[0-9]|2[0-3]):[0-5][0-9])/im
31
+ @@phone_regex = /(\d?[^\s\w]*(?:\(?\d{3}\)?\W*)?\d{3}\W*\d{4})/im
32
+ @@links_regex = /((?:https?:\/\/|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*\))+(?:\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*@\)|[^\s`!()\[\]{};:\'".,<>?]))/im
33
+ @@emails_regex = /([a-z0-9!#$%&'*+\/=?\^_`{|}~\-]+@([a-z0-9]+\.)+([a-z0-9]+))/im
34
+ @@ipv4_regex = /\b(((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))\b/m
35
+ @@ipv6_regex = /((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(([0-9A-Fa-f]{1,4}:){0,5}:((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(::([0-9A-Fa-f]{1,4}:){0,5}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|(::([0-9A-Fa-f]@{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))\b/im
36
+ @@hex_colors_regex = /(#(?:[0-9a-fA-F]{3}){1,2})\b/im
37
+ @@acronyms_regex = /\b(([A-Z]\.)+|([A-Z]){2,})/m
38
+ @@money_regex = /(((^|\b)US?)?\$\s?[0-9]{1,3}((,[0-9]{3})+|([0-9]{3})+)?(\.[0-9]{1,2})?\b)/m
39
+ @@percentage_regex = /((100(\.0+)?|[0-9]{1,2}(\.[0-9]+)?)%)/m
40
+ @@credit_card_regex = /((?:(?:\d{4}[- ]){3}\d{4}|\d{16}))(?![\d])/m
41
+ @@address_regex = /(\d{1,4} [\w\s]{1,20}(?:(street|avenue|road|highway|square|traill|drive|court|parkway|boulevard)\b|(st|ave|rd|hwy|sq|trl|dr|ct|pkwy|blvd)\.(?=\b)?))/im
42
+
43
+ def initialize(text = '')
44
+ @text = text;
45
+ end
46
+
47
+ def get_dates(text = @text)
48
+ get_matches(text, @@date_regex)
49
+ end
50
+
51
+ def get_times(text = @text)
52
+ get_matches(text, @@time_regex)
53
+ end
54
+
55
+ def get_phones(text = @text)
56
+ get_matches(text, @@phone_regex)
57
+ end
58
+
59
+ def get_links(text = @text)
60
+ get_matches(text, @@links_regex)
61
+ end
62
+
63
+ def get_emails(text = @text)
64
+ get_matches(text, @@emails_regex)
65
+ end
66
+
67
+ def get_ipv4(text = @text)
68
+ get_matches(text, @@ipv4_regex)
69
+ end
70
+
71
+ def get_ipv6(text = @text)
72
+ get_matches(text, @@ipv6_regex)
73
+ end
74
+
75
+ def get_hex_colors(text = @text)
76
+ get_matches(text, @@hex_colors_regex)
77
+ end
78
+
79
+ def get_acronyms(text = @text)
80
+ get_matches(text, @@acronyms_regex)
81
+ end
82
+
83
+ def get_money(text = @text)
84
+ get_matches(text, @@money_regex)
85
+ end
86
+
87
+ def get_percentages(text = @text)
88
+ get_matches(text, @@percentage_regex)
89
+ end
90
+
91
+ def get_credit_cards(text = @text)
92
+ get_matches(text, @@credit_card_regex)
93
+ end
94
+
95
+ def get_addresses(text = @text)
96
+ get_matches(text, @@address_regex)
97
+ end
98
+
99
+ private
100
+
101
+ def get_matches(text, regex)
102
+ text.scan(regex).collect{|x| x[0]}
103
+ end
104
+
105
+ end
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: commonregex
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Talysson Oliveira Cassiano
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-01-23 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Find a lot of kinds of common information in a string. CommonRegex port
14
+ for Ruby.
15
+ email: talyssonoc@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/commonregex.rb
21
+ homepage: https://github.com/talyssonoc/CommonRegexRuby
22
+ licenses:
23
+ - MIT
24
+ metadata: {}
25
+ post_install_message:
26
+ rdoc_options: []
27
+ require_paths:
28
+ - lib
29
+ required_ruby_version: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ required_rubygems_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ">="
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ requirements: []
40
+ rubyforge_project:
41
+ rubygems_version: 2.4.3
42
+ signing_key:
43
+ specification_version: 4
44
+ summary: CommonRegex port for Ruby
45
+ test_files: []